From: Jacob Lifshay <programmerjake@gmail.com>
Date: Wed, 22 Jul 2020 22:18:03 +0000 (-0700)
Subject: format code
X-Git-Tag: semi_working_ecp5~606
X-Git-Url: https://git.libre-soc.org/?p=soc.git;a=commitdiff_plain;h=575802fa56d7175ebbdc16bb5c493b556dab9c74

format code
---

diff --git a/src/soc/bus/test/test_minerva.py b/src/soc/bus/test/test_minerva.py
index 2bb920a3..8e581b5e 100644
--- a/src/soc/bus/test/test_minerva.py
+++ b/src/soc/bus/test/test_minerva.py
@@ -9,11 +9,11 @@ class TestSRAMBareLoadStoreUnit(BareLoadStoreUnit):
         super().__init__(pspec)
         # small 32-entry Memory
         if (hasattr(pspec, "dmem_test_depth") and
-            isinstance(pspec.dmem_test_depth, int)):
+                isinstance(pspec.dmem_test_depth, int)):
             depth = pspec.dmem_test_depth
         else:
             depth = 32
-        print ("TestSRAMBareLoadStoreUnit depth", depth)
+        print("TestSRAMBareLoadStoreUnit depth", depth)
 
         self.mem = Memory(width=self.data_wid, depth=depth)
 
@@ -29,8 +29,8 @@ class TestSRAMBareLoadStoreUnit(BareLoadStoreUnit):
         fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
         fanins = ['dat_r', 'ack', 'err']
         for fanout in fanouts:
-            print ("fanout", fanout, getattr(sram.bus, fanout).shape(),
-                                     getattr(dbus, fanout).shape())
+            print("fanout", fanout, getattr(sram.bus, fanout).shape(),
+                  getattr(dbus, fanout).shape())
             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
             comb += getattr(sram.bus, fanout).eq(getattr(dbus, fanout))
         for fanin in fanins:
@@ -46,11 +46,11 @@ class TestSRAMBareFetchUnit(BareFetchUnit):
         super().__init__(pspec)
         # default: small 32-entry Memory
         if (hasattr(pspec, "imem_test_depth") and
-            isinstance(pspec.imem_test_depth, int)):
+                isinstance(pspec.imem_test_depth, int)):
             depth = pspec.imem_test_depth
         else:
             depth = 32
-        print ("TestSRAMBareFetchUnit depth", depth)
+        print("TestSRAMBareFetchUnit depth", depth)
         self.mem = Memory(width=self.data_wid, depth=depth)
 
     def _get_memory(self):
@@ -68,8 +68,8 @@ class TestSRAMBareFetchUnit(BareFetchUnit):
         fanouts = ['dat_w', 'sel', 'cyc', 'stb', 'we', 'cti', 'bte']
         fanins = ['dat_r', 'ack', 'err']
         for fanout in fanouts:
-            print ("fanout", fanout, getattr(sram.bus, fanout).shape(),
-                                     getattr(ibus, fanout).shape())
+            print("fanout", fanout, getattr(sram.bus, fanout).shape(),
+                  getattr(ibus, fanout).shape())
             comb += getattr(sram.bus, fanout).eq(getattr(ibus, fanout))
             comb += getattr(sram.bus, fanout).eq(getattr(ibus, fanout))
         for fanin in fanins:
diff --git a/src/soc/config/test/test_fetch.py b/src/soc/config/test/test_fetch.py
index f6f0901a..df9caf68 100644
--- a/src/soc/config/test/test_fetch.py
+++ b/src/soc/config/test/test_fetch.py
@@ -12,6 +12,7 @@ from soc.config.test.test_loadstore import TestMemPspec
 import sys
 sys.setrecursionlimit(10**6)
 
+
 def read_from_addr(dut, addr):
     yield dut.a_pc_i.eq(addr)
     yield dut.a_valid_i.eq(1)
@@ -35,11 +36,11 @@ def tst_lsmemtype(ifacetype, sram_depth=32):
     m = Module()
     pspec = TestMemPspec(ldst_ifacetype=ifacetype,
                          imem_ifacetype=ifacetype, addr_wid=64,
-                                                   mask_wid=4,
-                                                   reg_wid=32,
+                         mask_wid=4,
+                         reg_wid=32,
                          imem_test_depth=sram_depth)
     dut = ConfigFetchUnit(pspec).fu
-    vl = rtlil.convert(dut, ports=[]) # TODOdut.ports())
+    vl = rtlil.convert(dut, ports=[])  # TODOdut.ports())
     with open("test_fetch_%s.il" % ifacetype, "w") as f:
         f.write(vl)
 
@@ -52,20 +53,21 @@ def tst_lsmemtype(ifacetype, sram_depth=32):
 
     def process():
 
-        values = [random.randint(0, (1<<32)-1) for x in range(16)]
+        values = [random.randint(0, (1 << 32)-1) for x in range(16)]
         for addr, val in enumerate(values):
             yield mem._array[addr].eq(val)
         yield Settle()
 
         for addr, val in enumerate(values):
             x = yield from read_from_addr(dut, addr << 2)
-            print ("addr, val", addr, hex(val), hex(x))
+            print("addr, val", addr, hex(val), hex(x))
             assert x == val
 
     sim.add_sync_process(process)
     with sim.write_vcd("test_fetch_%s.vcd" % ifacetype, traces=[]):
         sim.run()
 
+
 if __name__ == '__main__':
     tst_lsmemtype('test_bare_wb', sram_depth=32768)
     tst_lsmemtype('testmem')
diff --git a/src/soc/config/test/test_loadstore.py b/src/soc/config/test/test_loadstore.py
index 8d906c70..aab4d969 100644
--- a/src/soc/config/test/test_loadstore.py
+++ b/src/soc/config/test/test_loadstore.py
@@ -8,7 +8,7 @@ from collections import namedtuple
 from nmigen.cli import rtlil
 from unittest.mock import Mock
 
-TestMemPspec = Mock # might as well use Mock, it does the job
+TestMemPspec = Mock  # might as well use Mock, it does the job
 
 
 def write_to_addr(dut, addr, value):
@@ -21,7 +21,7 @@ def write_to_addr(dut, addr, value):
     yield dut.m_valid_i.eq(1)
     yield
     yield
-    
+
     yield dut.x_stall_i.eq(0)
     yield
     yield dut.x_st_i.eq(0)
@@ -51,7 +51,7 @@ def write_byte(dut, addr, val):
     yield dut.x_st_data_i.eq(val << (offset * 8))
     yield dut.x_st_i.eq(1)
     yield dut.x_mask_i.eq(1 << offset)
-    print ("write_byte", addr, bin(1<<offset), hex(val<<(offset*8)))
+    print("write_byte", addr, bin(1 << offset), hex(val << (offset*8)))
     yield dut.x_valid_i.eq(1)
     yield dut.m_valid_i.eq(1)
 
@@ -73,18 +73,18 @@ def read_byte(dut, addr):
         yield
     assert (yield dut.x_valid_i)
     val = (yield dut.m_ld_data_o)
-    print ("read_byte", addr, offset, hex(val))
+    print("read_byte", addr, offset, hex(val))
     return (val >> (offset * 8)) & 0xff
 
 
 def tst_lsmemtype(ifacetype):
     m = Module()
-    pspec = TestMemPspec(ldst_ifacetype=ifacetype, 
-                         imem_ifacetype=''       , addr_wid=64,
-                                                   mask_wid=4,
-                                                   reg_wid=32)
+    pspec = TestMemPspec(ldst_ifacetype=ifacetype,
+                         imem_ifacetype='', addr_wid=64,
+                         mask_wid=4,
+                         reg_wid=32)
     dut = ConfigLoadStoreUnit(pspec).lsi
-    vl = rtlil.convert(dut, ports=[]) # TODOdut.ports())
+    vl = rtlil.convert(dut, ports=[])  # TODOdut.ports())
     with open("test_loadstore_%s.il" % ifacetype, "w") as f:
         f.write(vl)
 
@@ -99,23 +99,24 @@ def tst_lsmemtype(ifacetype):
         for addr, val in enumerate(values):
             yield from write_byte(dut, addr, val)
             x = yield from read_from_addr(dut, addr << 2)
-            print ("addr, val", addr, hex(val), hex(x))
+            print("addr, val", addr, hex(val), hex(x))
             x = yield from read_byte(dut, addr)
-            print ("addr, val", addr, hex(val), hex(x))
+            print("addr, val", addr, hex(val), hex(x))
             assert x == val
 
-        values = [random.randint(0, (1<<32)-1) for x in range(16)]
+        values = [random.randint(0, (1 << 32)-1) for x in range(16)]
 
         for addr, val in enumerate(values):
             yield from write_to_addr(dut, addr << 2, val)
             x = yield from read_from_addr(dut, addr << 2)
-            print ("addr, val", addr, hex(val), hex(x))
+            print("addr, val", addr, hex(val), hex(x))
             assert x == val
 
     sim.add_sync_process(process)
     with sim.write_vcd("test_loadstore_%s.vcd" % ifacetype, traces=[]):
         sim.run()
 
+
 if __name__ == '__main__':
     tst_lsmemtype('test_bare_wb')
     tst_lsmemtype('testmem')
diff --git a/src/soc/decoder/helpers.py b/src/soc/decoder/helpers.py
index 40ddb088..17534800 100644
--- a/src/soc/decoder/helpers.py
+++ b/src/soc/decoder/helpers.py
@@ -15,6 +15,7 @@ Links:
 * https://bugs.libre-soc.org/show_bug.cgi?id=324 - add trunc_div and trunc_rem
 """
 
+
 def exts(value, bits):
     sign = 1 << (bits - 1)
     return (value & (sign - 1)) - (value & sign)
@@ -26,6 +27,7 @@ def EXTS(value):
     assert isinstance(value, SelectableInt)
     return SelectableInt(exts(value.value, value.bits) & ((1 << 256)-1), 256)
 
+
 def EXTS64(value):
     """ extends sign bit out from current MSB to 64 bits
     """
@@ -35,10 +37,10 @@ def EXTS64(value):
 
 # signed version of MUL
 def MULS(a, b):
-    a_s = a.value & (1<<(a.bits-1)) != 0
-    b_s = b.value & (1<<(b.bits-1)) != 0
+    a_s = a.value & (1 << (a.bits-1)) != 0
+    b_s = b.value & (1 << (b.bits-1)) != 0
     result = abs(a) * abs(b)
-    print ("MULS", result, a_s, b_s)
+    print("MULS", result, a_s, b_s)
     if a_s == b_s:
         return result
     return -result
@@ -48,7 +50,7 @@ def MULS(a, b):
 def EXTZ64(value):
     if isinstance(value, SelectableInt):
         value = value.value
-    return SelectableInt(value & ((1<<32)-1), 64)
+    return SelectableInt(value & ((1 << 32)-1), 64)
 
 
 def rotl(value, bits, wordlen):
@@ -88,24 +90,31 @@ def MASK(x, y):
         mask_b = (~((1 << y) - 1)) & ((1 << 64) - 1)
     return mask_a ^ mask_b
 
+
 def ne(a, b):
     return onebit(a != b)
 
+
 def eq(a, b):
     return onebit(a == b)
 
+
 def gt(a, b):
     return onebit(a > b)
 
+
 def ge(a, b):
     return onebit(a >= b)
 
+
 def lt(a, b):
     return onebit(a < b)
 
+
 def le(a, b):
     return onebit(a <= b)
 
+
 def length(a):
     return len(a)
 
@@ -115,6 +124,7 @@ def length(a):
 # set the shift equal to 0 and passed in a value of all ones, the
 # result I got would be exactly the same as the output of MASK()
 
+
 class HelperTests(unittest.TestCase):
     def test_MASK(self):
         # Verified using rlwinm, rldicl, rldicr in qemu
@@ -187,5 +197,5 @@ class HelperTests(unittest.TestCase):
 
 
 if __name__ == '__main__':
-    print (SelectableInt.__bases__)
+    print(SelectableInt.__bases__)
     unittest.main()
diff --git a/src/soc/decoder/isa/caller.py b/src/soc/decoder/isa/caller.py
index 9d3008ba..1dd24aeb 100644
--- a/src/soc/decoder/isa/caller.py
+++ b/src/soc/decoder/isa/caller.py
@@ -17,14 +17,14 @@ from soc.decoder.selectable_int import (FieldSelectableInt, SelectableInt,
 from soc.decoder.power_enums import (spr_dict, spr_byname, XER_bits,
                                      insns, MicrOp)
 from soc.decoder.helpers import exts
-from soc.consts import PIb, MSRb # big-endian (PowerISA versions)
+from soc.consts import PIb, MSRb  # big-endian (PowerISA versions)
 
 from collections import namedtuple
 import math
 import sys
 
 instruction_info = namedtuple('instruction_info',
-                              'func read_regs uninit_regs write_regs ' + \
+                              'func read_regs uninit_regs write_regs ' +
                               'special_regs op_fields form asmregs')
 
 special_sprs = {
@@ -57,7 +57,7 @@ class Mem:
         self.mem = {}
         self.bytes_per_word = row_bytes
         self.word_log2 = math.ceil(math.log2(row_bytes))
-        print ("Sim-Mem", initial_mem, self.bytes_per_word, self.word_log2)
+        print("Sim-Mem", initial_mem, self.bytes_per_word, self.word_log2)
         if not initial_mem:
             return
 
@@ -81,7 +81,7 @@ class Mem:
         # BE/LE mode?
         shifter = remainder * 8
         mask = (1 << (wid * 8)) - 1
-        print ("width,rem,shift,mask", wid, remainder, hex(shifter), hex(mask))
+        print("width,rem,shift,mask", wid, remainder, hex(shifter), hex(mask))
         return shifter, mask
 
     # TODO: Implement ld/st of lesser width
@@ -100,7 +100,7 @@ class Mem:
 
         if width != self.bytes_per_word:
             shifter, mask = self._get_shifter_mask(width, remainder)
-            print ("masking", hex(val), hex(mask<<shifter), shifter)
+            print("masking", hex(val), hex(mask << shifter), shifter)
             val = val & (mask << shifter)
             val >>= shifter
         if swap:
@@ -113,7 +113,7 @@ class Mem:
         remainder = addr & (self.bytes_per_word - 1)
         addr = addr >> self.word_log2
         print("Writing 0x{:x} to ST 0x{:x} memaddr 0x{:x}/{:x}".format(v,
-                        staddr, addr, remainder, swap))
+                                                                       staddr, addr, remainder, swap))
         assert remainder & (width - 1) == 0, "Unaligned access unsupported!"
         if swap:
             v = swap_order(v, width)
@@ -132,11 +132,11 @@ class Mem:
 
     def __call__(self, addr, sz):
         val = self.ld(addr.value, sz)
-        print ("memread", addr, sz, val)
+        print("memread", addr, sz, val)
         return SelectableInt(val, sz*8)
 
     def memassign(self, addr, sz, val):
-        print ("memassign", addr, sz, val)
+        print("memassign", addr, sz, val)
         self.st(addr.value, val.value, sz)
 
 
@@ -154,7 +154,7 @@ class GPR(dict):
         self.form = form
 
     def getz(self, rnum):
-        #rnum = rnum.value # only SelectableInt allowed
+        # rnum = rnum.value # only SelectableInt allowed
         print("GPR getzero", rnum)
         if rnum == 0:
             return SelectableInt(0, 64)
@@ -178,6 +178,7 @@ class GPR(dict):
             s = ' '.join(s)
             print("reg", "%2d" % i, s)
 
+
 class PC:
     def __init__(self, pc_init=0):
         self.CIA = SelectableInt(pc_init, 64)
@@ -207,8 +208,8 @@ class SPR(dict):
             self[key] = v
 
     def __getitem__(self, key):
-        print ("get spr", key)
-        print ("dict", self.items())
+        print("get spr", key)
+        print("dict", self.items())
         # if key in special_sprs get the special spr, otherwise return key
         if isinstance(key, SelectableInt):
             key = key.value
@@ -224,7 +225,7 @@ class SPR(dict):
                 info = spr_byname[key]
             dict.__setitem__(self, key, SelectableInt(0, info.length))
             res = dict.__getitem__(self, key)
-        print ("spr returning", key, res)
+        print("spr returning", key, res)
         return res
 
     def __setitem__(self, key, value):
@@ -232,9 +233,9 @@ class SPR(dict):
             key = key.value
         if isinstance(key, int):
             key = spr_dict[key].SPR
-            print ("spr key", key)
+            print("spr key", key)
         key = special_sprs.get(key, key)
-        print ("setting spr", key, value)
+        print("setting spr", key, value)
         dict.__setitem__(self, key, value)
 
     def __call__(self, ridx):
@@ -247,11 +248,11 @@ class ISACaller:
     # initial_{etc} - initial values for SPRs, Condition Register, Mem, MSR
     # respect_pc - tracks the program counter.  requires initial_insns
     def __init__(self, decoder2, regfile, initial_sprs=None, initial_cr=0,
-                       initial_mem=None, initial_msr=0,
-                       initial_insns=None, respect_pc=False,
-                       disassembly=None,
-                       initial_pc=0,
-                       bigendian=False):
+                 initial_mem=None, initial_msr=0,
+                 initial_insns=None, respect_pc=False,
+                 disassembly=None,
+                 initial_pc=0,
+                 bigendian=False):
 
         self.bigendian = bigendian
         self.halted = False
@@ -264,8 +265,8 @@ class ISACaller:
             initial_insns = {}
             assert self.respect_pc == False, "instructions required to honor pc"
 
-        print ("ISACaller insns", respect_pc, initial_insns, disassembly)
-        print ("ISACaller initial_msr", initial_msr)
+        print("ISACaller insns", respect_pc, initial_insns, disassembly)
+        print("ISACaller initial_msr", initial_msr)
 
         # "fake program counter" mode (for unit testing)
         self.fake_pc = 0
@@ -289,7 +290,7 @@ class ISACaller:
         self.imem = Mem(row_bytes=4, initial_mem=initial_insns)
         self.pc = PC()
         self.spr = SPR(decoder2, initial_sprs)
-        self.msr = SelectableInt(initial_msr, 64) # underlying reg
+        self.msr = SelectableInt(initial_msr, 64)  # underlying reg
 
         # TODO, needed here:
         # FPR (same as GPR except for FP nums)
@@ -305,26 +306,26 @@ class ISACaller:
         # 3.2.3 p46 p232 VRSAVE (actually SPR #256)
 
         # create CR then allow portions of it to be "selectable" (below)
-        self._cr = SelectableInt(initial_cr, 64) # underlying reg
-        self.cr = FieldSelectableInt(self._cr, list(range(32,64)))
+        self._cr = SelectableInt(initial_cr, 64)  # underlying reg
+        self.cr = FieldSelectableInt(self._cr, list(range(32, 64)))
 
         # "undefined", just set to variable-bit-width int (use exts "max")
-        self.undefined = SelectableInt(0, 256) # TODO, not hard-code 256!
+        self.undefined = SelectableInt(0, 256)  # TODO, not hard-code 256!
 
         self.namespace = {}
         self.namespace.update(self.spr)
         self.namespace.update({'GPR': self.gpr,
-                          'MEM': self.mem,
-                          'SPR': self.spr,
-                          'memassign': self.memassign,
-                          'NIA': self.pc.NIA,
-                          'CIA': self.pc.CIA,
-                          'CR': self.cr,
-                          'MSR': self.msr,
-                          'undefined': self.undefined,
-                          'mode_is_64bit': True,
-                          'SO': XER_bits['SO']
-                          })
+                               'MEM': self.mem,
+                               'SPR': self.spr,
+                               'memassign': self.memassign,
+                               'NIA': self.pc.NIA,
+                               'CIA': self.pc.CIA,
+                               'CR': self.cr,
+                               'MSR': self.msr,
+                               'undefined': self.undefined,
+                               'mode_is_64bit': True,
+                               'SO': XER_bits['SO']
+                               })
 
         # update pc to requested start point
         self.set_pc(initial_pc)
@@ -332,7 +333,7 @@ class ISACaller:
         # field-selectable versions of Condition Register TODO check bitranges?
         self.crl = []
         for i in range(8):
-            bits = tuple(range(i*4, (i+1)*4))# errr... maybe?
+            bits = tuple(range(i*4, (i+1)*4))  # errr... maybe?
             _cr = FieldSelectableInt(self.cr, bits)
             self.crl.append(_cr)
             self.namespace["CR%d" % i] = _cr
@@ -341,13 +342,13 @@ class ISACaller:
         self.dec2 = decoder2
 
     def TRAP(self, trap_addr=0x700, trap_bit=PIb.TRAP):
-        print ("TRAP:", hex(trap_addr), hex(self.namespace['MSR'].value))
+        print("TRAP:", hex(trap_addr), hex(self.namespace['MSR'].value))
         # store CIA(+4?) in SRR0, set NIA to 0x700
         # store MSR in SRR1, set MSR to um errr something, have to check spec
         self.spr['SRR0'].value = self.pc.CIA.value
         self.spr['SRR1'].value = self.namespace['MSR'].value
         self.trap_nia = SelectableInt(trap_addr, 64)
-        self.spr['SRR1'][trap_bit] = 1 # change *copy* of MSR in SRR1
+        self.spr['SRR1'][trap_bit] = 1  # change *copy* of MSR in SRR1
 
         # set exception bits.  TODO: this should, based on the address
         # in figure 66 p1065 V3.0B and the table figure 65 p1063 set these
@@ -406,14 +407,14 @@ class ISACaller:
             imm = yield self.dec2.e.do.imm_data.data
             inputs.append(SelectableInt(imm, 64))
         assert len(outputs) >= 1
-        print ("outputs", repr(outputs))
+        print("outputs", repr(outputs))
         if isinstance(outputs, list) or isinstance(outputs, tuple):
             output = outputs[0]
         else:
             output = outputs
         gts = []
         for x in inputs:
-            print ("gt input", x, output)
+            print("gt input", x, output)
             gt = (x > output)
             gts.append(gt)
         print(gts)
@@ -421,11 +422,11 @@ class ISACaller:
         if not (1 & already_done):
             self.spr['XER'][XER_bits['CA']] = cy
 
-        print ("inputs", inputs)
+        print("inputs", inputs)
         # 32 bit carry
         gts = []
         for x in inputs:
-            print ("input", x, output)
+            print("input", x, output)
             gt = (x[32:64] > output[32:64]) == SelectableInt(1, 1)
             gts.append(gt)
         cy32 = 1 if any(gts) else 0
@@ -442,7 +443,7 @@ class ISACaller:
             imm = yield self.dec2.e.do.imm_data.data
             inputs.append(SelectableInt(imm, 64))
         assert len(outputs) >= 1
-        print ("handle_overflow", inputs, outputs, div_overflow)
+        print("handle_overflow", inputs, outputs, div_overflow)
         if len(inputs) < 2 and div_overflow is None:
             return
 
@@ -474,19 +475,19 @@ class ISACaller:
 
     def handle_comparison(self, outputs):
         out = outputs[0]
-        print ("handle_comparison", out.bits, hex(out.value))
+        print("handle_comparison", out.bits, hex(out.value))
         # TODO - XXX *processor* in 32-bit mode
         # https://bugs.libre-soc.org/show_bug.cgi?id=424
-        #if is_32bit:
+        # if is_32bit:
         #    o32 = exts(out.value, 32)
         #    print ("handle_comparison exts 32 bit", hex(o32))
         out = exts(out.value, out.bits)
-        print ("handle_comparison exts", hex(out))
+        print("handle_comparison exts", hex(out))
         zero = SelectableInt(out == 0, 1)
         positive = SelectableInt(out > 0, 1)
         negative = SelectableInt(out < 0, 1)
         SO = self.spr['XER'][XER_bits['SO']]
-        print ("handle_comparison SO", SO)
+        print("handle_comparison SO", SO)
         cr_field = selectconcat(negative, positive, zero, SO)
         self.crl[0].eq(cr_field)
 
@@ -506,7 +507,7 @@ class ISACaller:
         if ins is None:
             raise KeyError("no instruction at 0x%x" % pc)
         print("setup: 0x%x 0x%x %s" % (pc, ins & 0xffffffff, bin(ins)))
-        print ("CIA NIA", self.respect_pc, self.pc.CIA.value, self.pc.NIA.value)
+        print("CIA NIA", self.respect_pc, self.pc.CIA.value, self.pc.NIA.value)
 
         yield self.dec2.dec.raw_opcode_in.eq(ins & 0xffffffff)
         yield self.dec2.dec.bigendian.eq(self.bigendian)
@@ -524,13 +525,13 @@ class ISACaller:
 
         if not self.respect_pc:
             self.fake_pc += 4
-        print ("execute one, CIA NIA", self.pc.CIA.value, self.pc.NIA.value)
+        print("execute one, CIA NIA", self.pc.CIA.value, self.pc.NIA.value)
 
     def get_assembly_name(self):
         # TODO, asmregs is from the spec, e.g. add RT,RA,RB
         # see http://bugs.libre-riscv.org/show_bug.cgi?id=282
         asmcode = yield self.dec2.dec.op.asmcode
-        print ("get assembly name asmcode", asmcode)
+        print("get assembly name asmcode", asmcode)
         asmop = insns.get(asmcode, None)
         int_op = yield self.dec2.dec.op.internal_op
 
@@ -540,9 +541,9 @@ class ISACaller:
         rc_en = yield self.dec2.e.do.rc.data
         rc_ok = yield self.dec2.e.do.rc.ok
         # grrrr have to special-case MUL op (see DecodeOE)
-        print ("ov en rc en", ov_ok, ov_en, rc_ok, rc_en, int_op)
+        print("ov en rc en", ov_ok, ov_en, rc_ok, rc_en, int_op)
         if int_op in [MicrOp.OP_MUL_H64.value, MicrOp.OP_MUL_H32.value]:
-            print ("mul op")
+            print("mul op")
             if rc_en & rc_ok:
                 asmop += "."
         else:
@@ -551,10 +552,10 @@ class ISACaller:
         lk = yield self.dec2.e.do.lk
         if lk:
             asmop += "l"
-        print ("int_op", int_op)
+        print("int_op", int_op)
         if int_op in [MicrOp.OP_B.value, MicrOp.OP_BC.value]:
             AA = yield self.dec2.dec.fields.FormI.AA[0:-1]
-            print ("AA", AA)
+            print("AA", AA)
             if AA:
                 asmop += "a"
         spr_msb = yield from self.get_spr_msb()
@@ -574,18 +575,18 @@ class ISACaller:
 
     def get_spr_msb(self):
         dec_insn = yield self.dec2.e.do.insn
-        return dec_insn & (1<<20) != 0 # sigh - XFF.spr[-1]?
+        return dec_insn & (1 << 20) != 0  # sigh - XFF.spr[-1]?
 
     def call(self, name):
-        name = name.strip() # remove spaces if not already done so
+        name = name.strip()  # remove spaces if not already done so
         if self.halted:
-            print ("halted - not executing", name)
+            print("halted - not executing", name)
             return
 
         # TODO, asmregs is from the spec, e.g. add RT,RA,RB
         # see http://bugs.libre-riscv.org/show_bug.cgi?id=282
         asmop = yield from self.get_assembly_name()
-        print  ("call", name, asmop)
+        print("call", name, asmop)
 
         # check privileged
         int_op = yield self.dec2.dec.op.internal_op
@@ -603,8 +604,8 @@ class ISACaller:
                       MicrOp.OP_MTSPR.value] and spr_msb:
             instr_is_privileged = True
 
-        print ("is priv", instr_is_privileged, hex(self.msr.value),
-                          self.msr[MSRb.PR])
+        print("is priv", instr_is_privileged, hex(self.msr.value),
+              self.msr[MSRb.PR])
         # check MSR priv bit and whether op is privileged: if so, throw trap
         if instr_is_privileged and self.msr[MSRb.PR] == 1:
             self.TRAP(0x700, PIb.PRIV)
@@ -626,8 +627,8 @@ class ISACaller:
             self.TRAP(0x700, PIb.ILLEG)
             self.namespace['NIA'] = self.trap_nia
             self.pc.update(self.namespace)
-            print ("name %s != %s - calling ILLEGAL trap, PC: %x" % \
-                    (name, asmop, self.pc.CIA.value))
+            print("name %s != %s - calling ILLEGAL trap, PC: %x" %
+                  (name, asmop, self.pc.CIA.value))
             return
 
         info = self.instrs[name]
@@ -666,7 +667,7 @@ class ISACaller:
         if self.trap_nia is not None:
             self.namespace['NIA'] = self.trap_nia
 
-        print ("after func", self.namespace['CIA'], self.namespace['NIA'])
+        print("after func", self.namespace['CIA'], self.namespace['NIA'])
 
         # detect if CA/CA32 already in outputs (sra*, basically)
         already_done = 0
@@ -678,7 +679,7 @@ class ISACaller:
                 if name == 'CA32':
                     already_done |= 2
 
-        print ("carry already done?", bin(already_done))
+        print("carry already done?", bin(already_done))
         carry_en = yield self.dec2.e.do.output_carry
         if carry_en:
             yield from self.handle_carry_(inputs, results, already_done)
@@ -692,7 +693,7 @@ class ISACaller:
 
         ov_en = yield self.dec2.e.do.oe.oe
         ov_ok = yield self.dec2.e.do.oe.ok
-        print ("internal overflow", overflow, ov_en, ov_ok)
+        print("internal overflow", overflow, ov_en, ov_ok)
         if ov_en & ov_ok:
             yield from self.handle_overflow(inputs, results, overflow)
 
@@ -703,16 +704,16 @@ class ISACaller:
         # any modified return results?
         if info.write_regs:
             for name, output in zip(output_names, results):
-                if name == 'overflow': # ignore, done already (above)
+                if name == 'overflow':  # ignore, done already (above)
                     continue
                 if isinstance(output, int):
                     output = SelectableInt(output, 256)
                 if name in ['CA', 'CA32']:
                     if carry_en:
-                        print ("writing %s to XER" % name, output)
+                        print("writing %s to XER" % name, output)
                         self.spr['XER'][XER_bits[name]] = output.value
                     else:
-                        print ("NOT writing %s to XER" % name, output)
+                        print("NOT writing %s to XER" % name, output)
                 elif name in info.special_regs:
                     print('writing special %s' % name, output, special_sprs)
                     if name in special_sprs:
@@ -720,7 +721,7 @@ class ISACaller:
                     else:
                         self.namespace[name].eq(output)
                     if name == 'MSR':
-                        print ('msr written', hex(self.msr.value))
+                        print('msr written', hex(self.msr.value))
                 else:
                     regnum = yield getattr(self.decoder, name)
                     print('writing reg %d %s' % (regnum, str(output)))
@@ -728,7 +729,7 @@ class ISACaller:
                         output = SelectableInt(output.value, 64)
                     self.gpr[regnum] = output
 
-        print ("end of call", self.namespace['CIA'], self.namespace['NIA'])
+        print("end of call", self.namespace['CIA'], self.namespace['NIA'])
         # UPDATE program counter
         self.pc.update(self.namespace)
 
@@ -753,17 +754,17 @@ def inject():
             except AttributeError:
                 func_globals = func.func_globals  # Earlier versions.
 
-            context = args[0].namespace # variables to be injected
+            context = args[0].namespace  # variables to be injected
             saved_values = func_globals.copy()  # Shallow copy of dict.
             func_globals.update(context)
             result = func(*args, **kwargs)
-            print ("globals after", func_globals['CIA'], func_globals['NIA'])
-            print ("args[0]", args[0].namespace['CIA'],
-                              args[0].namespace['NIA'])
+            print("globals after", func_globals['CIA'], func_globals['NIA'])
+            print("args[0]", args[0].namespace['CIA'],
+                  args[0].namespace['NIA'])
             args[0].namespace = func_globals
             #exec (func.__code__, func_globals)
 
-            #finally:
+            # finally:
             #    func_globals = saved_values  # Undo changes.
 
             return result
@@ -771,4 +772,3 @@ def inject():
         return decorator
 
     return variable_injector
-
diff --git a/src/soc/decoder/power_decoder.py b/src/soc/decoder/power_decoder.py
index b0ad17b2..98912b0d 100644
--- a/src/soc/decoder/power_decoder.py
+++ b/src/soc/decoder/power_decoder.py
@@ -95,14 +95,15 @@ from soc.decoder.power_fieldsn import SigDecode, SignalBitRange
 # key data structure in which the POWER decoder is specified,
 # in a hierarchical fashion
 Subdecoder = namedtuple("Subdecoder",
-        ["pattern",    # the major pattern to search for (e.g. major opcode)
-         "opcodes",    # a dictionary of minor patterns to find
-         "opint",      # true => the pattern must not be in "10----11" format
-         "bitsel",     # the bits (as a range) against which "pattern" matches
-         "suffix",     # shift the opcode down before decoding
-         "subdecoders" # list of further subdecoders for *additional* matches,
-                       # *ONLY* after "pattern" has *ALSO* been matched against.
-        ])
+                        ["pattern",    # the major pattern to search for (e.g. major opcode)
+                         "opcodes",    # a dictionary of minor patterns to find
+                         "opint",      # true => the pattern must not be in "10----11" format
+                         # the bits (as a range) against which "pattern" matches
+                         "bitsel",
+                         "suffix",     # shift the opcode down before decoding
+                         "subdecoders"  # list of further subdecoders for *additional* matches,
+                         # *ONLY* after "pattern" has *ALSO* been matched against.
+                         ])
 
 
 class PowerOp:
@@ -119,7 +120,7 @@ class PowerOp:
         self.function_unit = Signal(Function, reset_less=True)
         self.internal_op = Signal(MicrOp, reset_less=True)
         self.form = Signal(Form, reset_less=True)
-        if incl_asm: # for simulator only
+        if incl_asm:  # for simulator only
             self.asmcode = Signal(8, reset_less=True)
         self.in1_sel = Signal(In1Sel, reset_less=True)
         self.in2_sel = Signal(In2Sel, reset_less=True)
@@ -141,12 +142,14 @@ class PowerOp:
         # TODO: this conversion process from a dict to an object
         # should really be done using e.g. namedtuple and then
         # call eq not _eq
-        if False: # debugging
+        if False:  # debugging
             if row['CR in'] == '1':
-                import pdb; pdb.set_trace()
+                import pdb
+                pdb.set_trace()
                 print(row)
             if row['CR out'] == '0':
-                import pdb; pdb.set_trace()
+                import pdb
+                pdb.set_trace()
                 print(row)
             print(row)
         ldst_mode = row['upd']
@@ -169,7 +172,7 @@ class PowerOp:
                self.cry_in.eq(CryIn[row['cry in']]),
                ]
         if False:
-            print (row.keys())
+            print(row.keys())
         asmcode = row['comment']
         if hasattr(self, "asmcode") and asmcode in asmidx:
             res.append(self.asmcode.eq(asmidx[asmcode]))
@@ -302,7 +305,7 @@ class PowerDecoder(Elaboratable):
     def handle_subdecoders(self, m, d):
         for dec in d.subdecoders:
             subdecoder = PowerDecoder(self.width, dec)
-            if isinstance(dec, list): # XXX HACK: take first pattern
+            if isinstance(dec, list):  # XXX HACK: take first pattern
                 dec = dec[0]
             setattr(m.submodules, "dec%d" % dec.pattern, subdecoder)
             m.d.comb += subdecoder.opcode_in.eq(self.opcode_in)
@@ -390,9 +393,9 @@ def create_pdecode():
     # minor 19 has extra patterns
     m19 = []
     m19.append(Subdecoder(pattern=19, opcodes=get_csv("minor_19.csv"),
-                   opint=True, bitsel=(1, 11), suffix=None, subdecoders=[]))
+                          opint=True, bitsel=(1, 11), suffix=None, subdecoders=[]))
     m19.append(Subdecoder(pattern=19, opcodes=get_csv("minor_19_00000.csv"),
-                   opint=True, bitsel=(1, 6), suffix=None, subdecoders=[]))
+                          opint=True, bitsel=(1, 6), suffix=None, subdecoders=[]))
 
     # minor opcodes.
     pminor = [
@@ -411,10 +414,10 @@ def create_pdecode():
     dec = []
     opcodes = get_csv("major.csv")
     dec.append(Subdecoder(pattern=None, opint=True, opcodes=opcodes,
-                     bitsel=(26, 32), suffix=None, subdecoders=pminor))
+                          bitsel=(26, 32), suffix=None, subdecoders=pminor))
     opcodes = get_csv("extra.csv")
     dec.append(Subdecoder(pattern=None, opint=False, opcodes=opcodes,
-                     bitsel=(0, 32), suffix=None, subdecoders=[]))
+                          bitsel=(0, 32), suffix=None, subdecoders=[]))
 
     return TopPowerDecoder(32, dec)
 
diff --git a/src/soc/decoder/power_decoder2.py b/src/soc/decoder/power_decoder2.py
index 7166240f..0b12fe25 100644
--- a/src/soc/decoder/power_decoder2.py
+++ b/src/soc/decoder/power_decoder2.py
@@ -42,7 +42,7 @@ def instr_is_priv(m, op, insn):
         # XXX TODO
         #with m.Case(MicrOp.OP_TLBIE) : comb += is_priv_insn.eq(1)
         with m.Case(MicrOp.OP_MFSPR, MicrOp.OP_MTSPR):
-            with m.If(insn[20]): # field XFX.spr[-1] i think
+            with m.If(insn[20]):  # field XFX.spr[-1] i think
                 comb += is_priv_insn.eq(1)
     return is_priv_insn
 
@@ -50,6 +50,7 @@ def instr_is_priv(m, op, insn):
 class SPRMap(Elaboratable):
     """SPRMap: maps POWER9 SPR numbers to internal enum values
     """
+
     def __init__(self):
         self.spr_i = Signal(10, reset_less=True)
         self.spr_o = Signal(SPR, reset_less=True)
@@ -109,20 +110,22 @@ class DecodeA(Elaboratable):
 
             # BC or BCREG: implicit register (CTR) NOTE: same in DecodeOut
             with m.Case(MicrOp.OP_BC):
-                with m.If(~self.dec.BO[2]): # 3.0B p38 BO2=0, use CTR reg
-                    comb += self.fast_out.data.eq(FastRegs.CTR) # constant: CTR
+                with m.If(~self.dec.BO[2]):  # 3.0B p38 BO2=0, use CTR reg
+                    # constant: CTR
+                    comb += self.fast_out.data.eq(FastRegs.CTR)
                     comb += self.fast_out.ok.eq(1)
             with m.Case(MicrOp.OP_BCREG):
-                xo9 = self.dec.FormXL.XO[9] # 3.0B p38 top bit of XO
-                xo5 = self.dec.FormXL.XO[5] # 3.0B p38
+                xo9 = self.dec.FormXL.XO[9]  # 3.0B p38 top bit of XO
+                xo5 = self.dec.FormXL.XO[5]  # 3.0B p38
                 with m.If(xo9 & ~xo5):
-                    comb += self.fast_out.data.eq(FastRegs.CTR) # constant: CTR
+                    # constant: CTR
+                    comb += self.fast_out.data.eq(FastRegs.CTR)
                     comb += self.fast_out.ok.eq(1)
 
             # MFSPR move from SPRs
             with m.Case(MicrOp.OP_MFSPR):
                 spr = Signal(10, reset_less=True)
-                comb += spr.eq(decode_spr_num(self.dec.SPR)) # from XFX
+                comb += spr.eq(decode_spr_num(self.dec.SPR))  # from XFX
                 with m.Switch(spr):
                     # fast SPRs
                     with m.Case(SPR.CTR.value):
@@ -141,7 +144,7 @@ class DecodeA(Elaboratable):
                         comb += self.fast_out.data.eq(FastRegs.SRR1)
                         comb += self.fast_out.ok.eq(1)
                     with m.Case(SPR.XER.value):
-                        pass # do nothing
+                        pass  # do nothing
                     # : map to internal SPR numbers
                     # XXX TODO: dec and tb not to go through mapping.
                     with m.Default():
@@ -179,34 +182,35 @@ class DecodeB(Elaboratable):
                 comb += self.reg_out.data.eq(self.dec.RB)
                 comb += self.reg_out.ok.eq(1)
             with m.Case(In2Sel.RS):
-                comb += self.reg_out.data.eq(self.dec.RS) # for M-Form shiftrot
+                # for M-Form shiftrot
+                comb += self.reg_out.data.eq(self.dec.RS)
                 comb += self.reg_out.ok.eq(1)
             with m.Case(In2Sel.CONST_UI):
                 comb += self.imm_out.data.eq(self.dec.UI)
                 comb += self.imm_out.ok.eq(1)
-            with m.Case(In2Sel.CONST_SI): # TODO: sign-extend here?
+            with m.Case(In2Sel.CONST_SI):  # TODO: sign-extend here?
                 comb += self.imm_out.data.eq(
                     exts(self.dec.SI, 16, 64))
                 comb += self.imm_out.ok.eq(1)
             with m.Case(In2Sel.CONST_UI_HI):
-                comb += self.imm_out.data.eq(self.dec.UI<<16)
+                comb += self.imm_out.data.eq(self.dec.UI << 16)
                 comb += self.imm_out.ok.eq(1)
-            with m.Case(In2Sel.CONST_SI_HI): # TODO: sign-extend here?
-                comb += self.imm_out.data.eq(self.dec.SI<<16)
+            with m.Case(In2Sel.CONST_SI_HI):  # TODO: sign-extend here?
+                comb += self.imm_out.data.eq(self.dec.SI << 16)
                 comb += self.imm_out.data.eq(
                     exts(self.dec.SI << 16, 32, 64))
                 comb += self.imm_out.ok.eq(1)
             with m.Case(In2Sel.CONST_LI):
-                comb += self.imm_out.data.eq(self.dec.LI<<2)
+                comb += self.imm_out.data.eq(self.dec.LI << 2)
                 comb += self.imm_out.ok.eq(1)
             with m.Case(In2Sel.CONST_BD):
-                comb += self.imm_out.data.eq(self.dec.BD<<2)
+                comb += self.imm_out.data.eq(self.dec.BD << 2)
                 comb += self.imm_out.ok.eq(1)
             with m.Case(In2Sel.CONST_DS):
-                comb += self.imm_out.data.eq(self.dec.DS<<2)
+                comb += self.imm_out.data.eq(self.dec.DS << 2)
                 comb += self.imm_out.ok.eq(1)
             with m.Case(In2Sel.CONST_M1):
-                comb += self.imm_out.data.eq(~Const(0, 64)) # all 1s
+                comb += self.imm_out.data.eq(~Const(0, 64))  # all 1s
                 comb += self.imm_out.ok.eq(1)
             with m.Case(In2Sel.CONST_SH):
                 comb += self.imm_out.data.eq(self.dec.sh)
@@ -220,8 +224,8 @@ class DecodeB(Elaboratable):
         # BCREG implicitly uses LR or TAR for 2nd reg
         # CTR however is already in fast_spr1 *not* 2.
         with m.If(op.internal_op == MicrOp.OP_BCREG):
-            xo9 = self.dec.FormXL.XO[9] # 3.0B p38 top bit of XO
-            xo5 = self.dec.FormXL.XO[5] # 3.0B p38
+            xo9 = self.dec.FormXL.XO[9]  # 3.0B p38 top bit of XO
+            xo5 = self.dec.FormXL.XO[5]  # 3.0B p38
             with m.If(~xo9):
                 comb += self.fast_out.data.eq(FastRegs.LR)
                 comb += self.fast_out.ok.eq(1)
@@ -251,7 +255,8 @@ class DecodeC(Elaboratable):
         # select Register C field
         with m.Switch(self.sel_in):
             with m.Case(In3Sel.RB):
-                comb += self.reg_out.data.eq(self.dec.RB) # for M-Form shiftrot
+                # for M-Form shiftrot
+                comb += self.reg_out.data.eq(self.dec.RB)
                 comb += self.reg_out.ok.eq(1)
             with m.Case(In3Sel.RS):
                 comb += self.reg_out.data.eq(self.dec.RS)
@@ -290,7 +295,7 @@ class DecodeOut(Elaboratable):
                 comb += self.reg_out.ok.eq(1)
             with m.Case(OutSel.SPR):
                 spr = Signal(10, reset_less=True)
-                comb += spr.eq(decode_spr_num(self.dec.SPR)) # from XFX
+                comb += spr.eq(decode_spr_num(self.dec.SPR))  # from XFX
                 # TODO MTSPR 1st spr (fast)
                 with m.If(op.internal_op == MicrOp.OP_MTSPR):
                     with m.Switch(spr):
@@ -311,7 +316,7 @@ class DecodeOut(Elaboratable):
                             comb += self.fast_out.data.eq(FastRegs.SRR1)
                             comb += self.fast_out.ok.eq(1)
                         with m.Case(SPR.XER.value):
-                            pass # do nothing
+                            pass  # do nothing
                         # : map to internal SPR numbers
                         # XXX TODO: dec and tb not to go through mapping.
                         with m.Default():
@@ -323,13 +328,14 @@ class DecodeOut(Elaboratable):
 
             # BC or BCREG: implicit register (CTR) NOTE: same in DecodeA
             with m.Case(MicrOp.OP_BC, MicrOp.OP_BCREG):
-                with m.If(~self.dec.BO[2]): # 3.0B p38 BO2=0, use CTR reg
-                    comb += self.fast_out.data.eq(FastRegs.CTR) # constant: CTR
+                with m.If(~self.dec.BO[2]):  # 3.0B p38 BO2=0, use CTR reg
+                    # constant: CTR
+                    comb += self.fast_out.data.eq(FastRegs.CTR)
                     comb += self.fast_out.ok.eq(1)
 
             # RFID 1st spr (fast)
             with m.Case(MicrOp.OP_RFID):
-                comb += self.fast_out.data.eq(FastRegs.SRR0) # constant: SRR0
+                comb += self.fast_out.data.eq(FastRegs.SRR0)  # constant: SRR0
                 comb += self.fast_out.ok.eq(1)
 
         return m
@@ -365,13 +371,13 @@ class DecodeOut2(Elaboratable):
 
             # BC* implicit register (LR)
             with m.Case(MicrOp.OP_BC, MicrOp.OP_B, MicrOp.OP_BCREG):
-                with m.If(self.lk): # "link" mode
-                    comb += self.fast_out.data.eq(FastRegs.LR) # constant: LR
+                with m.If(self.lk):  # "link" mode
+                    comb += self.fast_out.data.eq(FastRegs.LR)  # constant: LR
                     comb += self.fast_out.ok.eq(1)
 
             # RFID 2nd spr (fast)
             with m.Case(MicrOp.OP_RFID):
-                comb += self.fast_out.data.eq(FastRegs.SRR1) # constant: SRR1
+                comb += self.fast_out.data.eq(FastRegs.SRR1)  # constant: SRR1
                 comb += self.fast_out.ok.eq(1)
 
         return m
@@ -382,6 +388,7 @@ class DecodeRC(Elaboratable):
 
     decodes Record bit Rc
     """
+
     def __init__(self, dec):
         self.dec = dec
         self.sel_in = Signal(RC, reset_less=True)
@@ -418,6 +425,7 @@ class DecodeOE(Elaboratable):
     -- actual POWER9 does if we set it on those instructions, for now we
     -- test that further down when assigning to the multiplier oe input.
     """
+
     def __init__(self, dec):
         self.dec = dec
         self.sel_in = Signal(RC, reset_less=True)
@@ -445,6 +453,7 @@ class DecodeOE(Elaboratable):
 
         return m
 
+
 class DecodeCRIn(Elaboratable):
     """Decodes input CR from instruction
 
@@ -470,7 +479,7 @@ class DecodeCRIn(Elaboratable):
         comb += self.whole_reg.eq(0)
         with m.Switch(self.sel_in):
             with m.Case(CRInSel.NONE):
-                pass # No bitfield activated
+                pass  # No bitfield activated
             with m.Case(CRInSel.CR0):
                 comb += self.cr_bitfield.data.eq(0)
                 comb += self.cr_bitfield.ok.eq(1)
@@ -519,10 +528,10 @@ class DecodeCROut(Elaboratable):
         comb += self.whole_reg.eq(0)
         with m.Switch(self.sel_in):
             with m.Case(CROutSel.NONE):
-                pass # No bitfield activated
+                pass  # No bitfield activated
             with m.Case(CROutSel.CR0):
                 comb += self.cr_bitfield.data.eq(0)
-                comb += self.cr_bitfield.ok.eq(self.rc_in) # only when RC=1
+                comb += self.cr_bitfield.ok.eq(self.rc_in)  # only when RC=1
             with m.Case(CROutSel.BF):
                 comb += self.cr_bitfield.data.eq(self.dec.FormX.BF)
                 comb += self.cr_bitfield.ok.eq(1)
@@ -559,11 +568,11 @@ class PowerDecode2(Elaboratable):
 
         self.dec = dec
         self.e = Decode2ToExecute1Type()
-        self.valid = Signal() # sync signal
+        self.valid = Signal()  # sync signal
 
         # state information needed by the Decoder (TODO: this as a Record)
-        self.msr = Signal(64, reset_less=True) # copy of MSR
-        self.cia = Signal(64, reset_less=True) # copy of Program Counter
+        self.msr = Signal(64, reset_less=True)  # copy of MSR
+        self.cia = Signal(64, reset_less=True)  # copy of Program Counter
 
     def ports(self):
         return self.dec.ports() + self.e.ports()
@@ -599,7 +608,7 @@ class PowerDecode2(Elaboratable):
         comb += dec_o2.sel_in.eq(op.out_sel)
         comb += dec_o2.lk.eq(do.lk)
         comb += dec_rc.sel_in.eq(op.rc_sel)
-        comb += dec_oe.sel_in.eq(op.rc_sel) # XXX should be OE sel
+        comb += dec_oe.sel_in.eq(op.rc_sel)  # XXX should be OE sel
         comb += dec_cr_in.sel_in.eq(op.cr_in)
         comb += dec_cr_out.sel_in.eq(op.cr_out)
         comb += dec_cr_out.rc_in.eq(dec_rc.rc_out.data)
@@ -609,7 +618,8 @@ class PowerDecode2(Elaboratable):
         comb += do.cia.eq(self.cia)
 
         # set up instruction, pick fn unit
-        comb += do.insn_type.eq(op.internal_op) # no op: defaults to OP_ILLEGAL
+        # no op: defaults to OP_ILLEGAL
+        comb += do.insn_type.eq(op.internal_op)
         comb += do.fn_unit.eq(op.function_unit)
 
         # registers a, b, c and out and out2 (LD/ST EA)
@@ -618,7 +628,7 @@ class PowerDecode2(Elaboratable):
         comb += e.read_reg3.eq(dec_c.reg_out)
         comb += e.write_reg.eq(dec_o.reg_out)
         comb += e.write_ea.eq(dec_o2.reg_out)
-        comb += do.imm_data.eq(dec_b.imm_out) # immediate in RB (usually)
+        comb += do.imm_data.eq(dec_b.imm_out)  # immediate in RB (usually)
         comb += do.zero_a.eq(dec_a.immz_out)  # RA==0 detected
 
         # rc and oe out
@@ -650,19 +660,19 @@ class PowerDecode2(Elaboratable):
         comb += do.invert_a.eq(op.inv_a)
         comb += do.invert_out.eq(op.inv_out)
         comb += do.input_carry.eq(op.cry_in)   # carry comes in
-        comb += do.output_carry.eq(op.cry_out) # carry goes out
+        comb += do.output_carry.eq(op.cry_out)  # carry goes out
         comb += do.is_32bit.eq(op.is_32b)
         comb += do.is_signed.eq(op.sgn)
         with m.If(op.lk):
-            comb += do.lk.eq(self.dec.LK) # XXX TODO: accessor
+            comb += do.lk.eq(self.dec.LK)  # XXX TODO: accessor
 
         comb += do.byte_reverse.eq(op.br)
         comb += do.sign_extend.eq(op.sgn_ext)
-        comb += do.ldst_mode.eq(op.upd) # LD/ST mode (update, cache-inhibit)
+        comb += do.ldst_mode.eq(op.upd)  # LD/ST mode (update, cache-inhibit)
 
         # These should be removed eventually
         comb += do.input_cr.eq(op.cr_in)   # condition reg comes in
-        comb += do.output_cr.eq(op.cr_out) # condition reg goes in
+        comb += do.output_cr.eq(op.cr_out)  # condition reg goes in
 
         # sigh this is exactly the sort of thing for which the
         # decoder is designed to not need.  MTSPR, MFSPR and others need
@@ -696,19 +706,19 @@ class PowerDecode2(Elaboratable):
         with m.If((do.insn_type == MicrOp.OP_TRAP) |
                   (do.insn_type == MicrOp.OP_SC)):
             # TRAP write fast1 = SRR0
-            comb += e.write_fast1.data.eq(FastRegs.SRR0) # constant: SRR0
+            comb += e.write_fast1.data.eq(FastRegs.SRR0)  # constant: SRR0
             comb += e.write_fast1.ok.eq(1)
             # TRAP write fast2 = SRR1
-            comb += e.write_fast2.data.eq(FastRegs.SRR1) # constant: SRR1
+            comb += e.write_fast2.data.eq(FastRegs.SRR1)  # constant: SRR1
             comb += e.write_fast2.ok.eq(1)
 
         # RFID: needs to read SRR0/1
         with m.If(do.insn_type == MicrOp.OP_RFID):
             # TRAP read fast1 = SRR0
-            comb += e.read_fast1.data.eq(FastRegs.SRR0) # constant: SRR0
+            comb += e.read_fast1.data.eq(FastRegs.SRR0)  # constant: SRR0
             comb += e.read_fast1.ok.eq(1)
             # TRAP read fast2 = SRR1
-            comb += e.read_fast2.data.eq(FastRegs.SRR1) # constant: SRR1
+            comb += e.read_fast2.data.eq(FastRegs.SRR1)  # constant: SRR1
             comb += e.read_fast2.ok.eq(1)
 
         return m
@@ -718,15 +728,15 @@ class PowerDecode2(Elaboratable):
         """
         comb = m.d.comb
         e, op, do = self.e, self.dec.op, self.e.do
-        comb += e.eq(0) # reset eeeeeverything
+        comb += e.eq(0)  # reset eeeeeverything
         # start again
         comb += do.insn.eq(self.dec.opcode_in)
         comb += do.insn_type.eq(MicrOp.OP_TRAP)
         comb += do.fn_unit.eq(Function.TRAP)
-        comb += do.trapaddr.eq(trapaddr >> 4) # cut bottom 4 bits
-        comb += do.traptype.eq(traptype) # request type
-        comb += do.msr.eq(self.msr) # copy of MSR "state"
-        comb += do.cia.eq(self.cia) # copy of PC "state"
+        comb += do.trapaddr.eq(trapaddr >> 4)  # cut bottom 4 bits
+        comb += do.traptype.eq(traptype)  # request type
+        comb += do.msr.eq(self.msr)  # copy of MSR "state"
+        comb += do.cia.eq(self.cia)  # copy of PC "state"
 
     def regspecmap_read(self, regfile, regname):
         """regspecmap_read: provides PowerDecode2 with an encoding relationship
@@ -748,7 +758,7 @@ class PowerDecode2(Elaboratable):
             regfile, regname, _ = cu.get_in_spec(idx)
             rdflag, read = self.regspecmap_read(regfile, regname)
             rdl.append(rdflag)
-        print ("rdflags", rdl)
+        print("rdflags", rdl)
         return Cat(*rdl)
 
 
@@ -758,4 +768,3 @@ if __name__ == '__main__':
     vl = rtlil.convert(dec2, ports=dec2.ports() + pdecode.ports())
     with open("dec2.il", "w") as f:
         f.write(vl)
-
diff --git a/src/soc/decoder/power_enums.py b/src/soc/decoder/power_enums.py
index a804aede..339b86f3 100644
--- a/src/soc/decoder/power_enums.py
+++ b/src/soc/decoder/power_enums.py
@@ -4,6 +4,7 @@ import os
 from os.path import dirname, join
 from collections import namedtuple
 
+
 def find_wiki_file(name):
     filedir = os.path.dirname(os.path.abspath(__file__))
     basedir = dirname(dirname(dirname(filedir)))
@@ -47,16 +48,16 @@ def get_signal_name(name):
 @unique
 class Function(Enum):
     NONE = 0
-    ALU = 1<<1
-    LDST = 1<<2
-    SHIFT_ROT = 1<<3
-    LOGICAL = 1<<4
-    BRANCH = 1<<5
-    CR = 1<<6
-    TRAP = 1<<7
-    MUL = 1<<8
-    DIV = 1<<9
-    SPR = 1<<10
+    ALU = 1 << 1
+    LDST = 1 << 2
+    SHIFT_ROT = 1 << 3
+    LOGICAL = 1 << 4
+    BRANCH = 1 << 5
+    CR = 1 << 6
+    TRAP = 1 << 7
+    MUL = 1 << 8
+    DIV = 1 << 9
+    SPR = 1 << 10
 
 
 @unique
@@ -91,6 +92,7 @@ class Form(Enum):
     Z22 = 27
     Z23 = 28
 
+
 # supported instructions: make sure to keep up-to-date with CSV files
 # just like everything else
 _insns = [
@@ -231,14 +233,14 @@ class In2Sel(Enum):
     CONST_SH = 10
     CONST_SH32 = 11
     SPR = 12
-    RS = 13 # for shiftrot (M-Form)
+    RS = 13  # for shiftrot (M-Form)
 
 
 @unique
 class In3Sel(Enum):
     NONE = 0
     RS = 1
-    RB = 2 # for shiftrot (M-Form)
+    RB = 2  # for shiftrot (M-Form)
 
 
 @unique
@@ -279,6 +281,7 @@ class CryIn(Enum):
     ONE = 1
     CA = 2
 
+
 @unique
 class CRInSel(Enum):
     NONE = 0
@@ -289,6 +292,7 @@ class CRInSel(Enum):
     BC = 5
     WHOLE_REG = 6
 
+
 @unique
 class CROutSel(Enum):
     NONE = 0
@@ -322,13 +326,13 @@ XER_bits = {
     'CA': 34,
     'OV32': 44,
     'CA32': 45
-    }
+}
 
 if __name__ == '__main__':
     # find out what the heck is in SPR enum :)
-    print ("sprs", len(SPR))
-    print (dir(SPR))
-    print (dir(Enum))
-    print (SPR.__members__['TAR'])
+    print("sprs", len(SPR))
+    print(dir(SPR))
+    print(dir(Enum))
+    print(SPR.__members__['TAR'])
     for x in SPR:
-        print (x, x.value, str(x), x.name)
+        print(x, x.value, str(x), x.name)
diff --git a/src/soc/decoder/power_fields.py b/src/soc/decoder/power_fields.py
index 05182798..f4bdf161 100644
--- a/src/soc/decoder/power_fields.py
+++ b/src/soc/decoder/power_fields.py
@@ -109,7 +109,7 @@ def decode_form(form):
 class DecodeFields:
 
     def __init__(self, bitkls=BitRange, bitargs=(), fname=None,
-                       name_on_wiki=None):
+                 name_on_wiki=None):
         self.bitkls = bitkls
         self.bitargs = bitargs
         if fname is None:
@@ -217,7 +217,8 @@ class DecodeFields:
                     txt = "%s (%s)" % (f0, s0)
                     individualfields.append(txt)
                 if len(fs) > 1:
-                  res.update(self.decode_instruction_fields(individualfields))
+                    res.update(self.decode_instruction_fields(
+                        individualfields))
             d = self.bitkls(*self.bitargs)
             idx = 0
             for s in ss:
@@ -244,6 +245,6 @@ if __name__ == '__main__':
     dec.create_specs()
     forms, instrs = dec.forms, dec.instrs
     for form, fields in instrs.items():
-        print ("Form", form)
+        print("Form", form)
         for field, bits in fields.items():
-            print ("\tfield", field, bits)
+            print("\tfield", field, bits)
diff --git a/src/soc/decoder/power_pseudo.py b/src/soc/decoder/power_pseudo.py
index 2e7b2422..3e02cb78 100644
--- a/src/soc/decoder/power_pseudo.py
+++ b/src/soc/decoder/power_pseudo.py
@@ -211,9 +211,10 @@ def tolist(num):
 def get_reg_hex(reg):
     return hex(reg.value)
 
+
 def convert_to_python(pcode, form, incl_carry):
 
-    print ("form", form)
+    print("form", form)
     gsc = GardenSnakeCompiler(form=form, incl_carry=incl_carry)
 
     tree = gsc.compile(pcode, mode="exec", filename="string")
@@ -222,7 +223,7 @@ def convert_to_python(pcode, form, incl_carry):
                 'write_regs': gsc.parser.write_regs,
                 'uninit_regs': gsc.parser.uninit_regs,
                 'special_regs': gsc.parser.special_regs,
-                'op_fields': gsc.parser.op_fields }
+                'op_fields': gsc.parser.op_fields}
     return astor.to_source(tree), regsused
 
 
@@ -307,8 +308,8 @@ def test():
             # read regs, drop them into dict for function
             for rname in gsc.parser.read_regs:
                 regidx = yield getattr(decode.sigforms['X'], rname)
-                d[rname] = gsc.gpr[regidx] # contents of regfile
-                d["_%s" % rname] = regidx # actual register value
+                d[rname] = gsc.gpr[regidx]  # contents of regfile
+                d["_%s" % rname] = regidx  # actual register value
                 print("read reg", rname, regidx, hex(d[rname].value))
 
             exec(compiled_code, d)  # code gets executed here in dict "d"
@@ -342,7 +343,8 @@ def test():
         for j in range(16):
             hexstr.append("%02x" % gsc.mem.mem[i+j])
         hexstr = ' '.join(hexstr)
-        print ("mem %4x" % i, hexstr)
+        print("mem %4x" % i, hexstr)
+
 
 if __name__ == '__main__':
     test()
diff --git a/src/soc/decoder/pseudo/lexer.py b/src/soc/decoder/pseudo/lexer.py
index c768a450..43aab336 100644
--- a/src/soc/decoder/pseudo/lexer.py
+++ b/src/soc/decoder/pseudo/lexer.py
@@ -12,7 +12,7 @@ from copy import copy
 from ply import lex
 from soc.decoder.selectable_int import SelectableInt
 
-## I implemented INDENT / DEDENT generation as a post-processing filter
+# I implemented INDENT / DEDENT generation as a post-processing filter
 
 # The original lex token stream contains WS and NEWLINE characters.
 # WS will only occur before any other tokens on a line.
@@ -35,6 +35,8 @@ MUST_INDENT = 2
 # identify tokens which tell us whether a "hidden colon" is needed.
 # this in turn means that track_tokens_filter "works" without needing
 # complex grammar rules
+
+
 def python_colonify(lexer, tokens):
 
     implied_colon_needed = False
@@ -111,6 +113,7 @@ def track_tokens_filter(lexer, tokens):
         yield token
         lexer.at_line_start = at_line_start
 
+
 def _new_token(type, lineno):
     tok = lex.LexToken()
     tok.type = type
@@ -120,19 +123,25 @@ def _new_token(type, lineno):
     return tok
 
 # Synthesize a DEDENT tag
+
+
 def DEDENT(lineno):
     return _new_token("DEDENT", lineno)
 
 # Synthesize an INDENT tag
+
+
 def INDENT(lineno):
     return _new_token("INDENT", lineno)
 
+
 def count_spaces(l):
     for i in range(len(l)):
         if l[i] != ' ':
             return i
     return 0
 
+
 def annoying_case_hack_filter(code):
     """add annoying "silent keyword" (fallthrough)
 
@@ -154,14 +163,14 @@ def annoying_case_hack_filter(code):
     for l in code.split("\n"):
         spc_count = count_spaces(l)
         nwhite = l[spc_count:]
-        if len(nwhite) == 0: # skip blank lines
+        if len(nwhite) == 0:  # skip blank lines
             continue
         if nwhite.startswith("case") or nwhite.startswith("default"):
             #print ("case/default", nwhite, spc_count, prev_spc_count)
             if (prev_spc_count is not None and
                 prev_spc_count == spc_count and
-                (res[-1].endswith(":") or res[-1].endswith(": fallthrough"))):
-                res[-1] += " fallthrough" # add to previous line
+                    (res[-1].endswith(":") or res[-1].endswith(": fallthrough"))):
+                res[-1] += " fallthrough"  # add to previous line
             prev_spc_count = spc_count
         else:
             #print ("notstarts", spc_count, nwhite)
@@ -179,11 +188,11 @@ def indentation_filter(tokens):
     prev_was_ws = False
     for token in tokens:
         if 0:
-            print ("Process", depth, token.indent, token,)
+            print("Process", depth, token.indent, token,)
             if token.at_line_start:
-                print ("at_line_start",)
+                print("at_line_start",)
             if token.must_indent:
-                print ("must_indent",)
+                print("must_indent",)
             print
 
         # WS only occurs at the start of the line
@@ -248,7 +257,7 @@ def indentation_filter(tokens):
 
 # The top-level filter adds an ENDMARKER, if requested.
 # Python's grammar uses it.
-def filter(lexer, add_endmarker = True):
+def filter(lexer, add_endmarker=True):
     token = None
     tokens = iter(lexer.token, None)
     tokens = python_colonify(lexer, tokens)
@@ -264,6 +273,7 @@ def filter(lexer, add_endmarker = True):
 
 ##### Lexer ######
 
+
 class PowerLexer:
     tokens = (
         'DEF',
@@ -316,16 +326,16 @@ class PowerLexer:
         'INDENT',
         'DEDENT',
         'ENDMARKER',
-        )
+    )
 
     # Build the lexer
-    def build(self,**kwargs):
-         self.lexer = lex.lex(module=self, **kwargs)
+    def build(self, **kwargs):
+        self.lexer = lex.lex(module=self, **kwargs)
 
     def t_HEX(self, t):
         r"""0x[0-9a-fA-F_]+"""
         val = t.value.replace("_", "")
-        t.value = SelectableInt(int(val, 16), (len(val)-2)*4) # hex = nibble
+        t.value = SelectableInt(int(val, 16), (len(val)-2)*4)  # hex = nibble
         return t
 
     def t_BINARY(self, t):
@@ -342,8 +352,8 @@ class PowerLexer:
 
     def t_STRING(self, t):
         r"'([^\\']+|\\'|\\\\)*'"  # I think this is right ...
-        print (repr(t.value))
-        t.value=t.value[1:-1]
+        print(repr(t.value))
+        t.value = t.value[1:-1]
         return t
 
     t_COLON = r':'
@@ -373,20 +383,20 @@ class PowerLexer:
     # Ply nicely documented how to do this.
 
     RESERVED = {
-      "def": "DEF",
-      "if": "IF",
-      "then": "THEN",
-      "else": "ELSE",
-      "leave": "BREAK",
-      "for": "FOR",
-      "to": "TO",
-      "while": "WHILE",
-      "do": "DO",
-      "return": "RETURN",
-      "switch": "SWITCH",
-      "case": "CASE",
-      "default": "DEFAULT",
-      }
+        "def": "DEF",
+        "if": "IF",
+        "then": "THEN",
+        "else": "ELSE",
+        "leave": "BREAK",
+        "for": "FOR",
+        "to": "TO",
+        "while": "WHILE",
+        "do": "DO",
+        "return": "RETURN",
+        "switch": "SWITCH",
+        "case": "CASE",
+        "default": "DEFAULT",
+    }
 
     def t_NAME(self, t):
         r'[a-zA-Z_][a-zA-Z0-9_]*'
@@ -400,12 +410,12 @@ class PowerLexer:
         r"[ ]*\043[^\n]*"  # \043 is '#'
         pass
 
-
     # Whitespace
+
     def t_WS(self, t):
         r'[ ]+'
         if t.lexer.at_line_start and t.lexer.paren_count == 0 and \
-                                     t.lexer.brack_count == 0:
+                t.lexer.brack_count == 0:
             return t
 
     # Don't generate newline tokens when inside of parenthesis, eg
@@ -444,7 +454,7 @@ class PowerLexer:
 
     def t_error(self, t):
         raise SyntaxError("Unknown symbol %r" % (t.value[0],))
-        print ("Skipping", repr(t.value[0]))
+        print("Skipping", repr(t.value[0]))
         t.lexer.skip(1)
 
 
@@ -454,13 +464,13 @@ class IndentLexer(PowerLexer):
     def __init__(self, debug=0, optimize=0, lextab='lextab', reflags=0):
         self.debug = debug
         self.build(debug=debug, optimize=optimize,
-                                lextab=lextab, reflags=reflags)
+                   lextab=lextab, reflags=reflags)
         self.token_stream = None
 
     def input(self, s, add_endmarker=True):
         s = annoying_case_hack_filter(s)
         if self.debug:
-            print (s)
+            print(s)
         s += "\n"
         self.lexer.paren_count = 0
         self.lexer.brack_count = 0
@@ -473,6 +483,7 @@ class IndentLexer(PowerLexer):
         except StopIteration:
             return None
 
+
 switchtest = """
 switch (n)
     case(1): x <- 5
@@ -503,15 +514,14 @@ if __name__ == '__main__':
     # quick test/demo
     #code = cnttzd
     code = switchtest
-    print (code)
+    print(code)
 
     lexer = IndentLexer(debug=1)
     # Give the lexer some input
-    print ("code")
-    print (code)
+    print("code")
+    print(code)
     lexer.input(code)
 
     tokens = iter(lexer.token, None)
     for token in tokens:
-        print (token)
-
+        print(token)
diff --git a/src/soc/decoder/pseudo/pagereader.py b/src/soc/decoder/pseudo/pagereader.py
index b7e978dd..6b25b54b 100644
--- a/src/soc/decoder/pseudo/pagereader.py
+++ b/src/soc/decoder/pseudo/pagereader.py
@@ -68,7 +68,7 @@ class ISA:
         self.forms = {}
         self.page = {}
         for pth in os.listdir(os.path.join(get_isa_dir())):
-            print (get_isa_dir(), pth)
+            print(get_isa_dir(), pth)
             assert pth.endswith(".mdwn"), "only %s in isa dir" % pth
             self.read_file(pth)
             continue
@@ -85,16 +85,16 @@ class ISA:
             lines = f.readlines()
         rewrite = []
 
-        l = lines.pop(0).rstrip() # get first line
+        l = lines.pop(0).rstrip()  # get first line
         rewrite.append(l)
         while lines:
-            print (l)
+            print(l)
             # expect get heading
             assert l.startswith('#'), ("# not found in line %s" % l)
 
             # whitespace expected
             l = lines.pop(0).strip()
-            print (repr(l))
+            print(repr(l))
             assert len(l) == 0, ("blank line not found %s" % l)
             rewrite.append(l)
 
@@ -112,7 +112,8 @@ class ISA:
             while True:
                 l = lines.pop(0).strip()
                 rewrite.append(l)
-                if len(l) == 0: break
+                if len(l) == 0:
+                    break
                 assert l.startswith('*'), ("* not found in line %s" % l)
 
             rewrite.append("Pseudo-code:")
@@ -121,7 +122,8 @@ class ISA:
             while True:
                 l = lines.pop(0).rstrip()
                 rewrite.append(l)
-                if len(l) == 0: break
+                if len(l) == 0:
+                    break
                 assert l.startswith('    '), ("4spcs not found in line %s" % l)
 
             # "Special Registers Altered" expected
@@ -138,14 +140,16 @@ class ISA:
             while lines:
                 l = lines.pop(0).rstrip()
                 rewrite.append(l)
-                if len(l) == 0: break
+                if len(l) == 0:
+                    break
                 assert l.startswith('    '), ("4spcs not found in line %s" % l)
 
             # expect and drop whitespace
             while lines:
                 l = lines.pop(0).rstrip()
                 rewrite.append(l)
-                if len(l) != 0: break
+                if len(l) != 0:
+                    break
 
         return rewrite
 
@@ -162,16 +166,16 @@ class ISA:
         # line off the list and checks it.  nothing complicated needed,
         # all sections are mandatory so no need for a full LALR parser.
 
-        l = lines.pop(0).rstrip() # get first line
+        l = lines.pop(0).rstrip()  # get first line
         while lines:
-            print (l)
+            print(l)
             # expect get heading
             assert l.startswith('#'), ("# not found in line %s" % l)
             d['desc'] = l[1:].strip()
 
             # whitespace expected
             l = lines.pop(0).strip()
-            print (repr(l))
+            print(repr(l))
             assert len(l) == 0, ("blank line not found %s" % l)
 
             # Form expected
@@ -187,10 +191,11 @@ class ISA:
             li = []
             while True:
                 l = lines.pop(0).strip()
-                if len(l) == 0: break
+                if len(l) == 0:
+                    break
                 assert l.startswith('*'), ("* not found in line %s" % l)
-                l = l[1:].split(' ') # lose star
-                l = filter(lambda x: len(x) != 0, l) # strip blanks
+                l = l[1:].split(' ')  # lose star
+                l = filter(lambda x: len(x) != 0, l)  # strip blanks
                 li.append(list(l))
             opcodes = li
 
@@ -200,16 +205,17 @@ class ISA:
 
             # whitespace expected
             l = lines.pop(0).strip()
-            print (repr(l))
+            print(repr(l))
             assert len(l) == 0, ("blank line not found %s" % l)
 
             # get pseudocode
             li = []
             while True:
                 l = lines.pop(0).rstrip()
-                if len(l) == 0: break
+                if len(l) == 0:
+                    break
                 assert l.startswith('    '), ("4spcs not found in line %s" % l)
-                l = l[4:] # lose 4 spaces
+                l = l[4:]  # lose 4 spaces
                 li.append(l)
             d['pcode'] = li
 
@@ -225,9 +231,10 @@ class ISA:
             li = []
             while lines:
                 l = lines.pop(0).rstrip()
-                if len(l) == 0: break
+                if len(l) == 0:
+                    break
                 assert l.startswith('    '), ("4spcs not found in line %s" % l)
-                l = l[4:] # lose 4 spaces
+                l = l[4:]  # lose 4 spaces
                 li.append(l)
             d['sregs'] = li
 
@@ -238,7 +245,8 @@ class ISA:
             # expect and drop whitespace
             while lines:
                 l = lines.pop(0).rstrip()
-                if len(l) != 0: break
+                if len(l) != 0:
+                    break
 
     def add_op(self, o, d):
         opcode, regs = o[0], o[1:]
@@ -260,14 +268,15 @@ class ISA:
 
     def pprint_ops(self):
         for k, v in self.instr.items():
-            print ("# %s %s" % (v.opcode, v.desc))
-            print ("Form: %s Regs: %s" % (v.form, v.regs))
-            print ('\n'.join(map(lambda x: "    %s" % x, v.pcode)))
-            print ("Specials")
-            print ('\n'.join(map(lambda x: "    %s" % x, v.sregs)))
-            print ()
+            print("# %s %s" % (v.opcode, v.desc))
+            print("Form: %s Regs: %s" % (v.form, v.regs))
+            print('\n'.join(map(lambda x: "    %s" % x, v.pcode)))
+            print("Specials")
+            print('\n'.join(map(lambda x: "    %s" % x, v.sregs)))
+            print()
         for k, v in isa.forms.items():
-            print (k, v)
+            print(k, v)
+
 
 if __name__ == '__main__':
     isa = ISA()
diff --git a/src/soc/decoder/pseudo/parser.py b/src/soc/decoder/pseudo/parser.py
index 957195fb..346e3f98 100644
--- a/src/soc/decoder/pseudo/parser.py
+++ b/src/soc/decoder/pseudo/parser.py
@@ -45,9 +45,9 @@ def Assign(autoassign, assignname, left, right, iea_mode):
     elif isinstance(left, ast.Subscript):
         ls = left.slice
         if (isinstance(ls, ast.Slice) and isinstance(right, ast.Name) and
-            right.id == 'undefined'):
+                right.id == 'undefined'):
             # undefined needs to be copied the exact same slice
-            right =  ast.Subscript(right, ls, ast.Load())
+            right = ast.Subscript(right, ls, ast.Load())
             return ast.Assign([left], right)
         res = ast.Assign([left], right)
         if autoassign and isinstance(ls, ast.Slice):
@@ -57,7 +57,7 @@ def Assign(autoassign, assignname, left, right, iea_mode):
             #       dividend[0:32] = (RA)[0:32]
             # the declaration makes the slice-assignment "work"
             lower, upper, step = ls.lower, ls.upper, ls.step
-            print ("lower, upper, step", repr(lower), repr(upper), step)
+            print("lower, upper, step", repr(lower), repr(upper), step)
             if not isinstance(lower, ast.Constant) or \
                not isinstance(upper, ast.Constant):
                 return res
@@ -271,7 +271,7 @@ class PowerParser:
         self.read_regs = OrderedSet()
         self.uninit_regs = OrderedSet()
         self.write_regs = OrderedSet()
-        self.special_regs = OrderedSet() # see p_atom_name
+        self.special_regs = OrderedSet()  # see p_atom_name
 
     # The grammar comments come from Python's Grammar/Grammar file
 
@@ -621,7 +621,7 @@ class PowerParser:
                     else:
                         fn = 'trunc_rems'
                     # return "function trunc_xxx(l, r)"
-                    p[0] =  ast.Call(ast.Name(fn, ast.Load()), (l, r), [])
+                    p[0] = ast.Call(ast.Name(fn, ast.Load()), (l, r), [])
                 else:
                     # return "l {binop} r"
                     p[0] = ast.BinOp(p[1], binary_ops[p[2]], p[3])
@@ -672,7 +672,7 @@ class PowerParser:
                 self.write_regs.add(name)
         if name in ['CR', 'LR', 'CTR', 'TAR', 'FPSCR', 'MSR']:
             self.special_regs.add(name)
-            self.write_regs.add(name) # and add to list to write
+            self.write_regs.add(name)  # and add to list to write
         p[0] = ast.Name(id=name, ctx=ast.Load())
 
     def p_atom_number(self, p):
diff --git a/src/soc/decoder/pseudo/pywriter.py b/src/soc/decoder/pseudo/pywriter.py
index f06d996f..56a1a2e6 100644
--- a/src/soc/decoder/pseudo/pywriter.py
+++ b/src/soc/decoder/pseudo/pywriter.py
@@ -9,6 +9,7 @@ from soc.decoder.power_pseudo import convert_to_python
 from soc.decoder.orderedset import OrderedSet
 from soc.decoder.isa.caller import create_args
 
+
 def get_isasrc_dir():
     fdir = os.path.abspath(os.path.dirname(__file__))
     fdir = os.path.split(fdir)[0]
@@ -37,6 +38,7 @@ iinfo_template = """instruction_info(func=%s,
                 form='%s',
                 asmregs=%s)"""
 
+
 class PyISAWriter(ISA):
     def __init__(self):
         ISA.__init__(self)
@@ -49,13 +51,13 @@ class PyISAWriter(ISA):
         fname = os.path.join(isadir, "%s.py" % pagename)
         with open(fname, "w") as f:
             iinf = ''
-            f.write(header % pagename) # write out header
+            f.write(header % pagename)  # write out header
             # go through all instructions
             for page in instrs:
                 d = self.instr[page]
-                print ("page", pagename, page, fname, d.opcode)
+                print("page", pagename, page, fname, d.opcode)
                 pcode = '\n'.join(d.pcode) + '\n'
-                print (pcode)
+                print(pcode)
                 incl_carry = pagename == 'fixedshift'
                 pycode, rused = convert_to_python(pcode, d.form, incl_carry)
                 # create list of arguments to call
@@ -66,7 +68,7 @@ class PyISAWriter(ISA):
                 retargs = ', '.join(create_args(rused['write_regs']))
                 # write out function.  pre-pend "op_" because some instrs are
                 # also python keywords (cmp).  also replace "." with "_"
-                op_fname ="op_%s" % page.replace(".", "_")
+                op_fname = "op_%s" % page.replace(".", "_")
                 f.write("    @inject()\n")
                 f.write("    def %s(%s):\n" % (op_fname, args))
                 if 'NIA' in pycode:  # HACK - TODO fix
@@ -105,7 +107,6 @@ class PyISAWriter(ISA):
         except:
             pass
 
-
     def write_isa_class(self):
         isadir = get_isasrc_dir()
         fname = os.path.join(isadir, "all.py")
@@ -129,8 +130,8 @@ class PyISAWriter(ISA):
 
 if __name__ == '__main__':
     isa = PyISAWriter()
-    if len(sys.argv) == 1: # quick way to do it
-        print (dir(isa))
+    if len(sys.argv) == 1:  # quick way to do it
+        print(dir(isa))
         sources = isa.page.keys()
     else:
         sources = sys.argv[1:]
diff --git a/src/soc/decoder/selectable_int.py b/src/soc/decoder/selectable_int.py
index 359afc97..4e091ea3 100644
--- a/src/soc/decoder/selectable_int.py
+++ b/src/soc/decoder/selectable_int.py
@@ -18,14 +18,15 @@ def check_extsign(a, b):
 class FieldSelectableInt:
     """FieldSelectableInt: allows bit-range selection onto another target
     """
+
     def __init__(self, si, br):
-        self.si = si # target selectable int
+        self.si = si  # target selectable int
         if isinstance(br, list) or isinstance(br, tuple):
             _br = BitRange()
             for i, v in enumerate(br):
                 _br[i] = v
             br = _br
-        self.br = br # map of indices.
+        self.br = br  # map of indices.
 
     def eq(self, b):
         if isinstance(b, SelectableInt):
@@ -46,11 +47,11 @@ class FieldSelectableInt:
         return self.merge(vi)
 
     def __getitem__(self, key):
-        print ("getitem", key, self.br)
+        print("getitem", key, self.br)
         if isinstance(key, SelectableInt):
             key = key.value
         if isinstance(key, int):
-            key = self.br[key] # don't do POWER 1.3.4 bit-inversion
+            key = self.br[key]  # don't do POWER 1.3.4 bit-inversion
             return self.si[key]
         if isinstance(key, slice):
             key = self.br[key]
@@ -59,7 +60,7 @@ class FieldSelectableInt:
     def __setitem__(self, key, value):
         if isinstance(key, SelectableInt):
             key = key.value
-        key = self.br[key] # don't do POWER 1.3.4 bit-inversion
+        key = self.br[key]  # don't do POWER 1.3.4 bit-inversion
         if isinstance(key, int):
             return self.si.__setitem__(key, value)
         else:
@@ -70,22 +71,31 @@ class FieldSelectableInt:
 
     def __negate__(self):
         return self._op1(negate)
+
     def __invert__(self):
         return self._op1(inv)
+
     def __add__(self, b):
         return self._op(add, b)
+
     def __sub__(self, b):
         return self._op(sub, b)
+
     def __mul__(self, b):
         return self._op(mul, b)
+
     def __div__(self, b):
         return self._op(truediv, b)
+
     def __mod__(self, b):
         return self._op(mod, b)
+
     def __and__(self, b):
         return self._op(and_, b)
+
     def __or__(self, b):
         return self._op(or_, b)
+
     def __xor__(self, b):
         return self._op(xor, b)
 
@@ -115,7 +125,7 @@ class FieldSelectableIntTestCase(unittest.TestCase):
         br[2] = 3
         fs = FieldSelectableInt(a, br)
         c = fs + b
-        print (c)
+        print(c)
         #self.assertEqual(c.value, a.value + b.value)
 
     def test_select(self):
@@ -155,6 +165,7 @@ class SelectableInt:
     is a bit width associated with SelectableInt, slices operate correctly
     including negative start/end points.
     """
+
     def __init__(self, value, bits):
         if isinstance(value, SelectableInt):
             value = value.value
@@ -175,30 +186,39 @@ class SelectableInt:
 
     def __add__(self, b):
         return self._op(add, b)
+
     def __sub__(self, b):
         return self._op(sub, b)
+
     def __mul__(self, b):
-        # different case: mul result needs to fit the total bitsize 
+        # different case: mul result needs to fit the total bitsize
         if isinstance(b, int):
             b = SelectableInt(b, self.bits)
-        print ("SelectableInt mul", hex(self.value), hex(b.value),
-                                    self.bits, b.bits)
+        print("SelectableInt mul", hex(self.value), hex(b.value),
+              self.bits, b.bits)
         return SelectableInt(self.value * b.value, self.bits + b.bits)
+
     def __floordiv__(self, b):
         return self._op(floordiv, b)
+
     def __truediv__(self, b):
         return self._op(truediv, b)
+
     def __mod__(self, b):
         return self._op(mod, b)
+
     def __and__(self, b):
         return self._op(and_, b)
+
     def __or__(self, b):
         return self._op(or_, b)
+
     def __xor__(self, b):
         return self._op(xor, b)
+
     def __abs__(self):
-        print ("abs", self.value & (1<<(self.bits-1)))
-        if self.value & (1<<(self.bits-1)) != 0:
+        print("abs", self.value & (1 << (self.bits-1)))
+        if self.value & (1 << (self.bits-1)) != 0:
             return -self
         return self
 
@@ -339,7 +359,7 @@ class SelectableInt:
         assert False
 
     def __eq__(self, other):
-        print ("__eq__", self, other)
+        print("__eq__", self, other)
         if isinstance(other, FieldSelectableInt):
             other = other.get_range()
         if isinstance(other, SelectableInt):
@@ -359,7 +379,7 @@ class SelectableInt:
 
     def __repr__(self):
         return "SelectableInt(value=0x{:x}, bits={})".format(self.value,
-                                                           self.bits)
+                                                             self.bits)
 
     def __len__(self):
         return self.bits
@@ -371,6 +391,7 @@ class SelectableInt:
 def onebit(bit):
     return SelectableInt(1 if bit else 0, 1)
 
+
 def selectltu(lhs, rhs):
     """ less-than (unsigned)
     """
@@ -378,6 +399,7 @@ def selectltu(lhs, rhs):
         rhs = rhs.value
     return onebit(lhs.value < rhs)
 
+
 def selectgtu(lhs, rhs):
     """ greater-than (unsigned)
     """
@@ -395,7 +417,7 @@ def selectassign(lhs, idx, rhs):
         else:
             lower, upper, step = idx
         toidx = range(lower, upper, step)
-        fromidx = range(0, upper-lower, step) # XXX eurgh...
+        fromidx = range(0, upper-lower, step)  # XXX eurgh...
     else:
         toidx = [idx]
         fromidx = [0]
@@ -406,7 +428,7 @@ def selectassign(lhs, idx, rhs):
 def selectconcat(*args, repeat=1):
     if repeat != 1 and len(args) == 1 and isinstance(args[0], int):
         args = [SelectableInt(args[0], 1)]
-    if repeat != 1: # multiplies the incoming arguments
+    if repeat != 1:  # multiplies the incoming arguments
         tmp = []
         for i in range(repeat):
             tmp += args
@@ -418,7 +440,7 @@ def selectconcat(*args, repeat=1):
         assert isinstance(i, SelectableInt), "can only concat SIs, sorry"
         res.bits += i.bits
         res.value = (res.value << i.bits) | i.value
-    print ("concat", repeat, res)
+    print("concat", repeat, res)
     return res
 
 
@@ -504,5 +526,6 @@ class SelectableIntTestCase(unittest.TestCase):
         self.assertTrue(a != b)
         self.assertFalse(a == b)
 
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/src/soc/decoder/test/test_power_decoder.py b/src/soc/decoder/test/test_power_decoder.py
index 24da6f44..1e5aa8fc 100644
--- a/src/soc/decoder/test/test_power_decoder.py
+++ b/src/soc/decoder/test/test_power_decoder.py
@@ -57,9 +57,9 @@ class DecoderTestCase(FHDLTestCase):
                 if not row['unit']:
                     continue
                 op = row['opcode']
-                if not opint: # HACK: convert 001---10 to 0b00100010
+                if not opint:  # HACK: convert 001---10 to 0b00100010
                     op = "0b" + op.replace('-', '0')
-                print ("opint", opint, row['opcode'], op)
+                print("opint", opint, row['opcode'], op)
                 print(row)
                 yield opcode.eq(0)
                 yield opcode[bitsel[0]:bitsel[1]].eq(int(op, 0))
@@ -133,7 +133,6 @@ class DecoderTestCase(FHDLTestCase):
     def test_minor_62(self):
         self.run_tst((0, 2), "minor_62.csv", minor=(62, (26, 32)))
 
-
     # #def test_minor_31_prefix(self):
     # #    self.run_tst(10, "minor_31.csv", suffix=(5, 10))
 
diff --git a/src/soc/experiment/alu_hier.py b/src/soc/experiment/alu_hier.py
index 90838b26..59bca26e 100644
--- a/src/soc/experiment/alu_hier.py
+++ b/src/soc/experiment/alu_hier.py
@@ -24,14 +24,12 @@ from soc.fu.cr.cr_input_record import CompCROpSubset
 import operator
 
 
-
-
 class Adder(Elaboratable):
     def __init__(self, width):
         self.invert_a = Signal()
-        self.a   = Signal(width)
-        self.b   = Signal(width)
-        self.o   = Signal(width, name="add_o")
+        self.a = Signal(width)
+        self.b = Signal(width)
+        self.o = Signal(width, name="add_o")
 
     def elaborate(self, platform):
         m = Module()
@@ -44,9 +42,9 @@ class Adder(Elaboratable):
 
 class Subtractor(Elaboratable):
     def __init__(self, width):
-        self.a   = Signal(width)
-        self.b   = Signal(width)
-        self.o   = Signal(width, name="sub_o")
+        self.a = Signal(width)
+        self.b = Signal(width)
+        self.o = Signal(width, name="sub_o")
 
     def elaborate(self, platform):
         m = Module()
@@ -56,9 +54,9 @@ class Subtractor(Elaboratable):
 
 class Multiplier(Elaboratable):
     def __init__(self, width):
-        self.a   = Signal(width)
-        self.b   = Signal(width)
-        self.o   = Signal(width, name="mul_o")
+        self.a = Signal(width)
+        self.b = Signal(width)
+        self.o = Signal(width, name="mul_o")
 
     def elaborate(self, platform):
         m = Module()
@@ -69,34 +67,35 @@ class Multiplier(Elaboratable):
 class Shifter(Elaboratable):
     def __init__(self, width):
         self.width = width
-        self.a   = Signal(width)
-        self.b   = Signal(width)
-        self.o   = Signal(width, name="shf_o")
+        self.a = Signal(width)
+        self.b = Signal(width)
+        self.o = Signal(width, name="shf_o")
 
     def elaborate(self, platform):
         m = Module()
         btrunc = Signal(self.width)
-        m.d.comb += btrunc.eq(self.b & Const((1<<self.width)-1))
+        m.d.comb += btrunc.eq(self.b & Const((1 << self.width)-1))
         m.d.comb += self.o.eq(self.a >> btrunc)
         return m
 
+
 class Dummy:
     pass
 
 
 class DummyALU(Elaboratable):
     def __init__(self, width):
-        self.p = Dummy() # make look like nmutil pipeline API
+        self.p = Dummy()  # make look like nmutil pipeline API
         self.p.data_i = Dummy()
         self.p.data_i.ctx = Dummy()
-        self.n = Dummy() # make look like nmutil pipeline API
+        self.n = Dummy()  # make look like nmutil pipeline API
         self.n.data_o = Dummy()
         self.p.valid_i = Signal()
         self.p.ready_o = Signal()
         self.n.ready_i = Signal()
         self.n.valid_o = Signal()
-        self.counter   = Signal(4)
-        self.op  = CompCROpSubset()
+        self.counter = Signal(4)
+        self.op = CompCROpSubset()
         i = []
         i.append(Signal(width, name="i1"))
         i.append(Signal(width, name="i2"))
@@ -116,7 +115,7 @@ class DummyALU(Elaboratable):
     def elaborate(self, platform):
         m = Module()
 
-        go_now = Signal(reset_less=True) # testing no-delay ALU
+        go_now = Signal(reset_less=True)  # testing no-delay ALU
 
         with m.If(self.p.valid_i):
             # input is valid. next check, if we already said "ready" or not
@@ -140,8 +139,8 @@ class DummyALU(Elaboratable):
         with m.If(self.n.ready_i & self.n.valid_o):
             m.d.sync += self.n.valid_o.eq(0)
             # recipient said it was ready: reset back to known-good.
-            m.d.sync += self.counter.eq(0) # reset the counter
-            m.d.sync += self.o.eq(0) # clear the output for tidiness sake
+            m.d.sync += self.counter.eq(0)  # reset the counter
+            m.d.sync += self.o.eq(0)  # clear the output for tidiness sake
 
         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
         with m.If(self.counter > 1):
@@ -162,16 +161,16 @@ class DummyALU(Elaboratable):
 
 class ALU(Elaboratable):
     def __init__(self, width):
-        self.p = Dummy() # make look like nmutil pipeline API
+        self.p = Dummy()  # make look like nmutil pipeline API
         self.p.data_i = Dummy()
         self.p.data_i.ctx = Dummy()
-        self.n = Dummy() # make look like nmutil pipeline API
+        self.n = Dummy()  # make look like nmutil pipeline API
         self.n.data_o = Dummy()
         self.p.valid_i = Signal()
         self.p.ready_o = Signal()
         self.n.ready_i = Signal()
         self.n.valid_o = Signal()
-        self.counter   = Signal(4)
+        self.counter = Signal(4)
         self.op = CompALUOpSubset(name="op")
         i = []
         i.append(Signal(width, name="i1"))
@@ -209,7 +208,7 @@ class ALU(Elaboratable):
         # pass invert (and carry later)
         m.d.comb += add.invert_a.eq(self.op.invert_a)
 
-        go_now = Signal(reset_less=True) # testing no-delay ALU
+        go_now = Signal(reset_less=True)  # testing no-delay ALU
 
         # ALU sequencer is idle when the count is zero
         alu_idle = Signal(reset_less=True)
@@ -291,9 +290,9 @@ class ALU(Elaboratable):
 
 class BranchOp(Elaboratable):
     def __init__(self, width, op):
-        self.a   = Signal(width)
-        self.b   = Signal(width)
-        self.o   = Signal(width)
+        self.a = Signal(width)
+        self.b = Signal(width)
+        self.o = Signal(width)
         self.op = op
 
     def elaborate(self, platform):
@@ -304,17 +303,17 @@ class BranchOp(Elaboratable):
 
 class BranchALU(Elaboratable):
     def __init__(self, width):
-        self.p = Dummy() # make look like nmutil pipeline API
+        self.p = Dummy()  # make look like nmutil pipeline API
         self.p.data_i = Dummy()
         self.p.data_i.ctx = Dummy()
-        self.n = Dummy() # make look like nmutil pipeline API
+        self.n = Dummy()  # make look like nmutil pipeline API
         self.n.data_o = Dummy()
         self.p.valid_i = Signal()
         self.p.ready_o = Signal()
         self.n.ready_i = Signal()
         self.n.valid_o = Signal()
-        self.counter   = Signal(4)
-        self.op  = Signal(2)
+        self.counter = Signal(4)
+        self.op = Signal(2)
         i = []
         i.append(Signal(width, name="i1"))
         i.append(Signal(width, name="i2"))
@@ -341,7 +340,7 @@ class BranchALU(Elaboratable):
                 mod.b.eq(self.b),
             ]
 
-        go_now = Signal(reset_less=True) # testing no-delay ALU
+        go_now = Signal(reset_less=True)  # testing no-delay ALU
         with m.If(self.p.valid_i):
             # input is valid. next check, if we already said "ready" or not
             with m.If(~self.p.ready_o):
@@ -353,7 +352,8 @@ class BranchALU(Elaboratable):
                     for i, mod in enumerate([bgt, blt, beq, bne]):
                         with m.Case(i):
                             m.d.sync += self.o.eq(mod.o)
-                m.d.sync += self.counter.eq(5) # branch to take 5 cycles (fake)
+                # branch to take 5 cycles (fake)
+                m.d.sync += self.counter.eq(5)
                 #m.d.comb += go_now.eq(1)
         with m.Else():
             # input says no longer valid, so drop ready as well.
@@ -367,8 +367,8 @@ class BranchALU(Elaboratable):
         with m.If(self.n.ready_i & self.n.valid_o):
             m.d.sync += self.n.valid_o.eq(0)
             # recipient said it was ready: reset back to known-good.
-            m.d.sync += self.counter.eq(0) # reset the counter
-            m.d.sync += self.o.eq(0) # clear the output for tidiness sake
+            m.d.sync += self.counter.eq(0)  # reset the counter
+            m.d.sync += self.o.eq(0)  # clear the output for tidiness sake
 
         # countdown to 1 (transition from 1 to 0 only on acknowledgement)
         with m.If(self.counter > 1):
@@ -385,6 +385,7 @@ class BranchALU(Elaboratable):
     def ports(self):
         return list(self)
 
+
 def run_op(dut, a, b, op, inv_a=0):
     yield dut.a.eq(a)
     yield dut.b.eq(b)
@@ -418,25 +419,25 @@ def run_op(dut, a, b, op, inv_a=0):
 
 def alu_sim(dut):
     result = yield from run_op(dut, 5, 3, MicrOp.OP_ADD)
-    print ("alu_sim add", result)
+    print("alu_sim add", result)
     assert (result == 8)
 
     result = yield from run_op(dut, 2, 3, MicrOp.OP_MUL_L64)
-    print ("alu_sim mul", result)
+    print("alu_sim mul", result)
     assert (result == 6)
 
     result = yield from run_op(dut, 5, 3, MicrOp.OP_ADD, inv_a=1)
-    print ("alu_sim add-inv", result)
+    print("alu_sim add-inv", result)
     assert (result == 65533)
 
     # test zero-delay ALU
     # don't have OP_SUB, so use any other
     result = yield from run_op(dut, 5, 3, MicrOp.OP_NOP)
-    print ("alu_sim sub", result)
+    print("alu_sim sub", result)
     assert (result == 2)
 
     result = yield from run_op(dut, 13, 2, MicrOp.OP_SHR)
-    print ("alu_sim shr", result)
+    print("alu_sim shr", result)
     assert (result == 3)
 
 
@@ -559,4 +560,3 @@ if __name__ == "__main__":
     # vl = rtlil.convert(alu, ports=alu.ports())
     # with open("test_branch_alu.il", "w") as f:
     #     f.write(vl)
-
diff --git a/src/soc/experiment/compalu_multi.py b/src/soc/experiment/compalu_multi.py
index bfe06c59..4565f2f3 100644
--- a/src/soc/experiment/compalu_multi.py
+++ b/src/soc/experiment/compalu_multi.py
@@ -54,6 +54,7 @@ class CompUnitRecord(RegSpec, RecordObject):
 
     see https://libre-soc.org/3d_gpu/architecture/regfile/ section on regspecs
     """
+
     def __init__(self, subkls, rwid, n_src=None, n_dst=None, name=None):
         RegSpec.__init__(self, rwid, n_src, n_dst)
         RecordObject.__init__(self, name)
@@ -63,7 +64,7 @@ class CompUnitRecord(RegSpec, RecordObject):
         # create source operands
         src = []
         for i in range(n_src):
-            j = i + 1 # name numbering to match src1/src2
+            j = i + 1  # name numbering to match src1/src2
             name = "src%d_i" % j
             rw = self._get_srcwid(i)
             sreg = Signal(rw, name=name, reset_less=True)
@@ -74,29 +75,29 @@ class CompUnitRecord(RegSpec, RecordObject):
         # create dest operands
         dst = []
         for i in range(n_dst):
-            j = i + 1 # name numbering to match dest1/2...
+            j = i + 1  # name numbering to match dest1/2...
             name = "dest%d_o" % j
             rw = self._get_dstwid(i)
-            #dreg = Data(rw, name=name) XXX ??? output needs to be a Data type?
+            # dreg = Data(rw, name=name) XXX ??? output needs to be a Data type?
             dreg = Signal(rw, name=name, reset_less=True)
             setattr(self, name, dreg)
             dst.append(dreg)
         self._dest = dst
 
         # operation / data input
-        self.oper_i = subkls(name="oper_i") # operand
+        self.oper_i = subkls(name="oper_i")  # operand
 
         # create read/write and other scoreboard signalling
-        self.rd = go_record(n_src, name="rd") # read in, req out
-        self.wr = go_record(n_dst, name="wr") # write in, req out
-        self.rdmaskn = Signal(n_src, reset_less=True) # read mask
-        self.wrmask = Signal(n_dst, reset_less=True) # write mask
-        self.issue_i = Signal(reset_less=True) # fn issue in
-        self.shadown_i = Signal(reset=1) # shadow function, defaults to ON
-        self.go_die_i = Signal() # go die (reset)
+        self.rd = go_record(n_src, name="rd")  # read in, req out
+        self.wr = go_record(n_dst, name="wr")  # write in, req out
+        self.rdmaskn = Signal(n_src, reset_less=True)  # read mask
+        self.wrmask = Signal(n_dst, reset_less=True)  # write mask
+        self.issue_i = Signal(reset_less=True)  # fn issue in
+        self.shadown_i = Signal(reset=1)  # shadow function, defaults to ON
+        self.go_die_i = Signal()  # go die (reset)
 
         # output (busy/done)
-        self.busy_o = Signal(reset_less=True) # fn busy out
+        self.busy_o = Signal(reset_less=True)  # fn busy out
         self.done_o = Signal(reset_less=True)
 
 
@@ -115,17 +116,17 @@ class MultiCompUnit(RegSpecALUAPI, Elaboratable):
         self.opsubsetkls = opsubsetkls
         self.cu = cu = CompUnitRecord(opsubsetkls, rwid, n_src, n_dst)
         n_src, n_dst = self.n_src, self.n_dst = cu._n_src, cu._n_dst
-        print ("n_src %d n_dst %d" % (self.n_src, self.n_dst))
+        print("n_src %d n_dst %d" % (self.n_src, self.n_dst))
 
         # convenience names for src operands
         for i in range(n_src):
-            j = i + 1 # name numbering to match src1/src2
+            j = i + 1  # name numbering to match src1/src2
             name = "src%d_i" % j
             setattr(self, name, getattr(cu, name))
 
         # convenience names for dest operands
         for i in range(n_dst):
-            j = i + 1 # name numbering to match dest1/2...
+            j = i + 1  # name numbering to match dest1/2...
             name = "dest%d_o" % j
             setattr(self, name, getattr(cu, name))
 
@@ -134,10 +135,10 @@ class MultiCompUnit(RegSpecALUAPI, Elaboratable):
         self.wr = cu.wr
         self.rdmaskn = cu.rdmaskn
         self.wrmask = cu.wrmask
-        self.go_rd_i = self.rd.go # temporary naming
-        self.go_wr_i = self.wr.go # temporary naming
-        self.rd_rel_o = self.rd.rel # temporary naming
-        self.req_rel_o = self.wr.rel # temporary naming
+        self.go_rd_i = self.rd.go  # temporary naming
+        self.go_wr_i = self.wr.go  # temporary naming
+        self.rd_rel_o = self.rd.rel  # temporary naming
+        self.req_rel_o = self.wr.rel  # temporary naming
         self.issue_i = cu.issue_i
         self.shadown_i = cu.shadown_i
         self.go_die_i = cu.go_die_i
@@ -148,7 +149,7 @@ class MultiCompUnit(RegSpecALUAPI, Elaboratable):
 
         self.busy_o = cu.busy_o
         self.dest = cu._dest
-        self.data_o = self.dest[0] # Dest out
+        self.data_o = self.dest[0]  # Dest out
         self.done_o = cu.done_o
 
     def _mux_op(self, m, sl, op_is_imm, imm, i):
@@ -161,7 +162,7 @@ class MultiCompUnit(RegSpecALUAPI, Elaboratable):
         # overwrite 1st src-latch with immediate-muxed stuff
         sl[i][0] = src_or_imm
         sl[i][2] = src_sel
-        sl[i][3] = ~op_is_imm # change rd.rel[i] gate condition
+        sl[i][3] = ~op_is_imm  # change rd.rel[i] gate condition
 
     def elaborate(self, platform):
         m = Module()
@@ -177,7 +178,7 @@ class MultiCompUnit(RegSpecALUAPI, Elaboratable):
         # so combine it with go_rd_i.  if all bits are set we're good
         all_rd = Signal(reset_less=True)
         m.d.comb += all_rd.eq(self.busy_o & rok_l.q &
-                    (((~self.rd.rel) | self.rd.go).all()))
+                              (((~self.rd.rel) | self.rd.go).all()))
 
         # generate read-done pulse
         all_rd_dly = Signal(reset_less=True)
@@ -205,21 +206,21 @@ class MultiCompUnit(RegSpecALUAPI, Elaboratable):
         # is enough, when combined with when read-phase is done (rst_l.q)
         wr_any = Signal(reset_less=True)
         req_done = Signal(reset_less=True)
-        m.d.comb += self.done_o.eq(self.busy_o & \
+        m.d.comb += self.done_o.eq(self.busy_o &
                                    ~((self.wr.rel & ~self.wrmask).bool()))
         m.d.comb += wr_any.eq(self.wr.go.bool() | prev_wr_go.bool())
-        m.d.comb += req_done.eq(wr_any & ~self.alu.n.ready_i & \
-                ((req_l.q & self.wrmask) == 0))
+        m.d.comb += req_done.eq(wr_any & ~self.alu.n.ready_i &
+                                ((req_l.q & self.wrmask) == 0))
         # argh, complicated hack: if there are no regs to write,
         # instead of waiting for regs that are never going to happen,
         # we indicate "done" when the ALU is "done"
-        with m.If((self.wrmask == 0) & \
-                    self.alu.n.ready_i & self.alu.n.valid_o & self.busy_o):
+        with m.If((self.wrmask == 0) &
+                  self.alu.n.ready_i & self.alu.n.valid_o & self.busy_o):
             m.d.comb += req_done.eq(1)
 
         # shadow/go_die
         reset = Signal(reset_less=True)
-        rst_r = Signal(reset_less=True) # reset latch off
+        rst_r = Signal(reset_less=True)  # reset latch off
         reset_w = Signal(self.n_dst, reset_less=True)
         reset_r = Signal(self.n_src, reset_less=True)
         m.d.comb += reset.eq(req_done | self.go_die_i)
@@ -229,7 +230,7 @@ class MultiCompUnit(RegSpecALUAPI, Elaboratable):
 
         # read-done,wr-proceed latch
         m.d.comb += rok_l.s.eq(self.issue_i)  # set up when issue starts
-        m.d.sync += rok_l.r.eq(self.alu.n.valid_o & self.busy_o) # ALU done
+        m.d.sync += rok_l.r.eq(self.alu.n.valid_o & self.busy_o)  # ALU done
 
         # wr-done, back-to-start latch
         m.d.comb += rst_l.s.eq(all_rd)     # set when read-phase is fully done
@@ -237,7 +238,7 @@ class MultiCompUnit(RegSpecALUAPI, Elaboratable):
 
         # opcode latch (not using go_rd_i) - inverted so that busy resets to 0
         m.d.sync += opc_l.s.eq(self.issue_i)       # set on issue
-        m.d.sync += opc_l.r.eq(req_done) # reset on ALU
+        m.d.sync += opc_l.r.eq(req_done)  # reset on ALU
 
         # src operand latch (not using go_wr_i)
         m.d.sync += src_l.s.eq(Repl(self.issue_i, self.n_src))
@@ -260,7 +261,7 @@ class MultiCompUnit(RegSpecALUAPI, Elaboratable):
             ok = Const(1, 1)
             if isinstance(lro, Record):
                 data_r = Record.like(lro, name=name)
-                print ("wr fields", i, lro, data_r.fields)
+                print("wr fields", i, lro, data_r.fields)
                 # bye-bye abstract interface design..
                 fname = find_ok(data_r.fields)
                 if fname:
@@ -285,9 +286,9 @@ class MultiCompUnit(RegSpecALUAPI, Elaboratable):
         # 2nd operand in the input "regspec".  see for example
         # soc.fu.alu.pipe_data.ALUInputData
         sl = []
-        print ("src_i", self.src_i)
+        print("src_i", self.src_i)
         for i in range(self.n_src):
-            sl.append([self.src_i[i], self.get_in(i), src_l.q[i], Const(1,1)])
+            sl.append([self.src_i[i], self.get_in(i), src_l.q[i], Const(1, 1)])
 
         # if the operand subset has "zero_a" we implicitly assume that means
         # src_i[0] is an INT reg type where zero can be multiplexed in, instead.
@@ -332,9 +333,9 @@ class MultiCompUnit(RegSpecALUAPI, Elaboratable):
         # outputs
         # -----
 
-        slg = Cat(*map(lambda x: x[3], sl)) # get req gate conditions
+        slg = Cat(*map(lambda x: x[3], sl))  # get req gate conditions
         # all request signals gated by busy_o.  prevents picker problems
-        m.d.comb += self.busy_o.eq(opc_l.q) # busy out
+        m.d.comb += self.busy_o.eq(opc_l.q)  # busy out
 
         # read-release gated by busy (and read-mask)
         bro = Repl(self.busy_o, self.n_src)
@@ -370,5 +371,3 @@ class MultiCompUnit(RegSpecALUAPI, Elaboratable):
 
     def ports(self):
         return list(self)
-
-
diff --git a/src/soc/experiment/compldst_multi.py b/src/soc/experiment/compldst_multi.py
index 2a040917..c23d7131 100644
--- a/src/soc/experiment/compldst_multi.py
+++ b/src/soc/experiment/compldst_multi.py
@@ -100,8 +100,8 @@ class LDSTCompUnitRecord(CompUnitRecord):
         CompUnitRecord.__init__(self, opsubset, rwid,
                                 n_src=3, n_dst=2, name=name)
 
-        self.ad = go_record(1, name="ad") # address go in, req out
-        self.st = go_record(1, name="st") # store go in, req out
+        self.ad = go_record(1, name="ad")  # address go in, req out
+        self.st = go_record(1, name="st")  # store go in, req out
 
         self.addr_exc_o = Signal(reset_less=True)   # address exception
 
@@ -171,7 +171,7 @@ class LDSTCompUnit(RegSpecAPI, Elaboratable):
     """
 
     def __init__(self, pi=None, rwid=64, awid=48, opsubset=CompLDSTOpSubset,
-                      debugtest=False):
+                 debugtest=False):
         super().__init__(rwid)
         self.awid = awid
         self.pi = pi
@@ -180,17 +180,17 @@ class LDSTCompUnit(RegSpecAPI, Elaboratable):
 
         # POWER-compliant LD/ST has index and update: *fixed* number of ports
         self.n_src = n_src = 3   # RA, RB, RT/RS
-        self.n_dst = n_dst = 2 # RA, RT/RS
+        self.n_dst = n_dst = 2  # RA, RT/RS
 
         # set up array of src and dest signals
         for i in range(n_src):
-            j = i + 1 # name numbering to match src1/src2
+            j = i + 1  # name numbering to match src1/src2
             name = "src%d_i" % j
             setattr(self, name, getattr(cu, name))
 
         dst = []
         for i in range(n_dst):
-            j = i + 1 # name numbering to match dest1/2...
+            j = i + 1  # name numbering to match dest1/2...
             name = "dest%d_o" % j
             setattr(self, name, getattr(cu, name))
 
@@ -207,13 +207,13 @@ class LDSTCompUnit(RegSpecAPI, Elaboratable):
         # (it really shouldn't be)
         self.data_wid = self.dest[0].shape()
 
-        self.go_rd_i = self.rd.go # temporary naming
-        self.go_wr_i = self.wr.go # temporary naming
-        self.go_ad_i = self.ad.go # temp naming: go address in
+        self.go_rd_i = self.rd.go  # temporary naming
+        self.go_wr_i = self.wr.go  # temporary naming
+        self.go_ad_i = self.ad.go  # temp naming: go address in
         self.go_st_i = self.st.go  # temp naming: go store in
 
-        self.rd_rel_o = self.rd.rel # temporary naming
-        self.req_rel_o = self.wr.rel # temporary naming
+        self.rd_rel_o = self.rd.rel  # temporary naming
+        self.req_rel_o = self.wr.rel  # temporary naming
         self.adr_rel_o = self.ad.rel  # request address (from mem)
         self.sto_rel_o = self.st.rel  # request store (to mem)
 
@@ -225,7 +225,7 @@ class LDSTCompUnit(RegSpecAPI, Elaboratable):
         self.src_i = cu._src_i
 
         self.data_o = Data(self.data_wid, name="o")  # Dest1 out: RT
-        self.addr_o = Data(self.data_wid, name="ea") # Addr out: Update => RA
+        self.addr_o = Data(self.data_wid, name="ea")  # Addr out: Update => RA
         self.addr_exc_o = cu.addr_exc_o
         self.done_o = cu.done_o
         self.busy_o = cu.busy_o
@@ -264,7 +264,7 @@ class LDSTCompUnit(RegSpecAPI, Elaboratable):
         op_is_st = Signal(reset_less=True)
 
         # ALU/LD data output control
-        alu_valid = Signal(reset_less=True) # ALU operands are valid
+        alu_valid = Signal(reset_less=True)  # ALU operands are valid
         alu_ok = Signal(reset_less=True)    # ALU out ok (1 clock delay valid)
         addr_ok = Signal(reset_less=True)   # addr ok (from PortInterface)
         ld_ok = Signal(reset_less=True)     # LD out ok from PortInterface
@@ -286,13 +286,13 @@ class LDSTCompUnit(RegSpecAPI, Elaboratable):
         reset_u = Signal(reset_less=True)             # reset update
         reset_a = Signal(reset_less=True)             # reset adr latch
         reset_i = Signal(reset_less=True)             # issue|die (use a lot)
-        reset_r = Signal(self.n_src, reset_less=True) # reset src
+        reset_r = Signal(self.n_src, reset_less=True)  # reset src
         reset_s = Signal(reset_less=True)             # reset store
 
         comb += reset_i.eq(issue_i | self.go_die_i)       # various
         comb += reset_o.eq(wr_reset | self.go_die_i)      # opcode reset
-        comb += reset_w.eq(self.wr.go[0] | self.go_die_i) # write reg 1
-        comb += reset_u.eq(self.wr.go[1] | self.go_die_i) # update (reg 2)
+        comb += reset_w.eq(self.wr.go[0] | self.go_die_i)  # write reg 1
+        comb += reset_u.eq(self.wr.go[1] | self.go_die_i)  # update (reg 2)
         comb += reset_s.eq(self.go_st_i | self.go_die_i)  # store reset
         comb += reset_r.eq(self.rd.go | Repl(self.go_die_i, self.n_src))
         comb += reset_a.eq(self.go_ad_i | self.go_die_i)
@@ -350,7 +350,7 @@ class LDSTCompUnit(RegSpecAPI, Elaboratable):
         comb += sto_l.r.eq(reset_s | p_st_go)
 
         # reset latch
-        comb += rst_l.s.eq(addr_ok) # start when address is ready
+        comb += rst_l.s.eq(addr_ok)  # start when address is ready
         comb += rst_l.r.eq(issue_i)
 
         # create a latch/register for the operand
@@ -384,11 +384,11 @@ class LDSTCompUnit(RegSpecAPI, Elaboratable):
         m.d.comb += src2_or_imm.eq(Mux(op_is_imm, oper_r.imm_data.imm, srl[1]))
 
         # now do the ALU addr add: one cycle, and say "ready" (next cycle, too)
-        sync += alu_o.eq(src1_or_z + src2_or_imm) # actual EA
+        sync += alu_o.eq(src1_or_z + src2_or_imm)  # actual EA
         sync += alu_ok.eq(alu_valid)             # keep ack in sync with EA
 
         # decode bits of operand (latched)
-        comb += op_is_st.eq(oper_r.insn_type == MicrOp.OP_STORE) # ST
+        comb += op_is_st.eq(oper_r.insn_type == MicrOp.OP_STORE)  # ST
         comb += op_is_ld.eq(oper_r.insn_type == MicrOp.OP_LOAD)  # LD
         op_is_update = oper_r.ldst_mode == LDSTMode.update           # UPDATE
         op_is_cix = oper_r.ldst_mode == LDSTMode.cix           # cache-inhibit
@@ -402,7 +402,7 @@ class LDSTCompUnit(RegSpecAPI, Elaboratable):
 
         # busy signal
         busy_o = self.busy_o
-        comb += self.busy_o.eq(opc_l.q) # | self.pi.busy_o)  # busy out
+        comb += self.busy_o.eq(opc_l.q)  # | self.pi.busy_o)  # busy out
 
         # 1st operand read-request only when zero not active
         # 2nd operand only needed when immediate is not active
@@ -440,8 +440,8 @@ class LDSTCompUnit(RegSpecAPI, Elaboratable):
         # provide "done" signal: select req_rel for non-LD/ST, adr_rel for LD/ST
         comb += wr_any.eq(self.st.go | p_st_go | self.wr.go[0] | self.wr.go[1])
         comb += wr_reset.eq(rst_l.q & busy_o & self.shadown_i &
-                    ~(self.st.rel | self.wr.rel[0] | self.wr.rel[1]) &
-                     (lod_l.qn | op_is_st))
+                            ~(self.st.rel | self.wr.rel[0] | self.wr.rel[1]) &
+                            (lod_l.qn | op_is_st))
         comb += self.done_o.eq(wr_reset)
 
         ######################
@@ -468,11 +468,12 @@ class LDSTCompUnit(RegSpecAPI, Elaboratable):
         # connect to LD/ST PortInterface.
         comb += pi.is_ld_i.eq(op_is_ld & busy_o)  # decoded-LD
         comb += pi.is_st_i.eq(op_is_st & busy_o)  # decoded-ST
-        comb += pi.data_len.eq(self.oper_i.data_len) # data_len
+        comb += pi.data_len.eq(self.oper_i.data_len)  # data_len
         # address
         comb += pi.addr.data.eq(addr_r)           # EA from adder
-        comb += pi.addr.ok.eq(alu_ok & (lod_l.q | sto_l.q)) # "do address stuff"
-        comb += self.addr_exc_o.eq(pi.addr_exc_o) # exception occurred
+        comb += pi.addr.ok.eq(alu_ok & (lod_l.q | sto_l.q)
+                              )  # "do address stuff"
+        comb += self.addr_exc_o.eq(pi.addr_exc_o)  # exception occurred
         comb += addr_ok.eq(self.pi.addr_ok_o)  # no exc, address fine
 
         # byte-reverse on LD - yes this is inverted
@@ -482,13 +483,13 @@ class LDSTCompUnit(RegSpecAPI, Elaboratable):
             # byte-reverse the data based on ld/st width (turn it to LE)
             data_len = self.oper_i.data_len
             lddata_r = byte_reverse(m, 'lddata_r', pi.ld.data, data_len)
-            comb += ldd_o.eq(lddata_r) # put reversed- data out
+            comb += ldd_o.eq(lddata_r)  # put reversed- data out
         # ld - ld gets latched in via lod_l
-        comb += ld_ok.eq(pi.ld.ok) # ld.ok *closes* (freezes) ld data
+        comb += ld_ok.eq(pi.ld.ok)  # ld.ok *closes* (freezes) ld data
 
         # yes this also looks odd (inverted)
         with m.If(self.oper_i.byte_reverse):
-            comb += pi.st.data.eq(srl[2]) # 3rd operand latch
+            comb += pi.st.data.eq(srl[2])  # 3rd operand latch
         with m.Else():
             # byte-reverse the data based on width
             data_len = self.oper_i.data_len
@@ -505,7 +506,7 @@ class LDSTCompUnit(RegSpecAPI, Elaboratable):
             return self.data_o
         if i == 1:
             return self.addr_o
-        #return self.dest[i]
+        # return self.dest[i]
 
     def get_fu_out(self, i):
         return self.get_out(i)
@@ -548,10 +549,10 @@ def wait_for(sig, wait=True, test1st=False):
 
 
 def store(dut, src1, src2, src3, imm, imm_ok=True, update=False,
-                                            byterev=True):
-    print ("ST", src1, src2, src3, imm, imm_ok, update)
+          byterev=True):
+    print("ST", src1, src2, src3, imm, imm_ok, update)
     yield dut.oper_i.insn_type.eq(MicrOp.OP_STORE)
-    yield dut.oper_i.data_len.eq(2) # half-word
+    yield dut.oper_i.data_len.eq(2)  # half-word
     yield dut.oper_i.byte_reverse.eq(byterev)
     yield dut.src1_i.eq(src1)
     yield dut.src2_i.eq(src2)
@@ -578,17 +579,17 @@ def store(dut, src1, src2, src3, imm, imm_ok=True, update=False,
     yield dut.rd.go.eq(0)
 
     yield from wait_for(dut.adr_rel_o, False, test1st=True)
-    #yield from wait_for(dut.adr_rel_o)
-    #yield dut.ad.go.eq(1)
-    #yield
-    #yield dut.ad.go.eq(0)
+    # yield from wait_for(dut.adr_rel_o)
+    # yield dut.ad.go.eq(1)
+    # yield
+    # yield dut.ad.go.eq(0)
 
     if update:
         yield from wait_for(dut.wr.rel[1])
         yield dut.wr.go.eq(0b10)
         yield
         addr = yield dut.addr_o
-        print ("addr", addr)
+        print("addr", addr)
         yield dut.wr.go.eq(0)
     else:
         addr = None
@@ -598,16 +599,16 @@ def store(dut, src1, src2, src3, imm, imm_ok=True, update=False,
     yield
     yield dut.go_st_i.eq(0)
     yield from wait_for(dut.busy_o, False)
-    #wait_for(dut.stwd_mem_o)
+    # wait_for(dut.stwd_mem_o)
     yield
     return addr
 
 
 def load(dut, src1, src2, imm, imm_ok=True, update=False, zero_a=False,
-                                            byterev=True):
-    print ("LD", src1, src2, imm, imm_ok, update)
+         byterev=True):
+    print("LD", src1, src2, imm, imm_ok, update)
     yield dut.oper_i.insn_type.eq(MicrOp.OP_LOAD)
-    yield dut.oper_i.data_len.eq(2) # half-word
+    yield dut.oper_i.data_len.eq(2)  # half-word
     yield dut.oper_i.byte_reverse.eq(byterev)
     yield dut.src1_i.eq(src1)
     yield dut.src2_i.eq(src2)
@@ -621,9 +622,9 @@ def load(dut, src1, src2, imm, imm_ok=True, update=False, zero_a=False,
 
     # set up read-operand flags
     rd = 0b00
-    if not imm_ok: # no immediate means RB register needs to be read
+    if not imm_ok:  # no immediate means RB register needs to be read
         rd |= 0b10
-    if not zero_a: # no zero-a means RA needs to be read
+    if not zero_a:  # no zero-a means RA needs to be read
         rd |= 0b01
 
     # wait for the operands (RA, RB, or both)
@@ -633,16 +634,16 @@ def load(dut, src1, src2, imm, imm_ok=True, update=False, zero_a=False,
         yield dut.rd.go.eq(0)
 
     yield from wait_for(dut.adr_rel_o, False, test1st=True)
-    #yield dut.ad.go.eq(1)
-    #yield
-    #yield dut.ad.go.eq(0)
+    # yield dut.ad.go.eq(1)
+    # yield
+    # yield dut.ad.go.eq(0)
 
     if update:
         yield from wait_for(dut.wr.rel[1])
         yield dut.wr.go.eq(0b10)
         yield
         addr = yield dut.addr_o
-        print ("addr", addr)
+        print("addr", addr)
         yield dut.wr.go.eq(0)
     else:
         addr = None
@@ -651,7 +652,7 @@ def load(dut, src1, src2, imm, imm_ok=True, update=False, zero_a=False,
     yield dut.wr.go.eq(1)
     yield
     data = yield dut.data_o
-    print (data)
+    print(data)
     yield dut.wr.go.eq(0)
     yield from wait_for(dut.busy_o)
     yield
@@ -671,7 +672,7 @@ def ldst_sim(dut):
     # two LDs (deliberately LD from the 1st address then 2nd)
     data, addr = yield from load(dut, 4, 0, 2)
     assert data == 0x0003, "returned %x" % data
-    data, addr  = yield from load(dut, 2, 0, 2)
+    data, addr = yield from load(dut, 2, 0, 2)
     assert data == 0x0009, "returned %x" % data
     yield
 
@@ -685,12 +686,12 @@ def ldst_sim(dut):
     assert addr == 0x000b, "returned %x" % addr
 
     # update-indexed version
-    data, addr  = yield from load(dut, 9, 5, 0, imm_ok=False, update=True)
+    data, addr = yield from load(dut, 9, 5, 0, imm_ok=False, update=True)
     assert data == 0x0003, "returned %x" % data
     assert addr == 0x000e, "returned %x" % addr
 
     # immediate *and* zero version
-    data, addr  = yield from load(dut, 1, 4, 8, imm_ok=True, zero_a=True)
+    data, addr = yield from load(dut, 1, 4, 8, imm_ok=True, zero_a=True)
     assert data == 0x0008, "returned %x" % data
 
 
@@ -705,7 +706,7 @@ class TestLDSTCompUnit(LDSTCompUnit):
     def elaborate(self, platform):
         m = LDSTCompUnit.elaborate(self, platform)
         m.submodules.l0 = self.l0
-        m.d.comb += self.ad.go.eq(self.ad.rel) # link addr-go direct to rel
+        m.d.comb += self.ad.go.eq(self.ad.rel)  # link addr-go direct to rel
         return m
 
 
@@ -732,7 +733,7 @@ class TestLDSTCompUnitRegSpec(LDSTCompUnit):
     def elaborate(self, platform):
         m = LDSTCompUnit.elaborate(self, platform)
         m.submodules.l0 = self.l0
-        m.d.comb += self.ad.go.eq(self.ad.rel) # link addr-go direct to rel
+        m.d.comb += self.ad.go.eq(self.ad.rel)  # link addr-go direct to rel
         return m
 
 
diff --git a/src/soc/experiment/imem.py b/src/soc/experiment/imem.py
index 6b51ed77..3a9a1bc8 100644
--- a/src/soc/experiment/imem.py
+++ b/src/soc/experiment/imem.py
@@ -7,7 +7,7 @@ from nmigen.cli import rtlil
 class TestMemFetchUnit(FetchUnitInterface, Elaboratable):
 
     def __init__(self, pspec):
-        print ("testmemfetchunit", pspec.addr_wid, pspec.reg_wid)
+        print("testmemfetchunit", pspec.addr_wid, pspec.reg_wid)
         super().__init__(pspec)
         # limit TestMemory to 2^6 entries of regwid size
         self.mem = TestMemory(self.data_wid, 6, readonly=True)
@@ -29,7 +29,7 @@ class TestMemFetchUnit(FetchUnitInterface, Elaboratable):
         # to done.
         op_actioned = Signal(reset=0)
         op_in_progress = Signal(reset=0)
-        with m.If(~op_actioned & do_fetch): # idle
+        with m.If(~op_actioned & do_fetch):  # idle
             m.d.sync += op_actioned.eq(1)
             m.d.sync += op_in_progress.eq(1)
         with m.Elif(op_in_progress):                    # in progress
@@ -44,16 +44,16 @@ class TestMemFetchUnit(FetchUnitInterface, Elaboratable):
 
         return m
 
-    def __iter__(self): # TODO
+    def __iter__(self):  # TODO
         yield self.a_pc_i
         yield self.f_instr_o
 
     def ports(self):
         return list(self)
 
+
 if __name__ == '__main__':
     dut = TestMemFetchUnit(addr_wid=32, data_wid=32)
-    vl = rtlil.convert(dut, ports=[]) # TODOdut.ports())
+    vl = rtlil.convert(dut, ports=[])  # TODOdut.ports())
     with open("test_imem.il", "w") as f:
         f.write(vl)
-
diff --git a/src/soc/experiment/pi2ls.py b/src/soc/experiment/pi2ls.py
index caf68be4..b31386b0 100644
--- a/src/soc/experiment/pi2ls.py
+++ b/src/soc/experiment/pi2ls.py
@@ -35,8 +35,8 @@ from nmutil.latch import SRLatch
 class Pi2LSUI(PortInterfaceBase):
 
     def __init__(self, name, lsui=None,
-                             data_wid=64, mask_wid=8, addr_wid=48):
-        print ("pi2lsui reg mask addr", data_wid, mask_wid, addr_wid)
+                 data_wid=64, mask_wid=8, addr_wid=48):
+        print("pi2lsui reg mask addr", data_wid, mask_wid, addr_wid)
         super().__init__(data_wid, addr_wid)
         if lsui is None:
             lsui = LoadStoreUnitInterface(addr_wid, self.addrbits, data_wid)
@@ -53,7 +53,7 @@ class Pi2LSUI(PortInterfaceBase):
         m.d.comb += self.lsui.x_mask_i.eq(mask)
         m.d.comb += self.lsui.x_addr_i.eq(addr)
 
-    def set_wr_data(self, m, data, wen): # mask already done in addr setup
+    def set_wr_data(self, m, data, wen):  # mask already done in addr setup
         m.d.comb += self.lsui.x_st_data_i.eq(data)
         return ~self.lsui.x_busy_o
 
@@ -76,7 +76,7 @@ class Pi2LSUI(PortInterfaceBase):
         m.d.comb += self.lsui.x_valid_i.eq(self.valid_l.q)
 
         # reset the valid latch when not busy
-        m.d.comb += self.valid_l.r.eq(~pi.busy_o)#self.lsui.x_busy_o)
+        m.d.comb += self.valid_l.r.eq(~pi.busy_o)  # self.lsui.x_busy_o)
 
         return m
 
@@ -84,8 +84,8 @@ class Pi2LSUI(PortInterfaceBase):
 class Pi2LSUI1(Elaboratable):
 
     def __init__(self, name, pi=None, lsui=None,
-                             data_wid=64, mask_wid=8, addr_wid=48):
-        print ("pi2lsui reg mask addr", data_wid, mask_wid, addr_wid)
+                 data_wid=64, mask_wid=8, addr_wid=48):
+        print("pi2lsui reg mask addr", data_wid, mask_wid, addr_wid)
         self.addrbits = mask_wid
         if pi is None:
             piname = "%s_pi" % name
@@ -113,12 +113,12 @@ class Pi2LSUI1(Elaboratable):
 
         m.d.comb += lsui.x_ld_i.eq(pi.is_ld_i)
         m.d.comb += lsui.x_st_i.eq(pi.is_st_i)
-        m.d.comb += pi.busy_o.eq(pi.is_ld_i | pi.is_st_i)#lsui.x_busy_o)
+        m.d.comb += pi.busy_o.eq(pi.is_ld_i | pi.is_st_i)  # lsui.x_busy_o)
 
         lsbaddr, msbaddr = self.splitaddr(pi.addr.data)
         m.d.comb += lenexp.len_i.eq(pi.data_len)
-        m.d.comb += lenexp.addr_i.eq(lsbaddr) # LSBs of addr
-        m.d.comb += lsui.x_addr_i.eq(pi.addr.data) # XXX hmmm...
+        m.d.comb += lenexp.addr_i.eq(lsbaddr)  # LSBs of addr
+        m.d.comb += lsui.x_addr_i.eq(pi.addr.data)  # XXX hmmm...
 
         with m.If(pi.addr.ok):
             # expand the LSBs of address plus LD/ST len into 16-bit mask
@@ -129,7 +129,7 @@ class Pi2LSUI1(Elaboratable):
             m.d.comb += pi.addr_ok_o.eq(1)
 
         with m.If(~lsui.x_busy_o & pi.is_st_i & pi.addr.ok):
-                m.d.sync += st_in_progress.eq(1)
+            m.d.sync += st_in_progress.eq(1)
 
         with m.If(pi.is_ld_i):
             # shift/mask out the loaded data
diff --git a/src/soc/experiment/pimem.py b/src/soc/experiment/pimem.py
index cdc82e17..3626b2e5 100644
--- a/src/soc/experiment/pimem.py
+++ b/src/soc/experiment/pimem.py
@@ -111,7 +111,7 @@ class PortInterface(RecordObject):
         self.st = Data(regwid, "st_data_i")  # ok to be set by CompUnit
 
     def connect_port(self, inport):
-        print ("connect_port", self, inport)
+        print("connect_port", self, inport)
         return [self.is_ld_i.eq(inport.is_ld_i),
                 self.is_st_i.eq(inport.is_st_i),
                 self.data_len.eq(inport.data_len),
@@ -302,7 +302,7 @@ class TestMemoryPortInterface(PortInterfaceBase):
 
     def set_wr_data(self, m, data, wen):
         m.d.comb += self.mem.wrport.data.eq(data)  # write st to mem
-        m.d.comb += self.mem.wrport.en.eq(wen) # enable writes
+        m.d.comb += self.mem.wrport.en.eq(wen)  # enable writes
         return Const(1, 1)
 
     def get_rd_data(self, m):
@@ -319,5 +319,3 @@ class TestMemoryPortInterface(PortInterfaceBase):
     def ports(self):
         yield from super().ports()
         # TODO: memory ports
-
-
diff --git a/src/soc/experiment/score6600.py b/src/soc/experiment/score6600.py
index 8995631f..7b80d76f 100644
--- a/src/soc/experiment/score6600.py
+++ b/src/soc/experiment/score6600.py
@@ -870,7 +870,7 @@ def power_instr_q(dut, pdecode2, ins, code):
 def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
             branch_success, branch_fail):
     instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest,
-                'imm_data': (imm, op_imm),
+               'imm_data': (imm, op_imm),
                'read_reg1': src1, 'read_reg2': src2}]
 
     sendlen = 1
@@ -884,11 +884,11 @@ def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
         yield dut.data_i[idx].insn_type.eq(insn_type)
         yield dut.data_i[idx].fn_unit.eq(fn_unit)
         yield dut.data_i[idx].read_reg1.data.eq(reg1)
-        yield dut.data_i[idx].read_reg1.ok.eq(1) # XXX TODO
+        yield dut.data_i[idx].read_reg1.ok.eq(1)  # XXX TODO
         yield dut.data_i[idx].read_reg2.data.eq(reg2)
-        yield dut.data_i[idx].read_reg2.ok.eq(1) # XXX TODO
+        yield dut.data_i[idx].read_reg2.ok.eq(1)  # XXX TODO
         yield dut.data_i[idx].write_reg.data.eq(dest)
-        yield dut.data_i[idx].write_reg.ok.eq(1) # XXX TODO
+        yield dut.data_i[idx].write_reg.ok.eq(1)  # XXX TODO
         yield dut.data_i[idx].imm_data.data.eq(imm)
         yield dut.data_i[idx].imm_data.ok.eq(op_imm)
         di = yield dut.data_i[idx]
@@ -1107,7 +1107,7 @@ def power_sim(m, dut, pdecode2, instruction, alusim):
         for i in range(1, dut.n_regs):
             #val = randint(0, (1<<alusim.rwidth)-1)
             #val = 31+i*3
-            val = i # XXX actually, not random at all
+            val = i  # XXX actually, not random at all
             yield dut.intregs.regs[i].reg.eq(val)
             alusim.setval(i, val)
 
@@ -1281,7 +1281,7 @@ def scoreboard_sim(dut, alusim):
 
         # issue instruction(s), wait for issue to be free before proceeding
         for i, instr in enumerate(instrs):
-            print (i, instr)
+            print(i, instr)
             src1, src2, dest, op, fn_unit, opi, imm, (br_ok, br_fail) = instr
 
             print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
@@ -1334,7 +1334,7 @@ def test_scoreboard():
     run_simulation(m, power_sim(m, dut, pdecode2, instruction, alusim),
                    vcd_name='test_powerboard6600.vcd')
 
-    #run_simulation(dut, scoreboard_sim(dut, alusim),
+    # run_simulation(dut, scoreboard_sim(dut, alusim),
     #               vcd_name='test_scoreboard6600.vcd')
 
     # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
diff --git a/src/soc/experiment/score6600_multi.py b/src/soc/experiment/score6600_multi.py
index 2d3ba001..22d8e2d1 100644
--- a/src/soc/experiment/score6600_multi.py
+++ b/src/soc/experiment/score6600_multi.py
@@ -91,7 +91,7 @@ class CompUnitsBase(Elaboratable):
         self.issue_i = Signal(n_units, reset_less=True)
         self.rd0 = go_record(n_units, "rd0")
         self.rd1 = go_record(n_units, "rd1")
-        self.go_rd_i = [self.rd0.go, self.rd1.go] # XXX HACK!
+        self.go_rd_i = [self.rd0.go, self.rd1.go]  # XXX HACK!
         self.wr0 = go_record(n_units, "wr0")
         self.go_wr_i = [self.wr0.go]
         self.shadown_i = Signal(n_units, reset_less=True)
@@ -102,7 +102,7 @@ class CompUnitsBase(Elaboratable):
 
         # outputs
         self.busy_o = Signal(n_units, reset_less=True)
-        self.rd_rel_o = [self.rd0.rel, self.rd1.rel] # HACK!
+        self.rd_rel_o = [self.rd0.rel, self.rd1.rel]  # HACK!
         self.req_rel_o = self.wr0.rel
         self.done_o = Signal(n_units, reset_less=True)
         if ldstmode:
@@ -143,7 +143,7 @@ class CompUnitsBase(Elaboratable):
             done_l.append(alu.done_o)
             shadow_l.append(alu.shadown_i)
             godie_l.append(alu.go_die_i)
-            print (alu, "rel", alu.req_rel_o, alu.rd_rel_o)
+            print(alu, "rel", alu.req_rel_o, alu.rd_rel_o)
             rd_rel0_l.append(alu.rd_rel_o[0])
             rd_rel1_l.append(alu.rd_rel_o[1])
             go_wr_l.append(alu.go_wr_i)
@@ -158,7 +158,7 @@ class CompUnitsBase(Elaboratable):
         comb += self.busy_o.eq(Cat(*busy_l))
         comb += Cat(*godie_l).eq(self.go_die_i)
         comb += Cat(*shadow_l).eq(self.shadown_i)
-        comb += Cat(*go_wr_l).eq(self.wr0.go) # XXX TODO
+        comb += Cat(*go_wr_l).eq(self.wr0.go)  # XXX TODO
         comb += Cat(*go_rd_l0).eq(self.rd0.go)
         comb += Cat(*go_rd_l1).eq(self.rd1.go)
         comb += Cat(*issue_l).eq(self.issue_i)
@@ -294,7 +294,7 @@ class CompUnitBR(CompUnitsBase):
         self.opwid = opwid
 
         # inputs
-        self.op = CompALUOpSubset("cua_i") # TODO - CompALUBranchSubset
+        self.op = CompALUOpSubset("cua_i")  # TODO - CompALUBranchSubset
         self.oper_i = Signal(opwid, reset_less=True)
         self.imm_i = Signal(rwid, reset_less=True)
 
@@ -310,7 +310,7 @@ class CompUnitBR(CompUnitsBase):
 
         # hand the same operation to all units
         for alu in self.units:
-            #comb += alu.oper_i.eq(self.op) # TODO
+            # comb += alu.oper_i.eq(self.op) # TODO
             comb += alu.oper_i.eq(self.oper_i)
             #comb += alu.imm_i.eq(self.imm_i)
 
@@ -335,30 +335,34 @@ class FunctionUnits(Elaboratable):
         rsel = []
         rd = []
         for i in range(n_src):
-            j = i + 1 # name numbering to match src1/src2
+            j = i + 1  # name numbering to match src1/src2
             src.append(Signal(n_reg, name="src%d" % j, reset_less=True))
-            rsel.append(Signal(n_reg, name="src%d_rsel_o" % j, reset_less=True))
+            rsel.append(Signal(n_reg, name="src%d_rsel_o" %
+                               j, reset_less=True))
             rd.append(Signal(nf, name="gord%d_i" % j, reset_less=True))
         dst = []
         dsel = []
         wr = []
         for i in range(n_dst):
-            j = i + 1 # name numbering to match src1/src2
+            j = i + 1  # name numbering to match src1/src2
             dst.append(Signal(n_reg, name="dst%d" % j, reset_less=True))
-            dsel.append(Signal(n_reg, name="dst%d_rsel_o" % j, reset_less=True))
+            dsel.append(Signal(n_reg, name="dst%d_rsel_o" %
+                               j, reset_less=True))
             wr.append(Signal(nf, name="gowr%d_i" % j, reset_less=True))
         wpnd = []
         pend = []
         for i in range(nf):
-            j = i + 1 # name numbering to match src1/src2
-            pend.append(Signal(nf, name="rd_src%d_pend_o" % j, reset_less=True))
-            wpnd.append(Signal(nf, name="wr_dst%d_pend_o" % j, reset_less=True))
+            j = i + 1  # name numbering to match src1/src2
+            pend.append(Signal(nf, name="rd_src%d_pend_o" %
+                               j, reset_less=True))
+            wpnd.append(Signal(nf, name="wr_dst%d_pend_o" %
+                               j, reset_less=True))
 
         self.dest_i = Array(dst)     # Dest in (top)
         self.src_i = Array(src)      # oper in (top)
 
         # for Register File Select Lines (horizontal), per-reg
-        self.dst_rsel_o = Array(dsel) # dest reg (bot)
+        self.dst_rsel_o = Array(dsel)  # dest reg (bot)
         self.src_rsel_o = Array(rsel)  # src reg (bot)
 
         self.go_rd_i = Array(rd)
@@ -400,13 +404,13 @@ class FunctionUnits(Elaboratable):
 
         # Connect function issue / arrays, and dest/src1/src2
         for i in range(self.n_src):
-            print (i, self.go_rd_i, intfudeps.go_rd_i)
+            print(i, self.go_rd_i, intfudeps.go_rd_i)
             comb += intfudeps.go_rd_i[i].eq(self.go_rd_i[i])
             comb += intregdeps.src_i[i].eq(self.src_i[i])
             comb += intregdeps.go_rd_i[i].eq(self.go_rd_i[i])
             comb += self.src_rsel_o[i].eq(intregdeps.src_rsel_o[i])
         for i in range(self.n_dst):
-            print (i, self.go_wr_i, intfudeps.go_wr_i)
+            print(i, self.go_wr_i, intfudeps.go_wr_i)
             comb += intfudeps.go_wr_i[i].eq(self.go_wr_i[i])
             comb += intregdeps.dest_i[i].eq(self.dest_i[i])
             comb += intregdeps.go_wr_i[i].eq(self.go_wr_i[i])
@@ -860,10 +864,10 @@ class IssueToScoreboard(Elaboratable):
 
             # choose a Function-Unit-Group
             with m.If(fu == Function.ALU):  # alu
-                comb += sc.aluissue.insn_i.eq(1) # enable alu issue
+                comb += sc.aluissue.insn_i.eq(1)  # enable alu issue
                 comb += wait_issue_alu.eq(1)
             with m.Elif(fu == Function.LDST):  # ld/st
-                comb += sc.lsissue.insn_i.eq(1) # enable ldst issue
+                comb += sc.lsissue.insn_i.eq(1)  # enable ldst issue
                 comb += wait_issue_ls.eq(1)
 
             with m.Elif((op & (0x3 << 2)) != 0):  # branch
@@ -912,7 +916,7 @@ def power_instr_q(dut, pdecode2, ins, code):
 def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
             branch_success, branch_fail):
     instrs = [{'insn_type': op, 'fn_unit': funit, 'write_reg': dest,
-                'imm_data': (imm, op_imm),
+               'imm_data': (imm, op_imm),
                'read_reg1': src1, 'read_reg2': src2}]
 
     sendlen = 1
@@ -926,11 +930,11 @@ def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
         yield dut.data_i[idx].insn_type.eq(insn_type)
         yield dut.data_i[idx].fn_unit.eq(fn_unit)
         yield dut.data_i[idx].read_reg1.data.eq(reg1)
-        yield dut.data_i[idx].read_reg1.ok.eq(1) # XXX TODO
+        yield dut.data_i[idx].read_reg1.ok.eq(1)  # XXX TODO
         yield dut.data_i[idx].read_reg2.data.eq(reg2)
-        yield dut.data_i[idx].read_reg2.ok.eq(1) # XXX TODO
+        yield dut.data_i[idx].read_reg2.ok.eq(1)  # XXX TODO
         yield dut.data_i[idx].write_reg.data.eq(dest)
-        yield dut.data_i[idx].write_reg.ok.eq(1) # XXX TODO
+        yield dut.data_i[idx].write_reg.ok.eq(1)  # XXX TODO
         yield dut.data_i[idx].imm_data.data.eq(imm)
         yield dut.data_i[idx].imm_data.ok.eq(op_imm)
         di = yield dut.data_i[idx]
@@ -1149,7 +1153,7 @@ def power_sim(m, dut, pdecode2, instruction, alusim):
         for i in range(1, dut.n_regs):
             #val = randint(0, (1<<alusim.rwidth)-1)
             #val = 31+i*3
-            val = i # XXX actually, not random at all
+            val = i  # XXX actually, not random at all
             yield dut.intregs.regs[i].reg.eq(val)
             alusim.setval(i, val)
 
@@ -1157,14 +1161,14 @@ def power_sim(m, dut, pdecode2, instruction, alusim):
         lst = []
         if False:
             lst += ["addi 2, 0, 0x4321",
-                   "addi 3, 0, 0x1234",
-                   "add  1, 3, 2",
-                   "add  4, 3, 5"
+                    "addi 3, 0, 0x1234",
+                    "add  1, 3, 2",
+                    "add  4, 3, 5"
                     ]
         if True:
-            lst += [ "lbzu 6, 7(2)",
-                     
-                   ]
+            lst += ["lbzu 6, 7(2)",
+
+                    ]
 
         with Program(lst) as program:
             gen = program.generate_instructions()
@@ -1172,7 +1176,7 @@ def power_sim(m, dut, pdecode2, instruction, alusim):
             # issue instruction(s), wait for issue to be free before proceeding
             for ins, code in zip(gen, program.assembly.splitlines()):
                 yield instruction.eq(ins)          # raw binary instr.
-                yield #Delay(1e-6)
+                yield  # Delay(1e-6)
 
                 print("binary 0x{:X}".format(ins & 0xffffffff))
                 print("assembly", code)
@@ -1332,7 +1336,7 @@ def scoreboard_sim(dut, alusim):
 
         # issue instruction(s), wait for issue to be free before proceeding
         for i, instr in enumerate(instrs):
-            print (i, instr)
+            print(i, instr)
             src1, src2, dest, op, fn_unit, opi, imm, (br_ok, br_fail) = instr
 
             print("instr %d: (%d, %d, %d, %s, %s, %d, %d)" %
@@ -1385,7 +1389,7 @@ def test_scoreboard():
     run_simulation(m, power_sim(m, dut, pdecode2, instruction, alusim),
                    vcd_name='test_powerboard6600.vcd')
 
-    #run_simulation(dut, scoreboard_sim(dut, alusim),
+    # run_simulation(dut, scoreboard_sim(dut, alusim),
     #               vcd_name='test_scoreboard6600.vcd')
 
     # run_simulation(dut, scoreboard_branch_sim(dut, alusim),
diff --git a/src/soc/experiment/sim.py b/src/soc/experiment/sim.py
index 2c2197aa..aebb51de 100644
--- a/src/soc/experiment/sim.py
+++ b/src/soc/experiment/sim.py
@@ -8,16 +8,15 @@ from math import log
 class MemSim:
     def __init__(self, regwid, addrw):
         self.regwid = regwid
-        self.ddepth = 1 # regwid//8
-        depth = (1<<addrw) // self.ddepth
+        self.ddepth = 1  # regwid//8
+        depth = (1 << addrw) // self.ddepth
         self.mem = list(range(0, depth))
 
     def ld(self, addr):
-        return self.mem[addr>>self.ddepth]
+        return self.mem[addr >> self.ddepth]
 
     def st(self, addr, data):
-        self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
-
+        self.mem[addr >> self.ddepth] = data & ((1 << self.regwid)-1)
 
 
 IADD = 0
@@ -36,7 +35,7 @@ class RegSim:
         self.regs = [0] * nregs
 
     def op(self, op, op_imm, imm, src1, src2, dest):
-        print ("regsim op src1, src2", op, op_imm, imm, src1, src2, dest)
+        print("regsim op src1, src2", op, op_imm, imm, src1, src2, dest)
         maxbits = (1 << self.rwidth) - 1
         src1 = self.regs[src1] & maxbits
         if op_imm:
@@ -47,7 +46,7 @@ class RegSim:
             val = src1 + src2
         elif op == MicrOp.OP_MUL_L64:
             val = src1 * src2
-            print ("mul src1, src2", src1, src2, val)
+            print("mul src1, src2", src1, src2, val)
         elif op == ISUB:
             val = src1 - src2
         elif op == ISHF:
@@ -61,13 +60,13 @@ class RegSim:
         elif op == IBNE:
             val = int(src1 != src2)
         else:
-            return 0 # LD/ST TODO
+            return 0  # LD/ST TODO
         val &= maxbits
         self.setval(dest, val)
         return val
 
     def setval(self, dest, val):
-        print ("sim setval", dest, hex(val))
+        print("sim setval", dest, hex(val))
         self.regs[dest] = val
 
     def dump(self, dut):
@@ -83,4 +82,3 @@ class RegSim:
                 print("reg %d expected %x received %x\n" % (i, val, reg))
                 yield from self.dump(dut)
                 assert False
-
diff --git a/src/soc/experiment/test/test_compalu_multi.py b/src/soc/experiment/test/test_compalu_multi.py
index af79aa28..4054de4c 100644
--- a/src/soc/experiment/test/test_compalu_multi.py
+++ b/src/soc/experiment/test/test_compalu_multi.py
@@ -10,21 +10,19 @@ its result(s) have been successfully stored in the regfile(s)
 Documented at http://libre-soc.org/3d_gpu/architecture/compunit
 """
 
+from soc.experiment.alu_fsm import Shifter, CompFSMOpSubset
+from soc.fu.alu.alu_input_record import CompALUOpSubset
+from soc.experiment.alu_hier import ALU, DummyALU
+from soc.experiment.compalu_multi import MultiCompUnit
+from soc.decoder.power_enums import MicrOp
+from nmigen import Module
+from nmigen.cli import rtlil
 cxxsim = False
 if cxxsim:
     from nmigen.sim.cxxsim import Simulator, Settle
 else:
     from nmigen.back.pysim import Simulator, Settle
 
-from nmigen.cli import rtlil
-from nmigen import Module
-
-from soc.decoder.power_enums import MicrOp
-
-from soc.experiment.compalu_multi import MultiCompUnit
-from soc.experiment.alu_hier import ALU, DummyALU
-from soc.fu.alu.alu_input_record import CompALUOpSubset
-from soc.experiment.alu_fsm import Shifter, CompFSMOpSubset
 
 def wrap(process):
     def wrapper():
@@ -33,7 +31,7 @@ def wrap(process):
 
 
 def op_sim_fsm(dut, a, b, direction):
-    print ("op_sim_fsm", a, b, direction)
+    print("op_sim_fsm", a, b, direction)
     yield dut.issue_i.eq(0)
     yield
     yield dut.src_i[0].eq(a)
@@ -48,18 +46,18 @@ def op_sim_fsm(dut, a, b, direction):
     while True:
         yield
         rd_rel_o = yield dut.rd.rel
-        print ("rd_rel", rd_rel_o)
+        print("rd_rel", rd_rel_o)
         if rd_rel_o:
             break
     yield dut.rd.go.eq(0)
 
     req_rel_o = yield dut.wr.rel
     result = yield dut.data_o
-    print ("req_rel", req_rel_o, result)
+    print("req_rel", req_rel_o, result)
     while True:
         req_rel_o = yield dut.wr.rel
         result = yield dut.data_o
-        print ("req_rel", req_rel_o, result)
+        print("req_rel", req_rel_o, result)
         if req_rel_o:
             break
         yield
@@ -67,7 +65,7 @@ def op_sim_fsm(dut, a, b, direction):
     yield Settle()
     result = yield dut.data_o
     yield
-    print ("result", result)
+    print("result", result)
     yield dut.wr.go[0].eq(0)
     yield
     return result
@@ -92,32 +90,32 @@ def op_sim(dut, a, b, op, inv_a=0, imm=0, imm_ok=0, zero_a=0):
         while True:
             yield
             rd_rel_o = yield dut.rd.rel
-            print ("rd_rel", rd_rel_o)
+            print("rd_rel", rd_rel_o)
             if rd_rel_o:
                 break
         yield dut.rd.go.eq(0)
     else:
-        print ("no go rd")
+        print("no go rd")
 
     if len(dut.src_i) == 3:
         yield dut.rd.go.eq(0b100)
         while True:
             yield
             rd_rel_o = yield dut.rd.rel
-            print ("rd_rel", rd_rel_o)
+            print("rd_rel", rd_rel_o)
             if rd_rel_o:
                 break
         yield dut.rd.go.eq(0)
     else:
-        print ("no 3rd rd")
+        print("no 3rd rd")
 
     req_rel_o = yield dut.wr.rel
     result = yield dut.data_o
-    print ("req_rel", req_rel_o, result)
+    print("req_rel", req_rel_o, result)
     while True:
         req_rel_o = yield dut.wr.rel
         result = yield dut.data_o
-        print ("req_rel", req_rel_o, result)
+        print("req_rel", req_rel_o, result)
         if req_rel_o:
             break
         yield
@@ -125,7 +123,7 @@ def op_sim(dut, a, b, op, inv_a=0, imm=0, imm_ok=0, zero_a=0):
     yield Settle()
     result = yield dut.data_o
     yield
-    print ("result", result)
+    print("result", result)
     yield dut.wr.go[0].eq(0)
     yield
     return result
@@ -144,23 +142,22 @@ def scoreboard_sim_fsm(dut):
 
 def scoreboard_sim_dummy(dut):
     result = yield from op_sim(dut, 5, 2, MicrOp.OP_NOP, inv_a=0,
-                                    imm=8, imm_ok=1)
+                               imm=8, imm_ok=1)
     assert result == 5, result
 
     result = yield from op_sim(dut, 9, 2, MicrOp.OP_NOP, inv_a=0,
-                                    imm=8, imm_ok=1)
+                               imm=8, imm_ok=1)
     assert result == 9, result
 
 
-
 def scoreboard_sim(dut):
     # zero (no) input operands test
     result = yield from op_sim(dut, 5, 2, MicrOp.OP_ADD, zero_a=1,
-                                    imm=8, imm_ok=1)
+                               imm=8, imm_ok=1)
     assert result == 8
 
     result = yield from op_sim(dut, 5, 2, MicrOp.OP_ADD, inv_a=0,
-                                    imm=8, imm_ok=1)
+                               imm=8, imm_ok=1)
     assert result == 13
 
     result = yield from op_sim(dut, 5, 2, MicrOp.OP_ADD)
@@ -433,9 +430,9 @@ def test_compunit_regspec2_fsm():
 
     inspec = [('INT', 'a', '0:15'),
               ('INT', 'b', '0:15'),
-            ]
-    outspec = [('INT', 'o', '0:15'),
               ]
+    outspec = [('INT', 'o', '0:15'),
+               ]
 
     regspec = (inspec, outspec)
 
@@ -459,7 +456,7 @@ def test_compunit_regspec3():
               ('INT', 'b', '0:15'),
               ('INT', 'c', '0:15')]
     outspec = [('INT', 'o', '0:15'),
-              ]
+               ]
 
     regspec = (inspec, outspec)
 
@@ -482,7 +479,7 @@ def test_compunit_regspec1():
     inspec = [('INT', 'a', '0:15'),
               ('INT', 'b', '0:15')]
     outspec = [('INT', 'o', '0:15'),
-              ]
+               ]
 
     regspec = (inspec, outspec)
 
diff --git a/src/soc/experiment/testmem.py b/src/soc/experiment/testmem.py
index 1e645742..12b04915 100644
--- a/src/soc/experiment/testmem.py
+++ b/src/soc/experiment/testmem.py
@@ -3,19 +3,19 @@ from nmigen import Module, Elaboratable, Memory
 
 class TestMemory(Elaboratable):
     def __init__(self, regwid, addrw, granularity=None, init=True,
-                                      readonly=False):
+                 readonly=False):
         self.readonly = readonly
-        self.ddepth = 1 # regwid //8
-        depth = (1<<addrw) // self.ddepth
+        self.ddepth = 1  # regwid //8
+        depth = (1 << addrw) // self.ddepth
         self.depth = depth
         self.regwid = regwid
-        print ("test memory width depth", regwid, depth)
+        print("test memory width depth", regwid, depth)
         if init is True:
             init = range(0, depth*2, 2)
         else:
             init = None
         self.mem = Memory(width=regwid, depth=depth, init=init)
-        self.rdport = self.mem.read_port() # not now transparent=False)
+        self.rdport = self.mem.read_port()  # not now transparent=False)
         if self.readonly:
             return
         self.wrport = self.mem.write_port(granularity=granularity)
@@ -31,7 +31,7 @@ class TestMemory(Elaboratable):
     def __iter__(self):
         yield self.rdport.addr
         yield self.rdport.data
-        #yield self.rdport.en
+        # yield self.rdport.en
         if self.readonly:
             return
         yield self.wrport.addr
diff --git a/src/soc/fu/alu/test/test_pipe_caller.py b/src/soc/fu/alu/test/test_pipe_caller.py
index d914010a..37f59d64 100644
--- a/src/soc/fu/alu/test/test_pipe_caller.py
+++ b/src/soc/fu/alu/test/test_pipe_caller.py
@@ -1,3 +1,18 @@
+import random
+from soc.fu.alu.pipe_data import ALUPipeSpec
+from soc.fu.alu.pipeline import ALUBasePipe
+from soc.fu.test.common import (TestCase, ALUHelpers)
+from soc.config.endian import bigendian
+from soc.decoder.isa.all import ISA
+from soc.simulator.program import Program
+from soc.decoder.selectable_int import SelectableInt
+from soc.decoder.power_enums import (XER_bits, Function, MicrOp, CryIn)
+from soc.decoder.power_decoder2 import (PowerDecode2)
+from soc.decoder.power_decoder import (create_pdecode)
+from soc.decoder.isa.caller import ISACaller, special_sprs
+import unittest
+from nmigen.cli import rtlil
+from nmutil.formaltest import FHDLTestCase
 from nmigen import Module, Signal
 from nmigen.back.pysim import Delay, Settle
 # NOTE: to use this (set to True), at present it is necessary to check
@@ -13,40 +28,22 @@ if cxxsim:
 else:
     from nmigen.back.pysim import Simulator
 
-from nmutil.formaltest import FHDLTestCase
-from nmigen.cli import rtlil
-import unittest
-from soc.decoder.isa.caller import ISACaller, special_sprs
-from soc.decoder.power_decoder import (create_pdecode)
-from soc.decoder.power_decoder2 import (PowerDecode2)
-from soc.decoder.power_enums import (XER_bits, Function, MicrOp, CryIn)
-from soc.decoder.selectable_int import SelectableInt
-from soc.simulator.program import Program
-from soc.decoder.isa.all import ISA
-from soc.config.endian import bigendian
-
-from soc.fu.test.common import (TestCase, ALUHelpers)
-from soc.fu.alu.pipeline import ALUBasePipe
-from soc.fu.alu.pipe_data import ALUPipeSpec
-import random
-
 
 def get_cu_inputs(dec2, sim):
     """naming (res) must conform to ALUFunctionUnit input regspec
     """
     res = {}
 
-    yield from ALUHelpers.get_sim_int_ra(res, sim, dec2) # RA
-    yield from ALUHelpers.get_sim_int_rb(res, sim, dec2) # RB
-    yield from ALUHelpers.get_rd_sim_xer_ca(res, sim, dec2) # XER.ca
-    yield from ALUHelpers.get_sim_xer_so(res, sim, dec2) # XER.so
+    yield from ALUHelpers.get_sim_int_ra(res, sim, dec2)  # RA
+    yield from ALUHelpers.get_sim_int_rb(res, sim, dec2)  # RB
+    yield from ALUHelpers.get_rd_sim_xer_ca(res, sim, dec2)  # XER.ca
+    yield from ALUHelpers.get_sim_xer_so(res, sim, dec2)  # XER.so
 
-    print ("alu get_cu_inputs", res)
+    print("alu get_cu_inputs", res)
 
     return res
 
 
-
 def set_alu_inputs(alu, dec2, sim):
     # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
     # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok))
@@ -126,27 +123,27 @@ class ALUTestCase(FHDLTestCase):
             choice = random.choice(insns)
             lst = [f"{choice} 3, 1, 2"]
             initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
-            initial_regs[2] = random.randint(0, (1<<64)-1)
+            initial_regs[1] = random.randint(0, (1 << 64)-1)
+            initial_regs[2] = random.randint(0, (1 << 64)-1)
             self.run_tst_program(Program(lst, bigendian), initial_regs)
 
     def test_rand_imm(self):
         insns = ["addi", "addis", "subfic"]
         for i in range(10):
             choice = random.choice(insns)
-            imm = random.randint(-(1<<15), (1<<15)-1)
+            imm = random.randint(-(1 << 15), (1 << 15)-1)
             lst = [f"{choice} 3, 1, {imm}"]
             print(lst)
             initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
+            initial_regs[1] = random.randint(0, (1 << 64)-1)
             self.run_tst_program(Program(lst, bigendian), initial_regs)
 
     def test_0_adde(self):
         lst = ["adde. 5, 6, 7"]
         for i in range(10):
             initial_regs = [0] * 32
-            initial_regs[6] = random.randint(0, (1<<64)-1)
-            initial_regs[7] = random.randint(0, (1<<64)-1)
+            initial_regs[6] = random.randint(0, (1 << 64)-1)
+            initial_regs[7] = random.randint(0, (1 << 64)-1)
             initial_sprs = {}
             xer = SelectableInt(0, 64)
             xer[XER_bits['CA']] = 1
@@ -169,7 +166,7 @@ class ALUTestCase(FHDLTestCase):
             lst = [f"{choice} 3, 1"]
             print(lst)
             initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
+            initial_regs[1] = random.randint(0, (1 << 64)-1)
             self.run_tst_program(Program(lst, bigendian), initial_regs)
 
     def test_cmpeqb(self):
@@ -212,14 +209,15 @@ class TestRunner(FHDLTestCase):
         sim = Simulator(m)
 
         sim.add_clock(1e-6)
+
         def process():
             for test in self.test_data:
                 print(test.name)
                 program = test.program
                 self.subTest(test.name)
                 sim = ISA(pdecode2, test.regs, test.sprs, test.cr,
-                                test.mem, test.msr,
-                                bigendian=bigendian)
+                          test.mem, test.msr,
+                          bigendian=bigendian)
                 gen = program.generate_instructions()
                 instructions = list(zip(gen, program.assembly.splitlines()))
 
@@ -233,7 +231,7 @@ class TestRunner(FHDLTestCase):
                         so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
                         ov = 1 if sim.spr['XER'][XER_bits['OV']] else 0
                         ov32 = 1 if sim.spr['XER'][XER_bits['OV32']] else 0
-                        print ("before: so/ov/32", so, ov, ov32)
+                        print("before: so/ov/32", so, ov, ov32)
 
                     # ask the decoder to decode this binary data (endian'd)
                     yield pdecode2.dec.bigendian.eq(bigendian)  # little / big?
@@ -265,7 +263,7 @@ class TestRunner(FHDLTestCase):
         cridx_ok = yield dec2.e.write_cr.ok
         cridx = yield dec2.e.write_cr.data
 
-        print ("check extra output", repr(code), cridx_ok, cridx)
+        print("check extra output", repr(code), cridx_ok, cridx)
         if rc:
             self.assertEqual(cridx, 0, code)
 
diff --git a/src/soc/fu/branch/test/test_pipe_caller.py b/src/soc/fu/branch/test/test_pipe_caller.py
index b29d8a45..b8aa16eb 100644
--- a/src/soc/fu/branch/test/test_pipe_caller.py
+++ b/src/soc/fu/branch/test/test_pipe_caller.py
@@ -18,7 +18,7 @@ from soc.fu.branch.pipeline import BranchBasePipe
 from soc.fu.branch.pipe_data import BranchPipeSpec
 import random
 
-from soc.regfile.util import fast_reg_to_spr # HACK!
+from soc.regfile.util import fast_reg_to_spr  # HACK!
 
 
 def get_rec_width(rec):
@@ -61,12 +61,13 @@ def get_cu_inputs(dec2, sim):
     yield from ALUHelpers.get_sim_fast_spr2(res, sim, dec2)
     yield from ALUHelpers.get_sim_cr_a(res, sim, dec2)
 
-    print ("get inputs", res)
+    print("get inputs", res)
     return res
 
 
 class BranchTestCase(FHDLTestCase):
     test_data = []
+
     def __init__(self, name):
         super().__init__(name)
         self.test_name = name
@@ -79,7 +80,7 @@ class BranchTestCase(FHDLTestCase):
 
     def test_0_regression_unconditional(self):
         for i in range(2):
-            imm = random.randrange(-1<<23, (1<<23)-1) * 4
+            imm = random.randrange(-1 << 23, (1 << 23)-1) * 4
             lst = [f"bl {imm}"]
             initial_regs = [0] * 32
             self.run_tst_program(Program(lst, bigendian), initial_regs)
@@ -88,30 +89,30 @@ class BranchTestCase(FHDLTestCase):
         choices = ["b", "ba", "bl", "bla"]
         for i in range(20):
             choice = random.choice(choices)
-            imm = random.randrange(-1<<23, (1<<23)-1) * 4
+            imm = random.randrange(-1 << 23, (1 << 23)-1) * 4
             lst = [f"{choice} {imm}"]
             initial_regs = [0] * 32
             self.run_tst_program(Program(lst, bigendian), initial_regs)
 
     def test_bc_cr(self):
         for i in range(20):
-            bc = random.randrange(-1<<13, (1<<13)-1) * 4
+            bc = random.randrange(-1 << 13, (1 << 13)-1) * 4
             bo = random.choice([0b01100, 0b00100, 0b10100])
             bi = random.randrange(0, 31)
-            cr = random.randrange(0, (1<<32)-1)
+            cr = random.randrange(0, (1 << 32)-1)
             lst = [f"bc {bo}, {bi}, {bc}"]
             initial_regs = [0] * 32
             self.run_tst_program(Program(lst, bigendian), initial_cr=cr)
 
     def test_bc_ctr(self):
         for i in range(20):
-            bc = random.randrange(-1<<13, (1<<13)-1) * 4
+            bc = random.randrange(-1 << 13, (1 << 13)-1) * 4
             bo = random.choice([0, 2, 8, 10, 16, 18])
             bi = random.randrange(0, 31)
-            cr = random.randrange(0, (1<<32)-1)
-            ctr = random.randint(0, (1<<32)-1)
+            cr = random.randrange(0, (1 << 32)-1)
+            ctr = random.randint(0, (1 << 32)-1)
             lst = [f"bc {bo}, {bi}, {bc}"]
-            initial_sprs={9: SelectableInt(ctr, 64)}
+            initial_sprs = {9: SelectableInt(ctr, 64)}
             self.run_tst_program(Program(lst, bigendian),
                                  initial_sprs=initial_sprs,
                                  initial_cr=cr)
@@ -124,14 +125,14 @@ class BranchTestCase(FHDLTestCase):
                 bh = random.randrange(0, 3)
                 bo = random.choice([4, 12])
                 bi = random.randrange(0, 31)
-                cr = random.randrange(0, (1<<32)-1)
-                ctr = random.randint(0, (1<<32)-1)
-                lr = random.randint(0, (1<<64)-1) & ~3
-                tar = random.randint(0, (1<<64)-1) & ~3
+                cr = random.randrange(0, (1 << 32)-1)
+                ctr = random.randint(0, (1 << 32)-1)
+                lr = random.randint(0, (1 << 64)-1) & ~3
+                tar = random.randint(0, (1 << 64)-1) & ~3
                 lst = [f"{insn} {bo}, {bi}, {bh}"]
-                initial_sprs={9: SelectableInt(ctr, 64),
-                              8: SelectableInt(lr, 64),
-                              815: SelectableInt(tar, 64)}
+                initial_sprs = {9: SelectableInt(ctr, 64),
+                                8: SelectableInt(lr, 64),
+                                815: SelectableInt(tar, 64)}
                 self.run_tst_program(Program(lst, bigendian),
                                      initial_sprs=initial_sprs,
                                      initial_cr=cr)
@@ -168,6 +169,7 @@ class TestRunner(FHDLTestCase):
         sim = Simulator(m)
 
         sim.add_clock(1e-6)
+
         def process():
             for test in self.test_data:
                 print(test.name)
@@ -193,8 +195,8 @@ class TestRunner(FHDLTestCase):
 
                     # ask the decoder to decode this binary data (endian'd)
                     yield pdecode2.dec.bigendian.eq(bigendian)  # little / big?
-                    yield pdecode2.msr.eq(msr) # set MSR in pdecode2
-                    yield pdecode2.cia.eq(pc) # set PC in pdecode2
+                    yield pdecode2.msr.eq(msr)  # set MSR in pdecode2
+                    yield pdecode2.cia.eq(pc)  # set PC in pdecode2
                     yield instruction.eq(ins)          # raw binary instr.
                     # note, here, the op will need further decoding in order
                     # to set the correct SPRs on SPR1/2/3.  op_bc* require
@@ -204,7 +206,7 @@ class TestRunner(FHDLTestCase):
                     # then additional op-decoding is required, accordingly
                     yield Settle()
                     lk = yield pdecode2.e.do.lk
-                    print ("lk:", lk)
+                    print("lk:", lk)
                     yield from self.set_inputs(branch, pdecode2, simulator)
                     fn_unit = yield pdecode2.e.do.fn_unit
                     self.assertEqual(fn_unit, Function.BRANCH.value, code)
diff --git a/src/soc/fu/compunits/test/test_alu_compunit.py b/src/soc/fu/compunits/test/test_alu_compunit.py
index 001f7392..d786f488 100644
--- a/src/soc/fu/compunits/test/test_alu_compunit.py
+++ b/src/soc/fu/compunits/test/test_alu_compunit.py
@@ -2,7 +2,7 @@ import unittest
 from soc.decoder.power_enums import (XER_bits, Function)
 
 from soc.fu.alu.test.test_pipe_caller import get_cu_inputs
-from soc.fu.alu.test.test_pipe_caller import ALUTestCase # creates the tests
+from soc.fu.alu.test.test_pipe_caller import ALUTestCase  # creates the tests
 
 from soc.fu.test.common import ALUHelpers
 from soc.fu.compunits.compunits import ALUFunctionUnit
@@ -30,7 +30,7 @@ class ALUTestRunner(TestRunner):
         cridx_ok = yield dec2.e.write_cr.ok
         cridx = yield dec2.e.write_cr.data
 
-        print ("check extra output", repr(code), cridx_ok, cridx)
+        print("check extra output", repr(code), cridx_ok, cridx)
 
         if rc:
             self.assertEqual(cridx_ok, 1, code)
diff --git a/src/soc/fu/compunits/test/test_branch_compunit.py b/src/soc/fu/compunits/test/test_branch_compunit.py
index 63d19680..2f781c16 100644
--- a/src/soc/fu/compunits/test/test_branch_compunit.py
+++ b/src/soc/fu/compunits/test/test_branch_compunit.py
@@ -8,7 +8,7 @@ from soc.fu.compunits.compunits import BranchFunctionUnit
 from soc.fu.compunits.test.test_compunit import TestRunner
 from soc.config.endian import bigendian
 
-from soc.regfile.util import fast_reg_to_spr # HACK!
+from soc.regfile.util import fast_reg_to_spr  # HACK!
 
 """
     def assert_outputs(self, branch, dec2, sim, prev_nia, code):
@@ -30,7 +30,7 @@ class BranchTestRunner(TestRunner):
         """naming (res) must conform to BranchFunctionUnit output regspec
         """
 
-        print ("check extra output", repr(code), res)
+        print("check extra output", repr(code), res)
 
         # NIA (next instruction address aka PC)
         branch_taken = 'nia' in res
diff --git a/src/soc/fu/compunits/test/test_compunit.py b/src/soc/fu/compunits/test/test_compunit.py
index 135e6fe5..e96f4c70 100644
--- a/src/soc/fu/compunits/test/test_compunit.py
+++ b/src/soc/fu/compunits/test/test_compunit.py
@@ -8,7 +8,7 @@ from soc.decoder.power_decoder2 import (PowerDecode2)
 from soc.decoder.power_enums import Function
 from soc.decoder.isa.all import ISA
 
-from soc.experiment.compalu_multi import find_ok # hack
+from soc.experiment.compalu_multi import find_ok  # hack
 from soc.config.test.test_loadstore import TestMemPspec
 
 
@@ -17,7 +17,7 @@ def set_cu_input(cu, idx, data):
     yield cu.src_i[idx].eq(data)
     while True:
         rd_rel_o = yield cu.rd.rel[idx]
-        print ("rd_rel %d wait HI" % idx, rd_rel_o, rdop, hex(data))
+        print("rd_rel %d wait HI" % idx, rd_rel_o, rdop, hex(data))
         if rd_rel_o:
             break
         yield
@@ -27,7 +27,7 @@ def set_cu_input(cu, idx, data):
         rd_rel_o = yield cu.rd.rel[idx]
         if rd_rel_o:
             break
-        print ("rd_rel %d wait HI" % idx, rd_rel_o)
+        print("rd_rel %d wait HI" % idx, rd_rel_o)
         yield
     yield cu.rd.go[idx].eq(0)
     yield cu.src_i[idx].eq(0)
@@ -39,15 +39,15 @@ def get_cu_output(cu, idx, code):
     wrok = cu.get_out(idx)
     fname = find_ok(wrok.fields)
     wrok = yield getattr(wrok, fname)
-    print ("wr_rel mask", repr(code), idx, wrop, bin(wrmask), fname, wrok)
-    assert wrmask & (1<<idx), \
-            "get_cu_output '%s': mask bit %d not set\n" \
-            "write-operand '%s' Data.ok likely not set (%s)" \
-            % (code, idx, wrop, hex(wrok))
+    print("wr_rel mask", repr(code), idx, wrop, bin(wrmask), fname, wrok)
+    assert wrmask & (1 << idx), \
+        "get_cu_output '%s': mask bit %d not set\n" \
+        "write-operand '%s' Data.ok likely not set (%s)" \
+        % (code, idx, wrop, hex(wrok))
     while True:
         wr_relall_o = yield cu.wr.rel
         wr_rel_o = yield cu.wr.rel[idx]
-        print ("wr_rel %d wait" % idx, hex(wr_relall_o), wr_rel_o)
+        print("wr_rel %d wait" % idx, hex(wr_relall_o), wr_rel_o)
         if wr_rel_o:
             break
         yield
@@ -56,19 +56,18 @@ def get_cu_output(cu, idx, code):
     result = yield cu.dest[idx]
     yield
     yield cu.wr.go[idx].eq(0)
-    print ("result", repr(code), idx, wrop, wrok, hex(result))
+    print("result", repr(code), idx, wrop, wrok, hex(result))
 
     return result
 
 
 def set_cu_inputs(cu, inp):
-    print ("set_cu_inputs", inp)
+    print("set_cu_inputs", inp)
     for idx, data in inp.items():
         yield from set_cu_input(cu, idx, data)
     # gets out of sync when checking busy if there is no wait, here.
     if len(inp) == 0:
-        yield # wait one cycle
-
+        yield  # wait one cycle
 
 
 def set_operand(cu, dec2, sim):
@@ -83,8 +82,9 @@ def get_cu_outputs(cu, code):
     res = {}
     wrmask = yield cu.wrmask
     wr_rel_o = yield cu.wr.rel
-    print ("get_cu_outputs", cu.n_dst, wrmask, wr_rel_o)
-    if not wrmask: # no point waiting (however really should doublecheck wr.rel)
+    print("get_cu_outputs", cu.n_dst, wrmask, wr_rel_o)
+    # no point waiting (however really should doublecheck wr.rel)
+    if not wrmask:
         return {}
     # wait for at least one result
     while True:
@@ -97,7 +97,7 @@ def get_cu_outputs(cu, code):
         if wr_rel_o:
             result = yield from get_cu_output(cu, i, code)
             wrop = cu.get_out_name(i)
-            print ("output", i, wrop, hex(result))
+            print("output", i, wrop, hex(result))
             res[wrop] = result
     return res
 
@@ -110,36 +110,38 @@ def get_inp_indexed(cu, inp):
             res[i] = inp[wrop]
     return res
 
-def get_l0_mem(l0): # BLECH!
+
+def get_l0_mem(l0):  # BLECH!
     if hasattr(l0.pimem, 'lsui'):
         return l0.pimem.lsui.mem
     return l0.pimem.mem.mem
 
+
 def setup_test_memory(l0, sim):
     mem = get_l0_mem(l0)
-    print ("before, init mem", mem.depth, mem.width, mem)
+    print("before, init mem", mem.depth, mem.width, mem)
     for i in range(mem.depth):
         data = sim.mem.ld(i*8, 8, False)
-        print ("init ", i, hex(data))
+        print("init ", i, hex(data))
         yield mem._array[i].eq(data)
     yield Settle()
     for k, v in sim.mem.mem.items():
-        print ("    %6x %016x" % (k, v))
-    print ("before, nmigen mem dump")
+        print("    %6x %016x" % (k, v))
+    print("before, nmigen mem dump")
     for i in range(mem.depth):
         actual_mem = yield mem._array[i]
-        print ("    %6i %016x" % (i, actual_mem))
+        print("    %6i %016x" % (i, actual_mem))
 
 
 def dump_sim_memory(dut, l0, sim, code):
     mem = get_l0_mem(l0)
-    print ("sim mem dump")
+    print("sim mem dump")
     for k, v in sim.mem.mem.items():
-        print ("    %6x %016x" % (k, v))
-    print ("nmigen mem dump")
+        print("    %6x %016x" % (k, v))
+    print("nmigen mem dump")
     for i in range(mem.depth):
         actual_mem = yield mem._array[i]
-        print ("    %6i %016x" % (i, actual_mem))
+        print("    %6i %016x" % (i, actual_mem))
 
 
 def check_sim_memory(dut, l0, sim, code):
@@ -149,8 +151,9 @@ def check_sim_memory(dut, l0, sim, code):
         expected_mem = sim.mem.ld(i*8, 8, False)
         actual_mem = yield mem._array[i]
         dut.assertEqual(expected_mem, actual_mem,
-                "%s %d %x %x" % (code, i,
-                                 expected_mem, actual_mem))
+                        "%s %d %x %x" % (code, i,
+                                         expected_mem, actual_mem))
+
 
 class TestRunner(FHDLTestCase):
     def __init__(self, test_data, fukls, iodef, funit, bigendian):
@@ -172,7 +175,7 @@ class TestRunner(FHDLTestCase):
         # copy of the decoder for simulator
         simdec = create_pdecode()
         simdec2 = PowerDecode2(simdec)
-        m.submodules.simdec2 = simdec2 # pain in the neck
+        m.submodules.simdec2 = simdec2  # pain in the neck
 
         if self.funit == Function.LDST:
             from soc.experiment.l0_cache import TstL0CacheBuffer
@@ -183,8 +186,8 @@ class TestRunner(FHDLTestCase):
             m.submodules.l0 = l0 = TstL0CacheBuffer(pspec, n_units=1)
             pi = l0.l0.dports[0]
             m.submodules.cu = cu = self.fukls(pi, idx=0, awid=3)
-            m.d.comb += cu.ad.go.eq(cu.ad.rel) # link addr-go direct to rel
-            m.d.comb += cu.st.go.eq(cu.st.rel) # link store-go direct to rel
+            m.d.comb += cu.ad.go.eq(cu.ad.rel)  # link addr-go direct to rel
+            m.d.comb += cu.st.go.eq(cu.st.rel)  # link store-go direct to rel
         else:
             m.submodules.cu = cu = self.fukls(0)
 
@@ -201,7 +204,7 @@ class TestRunner(FHDLTestCase):
                 print(test.name)
                 program = test.program
                 self.subTest(test.name)
-                print ("test", test.name, test.mem)
+                print("test", test.name, test.mem)
                 gen = list(program.generate_instructions())
                 insncode = program.assembly.splitlines()
                 instructions = list(zip(gen, insncode))
@@ -222,7 +225,7 @@ class TestRunner(FHDLTestCase):
                     print("instr pc", pc)
                     try:
                         yield from sim.setup_one()
-                    except KeyError: # indicates instruction not in imem: stop
+                    except KeyError:  # indicates instruction not in imem: stop
                         break
                     yield Settle()
                     ins, code = instructions[index]
@@ -239,10 +242,10 @@ class TestRunner(FHDLTestCase):
                         lk = yield pdecode2.e.do.lk
                         fast_out2 = yield pdecode2.e.write_fast2.data
                         fast_out2_ok = yield pdecode2.e.write_fast2.ok
-                        print ("lk:", lk, fast_out2, fast_out2_ok)
+                        print("lk:", lk, fast_out2, fast_out2_ok)
                         op_lk = yield cu.alu.pipe1.p.data_i.ctx.op.lk
-                        print ("op_lk:", op_lk)
-                        print (dir(cu.alu.pipe1.n.data_o))
+                        print("op_lk:", op_lk)
+                        print(dir(cu.alu.pipe1.n.data_o))
                     fn_unit = yield pdecode2.e.do.fn_unit
                     fuval = self.funit.value
                     self.assertEqual(fn_unit & fuval, fuval)
@@ -270,18 +273,18 @@ class TestRunner(FHDLTestCase):
                     # set inputs into CU
                     rd_rel_o = yield cu.rd.rel
                     wr_rel_o = yield cu.wr.rel
-                    print ("before inputs, rd_rel, wr_rel: ",
-                            bin(rd_rel_o), bin(wr_rel_o))
+                    print("before inputs, rd_rel, wr_rel: ",
+                          bin(rd_rel_o), bin(wr_rel_o))
                     assert wr_rel_o == 0, "wr.rel %s must be zero. "\
-                                "previous instr not written all regs\n"\
-                                "respec %s" % \
-                                (bin(wr_rel_o), cu.rwid[1])
+                        "previous instr not written all regs\n"\
+                        "respec %s" % \
+                        (bin(wr_rel_o), cu.rwid[1])
                     yield from set_cu_inputs(cu, inp)
                     rd_rel_o = yield cu.rd.rel
                     wr_rel_o = yield cu.wr.rel
                     wrmask = yield cu.wrmask
-                    print ("after inputs, rd_rel, wr_rel, wrmask: ",
-                            bin(rd_rel_o), bin(wr_rel_o), bin(wrmask))
+                    print("after inputs, rd_rel, wr_rel, wrmask: ",
+                          bin(rd_rel_o), bin(wr_rel_o), bin(wrmask))
 
                     # call simulated operation
                     yield from sim.execute_one()
@@ -295,8 +298,8 @@ class TestRunner(FHDLTestCase):
                     wrmask = yield cu.wrmask
                     rd_rel_o = yield cu.rd.rel
                     wr_rel_o = yield cu.wr.rel
-                    print ("after got outputs, rd_rel, wr_rel, wrmask: ",
-                            bin(rd_rel_o), bin(wr_rel_o), bin(wrmask))
+                    print("after got outputs, rd_rel, wr_rel, wrmask: ",
+                          bin(rd_rel_o), bin(wr_rel_o), bin(wrmask))
 
                     # reset read-mask.  IMPORTANT when there are no operands
                     yield cu.rdmaskn.eq(0)
@@ -304,7 +307,7 @@ class TestRunner(FHDLTestCase):
                     # wait for busy to go low
                     while True:
                         busy_o = yield cu.busy_o
-                        print ("busy", busy_o)
+                        print("busy", busy_o)
                         if not busy_o:
                             break
                         yield
@@ -314,29 +317,25 @@ class TestRunner(FHDLTestCase):
                     if self.funit == Function.BRANCH:
                         lr = yield cu.alu.pipe1.n.data_o.lr.data
                         lr_ok = yield cu.alu.pipe1.n.data_o.lr.ok
-                        print ("lr:", hex(lr), lr_ok)
+                        print("lr:", hex(lr), lr_ok)
 
                     if self.funit == Function.LDST:
                         yield from dump_sim_memory(self, l0, sim, code)
 
-
                     # sigh.  hard-coded.  test memory
                     if self.funit == Function.LDST:
                         yield from check_sim_memory(self, l0, sim, code)
                         yield from self.iodef.check_cu_outputs(res, pdecode2,
-                                                                sim, cu,
-                                                                code)
+                                                               sim, cu,
+                                                               code)
                     else:
                         yield from self.iodef.check_cu_outputs(res, pdecode2,
-                                                                sim, cu.alu,
-                                                                code)
-
+                                                               sim, cu.alu,
+                                                               code)
 
         sim.add_sync_process(process)
 
         name = self.funit.name.lower()
         with sim.write_vcd("%s_simulator.vcd" % name,
-                            traces=[]):
+                           traces=[]):
             sim.run()
-
-
diff --git a/src/soc/fu/compunits/test/test_cr_compunit.py b/src/soc/fu/compunits/test/test_cr_compunit.py
index 50e9d133..8d685835 100644
--- a/src/soc/fu/compunits/test/test_cr_compunit.py
+++ b/src/soc/fu/compunits/test/test_cr_compunit.py
@@ -25,7 +25,7 @@ class CRTestRunner(TestRunner):
         """naming (res) must conform to CRFunctionUnit output regspec
         """
 
-        print ("check extra output", repr(code), res)
+        print("check extra output", repr(code), res)
 
         # full CR
         whole_reg = yield dec2.e.do.write_cr_whole
diff --git a/src/soc/fu/compunits/test/test_ldst_compunit.py b/src/soc/fu/compunits/test/test_ldst_compunit.py
index 65a6be70..0f9211f2 100644
--- a/src/soc/fu/compunits/test/test_ldst_compunit.py
+++ b/src/soc/fu/compunits/test/test_ldst_compunit.py
@@ -24,14 +24,14 @@ class LDSTTestRunner(TestRunner):
         """naming (res) must conform to LDSTFunctionUnit output regspec
         """
 
-        print ("check cu outputs", code, res)
+        print("check cu outputs", code, res)
 
         rc = yield dec2.e.do.rc.data
         op = yield dec2.e.do.insn_type
         cridx_ok = yield dec2.e.write_cr.ok
         cridx = yield dec2.e.write_cr.data
 
-        print ("check extra output", repr(code), cridx_ok, cridx)
+        print("check extra output", repr(code), cridx_ok, cridx)
 
         if rc:
             self.assertEqual(cridx_ok, 1, code)
diff --git a/src/soc/fu/compunits/test/test_logical_compunit.py b/src/soc/fu/compunits/test/test_logical_compunit.py
index 877f14c5..7bcfa499 100644
--- a/src/soc/fu/compunits/test/test_logical_compunit.py
+++ b/src/soc/fu/compunits/test/test_logical_compunit.py
@@ -30,7 +30,7 @@ class LogicalTestRunner(TestRunner):
         cridx_ok = yield dec2.e.write_cr.ok
         cridx = yield dec2.e.write_cr.data
 
-        print ("check extra output", repr(code), cridx_ok, cridx)
+        print("check extra output", repr(code), cridx_ok, cridx)
 
         if rc:
             self.assertEqual(cridx_ok, 1, code)
diff --git a/src/soc/fu/compunits/test/test_shiftrot_compunit.py b/src/soc/fu/compunits/test/test_shiftrot_compunit.py
index 2931c542..796c8855 100644
--- a/src/soc/fu/compunits/test/test_shiftrot_compunit.py
+++ b/src/soc/fu/compunits/test/test_shiftrot_compunit.py
@@ -25,7 +25,7 @@ class ShiftRotTestRunner(TestRunner):
         """naming (res) must conform to ShiftRotFunctionUnit output regspec
         """
 
-        print ("outputs", repr(code), res)
+        print("outputs", repr(code), res)
 
         # RT
         out_reg_valid = yield dec2.e.write_reg.ok
@@ -41,7 +41,7 @@ class ShiftRotTestRunner(TestRunner):
         cridx_ok = yield dec2.e.write_cr.ok
         cridx = yield dec2.e.write_cr.data
 
-        print ("check extra output", repr(code), cridx_ok, cridx)
+        print("check extra output", repr(code), cridx_ok, cridx)
 
         if rc:
             self.assertEqual(cridx_ok, 1, code)
@@ -51,7 +51,7 @@ class ShiftRotTestRunner(TestRunner):
         if cridx_ok:
             cr_expected = sim.crl[cridx].get_range().value
             cr_actual = res['cr_a']
-            print ("CR", cridx, cr_expected, cr_actual)
+            print("CR", cridx, cr_expected, cr_actual)
             self.assertEqual(cr_expected, cr_actual, "CR%d %s" % (cridx, code))
 
         # XER.ca
@@ -59,10 +59,10 @@ class ShiftRotTestRunner(TestRunner):
         if cry_out:
             expected_carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0
             xer_ca = res['xer_ca']
-            real_carry = xer_ca & 0b1 # XXX CO not CO32
+            real_carry = xer_ca & 0b1  # XXX CO not CO32
             self.assertEqual(expected_carry, real_carry, code)
             expected_carry32 = 1 if sim.spr['XER'][XER_bits['CA32']] else 0
-            real_carry32 = bool(xer_ca & 0b10) # XXX CO32
+            real_carry32 = bool(xer_ca & 0b10)  # XXX CO32
             self.assertEqual(expected_carry32, real_carry32, code)
 
 
diff --git a/src/soc/fu/compunits/test/test_spr_compunit.py b/src/soc/fu/compunits/test/test_spr_compunit.py
index 087fc508..f70aeb00 100644
--- a/src/soc/fu/compunits/test/test_spr_compunit.py
+++ b/src/soc/fu/compunits/test/test_spr_compunit.py
@@ -2,7 +2,7 @@ import unittest
 from soc.decoder.power_enums import (XER_bits, Function)
 
 from soc.fu.spr.test.test_pipe_caller import get_cu_inputs
-from soc.fu.spr.test.test_pipe_caller import SPRTestCase # creates the tests
+from soc.fu.spr.test.test_pipe_caller import SPRTestCase  # creates the tests
 
 from soc.fu.test.common import ALUHelpers
 from soc.fu.compunits.compunits import SPRFunctionUnit
@@ -29,7 +29,7 @@ class SPRTestRunner(TestRunner):
         cridx_ok = yield dec2.e.write_cr.ok
         cridx = yield dec2.e.write_cr.data
 
-        print ("check extra output", repr(code), cridx_ok, cridx)
+        print("check extra output", repr(code), cridx_ok, cridx)
 
         if rc:
             self.assertEqual(cridx_ok, 1, code)
@@ -44,7 +44,7 @@ class SPRTestRunner(TestRunner):
         yield from ALUHelpers.get_xer_ca(res, alu, dec2)
         yield from ALUHelpers.get_xer_so(res, alu, dec2)
 
-        print ("output", res)
+        print("output", res)
 
         yield from ALUHelpers.get_sim_int_o(sim_o, sim, dec2)
         yield from ALUHelpers.get_wr_sim_xer_so(sim_o, sim, alu, dec2)
@@ -53,7 +53,7 @@ class SPRTestRunner(TestRunner):
         yield from ALUHelpers.get_wr_fast_spr1(sim_o, sim, dec2)
         yield from ALUHelpers.get_wr_slow_spr1(sim_o, sim, dec2)
 
-        print ("sim output", sim_o)
+        print("sim output", sim_o)
 
         ALUHelpers.check_xer_ov(self, res, sim_o, code)
         ALUHelpers.check_xer_ca(self, res, sim_o, code)
diff --git a/src/soc/fu/compunits/test/test_trap_compunit.py b/src/soc/fu/compunits/test/test_trap_compunit.py
index 7857b828..9464dba4 100644
--- a/src/soc/fu/compunits/test/test_trap_compunit.py
+++ b/src/soc/fu/compunits/test/test_trap_compunit.py
@@ -2,13 +2,14 @@ import unittest
 from soc.decoder.power_enums import (XER_bits, Function)
 
 from soc.fu.trap.test.test_pipe_caller import get_cu_inputs
-from soc.fu.trap.test.test_pipe_caller import TrapTestCase # creates the tests
+from soc.fu.trap.test.test_pipe_caller import TrapTestCase  # creates the tests
 
 from soc.fu.test.common import ALUHelpers
 from soc.fu.compunits.compunits import TrapFunctionUnit
 from soc.fu.compunits.test.test_compunit import TestRunner
 from soc.config.endian import bigendian
 
+
 class TrapTestRunner(TestRunner):
     def __init__(self, test_data):
         super().__init__(test_data, TrapFunctionUnit, self,
@@ -32,7 +33,7 @@ class TrapTestRunner(TestRunner):
         ALUHelpers.get_sim_nia(sim_o, sim, dec2)
         ALUHelpers.get_sim_msr(sim_o, sim, dec2)
 
-        print ("sim output", sim_o)
+        print("sim output", sim_o)
 
         ALUHelpers.check_int_o(self, res, sim_o, code)
         ALUHelpers.check_fast_spr1(self, res, sim_o, code)
diff --git a/src/soc/fu/cr/test/test_pipe_caller.py b/src/soc/fu/cr/test/test_pipe_caller.py
index a87bb896..f933636f 100644
--- a/src/soc/fu/cr/test/test_pipe_caller.py
+++ b/src/soc/fu/cr/test/test_pipe_caller.py
@@ -18,7 +18,6 @@ from soc.fu.cr.pipe_data import CRPipeSpec
 import random
 
 
-
 # This test bench is a bit different than is usual. Initially when I
 # was writing it, I had all of the tests call a function to create a
 # device under test and simulator, initialize the dut, run the
@@ -40,6 +39,7 @@ import random
 
 class CRTestCase(FHDLTestCase):
     test_data = []
+
     def __init__(self, name):
         super().__init__(name)
         self.test_name = name
@@ -59,13 +59,13 @@ class CRTestCase(FHDLTestCase):
             bb = random.randint(0, 31)
             bt = random.randint(0, 31)
             lst = [f"{choice} {ba}, {bb}, {bt}"]
-            cr = random.randint(0, (1<<32)-1)
+            cr = random.randint(0, (1 << 32)-1)
             self.run_tst_program(Program(lst, bigendian), initial_cr=cr)
 
     def test_crand(self):
         for i in range(20):
             lst = ["crand 0, 11, 13"]
-            cr = random.randint(0, (1<<32)-1)
+            cr = random.randint(0, (1 << 32)-1)
             self.run_tst_program(Program(lst, bigendian), initial_cr=cr)
 
     def test_1_mcrf(self):
@@ -73,7 +73,7 @@ class CRTestCase(FHDLTestCase):
             src = random.randint(0, 7)
             dst = random.randint(0, 7)
             lst = [f"mcrf {src}, {dst}"]
-            cr = random.randint(0, (1<<32)-1)
+            cr = random.randint(0, (1 << 32)-1)
         self.run_tst_program(Program(lst, bigendian), initial_cr=cr)
 
     def test_0_mcrf(self):
@@ -86,42 +86,43 @@ class CRTestCase(FHDLTestCase):
         for i in range(20):
             mask = random.randint(0, 255)
             lst = [f"mtcrf {mask}, 2"]
-            cr = random.randint(0, (1<<32)-1)
+            cr = random.randint(0, (1 << 32)-1)
             initial_regs = [0] * 32
-            initial_regs[2] = random.randint(0, (1<<32)-1)
+            initial_regs[2] = random.randint(0, (1 << 32)-1)
             self.run_tst_program(Program(lst, bigendian), initial_regs=initial_regs,
                                  initial_cr=cr)
+
     def test_mtocrf(self):
         for i in range(20):
-            mask = 1<<random.randint(0, 7)
+            mask = 1 << random.randint(0, 7)
             lst = [f"mtocrf {mask}, 2"]
-            cr = random.randint(0, (1<<32)-1)
+            cr = random.randint(0, (1 << 32)-1)
             initial_regs = [0] * 32
-            initial_regs[2] = random.randint(0, (1<<32)-1)
+            initial_regs[2] = random.randint(0, (1 << 32)-1)
             self.run_tst_program(Program(lst, bigendian), initial_regs=initial_regs,
                                  initial_cr=cr)
 
     def test_mfcr(self):
         for i in range(5):
             lst = ["mfcr 2"]
-            cr = random.randint(0, (1<<32)-1)
+            cr = random.randint(0, (1 << 32)-1)
             self.run_tst_program(Program(lst, bigendian), initial_cr=cr)
 
     def test_mfocrf(self):
         for i in range(20):
-            mask = 1<<random.randint(0, 7)
+            mask = 1 << random.randint(0, 7)
             lst = [f"mfocrf 2, {mask}"]
-            cr = random.randint(0, (1<<32)-1)
+            cr = random.randint(0, (1 << 32)-1)
             self.run_tst_program(Program(lst, bigendian), initial_cr=cr)
 
     def test_isel(self):
         for i in range(20):
             bc = random.randint(0, 31)
             lst = [f"isel 1, 2, 3, {bc}"]
-            cr = random.randint(0, (1<<32)-1)
+            cr = random.randint(0, (1 << 32)-1)
             initial_regs = [0] * 32
-            initial_regs[2] = random.randint(0, (1<<64)-1)
-            initial_regs[3] = random.randint(0, (1<<64)-1)
+            initial_regs[2] = random.randint(0, (1 << 64)-1)
+            initial_regs[3] = random.randint(0, (1 << 64)-1)
             #initial_regs[2] = i*2
             #initial_regs[3] = i*2+1
             self.run_tst_program(Program(lst, bigendian),
@@ -131,7 +132,7 @@ class CRTestCase(FHDLTestCase):
         for i in range(20):
             bfa = random.randint(0, 7)
             lst = [f"setb 1, {bfa}"]
-            cr = random.randint(0, (1<<32)-1)
+            cr = random.randint(0, (1 << 32)-1)
             self.run_tst_program(Program(lst, bigendian), initial_cr=cr)
 
     def test_regression_setb(self):
@@ -139,7 +140,6 @@ class CRTestCase(FHDLTestCase):
         cr = random.randint(0, 0x66f6b106)
         self.run_tst_program(Program(lst, bigendian), initial_cr=cr)
 
-
     def test_ilang(self):
         pspec = CRPipeSpec(id_wid=2)
         alu = CRBasePipe(pspec)
@@ -187,7 +187,7 @@ def get_cu_inputs(dec2, sim):
         data2 = yield dec2.e.read_reg2.data
         res['rb'] = sim.gpr(data2).value
 
-    print ("get inputs", res)
+    print("get inputs", res)
     return res
 
 
@@ -247,6 +247,7 @@ class TestRunner(FHDLTestCase):
         sim = Simulator(m)
 
         sim.add_clock(1e-6)
+
         def process():
             for test in self.test_data:
                 print(test.name)
@@ -287,7 +288,7 @@ class TestRunner(FHDLTestCase):
 
         sim.add_sync_process(process)
         with sim.write_vcd("simulator.vcd", "simulator.gtkw",
-                            traces=[]):
+                           traces=[]):
             sim.run()
 
 
diff --git a/src/soc/fu/logical/test/test_pipe_caller.py b/src/soc/fu/logical/test/test_pipe_caller.py
index d75ba813..997d5a65 100644
--- a/src/soc/fu/logical/test/test_pipe_caller.py
+++ b/src/soc/fu/logical/test/test_pipe_caller.py
@@ -24,8 +24,8 @@ def get_cu_inputs(dec2, sim):
     """
     res = {}
 
-    yield from ALUHelpers.get_sim_int_ra(res, sim, dec2) # RA
-    yield from ALUHelpers.get_sim_int_rb(res, sim, dec2) # RB
+    yield from ALUHelpers.get_sim_int_ra(res, sim, dec2)  # RA
+    yield from ALUHelpers.get_sim_int_rb(res, sim, dec2)  # RB
 
     return res
 
@@ -61,6 +61,7 @@ def set_alu_inputs(alu, dec2, sim):
 
 class LogicalTestCase(FHDLTestCase):
     test_data = []
+
     def __init__(self, name):
         super().__init__(name)
         self.test_name = name
@@ -139,7 +140,7 @@ class LogicalTestCase(FHDLTestCase):
         lst = ["bpermd 3, 1, 2"]
         for i in range(20):
             initial_regs = [0] * 32
-            initial_regs[1] = 1<<random.randint(0,63)
+            initial_regs[1] = 1 << random.randint(0, 63)
             initial_regs[2] = 0xdeadbeefcafec0de
             self.run_tst_program(Program(lst, bigendian), initial_regs)
 
@@ -226,7 +227,7 @@ class TestRunner(FHDLTestCase):
         cridx_ok = yield dec2.e.write_cr.ok
         cridx = yield dec2.e.write_cr.data
 
-        print ("check extra output", repr(code), cridx_ok, cridx)
+        print("check extra output", repr(code), cridx_ok, cridx)
         if rc:
             self.assertEqual(cridx, 0, code)
 
diff --git a/src/soc/fu/mul/test/test_pipe_caller.py b/src/soc/fu/mul/test/test_pipe_caller.py
index 56d3fed6..a09e6bad 100644
--- a/src/soc/fu/mul/test/test_pipe_caller.py
+++ b/src/soc/fu/mul/test/test_pipe_caller.py
@@ -23,23 +23,22 @@ def get_cu_inputs(dec2, sim):
     """
     res = {}
 
-    yield from ALUHelpers.get_sim_int_ra(res, sim, dec2) # RA
-    yield from ALUHelpers.get_sim_int_rb(res, sim, dec2) # RB
-    yield from ALUHelpers.get_sim_xer_so(res, sim, dec2) # XER.so
+    yield from ALUHelpers.get_sim_int_ra(res, sim, dec2)  # RA
+    yield from ALUHelpers.get_sim_int_rb(res, sim, dec2)  # RB
+    yield from ALUHelpers.get_sim_xer_so(res, sim, dec2)  # XER.so
 
-    print ("alu get_cu_inputs", res)
+    print("alu get_cu_inputs", res)
 
     return res
 
 
-
 def set_alu_inputs(alu, dec2, sim):
     # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
     # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok))
     # and place it into data_i.b
 
     inp = yield from get_cu_inputs(dec2, sim)
-    print ("set alu inputs", inp)
+    print("set alu inputs", inp)
     yield from ALUHelpers.set_int_ra(alu, dec2, inp)
     yield from ALUHelpers.set_int_rb(alu, dec2, inp)
 
@@ -95,8 +94,8 @@ class MulTestCase(FHDLTestCase):
     def test_2_mullwo(self):
         lst = [f"mullwo 3, 1, 2"]
         initial_regs = [0] * 32
-        initial_regs[1] = 0xffffffffffffa988 # -5678
-        initial_regs[2] = 0xffffffffffffedcc # -1234
+        initial_regs[1] = 0xffffffffffffa988  # -5678
+        initial_regs[2] = 0xffffffffffffedcc  # -1234
         self.run_tst_program(Program(lst, bigendian), initial_regs)
 
     def test_3_mullw(self):
@@ -111,8 +110,8 @@ class MulTestCase(FHDLTestCase):
         for i in range(40):
             lst = ["mullw 3, 1, 2"]
             initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
-            initial_regs[2] = random.randint(0, (1<<64)-1)
+            initial_regs[1] = random.randint(0, (1 << 64)-1)
+            initial_regs[2] = random.randint(0, (1 << 64)-1)
             self.run_tst_program(Program(lst, bigendian), initial_regs)
 
     def test_4_mullw_nonrand(self):
@@ -125,7 +124,7 @@ class MulTestCase(FHDLTestCase):
 
     def test_mulhw__regression_1(self):
         lst = ["mulhw. 3, 1, 2"
-              ]
+               ]
         initial_regs = [0] * 32
         initial_regs[1] = 0x7745b36eca6646fa
         initial_regs[2] = 0x47dfba3a63834ba2
@@ -135,8 +134,8 @@ class MulTestCase(FHDLTestCase):
         for i in range(40):
             lst = ["mullw 3, 1, 2"]
             initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
-            initial_regs[2] = random.randint(0, (1<<64)-1)
+            initial_regs[1] = random.randint(0, (1 << 64)-1)
+            initial_regs[2] = random.randint(0, (1 << 64)-1)
             self.run_tst_program(Program(lst, bigendian), initial_regs)
 
     def test_rand_mul_lh(self):
@@ -145,8 +144,8 @@ class MulTestCase(FHDLTestCase):
             choice = random.choice(insns)
             lst = [f"{choice} 3, 1, 2"]
             initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
-            initial_regs[2] = random.randint(0, (1<<64)-1)
+            initial_regs[1] = random.randint(0, (1 << 64)-1)
+            initial_regs[2] = random.randint(0, (1 << 64)-1)
             self.run_tst_program(Program(lst, bigendian), initial_regs)
 
     def test_rand_mullw(self):
@@ -155,8 +154,8 @@ class MulTestCase(FHDLTestCase):
             choice = random.choice(insns)
             lst = [f"{choice} 3, 1, 2"]
             initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
-            initial_regs[2] = random.randint(0, (1<<64)-1)
+            initial_regs[1] = random.randint(0, (1 << 64)-1)
+            initial_regs[2] = random.randint(0, (1 << 64)-1)
             self.run_tst_program(Program(lst, bigendian), initial_regs)
 
     def test_rand_mulld(self):
@@ -165,8 +164,8 @@ class MulTestCase(FHDLTestCase):
             choice = random.choice(insns)
             lst = [f"{choice} 3, 1, 2"]
             initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
-            initial_regs[2] = random.randint(0, (1<<64)-1)
+            initial_regs[1] = random.randint(0, (1 << 64)-1)
+            initial_regs[2] = random.randint(0, (1 << 64)-1)
             self.run_tst_program(Program(lst, bigendian), initial_regs)
 
     def test_rand_mulhd(self):
@@ -175,8 +174,8 @@ class MulTestCase(FHDLTestCase):
             choice = random.choice(insns)
             lst = [f"{choice} 3, 1, 2"]
             initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
-            initial_regs[2] = random.randint(0, (1<<64)-1)
+            initial_regs[1] = random.randint(0, (1 << 64)-1)
+            initial_regs[2] = random.randint(0, (1 << 64)-1)
             self.run_tst_program(Program(lst, bigendian), initial_regs)
 
     def test_ilang(self):
@@ -210,14 +209,15 @@ class TestRunner(FHDLTestCase):
         sim = Simulator(m)
 
         sim.add_clock(1e-6)
+
         def process():
             for test in self.test_data:
                 print(test.name)
                 program = test.program
                 self.subTest(test.name)
                 sim = ISA(pdecode2, test.regs, test.sprs, test.cr,
-                                test.mem, test.msr,
-                                bigendian=bigendian)
+                          test.mem, test.msr,
+                          bigendian=bigendian)
                 gen = program.generate_instructions()
                 instructions = list(zip(gen, program.assembly.splitlines()))
                 yield Settle()
@@ -232,7 +232,7 @@ class TestRunner(FHDLTestCase):
                         so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
                         ov = 1 if sim.spr['XER'][XER_bits['OV']] else 0
                         ov32 = 1 if sim.spr['XER'][XER_bits['OV32']] else 0
-                        print ("before: so/ov/32", so, ov, ov32)
+                        print("before: so/ov/32", so, ov, ov32)
 
                     # ask the decoder to decode this binary data (endian'd)
                     yield pdecode2.dec.bigendian.eq(bigendian)  # little / big?
@@ -263,7 +263,7 @@ class TestRunner(FHDLTestCase):
 
         sim.add_sync_process(process)
         with sim.write_vcd("mul_simulator.vcd", "mul_simulator.gtkw",
-                            traces=[]):
+                           traces=[]):
             sim.run()
 
     def check_alu_outputs(self, alu, dec2, sim, code):
@@ -272,7 +272,7 @@ class TestRunner(FHDLTestCase):
         cridx_ok = yield dec2.e.write_cr.ok
         cridx = yield dec2.e.write_cr.data
 
-        print ("check extra output", repr(code), cridx_ok, cridx)
+        print("check extra output", repr(code), cridx_ok, cridx)
         if rc:
             self.assertEqual(cridx, 0, code)
 
diff --git a/src/soc/fu/regspec.py b/src/soc/fu/regspec.py
index 1524e8aa..07d07120 100644
--- a/src/soc/fu/regspec.py
+++ b/src/soc/fu/regspec.py
@@ -21,14 +21,14 @@ from soc.regfile.regfiles import XERRegs, FastRegs
 
 
 def get_regspec_bitwidth(regspec, srcdest, idx):
-    print ("get_regspec_bitwidth", regspec, srcdest, idx)
+    print("get_regspec_bitwidth", regspec, srcdest, idx)
     bitspec = regspec[srcdest][idx]
     wid = 0
-    print (bitspec)
+    print(bitspec)
     for ranges in bitspec[2].split(","):
         ranges = ranges.split(":")
-        print (ranges)
-        if len(ranges) == 1: # only one bit
+        print(ranges)
+        if len(ranges) == 1:  # only one bit
             wid += 1
         else:
             start, end = map(int, ranges)
@@ -89,18 +89,18 @@ class RegSpecALUAPI(RegSpecAPI):
         self.alu = alu
 
     def get_out(self, i):
-        if isinstance(self.rwid, int): # old - testing - API (rwid is int)
+        if isinstance(self.rwid, int):  # old - testing - API (rwid is int)
             return self.alu.out[i]
         # regspec-based API: look up variable through regspec thru row number
         return getattr(self.alu.n.data_o, self.get_out_name(i))
 
     def get_in(self, i):
-        if isinstance(self.rwid, int): # old - testing - API (rwid is int)
+        if isinstance(self.rwid, int):  # old - testing - API (rwid is int)
             return self.alu.i[i]
         # regspec-based API: look up variable through regspec thru row number
         return getattr(self.alu.p.data_i, self.get_in_name(i))
 
     def get_op(self):
-        if isinstance(self.rwid, int): # old - testing - API (rwid is int)
+        if isinstance(self.rwid, int):  # old - testing - API (rwid is int)
             return self.alu.op
         return self.alu.p.data_i.ctx.op
diff --git a/src/soc/fu/shift_rot/rotator.py b/src/soc/fu/shift_rot/rotator.py
index 7ea9b0da..a2888a3c 100644
--- a/src/soc/fu/shift_rot/rotator.py
+++ b/src/soc/fu/shift_rot/rotator.py
@@ -13,12 +13,13 @@ from nmigen.back.pysim import Settle
 def right_mask(m, mask_begin):
     ret = Signal(64, name="right_mask", reset_less=True)
     with m.If(mask_begin <= 64):
-        m.d.comb += ret.eq((1<<(64-mask_begin)) - 1)
+        m.d.comb += ret.eq((1 << (64-mask_begin)) - 1)
     return ret
 
+
 def left_mask(m, mask_end):
     ret = Signal(64, name="left_mask", reset_less=True)
-    m.d.comb += ret.eq(~((1<<(63-mask_end)) - 1))
+    m.d.comb += ret.eq(~((1 << (63-mask_end)) - 1))
     return ret
 
 
@@ -40,11 +41,13 @@ class Rotator(Elaboratable):
         * clear_left = 1 when insn_type is OP_RLC or OP_RLCL
         * clear_right = 1 when insn_type is OP_RLC or OP_RLCR
     """
+
     def __init__(self):
         # input
         self.me = Signal(5, reset_less=True)        # ME field
         self.mb = Signal(5, reset_less=True)        # MB field
-        self.mb_extra = Signal(1, reset_less=True)  # extra bit of mb in MD-form
+        # extra bit of mb in MD-form
+        self.mb_extra = Signal(1, reset_less=True)
         self.ra = Signal(64, reset_less=True)       # RA
         self.rs = Signal(64, reset_less=True)       # RS
         self.shift = Signal(7, reset_less=True)     # RB[0:7]
@@ -162,6 +165,7 @@ class Rotator(Elaboratable):
 
         return m
 
+
 if __name__ == '__main__':
 
     m = Module()
@@ -175,8 +179,7 @@ if __name__ == '__main__':
             yield mb.eq(63-i)
             yield Settle()
             res = yield mr
-            print (i, hex(res))
+            print(i, hex(res))
 
     run_simulation(m, [loop()],
                    vcd_name="test_mask.vcd")
-
diff --git a/src/soc/fu/shift_rot/test/test_pipe_caller.py b/src/soc/fu/shift_rot/test/test_pipe_caller.py
index 4abdfd12..574b1a7e 100644
--- a/src/soc/fu/shift_rot/test/test_pipe_caller.py
+++ b/src/soc/fu/shift_rot/test/test_pipe_caller.py
@@ -1,3 +1,19 @@
+import random
+from soc.fu.shift_rot.pipe_data import ShiftRotPipeSpec
+from soc.fu.alu.alu_input_record import CompALUOpSubset
+from soc.fu.shift_rot.pipeline import ShiftRotBasePipe
+from soc.fu.test.common import TestCase, ALUHelpers
+from soc.config.endian import bigendian
+from soc.decoder.isa.all import ISA
+from soc.simulator.program import Program
+from soc.decoder.selectable_int import SelectableInt
+from soc.decoder.power_enums import (XER_bits, Function, CryIn)
+from soc.decoder.power_decoder2 import (PowerDecode2)
+from soc.decoder.power_decoder import (create_pdecode)
+from soc.decoder.isa.caller import ISACaller, special_sprs
+import unittest
+from nmigen.cli import rtlil
+from nmutil.formaltest import FHDLTestCase
 from nmigen import Module, Signal
 from nmigen.back.pysim import Delay, Settle
 # NOTE: to use this (set to True), at present it is necessary to check
@@ -13,36 +29,18 @@ if cxxsim:
 else:
     from nmigen.back.pysim import Simulator
 
-from nmutil.formaltest import FHDLTestCase
-from nmigen.cli import rtlil
-import unittest
-from soc.decoder.isa.caller import ISACaller, special_sprs
-from soc.decoder.power_decoder import (create_pdecode)
-from soc.decoder.power_decoder2 import (PowerDecode2)
-from soc.decoder.power_enums import (XER_bits, Function, CryIn)
-from soc.decoder.selectable_int import SelectableInt
-from soc.simulator.program import Program
-from soc.decoder.isa.all import ISA
-from soc.config.endian import bigendian
-
-from soc.fu.test.common import TestCase, ALUHelpers
-from soc.fu.shift_rot.pipeline import ShiftRotBasePipe
-from soc.fu.alu.alu_input_record import CompALUOpSubset
-from soc.fu.shift_rot.pipe_data import ShiftRotPipeSpec
-import random
-
 
 def get_cu_inputs(dec2, sim):
     """naming (res) must conform to ShiftRotFunctionUnit input regspec
     """
     res = {}
 
-    yield from ALUHelpers.get_sim_int_ra(res, sim, dec2) # RA
-    yield from ALUHelpers.get_sim_int_rb(res, sim, dec2) # RB
-    yield from ALUHelpers.get_sim_int_rc(res, sim, dec2) # RC
-    yield from ALUHelpers.get_rd_sim_xer_ca(res, sim, dec2) # XER.ca
+    yield from ALUHelpers.get_sim_int_ra(res, sim, dec2)  # RA
+    yield from ALUHelpers.get_sim_int_rb(res, sim, dec2)  # RB
+    yield from ALUHelpers.get_sim_int_rc(res, sim, dec2)  # RC
+    yield from ALUHelpers.get_rd_sim_xer_ca(res, sim, dec2)  # XER.ca
 
-    print ("inputs", res)
+    print("inputs", res)
 
     return res
 
@@ -80,6 +78,7 @@ def set_alu_inputs(alu, dec2, sim):
 
 class ShiftRotTestCase(FHDLTestCase):
     test_data = []
+
     def __init__(self, name):
         super().__init__(name)
         self.test_name = name
@@ -94,7 +93,7 @@ class ShiftRotTestCase(FHDLTestCase):
             choice = random.choice(insns)
             lst = [f"{choice} 3, 1, 2"]
             initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
+            initial_regs[1] = random.randint(0, (1 << 64)-1)
             initial_regs[2] = random.randint(0, 63)
             print(initial_regs[1], initial_regs[2])
             self.run_tst_program(Program(lst, bigendian), initial_regs)
@@ -102,7 +101,7 @@ class ShiftRotTestCase(FHDLTestCase):
     def test_shift_arith(self):
         lst = ["sraw 3, 1, 2"]
         initial_regs = [0] * 32
-        initial_regs[1] = random.randint(0, (1<<64)-1)
+        initial_regs[1] = random.randint(0, (1 << 64)-1)
         initial_regs[2] = random.randint(0, 63)
         print(initial_regs[1], initial_regs[2])
         self.run_tst_program(Program(lst, bigendian), initial_regs)
@@ -118,14 +117,14 @@ class ShiftRotTestCase(FHDLTestCase):
 
     def test_rlwinm(self):
         for i in range(10):
-            mb = random.randint(0,31)
-            me = random.randint(0,31)
-            sh = random.randint(0,31)
+            mb = random.randint(0, 31)
+            me = random.randint(0, 31)
+            sh = random.randint(0, 31)
             lst = [f"rlwinm 3, 1, {mb}, {me}, {sh}",
                    #f"rlwinm. 3, 1, {mb}, {me}, {sh}"
                    ]
             initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
+            initial_regs[1] = random.randint(0, (1 << 64)-1)
             self.run_tst_program(Program(lst, bigendian), initial_regs)
 
     def test_rlwimi(self):
@@ -138,20 +137,20 @@ class ShiftRotTestCase(FHDLTestCase):
     def test_rlwnm(self):
         lst = ["rlwnm 3, 1, 2, 20, 6"]
         initial_regs = [0] * 32
-        initial_regs[1] = random.randint(0, (1<<64)-1)
+        initial_regs[1] = random.randint(0, (1 << 64)-1)
         initial_regs[2] = random.randint(0, 63)
         self.run_tst_program(Program(lst, bigendian), initial_regs)
 
     def test_rldicl(self):
         lst = ["rldicl 3, 1, 5, 20"]
         initial_regs = [0] * 32
-        initial_regs[1] = random.randint(0, (1<<64)-1)
+        initial_regs[1] = random.randint(0, (1 << 64)-1)
         self.run_tst_program(Program(lst, bigendian), initial_regs)
 
     def test_rldicr(self):
         lst = ["rldicr 3, 1, 5, 20"]
         initial_regs = [0] * 32
-        initial_regs[1] = random.randint(0, (1<<64)-1)
+        initial_regs[1] = random.randint(0, (1 << 64)-1)
         self.run_tst_program(Program(lst, bigendian), initial_regs)
 
     def test_regression_extswsli(self):
@@ -181,7 +180,7 @@ class ShiftRotTestCase(FHDLTestCase):
             sh = random.randint(0, 63)
             lst = [f"extswsli 3, 1, {sh}"]
             initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
+            initial_regs[1] = random.randint(0, (1 << 64)-1)
             self.run_tst_program(Program(lst, bigendian), initial_regs)
 
     def test_rlc(self):
@@ -192,7 +191,7 @@ class ShiftRotTestCase(FHDLTestCase):
             m = random.randint(0, 63)
             lst = [f"{choice} 3, 1, {sh}, {m}"]
             initial_regs = [0] * 32
-            initial_regs[1] = random.randint(0, (1<<64)-1)
+            initial_regs[1] = random.randint(0, (1 << 64)-1)
             self.run_tst_program(Program(lst, bigendian), initial_regs)
 
     def test_ilang(self):
@@ -227,6 +226,7 @@ class TestRunner(FHDLTestCase):
         sim = Simulator(m)
 
         sim.add_clock(1e-6)
+
         def process():
             for test in self.test_data:
                 print(test.name)
@@ -265,16 +265,16 @@ class TestRunner(FHDLTestCase):
                     alu_out = yield alu.n.data_o.o.data
 
                     yield from self.check_alu_outputs(alu, pdecode2,
-                                                            simulator, code)
+                                                      simulator, code)
                     break
 
         sim.add_sync_process(process)
-        print (dir(sim))
+        print(dir(sim))
         if cxxsim:
             sim.run()
         else:
             with sim.write_vcd("simulator.vcd", "simulator.gtkw",
-                                traces=[]):
+                               traces=[]):
                 sim.run()
 
     def check_alu_outputs(self, alu, dec2, sim, code):
@@ -283,7 +283,7 @@ class TestRunner(FHDLTestCase):
         cridx_ok = yield dec2.e.write_cr.ok
         cridx = yield dec2.e.write_cr.data
 
-        print ("check extra output", repr(code), cridx_ok, cridx)
+        print("check extra output", repr(code), cridx_ok, cridx)
         if rc:
             self.assertEqual(cridx, 0, code)
 
@@ -303,7 +303,6 @@ class TestRunner(FHDLTestCase):
         ALUHelpers.check_int_o(self, res, sim_o, code)
 
 
-
 if __name__ == "__main__":
     unittest.main(exit=False)
     suite = unittest.TestSuite()
diff --git a/src/soc/fu/spr/test/test_pipe_caller.py b/src/soc/fu/spr/test/test_pipe_caller.py
index e20c55e0..6164fdd1 100644
--- a/src/soc/fu/spr/test/test_pipe_caller.py
+++ b/src/soc/fu/spr/test/test_pipe_caller.py
@@ -25,20 +25,19 @@ def get_cu_inputs(dec2, sim):
     """
     res = {}
 
-    yield from ALUHelpers.get_sim_int_ra(res, sim, dec2) # RA
-    yield from ALUHelpers.get_sim_int_rb(res, sim, dec2) # RB
-    yield from ALUHelpers.get_sim_slow_spr1(res, sim, dec2) # FAST1
-    yield from ALUHelpers.get_sim_fast_spr1(res, sim, dec2) # FAST1
-    yield from ALUHelpers.get_rd_sim_xer_ca(res, sim, dec2) # XER.ca
-    yield from ALUHelpers.get_sim_xer_ov(res, sim, dec2) # XER.ov
-    yield from ALUHelpers.get_sim_xer_so(res, sim, dec2) # XER.so
+    yield from ALUHelpers.get_sim_int_ra(res, sim, dec2)  # RA
+    yield from ALUHelpers.get_sim_int_rb(res, sim, dec2)  # RB
+    yield from ALUHelpers.get_sim_slow_spr1(res, sim, dec2)  # FAST1
+    yield from ALUHelpers.get_sim_fast_spr1(res, sim, dec2)  # FAST1
+    yield from ALUHelpers.get_rd_sim_xer_ca(res, sim, dec2)  # XER.ca
+    yield from ALUHelpers.get_sim_xer_ov(res, sim, dec2)  # XER.ov
+    yield from ALUHelpers.get_sim_xer_so(res, sim, dec2)  # XER.so
 
-    print ("spr get_cu_inputs", res)
+    print("spr get_cu_inputs", res)
 
     return res
 
 
-
 def set_alu_inputs(alu, dec2, sim):
     # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
     # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok))
@@ -82,26 +81,27 @@ class SPRTestCase(FHDLTestCase):
         self.test_name = name
 
     def run_tst_program(self, prog, initial_regs=None, initial_sprs=None,
-                                    initial_msr=0):
+                        initial_msr=0):
         tc = TestCase(prog, self.test_name, initial_regs, initial_sprs,
-                                            msr=initial_msr)
+                      msr=initial_msr)
         self.test_data.append(tc)
 
     def test_1_mfspr(self):
-        lst = ["mfspr 1, 26", # SRR0
+        lst = ["mfspr 1, 26",  # SRR0
                "mfspr 2, 27",  # SRR1
                "mfspr 3, 8",  # LR
-               "mfspr 4, 1",] # XER
+               "mfspr 4, 1", ]  # XER
         initial_regs = [0] * 32
         initial_sprs = {'SRR0': 0x12345678, 'SRR1': 0x5678, 'LR': 0x1234,
                         'XER': 0xe00c0000}
-        self.run_tst_program(Program(lst, bigendian), initial_regs, initial_sprs)
+        self.run_tst_program(Program(lst, bigendian),
+                             initial_regs, initial_sprs)
 
     def test_1_mtspr(self):
-        lst = ["mtspr 26, 1", # SRR0
-               "mtspr 27, 2", # SRR1
+        lst = ["mtspr 26, 1",  # SRR0
+               "mtspr 27, 2",  # SRR1
                "mtspr 1, 3",  # XER
-               "mtspr 9, 4",] # CTR
+               "mtspr 9, 4", ]  # CTR
         initial_regs = [0] * 32
         initial_regs[1] = 0x129518230011feed
         initial_regs[2] = 0x123518230011feed
@@ -113,14 +113,14 @@ class SPRTestCase(FHDLTestCase):
                              initial_regs, initial_sprs)
 
     def test_2_mtspr_mfspr(self):
-        lst = ["mtspr 26, 1", # SRR0
-               "mtspr 27, 2", # SRR1
+        lst = ["mtspr 26, 1",  # SRR0
+               "mtspr 27, 2",  # SRR1
                "mtspr 1, 3",  # XER
                "mtspr 9, 4",  # CTR
-               "mfspr 2, 26", # SRR0
-               "mfspr 3, 27", # and into reg 2
+               "mfspr 2, 26",  # SRR0
+               "mfspr 3, 27",  # and into reg 2
                "mfspr 4, 1",  # XER
-               "mfspr 5, 9",] # CTR
+               "mfspr 5, 9", ]  # CTR
         initial_regs = [0] * 32
         initial_regs[1] = 0x129518230011feed
         initial_regs[2] = 0x123518230011feed
@@ -133,10 +133,10 @@ class SPRTestCase(FHDLTestCase):
 
     @unittest.skip("spr does not have TRAP in it. has to be done another way")
     def test_3_mtspr_priv(self):
-        lst = ["mtspr 26, 1", # SRR0
-               "mtspr 27, 2", # SRR1
+        lst = ["mtspr 26, 1",  # SRR0
+               "mtspr 27, 2",  # SRR1
                "mtspr 1, 3",  # XER
-               "mtspr 9, 4",] # CTR
+               "mtspr 9, 4", ]  # CTR
         initial_regs = [0] * 32
         initial_regs[1] = 0x129518230011feed
         initial_regs[2] = 0x123518230011feed
@@ -144,7 +144,7 @@ class SPRTestCase(FHDLTestCase):
         initial_regs[4] = 0x1010101010101010
         initial_sprs = {'SRR0': 0x12345678, 'SRR1': 0x5678, 'LR': 0x1234,
                         'XER': 0x0}
-        msr = 1<<MSR.PR
+        msr = 1 << MSR.PR
         self.run_tst_program(Program(lst, bigendian),
                              initial_regs, initial_sprs, initial_msr=msr)
 
@@ -180,15 +180,16 @@ class TestRunner(FHDLTestCase):
         sim = Simulator(m)
 
         sim.add_clock(1e-6)
+
         def process():
             for test in self.test_data:
                 print("test", test.name)
-                print ("sprs", test.sprs)
+                print("sprs", test.sprs)
                 program = test.program
                 self.subTest(test.name)
                 sim = ISA(pdecode2, test.regs, test.sprs, test.cr,
-                                test.mem, test.msr,
-                                bigendian=bigendian)
+                          test.mem, test.msr,
+                          bigendian=bigendian)
                 gen = program.generate_instructions()
                 instructions = list(zip(gen, program.assembly.splitlines()))
 
@@ -205,22 +206,22 @@ class TestRunner(FHDLTestCase):
                         so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
                         ov = 1 if sim.spr['XER'][XER_bits['OV']] else 0
                         ov32 = 1 if sim.spr['XER'][XER_bits['OV32']] else 0
-                        print ("before: so/ov/32", so, ov, ov32)
+                        print("before: so/ov/32", so, ov, ov32)
 
                     # ask the decoder to decode this binary data (endian'd)
                     yield pdecode2.dec.bigendian.eq(bigendian)  # little / big?
-                    yield pdecode2.msr.eq(msr) # set MSR in pdecode2
-                    yield pdecode2.cia.eq(pc) # set PC in pdecode2
+                    yield pdecode2.msr.eq(msr)  # set MSR in pdecode2
+                    yield pdecode2.cia.eq(pc)  # set PC in pdecode2
                     yield instruction.eq(ins)          # raw binary instr.
                     yield Settle()
 
                     fast_in = yield pdecode2.e.read_fast1.data
                     spr_in = yield pdecode2.e.read_spr1.data
-                    print ("dec2 spr/fast in", fast_in, spr_in)
+                    print("dec2 spr/fast in", fast_in, spr_in)
 
                     fast_out = yield pdecode2.e.write_fast1.data
                     spr_out = yield pdecode2.e.write_spr.data
-                    print ("dec2 spr/fast in", fast_out, spr_out)
+                    print("dec2 spr/fast in", fast_out, spr_out)
 
                     fn_unit = yield pdecode2.e.do.fn_unit
                     self.assertEqual(fn_unit, Function.SPR.value)
@@ -243,7 +244,7 @@ class TestRunner(FHDLTestCase):
 
         sim.add_sync_process(process)
         with sim.write_vcd("alu_simulator.vcd", "simulator.gtkw",
-                            traces=[]):
+                           traces=[]):
             sim.run()
 
     def check_alu_outputs(self, alu, dec2, sim, code):
@@ -252,7 +253,7 @@ class TestRunner(FHDLTestCase):
         cridx_ok = yield dec2.e.write_cr.ok
         cridx = yield dec2.e.write_cr.data
 
-        print ("check extra output", repr(code), cridx_ok, cridx)
+        print("check extra output", repr(code), cridx_ok, cridx)
         if rc:
             self.assertEqual(cridx, 0, code)
 
@@ -266,7 +267,7 @@ class TestRunner(FHDLTestCase):
         yield from ALUHelpers.get_xer_ca(res, alu, dec2)
         yield from ALUHelpers.get_xer_so(res, alu, dec2)
 
-        print ("output", res)
+        print("output", res)
 
         yield from ALUHelpers.get_sim_int_o(sim_o, sim, dec2)
         yield from ALUHelpers.get_wr_sim_xer_so(sim_o, sim, alu, dec2)
@@ -275,7 +276,7 @@ class TestRunner(FHDLTestCase):
         yield from ALUHelpers.get_wr_fast_spr1(sim_o, sim, dec2)
         yield from ALUHelpers.get_wr_slow_spr1(sim_o, sim, dec2)
 
-        print ("sim output", sim_o)
+        print("sim output", sim_o)
 
         ALUHelpers.check_xer_ov(self, res, sim_o, code)
         ALUHelpers.check_xer_ca(self, res, sim_o, code)
diff --git a/src/soc/fu/test/common.py b/src/soc/fu/test/common.py
index a4b9f0e8..ce4f21fe 100644
--- a/src/soc/fu/test/common.py
+++ b/src/soc/fu/test/common.py
@@ -4,15 +4,15 @@ Bugreports:
 """
 
 from soc.decoder.power_enums import XER_bits, CryIn, spr_dict
-from soc.regfile.util import fast_reg_to_spr # HACK!
+from soc.regfile.util import fast_reg_to_spr  # HACK!
 from soc.regfile.regfiles import FastRegs
 
 
 class TestCase:
     def __init__(self, program, name, regs=None, sprs=None, cr=0, mem=None,
-                       msr=0,
-                       do_sim=True,
-                       extra_break_addr=None):
+                 msr=0,
+                 do_sim=True,
+                 extra_break_addr=None):
 
         self.program = program
         self.name = name
@@ -131,26 +131,26 @@ class ALUHelpers:
     def set_xer_ca(alu, dec2, inp):
         if 'xer_ca' in inp:
             yield alu.p.data_i.xer_ca.eq(inp['xer_ca'])
-            print ("extra inputs: CA/32", bin(inp['xer_ca']))
+            print("extra inputs: CA/32", bin(inp['xer_ca']))
 
     def set_xer_ov(alu, dec2, inp):
         if 'xer_ov' in inp:
             yield alu.p.data_i.xer_ov.eq(inp['xer_ov'])
-            print ("extra inputs: OV/32", bin(inp['xer_ov']))
+            print("extra inputs: OV/32", bin(inp['xer_ov']))
 
     def set_xer_so(alu, dec2, inp):
         if 'xer_so' in inp:
             so = inp['xer_so']
-            print ("extra inputs: so", so)
+            print("extra inputs: so", so)
             yield alu.p.data_i.xer_so.eq(so)
 
     def set_msr(alu, dec2, inp):
-        print ("TODO: deprecate set_msr")
+        print("TODO: deprecate set_msr")
         if 'msr' in inp:
             yield alu.p.data_i.msr.eq(inp['msr'])
 
     def set_cia(alu, dec2, inp):
-        print ("TODO: deprecate set_cia")
+        print("TODO: deprecate set_cia")
         if 'cia' in inp:
             yield alu.p.data_i.cia.eq(inp['cia'])
 
@@ -304,7 +304,7 @@ class ALUHelpers:
             res['spr1'] = sim.spr[spr_name].value
 
     def get_wr_sim_xer_ca(res, sim, dec2):
-        #if not (yield alu.n.data_o.xer_ca.ok):
+        # if not (yield alu.n.data_o.xer_ca.ok):
         #    return
         cry_out = yield dec2.e.do.output_carry
         xer_out = yield dec2.e.xer_out
@@ -317,7 +317,7 @@ class ALUHelpers:
         oe = yield dec2.e.do.oe.oe
         oe_ok = yield dec2.e.do.oe.ok
         xer_out = yield dec2.e.xer_out
-        print ("get_wr_sim_xer_ov", xer_out)
+        print("get_wr_sim_xer_ov", xer_out)
         if not (yield alu.n.data_o.xer_ov.ok):
             return
         if xer_out or (oe and oe_ok):
@@ -338,7 +338,7 @@ class ALUHelpers:
         oe = yield dec2.e.do.oe.oe
         oe_ok = yield dec2.e.do.oe.ok
         xer_in = yield dec2.e.xer_in
-        print ("get_sim_xer_ov", xer_in)
+        print("get_sim_xer_ov", xer_in)
         if xer_in or (oe and oe_ok):
             expected_ov = 1 if sim.spr['XER'][XER_bits['OV']] else 0
             expected_ov32 = 1 if sim.spr['XER'][XER_bits['OV32']] else 0
@@ -404,27 +404,26 @@ class ALUHelpers:
         if 'cr_a' in res:
             cr_expected = sim_o['cr_a']
             cr_actual = res['cr_a']
-            print ("CR", cr_expected, cr_actual)
+            print("CR", cr_expected, cr_actual)
             dut.assertEqual(cr_expected, cr_actual, msg)
 
     def check_xer_ca(dut, res, sim_o, msg):
         if 'xer_ca' in res:
             ca_expected = sim_o['xer_ca']
             ca_actual = res['xer_ca']
-            print ("CA", ca_expected, ca_actual)
+            print("CA", ca_expected, ca_actual)
             dut.assertEqual(ca_expected, ca_actual, msg)
 
     def check_xer_ov(dut, res, sim_o, msg):
         if 'xer_ov' in res:
             ov_expected = sim_o['xer_ov']
             ov_actual = res['xer_ov']
-            print ("OV", ov_expected, ov_actual)
+            print("OV", ov_expected, ov_actual)
             dut.assertEqual(ov_expected, ov_actual, msg)
 
     def check_xer_so(dut, res, sim_o, msg):
         if 'xer_so' in res:
             so_expected = sim_o['xer_so']
             so_actual = res['xer_so']
-            print ("SO", so_expected, so_actual)
+            print("SO", so_expected, so_actual)
             dut.assertEqual(so_expected, so_actual, msg)
-
diff --git a/src/soc/fu/trap/test/test_pipe_caller.py b/src/soc/fu/trap/test/test_pipe_caller.py
index 15be93d1..82e86201 100644
--- a/src/soc/fu/trap/test/test_pipe_caller.py
+++ b/src/soc/fu/trap/test/test_pipe_caller.py
@@ -24,19 +24,18 @@ def get_cu_inputs(dec2, sim):
     """
     res = {}
 
-    yield from ALUHelpers.get_sim_int_ra(res, sim, dec2) # RA
-    yield from ALUHelpers.get_sim_int_rb(res, sim, dec2) # RB
-    yield from ALUHelpers.get_sim_fast_spr1(res, sim, dec2) # SPR1
-    yield from ALUHelpers.get_sim_fast_spr2(res, sim, dec2) # SPR2
-    ALUHelpers.get_sim_cia(res, sim, dec2) # PC
-    ALUHelpers.get_sim_msr(res, sim, dec2) # MSR
+    yield from ALUHelpers.get_sim_int_ra(res, sim, dec2)  # RA
+    yield from ALUHelpers.get_sim_int_rb(res, sim, dec2)  # RB
+    yield from ALUHelpers.get_sim_fast_spr1(res, sim, dec2)  # SPR1
+    yield from ALUHelpers.get_sim_fast_spr2(res, sim, dec2)  # SPR2
+    ALUHelpers.get_sim_cia(res, sim, dec2)  # PC
+    ALUHelpers.get_sim_msr(res, sim, dec2)  # MSR
 
-    print ("alu get_cu_inputs", res)
+    print("alu get_cu_inputs", res)
 
     return res
 
 
-
 def set_alu_inputs(alu, dec2, sim):
     # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
     # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok))
@@ -45,11 +44,11 @@ def set_alu_inputs(alu, dec2, sim):
     inp = yield from get_cu_inputs(dec2, sim)
     yield from ALUHelpers.set_int_ra(alu, dec2, inp)
     yield from ALUHelpers.set_int_rb(alu, dec2, inp)
-    yield from ALUHelpers.set_fast_spr1(alu, dec2, inp) # SPR1
-    yield from ALUHelpers.set_fast_spr2(alu, dec2, inp) # SPR1
+    yield from ALUHelpers.set_fast_spr1(alu, dec2, inp)  # SPR1
+    yield from ALUHelpers.set_fast_spr2(alu, dec2, inp)  # SPR1
 
-    #yield from ALUHelpers.set_cia(alu, dec2, inp)
-    #yield from ALUHelpers.set_msr(alu, dec2, inp)
+    # yield from ALUHelpers.set_cia(alu, dec2, inp)
+    # yield from ALUHelpers.set_msr(alu, dec2, inp)
     return inp
 
 # This test bench is a bit different than is usual. Initially when I
@@ -79,9 +78,9 @@ class TrapTestCase(FHDLTestCase):
         self.test_name = name
 
     def run_tst_program(self, prog, initial_regs=None, initial_sprs=None,
-                                    initial_msr=0):
+                        initial_msr=0):
         tc = TestCase(prog, self.test_name, initial_regs, initial_sprs,
-                                            msr=initial_msr)
+                      msr=initial_msr)
         self.test_data.append(tc)
 
     def test_1_rfid(self):
@@ -96,7 +95,7 @@ class TrapTestCase(FHDLTestCase):
         insns = ["twi", "tdi"]
         for i in range(2):
             choice = random.choice(insns)
-            lst = [f"{choice} 4, 1, %d" % i] # TO=4: trap equal
+            lst = [f"{choice} 4, 1, %d" % i]  # TO=4: trap equal
             initial_regs = [0] * 32
             initial_regs[1] = 1
             self.run_tst_program(Program(lst, bigendian), initial_regs)
@@ -105,13 +104,12 @@ class TrapTestCase(FHDLTestCase):
         insns = ["tw", "td"]
         for i in range(2):
             choice = insns[i]
-            lst = [f"{choice} 4, 1, 2"] # TO=4: trap equal
+            lst = [f"{choice} 4, 1, 2"]  # TO=4: trap equal
             initial_regs = [0] * 32
             initial_regs[1] = 1
             initial_regs[2] = 1
             self.run_tst_program(Program(lst, bigendian), initial_regs)
 
-
     def test_3_mtmsr_0(self):
         lst = ["mtmsr 1,0"]
         initial_regs = [0] * 32
@@ -140,15 +138,16 @@ class TrapTestCase(FHDLTestCase):
         lst = ["mtmsr 1,0"]
         initial_regs = [0] * 32
         initial_regs[1] = 0xffffffffffffffff
-        msr = 1 << MSR.PR # set in "problem state"
+        msr = 1 << MSR.PR  # set in "problem state"
         self.run_tst_program(Program(lst, bigendian), initial_regs,
-                                                      initial_msr=msr)
+                             initial_msr=msr)
+
     def test_7_rfid_priv_0(self):
         lst = ["rfid"]
         initial_regs = [0] * 32
         initial_regs[1] = 1
         initial_sprs = {'SRR0': 0x12345678, 'SRR1': 0x5678}
-        msr = 1 << MSR.PR # set in "problem state"
+        msr = 1 << MSR.PR  # set in "problem state"
         self.run_tst_program(Program(lst, bigendian),
                              initial_regs, initial_sprs,
                              initial_msr=msr)
@@ -158,20 +157,20 @@ class TrapTestCase(FHDLTestCase):
         initial_regs = [0] * 32
         msr = (~(1 << MSR.PR)) & 0xffffffffffffffff
         self.run_tst_program(Program(lst, bigendian), initial_regs,
-                                                      initial_msr=msr)
+                             initial_msr=msr)
 
     def test_9_mfmsr_priv(self):
         lst = ["mfmsr 1"]
         initial_regs = [0] * 32
-        msr = 1 << MSR.PR # set in "problem state"
+        msr = 1 << MSR.PR  # set in "problem state"
         self.run_tst_program(Program(lst, bigendian), initial_regs,
-                                                      initial_msr=msr)
+                             initial_msr=msr)
 
     def test_999_illegal(self):
         # ok, um this is a bit of a cheat: use an instruction we know
         # is not implemented by either ISACaller or the core
         lst = ["tbegin.",
-               "mtmsr 1,1"] # should not get executed
+               "mtmsr 1,1"]  # should not get executed
         initial_regs = [0] * 32
         self.run_tst_program(Program(lst, bigendian), initial_regs)
 
@@ -207,20 +206,21 @@ class TestRunner(FHDLTestCase):
         sim = Simulator(m)
 
         sim.add_clock(1e-6)
+
         def process():
             for test in self.test_data:
                 print(test.name)
                 program = test.program
                 self.subTest(test.name)
                 sim = ISA(pdecode2, test.regs, test.sprs, test.cr,
-                                test.mem, test.msr,
-                                bigendian=bigendian)
+                          test.mem, test.msr,
+                          bigendian=bigendian)
                 gen = program.generate_instructions()
                 instructions = list(zip(gen, program.assembly.splitlines()))
 
                 msr = sim.msr.value
                 pc = sim.pc.CIA.value
-                print ("starting msr, pc %08x, %08x"% (msr, pc))
+                print("starting msr, pc %08x, %08x" % (msr, pc))
                 index = pc//4
                 while index < len(instructions):
                     ins, code = instructions[index]
@@ -231,12 +231,12 @@ class TestRunner(FHDLTestCase):
                         so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
                         ov = 1 if sim.spr['XER'][XER_bits['OV']] else 0
                         ov32 = 1 if sim.spr['XER'][XER_bits['OV32']] else 0
-                        print ("before: so/ov/32", so, ov, ov32)
+                        print("before: so/ov/32", so, ov, ov32)
 
                     # ask the decoder to decode this binary data (endian'd)
                     yield pdecode2.dec.bigendian.eq(bigendian)  # little / big?
-                    yield pdecode2.msr.eq(msr) # set MSR in pdecode2
-                    yield pdecode2.cia.eq(pc) # set CIA in pdecode2 
+                    yield pdecode2.msr.eq(msr)  # set MSR in pdecode2
+                    yield pdecode2.cia.eq(pc)  # set CIA in pdecode2
                     yield instruction.eq(ins)          # raw binary instr.
                     yield Settle()
                     fn_unit = yield pdecode2.e.do.fn_unit
@@ -261,7 +261,7 @@ class TestRunner(FHDLTestCase):
 
         sim.add_sync_process(process)
         with sim.write_vcd("alu_simulator.vcd", "simulator.gtkw",
-                            traces=[]):
+                           traces=[]):
             sim.run()
 
     def check_alu_outputs(self, alu, dec2, sim, code):
@@ -270,7 +270,7 @@ class TestRunner(FHDLTestCase):
         cridx_ok = yield dec2.e.write_cr.ok
         cridx = yield dec2.e.write_cr.data
 
-        print ("check extra output", repr(code), cridx_ok, cridx)
+        print("check extra output", repr(code), cridx_ok, cridx)
         if rc:
             self.assertEqual(cridx, 0, code)
 
@@ -283,7 +283,7 @@ class TestRunner(FHDLTestCase):
         yield from ALUHelpers.get_nia(res, alu, dec2)
         yield from ALUHelpers.get_msr(res, alu, dec2)
 
-        print ("output", res)
+        print("output", res)
 
         yield from ALUHelpers.get_sim_int_o(sim_o, sim, dec2)
         yield from ALUHelpers.get_wr_fast_spr1(sim_o, sim, dec2)
@@ -291,7 +291,7 @@ class TestRunner(FHDLTestCase):
         ALUHelpers.get_sim_nia(sim_o, sim, dec2)
         ALUHelpers.get_sim_msr(sim_o, sim, dec2)
 
-        print ("sim output", sim_o)
+        print("sim output", sim_o)
 
         ALUHelpers.check_int_o(self, res, sim_o, code)
         ALUHelpers.check_fast_spr1(self, res, sim_o, code)
diff --git a/src/soc/minerva/units/loadstore.py b/src/soc/minerva/units/loadstore.py
index c9808bf1..ffd6fec4 100644
--- a/src/soc/minerva/units/loadstore.py
+++ b/src/soc/minerva/units/loadstore.py
@@ -14,12 +14,12 @@ class LoadStoreUnitInterface:
     def __init__(self, pspec):
         self.pspec = pspec
         self.dbus = Record(make_wb_layout(pspec))
-        print (self.dbus.sel.shape())
+        print(self.dbus.sel.shape())
         self.mask_wid = mask_wid = pspec.mask_wid
         self.addr_wid = addr_wid = pspec.addr_wid
         self.data_wid = data_wid = pspec.reg_wid
-        print ("loadstoreunit addr mask data", addr_wid, mask_wid, data_wid)
-        self.adr_lsbs = log2_int(mask_wid) # LSBs of addr covered by mask
+        print("loadstoreunit addr mask data", addr_wid, mask_wid, data_wid)
+        self.adr_lsbs = log2_int(mask_wid)  # LSBs of addr covered by mask
         badwid = addr_wid-self.adr_lsbs    # TODO: is this correct?
 
         # INPUTS
@@ -27,29 +27,30 @@ class LoadStoreUnitInterface:
         self.x_mask_i = Signal(mask_wid)    # Mask of which bytes to write
         self.x_ld_i = Signal()              # set to do a memory load
         self.x_st_i = Signal()              # set to do a memory store
-        self.x_st_data_i = Signal(data_wid) # The data to write when storing
+        self.x_st_data_i = Signal(data_wid)  # The data to write when storing
 
         self.x_stall_i = Signal()           # do nothing until low
         self.x_valid_i = Signal()           # Whether x pipeline stage is
-                                            # currently enabled (I
-                                            # think?). Set to 1 for #now
+        # currently enabled (I
+        # think?). Set to 1 for #now
         self.m_stall_i = Signal()           # do nothing until low
         self.m_valid_i = Signal()           # Whether m pipeline stage is
-                                            # currently enabled. Set
-                                            # to 1 for now
+        # currently enabled. Set
+        # to 1 for now
 
         # OUTPUTS
         self.x_busy_o = Signal()            # set when the memory is busy
         self.m_busy_o = Signal()            # set when the memory is busy
 
-        self.m_ld_data_o = Signal(data_wid) # Data returned from memory read
+        self.m_ld_data_o = Signal(data_wid)  # Data returned from memory read
         # Data validity is NOT indicated by m_valid_i or x_valid_i as
         # those are inputs. I believe it is valid on the next cycle
         # after raising m_load where busy is low
 
         self.m_load_err_o = Signal()      # if there was an error when loading
         self.m_store_err_o = Signal()     # if there was an error when storing
-        self.m_badaddr_o = Signal(badwid) # The address of the load/store error
+        # The address of the load/store error
+        self.m_badaddr_o = Signal(badwid)
 
     def __iter__(self):
         yield self.x_addr_i
@@ -87,7 +88,7 @@ class BareLoadStoreUnit(LoadStoreUnitInterface, Elaboratable):
                     self.m_ld_data_o.eq(self.dbus.dat_r)
                 ]
         with m.Elif((self.x_ld_i | self.x_st_i) &
-                     self.x_valid_i & ~self.x_stall_i):
+                    self.x_valid_i & ~self.x_stall_i):
             m.d.sync += [
                 self.dbus.cyc.eq(1),
                 self.dbus.stb.eq(1),
@@ -154,7 +155,7 @@ class CachedLoadStoreUnit(LoadStoreUnitInterface, Elaboratable):
                 const_bits = 30 - range_bits
                 return "{}{}".format("0" * const_bits, "-" * range_bits)
 
-            if dcache.base >= (1<<self.adr_lsbs):
+            if dcache.base >= (1 << self.adr_lsbs):
                 with m.Case(addr_below(dcache.base >> self.adr_lsbs)):
                     m.d.comb += x_dcache_select.eq(0)
             with m.Case(addr_below(dcache.limit >> self.adr_lsbs)):
diff --git a/src/soc/regfile/regfile.py b/src/soc/regfile/regfile.py
index 3e3c4538..a389132d 100644
--- a/src/soc/regfile/regfile.py
+++ b/src/soc/regfile/regfile.py
@@ -87,6 +87,7 @@ class Register(Elaboratable):
     def ports(self):
         res = list(self)
 
+
 def ortreereduce(tree, attr="data_o"):
     return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
 
@@ -96,6 +97,7 @@ class RegFileArray(Elaboratable):
         that has no "address" decoder, instead it has individual write-en
         and read-en signals (per port).
     """
+
     def __init__(self, width, depth):
         self.width = width
         self.depth = depth
@@ -236,7 +238,7 @@ def regfile_sim(dut, rp, wp):
     yield rp.raddr.eq(1)
     yield Settle()
     data = yield rp.data_o
-    print (data)
+    print(data)
     assert data == 2
     yield
 
@@ -247,57 +249,59 @@ def regfile_sim(dut, rp, wp):
     yield wp.data_i.eq(6)
     yield Settle()
     data = yield rp.data_o
-    print (data)
+    print(data)
     assert data == 6
     yield
     yield wp.wen.eq(0)
     yield rp.ren.eq(0)
     yield Settle()
     data = yield rp.data_o
-    print (data)
+    print(data)
     assert data == 0
     yield
     data = yield rp.data_o
-    print (data)
+    print(data)
+
 
 def regfile_array_sim(dut, rp1, rp2, wp, wp2):
-    print ("regfile_array_sim")
+    print("regfile_array_sim")
     yield wp.data_i.eq(2)
-    yield wp.wen.eq(1<<1)
+    yield wp.wen.eq(1 << 1)
     yield
     yield wp.wen.eq(0)
-    yield rp1.ren.eq(1<<1)
+    yield rp1.ren.eq(1 << 1)
     yield Settle()
     data = yield rp1.data_o
-    print (data)
+    print(data)
     assert data == 2
     yield
 
-    yield rp1.ren.eq(1<<5)
-    yield rp2.ren.eq(1<<1)
-    yield wp.wen.eq(1<<5)
+    yield rp1.ren.eq(1 << 5)
+    yield rp2.ren.eq(1 << 1)
+    yield wp.wen.eq(1 << 5)
     yield wp.data_i.eq(6)
     yield Settle()
     data = yield rp1.data_o
     assert data == 6
-    print (data)
+    print(data)
     yield
     yield wp.wen.eq(0)
     yield rp1.ren.eq(0)
     yield rp2.ren.eq(0)
     yield Settle()
     data1 = yield rp1.data_o
-    print (data1)
+    print(data1)
     assert data1 == 0
     data2 = yield rp2.data_o
-    print (data2)
+    print(data2)
     assert data2 == 0
 
     yield
     data = yield rp1.data_o
-    print (data)
+    print(data)
     assert data == 0
 
+
 def test_regfile():
     dut = RegFile(32, 8)
     rp = dut.read_port()
@@ -313,8 +317,8 @@ def test_regfile():
     rp2 = dut.read_port("read2")
     wp = dut.write_port("write")
     wp2 = dut.write_port("write2")
-    ports=dut.ports()
-    print ("ports", ports)
+    ports = dut.ports()
+    print("ports", ports)
     vl = rtlil.convert(dut, ports=ports)
     with open("test_regfile_array.il", "w") as f:
         f.write(vl)
@@ -322,5 +326,6 @@ def test_regfile():
     run_simulation(dut, regfile_array_sim(dut, rp1, rp2, wp, wp2),
                    vcd_name='test_regfile_array.vcd')
 
+
 if __name__ == '__main__':
     test_regfile()
diff --git a/src/soc/regfile/virtual_port.py b/src/soc/regfile/virtual_port.py
index 322efe8e..ec1ee2c3 100644
--- a/src/soc/regfile/virtual_port.py
+++ b/src/soc/regfile/virtual_port.py
@@ -26,11 +26,12 @@ class VirtualRegPort(RegFileArray):
 
         # "full" depth variant of the "external" port
         self.full_wr = RecordObject([("wen", n_regs),
-                                     ("data_i", bitwidth)], # *full* wid
+                                     ("data_i", bitwidth)],  # *full* wid
                                     name="full_wr")
         self.full_rd = RecordObject([("ren", n_regs),
-                                     ("data_o", bitwidth)], # *full* wid
+                                     ("data_o", bitwidth)],  # *full* wid
                                     name="full_rd")
+
     def elaborate(self, platform):
         m = super().elaborate(platform)
         comb = m.d.comb
@@ -43,10 +44,10 @@ class VirtualRegPort(RegFileArray):
         rfull = self.full_rd
 
         # wire up the enable signals and chain-accumulate the data
-        l = map(lambda port: port.data_o, rd_regs) # get port data(s)
-        le = map(lambda port: port.ren, rd_regs) # get port ren(s)
+        l = map(lambda port: port.data_o, rd_regs)  # get port data(s)
+        le = map(lambda port: port.ren, rd_regs)  # get port ren(s)
 
-        comb += rfull.data_o.eq(Cat(*l)) # we like Cat on lists
+        comb += rfull.data_o.eq(Cat(*l))  # we like Cat on lists
         comb += Cat(*le).eq(rfull.ren)
 
         # connect up full write port
@@ -54,7 +55,7 @@ class VirtualRegPort(RegFileArray):
 
         # wire up the enable signals from the large (full) port
         l = map(lambda port: port.data_i, wr_regs)
-        le = map(lambda port: port.wen, wr_regs) # get port wen(s)
+        le = map(lambda port: port.wen, wr_regs)  # get port wen(s)
 
         # get list of all data_i (and wens) and assign to them via Cat
         comb += Cat(*l).eq(wfull.data_i)
@@ -71,51 +72,51 @@ class VirtualRegPort(RegFileArray):
 def regfile_array_sim(dut, rp1, rp2, rp3, wp):
     # part-port write
     yield wp.data_i.eq(2)
-    yield wp.wen.eq(1<<1)
+    yield wp.wen.eq(1 << 1)
     yield
     yield wp.wen.eq(0)
     # part-port read
-    yield rp1.ren.eq(1<<1)
+    yield rp1.ren.eq(1 << 1)
     yield
     data = yield rp1.data_o
-    print (data)
+    print(data)
     assert data == 2
 
     # simultaneous read/write - should be pass-thru
-    yield rp1.ren.eq(1<<5)
-    yield rp2.ren.eq(1<<1)
-    yield wp.wen.eq(1<<5)
+    yield rp1.ren.eq(1 << 5)
+    yield rp2.ren.eq(1 << 1)
+    yield wp.wen.eq(1 << 5)
     yield wp.data_i.eq(6)
     yield
     yield wp.wen.eq(0)
     yield rp1.ren.eq(0)
     yield rp2.ren.eq(0)
     data1 = yield rp1.data_o
-    print (data1)
+    print(data1)
     assert data1 == 6, data1
     data2 = yield rp2.data_o
-    print (data2)
+    print(data2)
     assert data2 == 2, data2
     yield
     data = yield rp1.data_o
-    print (data)
+    print(data)
 
     # full port read (whole reg)
     yield dut.full_rd.ren.eq(0xff)
     yield
     yield dut.full_rd.ren.eq(0)
     data = yield dut.full_rd.data_o
-    print (hex(data))
+    print(hex(data))
 
     # full port read (part reg)
-    yield dut.full_rd.ren.eq(0x1<<5)
+    yield dut.full_rd.ren.eq(0x1 << 5)
     yield
     yield dut.full_rd.ren.eq(0)
     data = yield dut.full_rd.data_o
-    print (hex(data))
+    print(hex(data))
 
     # full port part-write (part masked reg)
-    yield dut.full_wr.wen.eq(0x1<<1)
+    yield dut.full_wr.wen.eq(0x1 << 1)
     yield dut.full_wr.data_i.eq(0xe0)
     yield
     yield dut.full_wr.wen.eq(0x0)
@@ -125,7 +126,7 @@ def regfile_array_sim(dut, rp1, rp2, rp3, wp):
     yield
     yield dut.full_rd.ren.eq(0)
     data = yield dut.full_rd.data_o
-    print (hex(data))
+    print(hex(data))
 
     # full port write
     yield dut.full_wr.wen.eq(0xff)
@@ -138,17 +139,17 @@ def regfile_array_sim(dut, rp1, rp2, rp3, wp):
     yield
     yield dut.full_rd.ren.eq(0)
     data = yield dut.full_rd.data_o
-    print (hex(data))
+    print(hex(data))
 
     # part write
     yield wp.data_i.eq(2)
-    yield wp.wen.eq(1<<1)
+    yield wp.wen.eq(1 << 1)
     yield
     yield wp.wen.eq(0)
-    yield rp1.ren.eq(1<<1)
+    yield rp1.ren.eq(1 << 1)
     yield
     data = yield rp1.data_o
-    print (hex(data))
+    print(hex(data))
     assert data == 2
 
     # full port read (whole reg)
@@ -156,16 +157,16 @@ def regfile_array_sim(dut, rp1, rp2, rp3, wp):
     yield
     yield dut.full_rd.ren.eq(0)
     data = yield dut.full_rd.data_o
-    print (hex(data))
+    print(hex(data))
 
     # simultaneous read/write: full-write, part-write, 3x part-read
-    yield rp1.ren.eq(1<<5)
-    yield rp2.ren.eq(1<<1)
-    yield rp3.ren.eq(1<<3)
-    yield wp.wen.eq(1<<3)
+    yield rp1.ren.eq(1 << 5)
+    yield rp2.ren.eq(1 << 1)
+    yield rp3.ren.eq(1 << 3)
+    yield wp.wen.eq(1 << 3)
     yield wp.data_i.eq(6)
-    yield dut.full_wr.wen.eq((1<<1) | (1<<5))
-    yield dut.full_wr.data_i.eq((0xa<<(1*4)) | (0x3<<(5*4)))
+    yield dut.full_wr.wen.eq((1 << 1) | (1 << 5))
+    yield dut.full_wr.data_i.eq((0xa << (1*4)) | (0x3 << (5*4)))
     yield
     yield dut.full_wr.wen.eq(0)
     yield wp.wen.eq(0)
@@ -173,13 +174,13 @@ def regfile_array_sim(dut, rp1, rp2, rp3, wp):
     yield rp2.ren.eq(0)
     yield rp3.ren.eq(0)
     data1 = yield rp1.data_o
-    print (hex(data1))
+    print(hex(data1))
     assert data1 == 0x3
     data2 = yield rp2.data_o
-    print (hex(data2))
+    print(hex(data2))
     assert data2 == 0xa
     data3 = yield rp3.data_o
-    print (hex(data3))
+    print(hex(data3))
     assert data3 == 0x6
 
 
@@ -190,8 +191,8 @@ def test_regfile():
     rp3 = dut.read_port("read3")
     wp = dut.write_port("write")
 
-    ports=dut.ports()
-    print ("ports", ports)
+    ports = dut.ports()
+    print("ports", ports)
     vl = rtlil.convert(dut, ports=ports)
     with open("test_virtualregfile.il", "w") as f:
         f.write(vl)
@@ -199,7 +200,6 @@ def test_regfile():
     run_simulation(dut, regfile_array_sim(dut, rp1, rp2, rp3, wp),
                    vcd_name='test_regfile_array.vcd')
 
+
 if __name__ == '__main__':
     test_regfile()
-
-
diff --git a/src/soc/scoreboard/addr_match.py b/src/soc/scoreboard/addr_match.py
index 1fa56524..eee28398 100644
--- a/src/soc/scoreboard/addr_match.py
+++ b/src/soc/scoreboard/addr_match.py
@@ -43,20 +43,21 @@ from nmutil.latch import latchregister, SRLatch
 class PartialAddrMatch(Elaboratable):
     """A partial address matcher
     """
+
     def __init__(self, n_adr, bitwid):
         self.n_adr = n_adr
         self.bitwid = bitwid
         # inputs
         self.addrs_i = Array(Signal(bitwid, name="addr") for i in range(n_adr))
-        #self.addr_we_i = Signal(n_adr, reset_less=True) # write-enable
-        self.addr_en_i = Signal(n_adr, reset_less=True) # address latched in
-        self.addr_rs_i = Signal(n_adr, reset_less=True) # address deactivated
+        # self.addr_we_i = Signal(n_adr, reset_less=True) # write-enable
+        self.addr_en_i = Signal(n_adr, reset_less=True)  # address latched in
+        self.addr_rs_i = Signal(n_adr, reset_less=True)  # address deactivated
 
         # output: a nomatch for each address plus individual nomatch signals
         self.addr_nomatch_o = Signal(n_adr, name="nomatch_o", reset_less=True)
         self.addr_nomatch_a_o = Array(Signal(n_adr, reset_less=True,
-                                             name="nomatch_array_o") \
-                                  for i in range(n_adr))
+                                             name="nomatch_array_o")
+                                      for i in range(n_adr))
 
     def elaborate(self, platform):
         m = Module()
@@ -69,8 +70,8 @@ class PartialAddrMatch(Elaboratable):
         # array of address-latches
         m.submodules.l = self.l = l = SRLatch(llen=self.n_adr, sync=False)
         self.adrs_r = adrs_r = Array(Signal(self.bitwid, reset_less=True,
-                                              name="a_r") \
-                                       for i in range(self.n_adr))
+                                            name="a_r")
+                                     for i in range(self.n_adr))
 
         # latch set/reset
         comb += l.s.eq(self.addr_en_i)
@@ -94,12 +95,12 @@ class PartialAddrMatch(Elaboratable):
 
     def is_match(self, i, j):
         if i == j:
-            return Const(0) # don't match against self!
+            return Const(0)  # don't match against self!
         return self.adrs_r[i] == self.adrs_r[j]
 
     def __iter__(self):
         yield from self.addrs_i
-        #yield self.addr_we_i
+        # yield self.addr_we_i
         yield self.addr_en_i
         yield from self.addr_nomatch_a_o
         yield self.addr_nomatch_o
@@ -131,11 +132,11 @@ class LenExpand(Elaboratable):
         self.lexp_o = Signal(self.llen(1), reset_less=True)
         if cover > 1:
             self.rexp_o = Signal(self.llen(cover), reset_less=True)
-        print ("LenExpand", bit_len, cover, self.lexp_o.shape())
+        print("LenExpand", bit_len, cover, self.lexp_o.shape())
 
     def llen(self, cover):
         cl = log2_int(self.cover)
-        return (cover<<(self.bit_len))+(cl<<self.bit_len)
+        return (cover << (self.bit_len))+(cl << self.bit_len)
 
     def elaborate(self, platform):
         m = Module()
@@ -144,21 +145,21 @@ class LenExpand(Elaboratable):
         # covers N bits
         llen = self.llen(1)
         # temp
-        binlen = Signal((1<<self.bit_len)+1, reset_less=True)
+        binlen = Signal((1 << self.bit_len)+1, reset_less=True)
         lexp_o = Signal(llen, reset_less=True)
         comb += binlen.eq((Const(1, self.bit_len+1) << (self.len_i)) - 1)
         comb += self.lexp_o.eq(binlen << self.addr_i)
         if self.cover == 1:
             return m
         l = []
-        print ("llen", llen)
+        print("llen", llen)
         for i in range(llen):
             l.append(Repl(self.lexp_o[i], self.cover))
         comb += self.rexp_o.eq(Cat(*l))
         return m
 
     def ports(self):
-        return [self.len_i, self.addr_i, self.lexp_o,]
+        return [self.len_i, self.addr_i, self.lexp_o, ]
 
 
 class TwinPartialAddrBitmap(PartialAddrMatch):
@@ -174,22 +175,23 @@ class TwinPartialAddrBitmap(PartialAddrMatch):
     are 1 apart is *guaranteed* to be a miss for those two addresses.
     therefore is_match specially takes that into account.
     """
+
     def __init__(self, n_adr, lsbwid, bitlen):
-        self.lsbwid = lsbwid # number of bits to turn into unary
+        self.lsbwid = lsbwid  # number of bits to turn into unary
         self.midlen = bitlen-lsbwid
         PartialAddrMatch.__init__(self, n_adr, self.midlen)
 
         # input: length of the LOAD/STORE
-        expwid = 1+self.lsbwid # XXX assume LD/ST no greater than 8
-        self.lexp_i = Array(Signal(1<<expwid, reset_less=True,
-                                  name="len") for i in range(n_adr))
+        expwid = 1+self.lsbwid  # XXX assume LD/ST no greater than 8
+        self.lexp_i = Array(Signal(1 << expwid, reset_less=True,
+                                   name="len") for i in range(n_adr))
         # input: full address
         self.faddrs_i = Array(Signal(bitlen, reset_less=True,
-                                      name="fadr") for i in range(n_adr))
+                                     name="fadr") for i in range(n_adr))
 
         # registers for expanded len
-        self.len_r = Array(Signal(expwid, reset_less=True, name="l_r") \
-                                       for i in range(self.n_adr))
+        self.len_r = Array(Signal(expwid, reset_less=True, name="l_r")
+                           for i in range(self.n_adr))
 
     def elaborate(self, platform):
         m = PartialAddrMatch.elaborate(self, platform)
@@ -211,17 +213,17 @@ class TwinPartialAddrBitmap(PartialAddrMatch):
     # TODO make this a module.  too much.
     def is_match(self, i, j):
         if i == j:
-            return Const(0) # don't match against self!
+            return Const(0)  # don't match against self!
         # we know that pairs have addr and addr+1 therefore it is
         # guaranteed that they will not match.
         if (i // 2) == (j // 2):
-            return Const(0) # don't match against twin, either.
+            return Const(0)  # don't match against twin, either.
 
         # the bitmask contains data for *two* cache lines (16 bytes).
         # however len==8 only covers *half* a cache line so we only
         # need to compare half the bits
-        expwid = 1<<self.lsbwid
-        #if i % 2 == 1 or j % 2 == 1: # XXX hmmm...
+        expwid = 1 << self.lsbwid
+        # if i % 2 == 1 or j % 2 == 1: # XXX hmmm...
         #   expwid >>= 1
 
         # straight compare: binary top bits of addr, *unary* compare on bottom
@@ -259,8 +261,9 @@ class PartialAddrBitmap(PartialAddrMatch):
     therefore, because this now covers two addresses, we need *two*
     comparisons per address *not* one.
     """
+
     def __init__(self, n_adr, lsbwid, bitlen):
-        self.lsbwid = lsbwid # number of bits to turn into unary
+        self.lsbwid = lsbwid  # number of bits to turn into unary
         self.midlen = bitlen-lsbwid
         PartialAddrMatch.__init__(self, n_adr, self.midlen)
 
@@ -269,18 +272,18 @@ class PartialAddrBitmap(PartialAddrMatch):
                                   name="len") for i in range(n_adr))
         # input: full address
         self.faddrs_i = Array(Signal(bitlen, reset_less=True,
-                                      name="fadr") for i in range(n_adr))
+                                     name="fadr") for i in range(n_adr))
 
         # intermediary: address + 1
         self.addr1s = Array(Signal(self.midlen, reset_less=True,
-                                      name="adr1") \
+                                   name="adr1")
                             for i in range(n_adr))
 
         # expanded lengths, needed in match
-        expwid = 1+self.lsbwid # XXX assume LD/ST no greater than 8
-        self.lexp = Array(Signal(1<<expwid, reset_less=True,
-                                name="a_l") \
-                                       for i in range(self.n_adr))
+        expwid = 1+self.lsbwid  # XXX assume LD/ST no greater than 8
+        self.lexp = Array(Signal(1 << expwid, reset_less=True,
+                                 name="a_l")
+                          for i in range(self.n_adr))
 
     def elaborate(self, platform):
         m = PartialAddrMatch.elaborate(self, platform)
@@ -289,8 +292,8 @@ class PartialAddrBitmap(PartialAddrMatch):
         # intermediaries
         adrs_r, l = self.adrs_r, self.l
         len_r = Array(Signal(self.lsbwid, reset_less=True,
-                                name="l_r") \
-                                       for i in range(self.n_adr))
+                             name="l_r")
+                      for i in range(self.n_adr))
 
         for i in range(self.n_adr):
             # create a bit-expander for each address
@@ -316,14 +319,14 @@ class PartialAddrBitmap(PartialAddrMatch):
     # TODO make this a module.  too much.
     def is_match(self, i, j):
         if i == j:
-            return Const(0) # don't match against self!
+            return Const(0)  # don't match against self!
         # the bitmask contains data for *two* cache lines (16 bytes).
         # however len==8 only covers *half* a cache line so we only
         # need to compare half the bits
-        expwid = 1<<self.lsbwid
+        expwid = 1 << self.lsbwid
         hexp = expwid >> 1
         expwid2 = expwid + hexp
-        print (self.lsbwid, expwid)
+        print(self.lsbwid, expwid)
         # straight compare: binary top bits of addr, *unary* compare on bottom
         straight_eq = (self.adrs_r[i] == self.adrs_r[j]) & \
                       (self.lexp[i][:expwid] & self.lexp[j][:expwid]).bool()
@@ -338,7 +341,7 @@ class PartialAddrBitmap(PartialAddrMatch):
     def __iter__(self):
         yield from self.faddrs_i
         yield from self.len_i
-        #yield self.addr_we_i
+        # yield self.addr_we_i
         yield self.addr_en_i
         yield from self.addr_nomatch_a_o
         yield self.addr_nomatch_o
@@ -368,6 +371,7 @@ def part_addr_sim(dut):
     yield dut.go_wr_i.eq(0)
     yield
 
+
 def part_addr_bit(dut):
     #                                    0b110 |               0b101 |
     # 0b101 1011 / 8 ==> 0b0000 0000 0000 0111 | 1111 1000 0000 0000 |
@@ -409,15 +413,15 @@ def part_addr_bit(dut):
 
 def part_addr_byte(dut):
     for l in range(8):
-        for a in range(1<<dut.bit_len):
-            maskbit = (1<<(l))-1
-            mask = (1<<(l*8))-1
+        for a in range(1 << dut.bit_len):
+            maskbit = (1 << (l))-1
+            mask = (1 << (l*8))-1
             yield dut.len_i.eq(l)
             yield dut.addr_i.eq(a)
             yield Settle()
             lexp = yield dut.lexp_o
             exp = yield dut.rexp_o
-            print ("pa", l, a, bin(lexp), hex(exp))
+            print("pa", l, a, bin(lexp), hex(exp))
             assert exp == (mask << (a*8))
             assert lexp == (maskbit << (a))
 
@@ -455,6 +459,7 @@ def test_part_addr():
 
     run_simulation(dut, part_addr_sim(dut), vcd_name='test_part_addr.vcd')
 
+
 if __name__ == '__main__':
     test_part_addr()
     test_lenexpand_byte()
diff --git a/src/soc/scoreboard/addr_split.py b/src/soc/scoreboard/addr_split.py
index 7ae3fcc5..99b03c74 100644
--- a/src/soc/scoreboard/addr_split.py
+++ b/src/soc/scoreboard/addr_split.py
@@ -59,8 +59,8 @@ class LDSTSplitter(Elaboratable):
 
     def __init__(self, dwidth, awidth, dlen):
         self.dwidth, self.awidth, self.dlen = dwidth, awidth, dlen
-        #cline_wid = 8<<dlen # cache line width: bytes (8) times (2^^dlen)
-        cline_wid = dwidth # TODO: make this bytes not bits
+        # cline_wid = 8<<dlen # cache line width: bytes (8) times (2^^dlen)
+        cline_wid = dwidth  # TODO: make this bytes not bits
         self.addr_i = Signal(awidth, reset_less=True)
         self.len_i = Signal(dlen, reset_less=True)
         self.valid_i = Signal(reset_less=True)
@@ -77,12 +77,12 @@ class LDSTSplitter(Elaboratable):
         self.sld_valid_o = Signal(2, reset_less=True)
         self.sld_valid_i = Signal(2, reset_less=True)
         self.sld_data_i = Array((LDData(cline_wid, "ld_data_i1"),
-                                LDData(cline_wid, "ld_data_i2")))
+                                 LDData(cline_wid, "ld_data_i2")))
 
         self.sst_valid_o = Signal(2, reset_less=True)
         self.sst_valid_i = Signal(2, reset_less=True)
         self.sst_data_o = Array((LDData(cline_wid, "st_data_i1"),
-                                LDData(cline_wid, "st_data_i2")))
+                                 LDData(cline_wid, "st_data_i2")))
 
     def elaborate(self, platform):
         m = Module()
@@ -99,14 +99,14 @@ class LDSTSplitter(Elaboratable):
         comb += lenexp.len_i.eq(self.len_i)
         mask1 = Signal(mlen, reset_less=True)
         mask2 = Signal(mlen, reset_less=True)
-        comb += mask1.eq(lenexp.lexp_o[0:mlen]) # Lo bits of expanded len-mask
+        comb += mask1.eq(lenexp.lexp_o[0:mlen])  # Lo bits of expanded len-mask
         comb += mask2.eq(lenexp.lexp_o[mlen:])  # Hi bits of expanded len-mask
 
         # set up new address records: addr1 is "as-is", addr2 is +1
         comb += ld1.addr_i.eq(self.addr_i[dlen:])
         ld2_value = self.addr_i[dlen:] + 1
         comb += ld2.addr_i.eq(ld2_value)
-        #exception if rolls
+        # exception if rolls
         with m.If(ld2_value[self.awidth-dlen]):
             comb += self.exc.eq(1)
 
@@ -114,7 +114,7 @@ class LDSTSplitter(Elaboratable):
         ashift1 = Signal(self.dlen, reset_less=True)
         ashift2 = Signal(self.dlen, reset_less=True)
         comb += ashift1.eq(self.addr_i[:self.dlen])
-        comb += ashift2.eq((1<<dlen)-ashift1)
+        comb += ashift2.eq((1 << dlen)-ashift1)
 
         with m.If(self.is_ld_i):
             # set up connections to LD-split.  note: not active if mask is zero
@@ -140,7 +140,7 @@ class LDSTSplitter(Elaboratable):
                 # note that data from LD1 will be in *cache-line* byte position
                 # likewise from LD2 but we *know* it is at the start of the line
                 comb += self.ld_data_o.data.eq((ld1.ld_o.data >> ashift1) |
-                                                (ld2.ld_o.data << ashift2))
+                                               (ld2.ld_o.data << ashift2))
 
         with m.If(self.is_st_i):
             for i, (ld, mask) in enumerate(((ld1, mask1),
@@ -183,33 +183,34 @@ class LDSTSplitter(Elaboratable):
     def ports(self):
         return list(self)
 
+
 def sim(dut):
 
     sim = Simulator(dut)
     sim.add_clock(1e-6)
     data = 0b11010011
-    dlen = 4 # 4 bits
+    dlen = 4  # 4 bits
     addr = 0b1100
     ld_len = 8
-    ldm = ((1<<ld_len)-1)
-    dlm = ((1<<dlen)-1)
-    data = data & ldm # truncate data to be tested, mask to within ld len
-    print ("ldm", ldm, bin(data&ldm))
-    print ("dlm", dlm, bin(addr&dlm))
+    ldm = ((1 << ld_len)-1)
+    dlm = ((1 << dlen)-1)
+    data = data & ldm  # truncate data to be tested, mask to within ld len
+    print("ldm", ldm, bin(data & ldm))
+    print("dlm", dlm, bin(addr & dlm))
     dmask = ldm << (addr & dlm)
-    print ("dmask", bin(dmask))
-    dmask1 = dmask >> (1<<dlen)
-    print ("dmask1", bin(dmask1))
-    dmask = dmask & ((1<<(1<<dlen))-1)
-    print ("dmask", bin(dmask))
+    print("dmask", bin(dmask))
+    dmask1 = dmask >> (1 << dlen)
+    print("dmask1", bin(dmask1))
+    dmask = dmask & ((1 << (1 << dlen))-1)
+    print("dmask", bin(dmask))
 
     def send_ld():
-        print ("send_ld")
+        print("send_ld")
         yield dut.is_ld_i.eq(1)
         yield dut.len_i.eq(ld_len)
         yield dut.addr_i.eq(addr)
         yield dut.valid_i.eq(1)
-        print ("waiting")
+        print("waiting")
         while True:
             valid_o = yield dut.valid_o
             if valid_o:
@@ -219,11 +220,11 @@ def sim(dut):
         yield dut.is_ld_i.eq(0)
         yield
 
-        print (bin(ld_data_o), bin(data))
+        print(bin(ld_data_o), bin(data))
         assert ld_data_o == data
 
     def lds():
-        print ("lds")
+        print("lds")
         while True:
             valid_i = yield dut.valid_i
             if valid_i:
@@ -233,10 +234,10 @@ def sim(dut):
         shf = addr & dlm
         shfdata = (data << shf)
         data1 = shfdata & dmask
-        print ("ld data1", bin(data), bin(data1), shf, bin(dmask))
+        print("ld data1", bin(data), bin(data1), shf, bin(dmask))
 
         data2 = (shfdata >> 16) & dmask1
-        print ("ld data2", 1<<dlen, bin(data >> (1<<dlen)), bin(data2))
+        print("ld data2", 1 << dlen, bin(data >> (1 << dlen)), bin(data2))
         yield dut.sld_data_i[0].data.eq(data1)
         yield dut.sld_valid_i[0].eq(1)
         yield
diff --git a/src/soc/scoreboard/instruction_q.py b/src/soc/scoreboard/instruction_q.py
index fdfdc297..4dec3cf2 100644
--- a/src/soc/scoreboard/instruction_q.py
+++ b/src/soc/scoreboard/instruction_q.py
@@ -8,6 +8,7 @@ from nmutil.nmoperator import eq, shape, cat
 
 from soc.decoder.power_decoder2 import Decode2ToExecute1Type
 
+
 class Instruction(Decode2ToExecute1Type):
 
     @staticmethod
@@ -27,6 +28,7 @@ class InstructionQ(Elaboratable):
 
         input and shifting occurs on sync.
     """
+
     def __init__(self, wid, opwid, iqlen, n_in, n_out):
         """ constructor
 
@@ -44,13 +46,13 @@ class InstructionQ(Elaboratable):
         self.n_out = n_out
         mqbits = (int(log(iqlen) / log(2))+2, False)
 
-        self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
-        self.p_ready_o = Signal() # instructions were added
+        self.p_add_i = Signal(mqbits)  # instructions to add (from data_i)
+        self.p_ready_o = Signal()  # instructions were added
         self.data_i = Instruction._nq(n_in, "data_i")
-        
+
         self.data_o = Instruction._nq(n_out, "data_o")
-        self.n_sub_i = Signal(mqbits) # number of instructions to remove
-        self.n_sub_o = Signal(mqbits) # number of instructions removed
+        self.n_sub_i = Signal(mqbits)  # number of instructions to remove
+        self.n_sub_o = Signal(mqbits)  # number of instructions removed
 
         self.qsz = shape(self.data_o[0])[0]
         q = []
@@ -74,7 +76,7 @@ class InstructionQ(Elaboratable):
         start_q = Signal(mqbits)
         end_q = Signal(mqbits)
         mqlen = Const(iqlen, (len(left), False))
-        print ("mqlen", mqlen)
+        print("mqlen", mqlen)
 
         # work out how many can be subtracted from the queue
         with m.If(self.n_sub_i):
@@ -86,7 +88,7 @@ class InstructionQ(Elaboratable):
                 comb += self.n_sub_o.eq(self.n_sub_i)
 
         # work out how many new items are going to be in the queue
-        comb += left.eq(self.qlen_o )#- self.n_sub_o)
+        comb += left.eq(self.qlen_o)  # - self.n_sub_o)
         comb += spare.eq(mqlen - self.p_add_i)
         comb += qmaxed.eq(left <= spare)
         comb += self.p_ready_o.eq(qmaxed & (self.p_add_i != 0))
@@ -106,7 +108,7 @@ class InstructionQ(Elaboratable):
             for i in range(self.n_in):
                 with m.If(self.p_add_i > Const(i, len(self.p_add_i))):
                     ipos = Signal(mqbits)
-                    comb += ipos.eq(start_q + i) # should roll round
+                    comb += ipos.eq(start_q + i)  # should roll round
                     sync += self.q[ipos].eq(cat(self.data_i[i]))
             sync += start_q.eq(start_q + self.p_add_i)
 
@@ -127,7 +129,7 @@ class InstructionQ(Elaboratable):
         for o in self.data_i:
             yield from list(o)
         yield self.p_add_i
-        
+
         for o in self.data_o:
             yield from list(o)
         yield self.n_sub_i
@@ -159,6 +161,7 @@ def instruction_q_sim(dut):
     yield dut.go_wr_i.eq(0)
     yield
 
+
 def test_instruction_q():
     dut = InstructionQ(16, 4, 4, n_in=2, n_out=2)
     vl = rtlil.convert(dut, ports=dut.ports())
@@ -168,5 +171,6 @@ def test_instruction_q():
     run_simulation(dut, instruction_q_sim(dut),
                    vcd_name='test_instruction_q.vcd')
 
+
 if __name__ == '__main__':
     test_instruction_q()
diff --git a/src/soc/simple/core.py b/src/soc/simple/core.py
index f03ffab8..f6e9a73b 100644
--- a/src/soc/simple/core.py
+++ b/src/soc/simple/core.py
@@ -30,7 +30,7 @@ from soc.regfile.regfiles import RegFiles
 from soc.decoder.power_decoder import create_pdecode
 from soc.decoder.power_decoder2 import PowerDecode2
 from soc.decoder.decode2execute1 import Data
-from soc.experiment.l0_cache import TstL0CacheBuffer # test only
+from soc.experiment.l0_cache import TstL0CacheBuffer  # test only
 from soc.config.test.test_loadstore import TestMemPspec
 from soc.decoder.power_enums import MicrOp
 import operator
@@ -41,6 +41,7 @@ import operator
 def ortreereduce(tree, attr="data_o"):
     return treereduce(tree, operator.or_, lambda x: getattr(x, attr))
 
+
 def ortreereduce_sig(tree):
     return treereduce(tree, operator.or_, lambda x: x)
 
@@ -54,7 +55,7 @@ def sort_fuspecs(fuspecs):
     for (regname, fspec) in fuspecs.items():
         if not regname.startswith("full"):
             res.append((regname, fspec))
-    return res # enumerate(res)
+    return res  # enumerate(res)
 
 
 class NonProductionCore(Elaboratable):
@@ -85,7 +86,7 @@ class NonProductionCore(Elaboratable):
         # start/stop and terminated signalling
         self.core_start_i = Signal(reset_less=True)
         self.core_stop_i = Signal(reset_less=True)
-        self.core_terminated_o = Signal(reset=0) # indicates stopped
+        self.core_terminated_o = Signal(reset=0)  # indicates stopped
 
     def elaborate(self, platform):
         m = Module()
@@ -98,7 +99,7 @@ class NonProductionCore(Elaboratable):
         fus = self.fus.fus
 
         # core start/stopped state
-        core_stopped = Signal(reset=1) # begins in stopped state
+        core_stopped = Signal(reset=1)  # begins in stopped state
 
         # start/stop signalling
         with m.If(self.core_start_i):
@@ -156,7 +157,7 @@ class NonProductionCore(Elaboratable):
 
         with m.If(can_run):
             with m.Switch(dec2.e.do.insn_type):
-            # check for ATTN: halt if true
+                # check for ATTN: halt if true
                 with m.Case(MicrOp.OP_ATTN):
                     m.d.sync += core_stopped.eq(1)
 
@@ -203,7 +204,7 @@ class NonProductionCore(Elaboratable):
 
             # for each named regfile port, connect up all FUs to that port
             for (regname, fspec) in sort_fuspecs(fuspecs):
-                print ("connect rd", regname, fspec)
+                print("connect rd", regname, fspec)
                 rpidx = regname
                 # get the regfile specs for this regfile port
                 (rf, read, write, wid, fuspec) = fspec
@@ -212,12 +213,14 @@ class NonProductionCore(Elaboratable):
                 comb += rdflag.eq(rf)
 
                 # select the required read port.  these are pre-defined sizes
-                print (rpidx, regfile, regs.rf.keys())
+                print(rpidx, regfile, regs.rf.keys())
                 rport = regs.rf[regfile.lower()].r_ports[rpidx]
 
                 # create a priority picker to manage this port
-                rdpickers[regfile][rpidx] = rdpick = PriorityPicker(len(fuspec))
-                setattr(m.submodules, "rdpick_%s_%s" % (regfile, rpidx), rdpick)
+                rdpickers[regfile][rpidx] = rdpick = PriorityPicker(
+                    len(fuspec))
+                setattr(m.submodules, "rdpick_%s_%s" %
+                        (regfile, rpidx), rdpick)
 
                 # connect the regspec "reg select" number to this port
                 with m.If(rdpick.en_o):
@@ -235,10 +238,11 @@ class NonProductionCore(Elaboratable):
                     comb += fu.go_rd_i[idx].eq(rdpick.o[pi])
 
                     # connect regfile port to input, creating a Broadcast Bus
-                    print ("reg connect widths",
-                           regfile, regname, pi, funame,
-                           src.shape(), rport.data_o.shape())
-                    comb += src.eq(rport.data_o) # all FUs connect to same port
+                    print("reg connect widths",
+                          regfile, regname, pi, funame,
+                          src.shape(), rport.data_o.shape())
+                    # all FUs connect to same port
+                    comb += src.eq(rport.data_o)
 
     def connect_wrports(self, m, fu_bitdict):
         """connect write ports
@@ -264,18 +268,20 @@ class NonProductionCore(Elaboratable):
             fuspecs = byregfiles_wrspec[regfile]
             wrpickers[regfile] = {}
             for (regname, fspec) in sort_fuspecs(fuspecs):
-                print ("connect wr", regname, fspec)
+                print("connect wr", regname, fspec)
                 rpidx = regname
                 # get the regfile specs for this regfile port
                 (rf, read, write, wid, fuspec) = fspec
 
                 # select the required write port.  these are pre-defined sizes
-                print (regfile, regs.rf.keys())
+                print(regfile, regs.rf.keys())
                 wport = regs.rf[regfile.lower()].w_ports[rpidx]
 
                 # create a priority picker to manage this port
-                wrpickers[regfile][rpidx] = wrpick = PriorityPicker(len(fuspec))
-                setattr(m.submodules, "wrpick_%s_%s" % (regfile, rpidx), wrpick)
+                wrpickers[regfile][rpidx] = wrpick = PriorityPicker(
+                    len(fuspec))
+                setattr(m.submodules, "wrpick_%s_%s" %
+                        (regfile, rpidx), wrpick)
 
                 # connect the regspec write "reg select" number to this port
                 # only if one FU actually requests (and is granted) the port
@@ -291,20 +297,20 @@ class NonProductionCore(Elaboratable):
                 for pi, (funame, fu, idx) in enumerate(fuspec):
                     # write-request comes from dest.ok
                     dest = fu.get_out(idx)
-                    fu_dest_latch = fu.get_fu_out(idx) # latched output
+                    fu_dest_latch = fu.get_fu_out(idx)  # latched output
                     name = "wrflag_%s_%s_%d" % (funame, regname, idx)
                     wrflag = Signal(name=name, reset_less=True)
                     comb += wrflag.eq(dest.ok & fu.busy_o)
 
                     # connect request-read to picker input, and output to go-wr
                     fu_active = fu_bitdict[funame]
-                    pick = fu.wr.rel[idx] & fu_active #& wrflag
+                    pick = fu.wr.rel[idx] & fu_active  # & wrflag
                     comb += wrpick.i[pi].eq(pick)
                     comb += fu.go_wr_i[idx].eq(wrpick.o[pi] & wrpick.en_o)
                     # connect regfile port to input
-                    print ("reg connect widths",
-                           regfile, regname, pi, funame,
-                           dest.shape(), wport.data_i.shape())
+                    print("reg connect widths",
+                          regfile, regname, pi, funame,
+                          dest.shape(), wport.data_i.shape())
                     wsigs.append(fu_dest_latch)
 
                 # here is where we create the Write Broadcast Bus. simple, eh?
@@ -321,13 +327,13 @@ class NonProductionCore(Elaboratable):
         byregfiles = {}
         byregfiles_spec = {}
         for (funame, fu) in fus.items():
-            print ("%s ports for %s" % (mode, funame))
+            print("%s ports for %s" % (mode, funame))
             for idx in range(fu.n_src if readmode else fu.n_dst):
                 if readmode:
                     (regfile, regname, wid) = fu.get_in_spec(idx)
                 else:
                     (regfile, regname, wid) = fu.get_out_spec(idx)
-                print ("    %d %s %s %s" % (idx, regfile, regname, str(wid)))
+                print("    %d %s %s %s" % (idx, regfile, regname, str(wid)))
                 if readmode:
                     rdflag, read = dec2.regspecmap_read(regfile, regname)
                     write = None
@@ -339,7 +345,7 @@ class NonProductionCore(Elaboratable):
                     byregfiles_spec[regfile] = {}
                 if regname not in byregfiles_spec[regfile]:
                     byregfiles_spec[regfile][regname] = \
-                                [rdflag, read, write, wid, []]
+                        [rdflag, read, write, wid, []]
                 # here we start to create "lanes"
                 if idx not in byregfiles[regfile]:
                     byregfiles[regfile][idx] = []
@@ -349,16 +355,16 @@ class NonProductionCore(Elaboratable):
 
         # ok just print that out, for convenience
         for regfile, spec in byregfiles.items():
-            print ("regfile %s ports:" % mode, regfile)
+            print("regfile %s ports:" % mode, regfile)
             fuspecs = byregfiles_spec[regfile]
             for regname, fspec in fuspecs.items():
                 [rdflag, read, write, wid, fuspec] = fspec
-                print ("  rf %s port %s lane: %s" % (mode, regfile, regname))
-                print ("  %s" % regname, wid, read, write, rdflag)
+                print("  rf %s port %s lane: %s" % (mode, regfile, regname))
+                print("  %s" % regname, wid, read, write, rdflag)
                 for (funame, fu, idx) in fuspec:
                     fusig = fu.src_i[idx] if readmode else fu.dest[idx]
-                    print ("    ", funame, fu, idx, fusig)
-                    print ()
+                    print("    ", funame, fu, idx, fusig)
+                    print()
 
         return byregfiles, byregfiles_spec
 
diff --git a/src/soc/simple/test/test_core.py b/src/soc/simple/test/test_core.py
index 2c70cc73..b2c219da 100644
--- a/src/soc/simple/test/test_core.py
+++ b/src/soc/simple/test/test_core.py
@@ -19,7 +19,7 @@ from soc.config.test.test_loadstore import TestMemPspec
 from soc.config.endian import bigendian
 
 from soc.simple.core import NonProductionCore
-from soc.experiment.compalu_multi import find_ok # hack
+from soc.experiment.compalu_multi import find_ok  # hack
 
 from soc.fu.compunits.test.test_compunit import (setup_test_memory,
                                                  check_sim_memory)
@@ -45,18 +45,18 @@ def setup_regs(core, test):
     cr = test.cr
     crregs = core.regs.cr
     #cr = int('{:32b}'.format(cr)[::-1], 2)
-    print ("cr reg", hex(cr))
+    print("cr reg", hex(cr))
     for i in range(8):
         #j = 7-i
-        cri = (cr>>(i*4)) & 0xf
+        cri = (cr >> (i*4)) & 0xf
         #cri = int('{:04b}'.format(cri)[::-1], 2)
-        print ("cr reg", hex(cri), i,
-                crregs.regs[i].reg.shape())
+        print("cr reg", hex(cri), i,
+              crregs.regs[i].reg.shape())
         yield crregs.regs[i].reg.eq(cri)
 
     # set up XER.  "direct" write (bypass rd/write ports)
     xregs = core.regs.xer
-    print ("sprs", test.sprs)
+    print("sprs", test.sprs)
     xer = None
     if 'XER' in test.sprs:
         xer = test.sprs['XER']
@@ -73,8 +73,8 @@ def setup_regs(core, test):
         ovbit = xer[XER_bits['OV']].value
         ov32bit = xer[XER_bits['OV32']].value
         yield xregs.regs[xregs.OV].reg.eq(Cat(ovbit, ov32bit))
-        print ("setting XER so %d ca %d ca32 %d ov %d ov32 %d" % \
-                (sobit, cabit, ca32bit, ovbit, ov32bit))
+        print("setting XER so %d ca %d ca32 %d ov %d ov32 %d" %
+              (sobit, cabit, ca32bit, ovbit, ov32bit))
     else:
         yield xregs.regs[xregs.SO].reg.eq(0)
         yield xregs.regs[xregs.OV].reg.eq(0)
@@ -97,13 +97,12 @@ def setup_regs(core, test):
             for i, x in enumerate(SPR):
                 if sprname == x.name:
                     yield sregs[i].reg.eq(val)
-                    print ("setting slow SPR %d (%s) to %x" % \
-                            (i, sprname, val))
+                    print("setting slow SPR %d (%s) to %x" %
+                          (i, sprname, val))
         else:
             yield fregs.regs[fast].reg.eq(val)
-            print ("setting fast reg %d (%s) to %x" % \
-                    (fast, sprname, val))
-
+            print("setting fast reg %d (%s) to %x" %
+                  (fast, sprname, val))
 
     # allow changes to settle before reporting on XER
     yield Settle()
@@ -116,8 +115,8 @@ def setup_regs(core, test):
     oe = yield pdecode2.e.do.oe.oe
     oe_ok = yield pdecode2.e.do.oe.oe_ok
 
-    print ("before: so/ov-32/ca-32", so, bin(ov), bin(ca))
-    print ("oe:", oe, oe_ok)
+    print("before: so/ov-32/ca-32", so, bin(ov), bin(ca))
+    print("oe:", oe, oe_ok)
 
 
 def check_regs(dut, sim, core, test, code):
@@ -126,25 +125,25 @@ def check_regs(dut, sim, core, test, code):
     for i in range(32):
         rval = yield core.regs.int.regs[i].reg
         intregs.append(rval)
-    print ("int regs", list(map(hex, intregs)))
+    print("int regs", list(map(hex, intregs)))
     for i in range(32):
         simregval = sim.gpr[i].asint()
         dut.assertEqual(simregval, intregs[i],
-            "int reg %d not equal %s" % (i, repr(code)))
+                        "int reg %d not equal %s" % (i, repr(code)))
 
     # CRs
     crregs = []
     for i in range(8):
         rval = yield core.regs.cr.regs[i].reg
         crregs.append(rval)
-    print ("cr regs", list(map(hex, crregs)))
+    print("cr regs", list(map(hex, crregs)))
     for i in range(8):
         rval = crregs[i]
         cri = sim.crl[7-i].get_range().value
-        print ("cr reg", i, hex(cri), i, hex(rval))
+        print("cr reg", i, hex(cri), i, hex(rval))
         # XXX https://bugs.libre-soc.org/show_bug.cgi?id=363
         dut.assertEqual(cri, rval,
-            "cr reg %d not equal %s" % (i, repr(code)))
+                        "cr reg %d not equal %s" % (i, repr(code)))
 
     # XER
     xregs = core.regs.xer
@@ -152,17 +151,17 @@ def check_regs(dut, sim, core, test, code):
     ov = yield xregs.regs[xregs.OV].reg
     ca = yield xregs.regs[xregs.CA].reg
 
-    print ("sim SO", sim.spr['XER'][XER_bits['SO']])
+    print("sim SO", sim.spr['XER'][XER_bits['SO']])
     e_so = sim.spr['XER'][XER_bits['SO']].value
     e_ov = sim.spr['XER'][XER_bits['OV']].value
     e_ov32 = sim.spr['XER'][XER_bits['OV32']].value
     e_ca = sim.spr['XER'][XER_bits['CA']].value
     e_ca32 = sim.spr['XER'][XER_bits['CA32']].value
 
-    e_ov = e_ov | (e_ov32<<1)
-    e_ca = e_ca | (e_ca32<<1)
+    e_ov = e_ov | (e_ov32 << 1)
+    e_ca = e_ca | (e_ca32 << 1)
 
-    print ("after: so/ov-32/ca-32", so, bin(ov), bin(ca))
+    print("after: so/ov-32/ca-32", so, bin(ov), bin(ca))
     dut.assertEqual(e_so, so, "so mismatch %s" % (repr(code)))
     dut.assertEqual(e_ov, ov, "ov mismatch %s" % (repr(code)))
     dut.assertEqual(e_ca, ca, "ca mismatch %s" % (repr(code)))
@@ -178,6 +177,7 @@ def wait_for_busy_hi(cu):
         print("!busy", busy_o, terminated_o)
         yield
 
+
 def set_issue(core, dec2, sim):
     yield core.issue_i.eq(1)
     yield
@@ -222,8 +222,8 @@ class TestRunner(FHDLTestCase):
 
         # temporary hack: says "go" immediately for both address gen and ST
         ldst = core.fus.fus['ldst0']
-        m.d.comb += ldst.ad.go.eq(ldst.ad.rel) # link addr-go direct to rel
-        m.d.comb += ldst.st.go.eq(ldst.st.rel) # link store-go direct to rel
+        m.d.comb += ldst.ad.go.eq(ldst.ad.rel)  # link addr-go direct to rel
+        m.d.comb += ldst.st.go.eq(ldst.st.rel)  # link store-go direct to rel
 
         # nmigen Simulation
         sim = Simulator(m)
@@ -258,7 +258,7 @@ class TestRunner(FHDLTestCase):
                     yield instruction.eq(ins)          # raw binary instr.
                     yield ivalid_i.eq(1)
                     yield Settle()
-                    #fn_unit = yield pdecode2.e.fn_unit
+                    # fn_unit = yield pdecode2.e.fn_unit
                     #fuval = self.funit.value
                     #self.assertEqual(fn_unit & fuval, fuval)
 
@@ -270,7 +270,7 @@ class TestRunner(FHDLTestCase):
                     yield ivalid_i.eq(0)
                     yield
 
-                    print ("sim", code)
+                    print("sim", code)
                     # call simulated operation
                     opname = code.split(' ')[0]
                     yield from sim.call(opname)
@@ -284,7 +284,7 @@ class TestRunner(FHDLTestCase):
 
         sim.add_sync_process(process)
         with sim.write_vcd("core_simulator.vcd", "core_simulator.gtkw",
-                            traces=[]):
+                           traces=[]):
             sim.run()
 
 
@@ -300,4 +300,3 @@ if __name__ == "__main__":
 
     runner = unittest.TextTestRunner()
     runner.run(suite)
-
diff --git a/src/soc/simple/test/test_issuer.py b/src/soc/simple/test/test_issuer.py
index 804d7ba7..a353933b 100644
--- a/src/soc/simple/test/test_issuer.py
+++ b/src/soc/simple/test/test_issuer.py
@@ -15,7 +15,7 @@ from soc.decoder.power_enums import Function, XER_bits
 from soc.config.endian import bigendian
 
 from soc.simple.issuer import TestIssuer
-from soc.experiment.compalu_multi import find_ok # hack
+from soc.experiment.compalu_multi import find_ok  # hack
 
 from soc.config.test.test_loadstore import TestMemPspec
 from soc.simple.test.test_core import (setup_regs, check_regs,
@@ -39,32 +39,32 @@ from soc.simulator.test_sim import (GeneralTestCases, AttnTestCase)
 
 def setup_i_memory(imem, startaddr, instructions):
     mem = imem
-    print ("insn before, init mem", mem.depth, mem.width, mem,
-                                    len(instructions))
+    print("insn before, init mem", mem.depth, mem.width, mem,
+          len(instructions))
     for i in range(mem.depth):
         yield mem._array[i].eq(0)
     yield Settle()
-    startaddr //= 4 # instructions are 32-bit
-    mask = ((1<<64)-1)
+    startaddr //= 4  # instructions are 32-bit
+    mask = ((1 << 64)-1)
     for ins in instructions:
         if isinstance(ins, tuple):
             insn, code = ins
         else:
             insn, code = ins, ''
         insn = insn & 0xffffffff
-        msbs = (startaddr>>1) & mask
+        msbs = (startaddr >> 1) & mask
         val = yield mem._array[msbs]
         if insn != 0:
-            print ("before set", hex(4*startaddr),
-                                 hex(msbs), hex(val), hex(insn))
+            print("before set", hex(4*startaddr),
+                  hex(msbs), hex(val), hex(insn))
         lsb = 1 if (startaddr & 1) else 0
         val = (val | (insn << (lsb*32)))
         val = val & mask
         yield mem._array[msbs].eq(val)
         yield Settle()
         if insn != 0:
-            print ("after  set", hex(4*startaddr), hex(msbs), hex(val))
-            print ("instr: %06x 0x%x %s %08x" % (4*startaddr, insn, code, val))
+            print("after  set", hex(4*startaddr), hex(msbs), hex(val))
+            print("instr: %06x 0x%x %s %08x" % (4*startaddr, insn, code, val))
         startaddr += 1
         startaddr = startaddr & mask
 
@@ -112,12 +112,12 @@ class TestRunner(FHDLTestCase):
                 print(test.name)
                 program = test.program
                 self.subTest(test.name)
-                print ("regs", test.regs)
-                print ("sprs", test.sprs)
-                print ("cr", test.cr)
-                print ("mem", test.mem)
-                print ("msr", test.msr)
-                print ("assem", program.assembly)
+                print("regs", test.regs)
+                print("sprs", test.sprs)
+                print("cr", test.cr)
+                print("mem", test.mem)
+                print("msr", test.msr)
+                print("assem", program.assembly)
                 gen = list(program.generate_instructions())
                 insncode = program.assembly.splitlines()
                 instructions = list(zip(gen, insncode))
@@ -127,7 +127,7 @@ class TestRunner(FHDLTestCase):
                           disassembly=insncode,
                           bigendian=bigendian)
 
-                pc = 0 # start address
+                pc = 0  # start address
 
                 yield from setup_i_memory(imem, pc, instructions)
                 yield from setup_test_memory(l0, sim)
@@ -146,7 +146,7 @@ class TestRunner(FHDLTestCase):
                     # start the instruction
                     yield go_insn_i.eq(1)
                     yield
-                    yield issuer.pc_i.ok.eq(0) # don't change PC from now on
+                    yield issuer.pc_i.ok.eq(0)  # don't change PC from now on
                     yield go_insn_i.eq(0)      # and don't issue a new insn
                     yield Settle()
 
@@ -155,9 +155,9 @@ class TestRunner(FHDLTestCase):
                     yield from wait_for_busy_clear(core)
 
                     terminated = yield issuer.halted_o
-                    print ("terminated", terminated)
+                    print("terminated", terminated)
 
-                    print ("sim", code)
+                    print("sim", code)
                     # call simulated operation
                     opname = code.split(' ')[0]
                     yield from sim.call(opname)
@@ -176,25 +176,24 @@ class TestRunner(FHDLTestCase):
 
         sim.add_sync_process(process)
         with sim.write_vcd("issuer_simulator.vcd",
-                            traces=[]):
+                           traces=[]):
             sim.run()
 
 
 if __name__ == "__main__":
     unittest.main(exit=False)
     suite = unittest.TestSuite()
-    #suite.addTest(TestRunner(HelloTestCases.test_data))
-    #suite.addTest(TestRunner(DivTestCase.test_data))
+    # suite.addTest(TestRunner(HelloTestCases.test_data))
+    # suite.addTest(TestRunner(DivTestCase.test_data))
     suite.addTest(TestRunner(AttnTestCase.test_data))
     suite.addTest(TestRunner(GeneralTestCases.test_data))
     suite.addTest(TestRunner(LDSTTestCase.test_data))
-    #suite.addTest(TestRunner(CRTestCase.test_data))
-    #suite.addTest(TestRunner(ShiftRotTestCase.test_data))
-    #suite.addTest(TestRunner(LogicalTestCase.test_data))
-    #suite.addTest(TestRunner(ALUTestCase.test_data))
-    #suite.addTest(TestRunner(BranchTestCase.test_data))
-    #suite.addTest(TestRunner(SPRTestCase.test_data))
+    # suite.addTest(TestRunner(CRTestCase.test_data))
+    # suite.addTest(TestRunner(ShiftRotTestCase.test_data))
+    # suite.addTest(TestRunner(LogicalTestCase.test_data))
+    # suite.addTest(TestRunner(ALUTestCase.test_data))
+    # suite.addTest(TestRunner(BranchTestCase.test_data))
+    # suite.addTest(TestRunner(SPRTestCase.test_data))
 
     runner = unittest.TextTestRunner()
     runner.run(suite)
-
diff --git a/src/soc/simple/test/test_microwatt.py b/src/soc/simple/test/test_microwatt.py
index ae13cd43..c666a815 100644
--- a/src/soc/simple/test/test_microwatt.py
+++ b/src/soc/simple/test/test_microwatt.py
@@ -44,7 +44,7 @@ class BinaryTestCase(FHDLTestCase):
     def run_tst_program(self, prog):
         initial_regs = [0] * 32
         tc = TestCase(prog, self.test_name, initial_regs, None, 0,
-                                            None, 0,
+                      None, 0,
                       do_sim=False)
         self.test_data.append(tc)
 
@@ -96,23 +96,23 @@ class TestRunner(FHDLTestCase):
                 print(test.name)
                 program = test.program
                 self.subTest(test.name)
-                print ("regs", test.regs)
-                print ("sprs", test.sprs)
-                print ("cr", test.cr)
-                print ("mem", test.mem)
-                print ("msr", test.msr)
-                print ("assem", program.assembly)
+                print("regs", test.regs)
+                print("sprs", test.sprs)
+                print("cr", test.cr)
+                print("mem", test.mem)
+                print("msr", test.msr)
+                print("assem", program.assembly)
                 instructions = list(program.generate_instructions())
 
-                print ("instructions", len(instructions))
+                print("instructions", len(instructions))
 
-                pc = 0 # start of memory
+                pc = 0  # start of memory
 
                 yield from setup_i_memory(imem, pc, instructions)
                 # blech!  put the same listing into the data memory
                 data_mem = get_l0_mem(l0)
                 yield from setup_i_memory(data_mem, pc, instructions)
-                #yield from setup_test_memory(l0, sim)
+                # yield from setup_test_memory(l0, sim)
                 yield from setup_regs(core, test)
 
                 yield pc_i.eq(pc)
@@ -123,7 +123,7 @@ class TestRunner(FHDLTestCase):
                     # start the instruction
                     yield go_insn_i.eq(1)
                     yield
-                    yield pc_i_ok.eq(0) # don't change PC from now on
+                    yield pc_i_ok.eq(0)  # don't change PC from now on
                     yield go_insn_i.eq(0)      # and don't issue a new insn
                     yield from wait_for_busy_hi(core)
                     yield Settle()
@@ -135,21 +135,21 @@ class TestRunner(FHDLTestCase):
                     yield from wait_for_busy_clear(core)
 
                     terminated = yield core.core_terminated_o
-                    print ("terminated", terminated)
+                    print("terminated", terminated)
 
                     terminated = yield core.core_terminated_o
                     if terminated:
                         break
 
             # register check
-            #yield from check_regs(self, sim, core, test, code)
+            # yield from check_regs(self, sim, core, test, code)
 
             # Memory check
-            #yield from check_sim_memory(self, l0, sim, code)
+            # yield from check_sim_memory(self, l0, sim, code)
 
         sim.add_sync_process(process)
         with sim.write_vcd("binary_issuer_simulator.vcd",
-                            traces=[]):
+                           traces=[]):
             sim.run()
 
 
@@ -160,4 +160,3 @@ if __name__ == "__main__":
 
     runner = unittest.TextTestRunner()
     runner.run(suite)
-
diff --git a/src/soc/simulator/program.py b/src/soc/simulator/program.py
index 16a172bb..fda0c959 100644
--- a/src/soc/simulator/program.py
+++ b/src/soc/simulator/program.py
@@ -26,14 +26,14 @@ class Program:
             self.endian_fmt = "elf64-little"
             self.obj_fmt = "-le"
 
-        if isinstance(instructions, str): # filename
+        if isinstance(instructions, str):  # filename
             self.binfile = open(instructions, "rb")
-            self.assembly = '' # noo disassemble number fiiive
-            print ("program", self.binfile)
+            self.assembly = ''  # noo disassemble number fiiive
+            print("program", self.binfile)
         else:
             if isinstance(instructions, list):
                 instructions = '\n'.join(instructions)
-            self.assembly = instructions + '\n' # plus final newline
+            self.assembly = instructions + '\n'  # plus final newline
             self._assemble()
         self._instructions = list(self._get_instructions())
 
@@ -82,7 +82,7 @@ class Program:
             data = self.binfile.read(4)
             if not data:
                 break
-            yield struct.unpack('<I', data)[0] # unsigned int
+            yield struct.unpack('<I', data)[0]  # unsigned int
 
     def generate_instructions(self):
         yield from self._instructions
diff --git a/src/soc/simulator/qemu.py b/src/soc/simulator/qemu.py
index 41ef1690..c4511e12 100644
--- a/src/soc/simulator/qemu.py
+++ b/src/soc/simulator/qemu.py
@@ -2,14 +2,15 @@ from pygdbmi.gdbcontroller import GdbController
 import subprocess
 
 launch_args_be = ['qemu-system-ppc64',
-               '-machine', 'powernv9',
-               '-nographic',
-               '-s', '-S']
+                  '-machine', 'powernv9',
+                  '-nographic',
+                  '-s', '-S']
 
 launch_args_le = ['qemu-system-ppc64le',
-               '-machine', 'powernv9',
-               '-nographic',
-               '-s', '-S']
+                  '-machine', 'powernv9',
+                  '-nographic',
+                  '-s', '-S']
+
 
 def swap_order(x, nbytes):
     x = x.to_bytes(nbytes, byteorder='little')
@@ -56,16 +57,17 @@ class QemuController:
         return self.gdb.write('-break-delete' + breakstring)
 
     def set_byte(self, addr, v):
-        print ("qemu set byte", hex(addr), hex(v))
+        print("qemu set byte", hex(addr), hex(v))
         faddr = '&{int}0x%x' % addr
         res = self.gdb.write('-data-write-memory-bytes %s "%02x"' % (faddr, v))
-        print ("confirm", self.get_mem(addr, 1))
+        print("confirm", self.get_mem(addr, 1))
 
     def get_mem(self, addr, nbytes):
-        res = self.gdb.write("-data-read-memory %d u 1 1 %d" % (addr, 8*nbytes))
+        res = self.gdb.write("-data-read-memory %d u 1 1 %d" %
+                             (addr, 8*nbytes))
         #print ("get_mem", res)
         for x in res:
-            if(x["type"]=="result"):
+            if(x["type"] == "result"):
                 l = list(map(int, x['payload']['memory'][0]['data']))
                 res = []
                 for j in range(0, len(l), 8):
@@ -81,13 +83,13 @@ class QemuController:
 
     def _get_register(self, fmt):
         res = self.gdb.write('-data-list-register-values '+fmt,
-                             timeout_sec=1.0) # increase this timeout if needed
+                             timeout_sec=1.0)  # increase this timeout if needed
         for x in res:
-            if(x["type"]=="result"):
+            if(x["type"] == "result"):
                 assert 'register-values' in x['payload']
                 res = int(x['payload']['register-values'][0]['value'], 0)
                 return res
-                #return swap_order(res, 8)
+                # return swap_order(res, 8)
         return None
 
     # TODO: use -data-list-register-names instead of hardcoding the values
@@ -95,10 +97,11 @@ class QemuController:
     def get_msr(self): return self._get_register('x 65')
     def get_cr(self): return self._get_register('x 66')
     def get_lr(self): return self._get_register('x 67')
-    def get_ctr(self): return self._get_register('x 68') # probably
+    def get_ctr(self): return self._get_register('x 68')  # probably
     def get_xer(self): return self._get_register('x 69')
     def get_fpscr(self): return self._get_register('x 70')
     def get_mq(self): return self._get_register('x 71')
+
     def get_register(self, num):
         return self._get_register('x {}'.format(num))
 
@@ -120,30 +123,30 @@ class QemuController:
 
 
 def run_program(program, initial_mem=None, extra_break_addr=None,
-                         bigendian=False):
+                bigendian=False):
     q = QemuController(program.binfile.name, bigendian)
     q.connect()
-    q.set_endian(True) # easier to set variables this way
+    q.set_endian(True)  # easier to set variables this way
 
     # Run to the start of the program
     if initial_mem:
         for addr, (v, wid) in initial_mem.items():
             for i in range(wid):
-                q.set_byte(addr+i, (v>>i*8) & 0xff)
+                q.set_byte(addr+i, (v >> i*8) & 0xff)
 
     # set breakpoint at start
     q.break_address(0x20000000)
     q.gdb_continue()
     # set the MSR bit 63, to set bigendian/littleendian mode
     msr = q.get_msr()
-    print ("msr", bigendian, hex(msr))
+    print("msr", bigendian, hex(msr))
     if bigendian:
-        msr &= ~(1<<0)
-        msr = msr & ((1<<64)-1)
+        msr &= ~(1 << 0)
+        msr = msr & ((1 << 64)-1)
     else:
-        msr |= (1<<0)
+        msr |= (1 << 0)
     q.gdb_eval('$msr=%d' % msr)
-    print ("msr set to", hex(msr))
+    print("msr set to", hex(msr))
     # set the CR to 0, matching the simulator
     q.gdb_eval('$cr=0')
     # delete the previous breakpoint so loops don't screw things up
diff --git a/src/soc/simulator/test_sim.py b/src/soc/simulator/test_sim.py
index d6343ae5..30045ac6 100644
--- a/src/soc/simulator/test_sim.py
+++ b/src/soc/simulator/test_sim.py
@@ -35,10 +35,10 @@ class AttnTestCase(FHDLTestCase):
             self.run_tst_program(program, [1])
 
     def run_tst_program(self, prog, initial_regs=None, initial_sprs=None,
-                                    initial_mem=None):
+                        initial_mem=None):
         initial_regs = [0] * 32
         tc = TestCase(prog, self.test_name, initial_regs, initial_sprs, 0,
-                                            initial_mem, 0)
+                      initial_mem, 0)
         self.test_data.append(tc)
 
 
@@ -74,10 +74,10 @@ class GeneralTestCases(FHDLTestCase):
                "addi 2, 0, 0x1234",
                "stw  1, 0(2)",
                "lwz  3, 0(2)"
-              ]
+               ]
         initial_mem = {0x1230: (0x5432123412345678, 8),
                        0x1238: (0xabcdef0187654321, 8),
-                      }
+                       }
         with Program(lst, bigendian) as program:
             self.run_tst_program(program,
                                  [1, 2, 3],
@@ -173,7 +173,7 @@ class GeneralTestCases(FHDLTestCase):
                "addi 3, 0, 0x00ee",
                "stb 3, 1(2)",
                "lbz 4, 1(2)",
-        ]
+               ]
         initial_regs = [0] * 32
         initial_regs[1] = 0x1004
         initial_regs[2] = 0x1008
@@ -181,9 +181,9 @@ class GeneralTestCases(FHDLTestCase):
         initial_mem = {0x1000: (0x5432123412345678, 8),
                        0x1008: (0xabcdef0187654321, 8),
                        0x1020: (0x1828384822324252, 8),
-                        }
+                       }
         with Program(lst, bigendian) as program:
-            self.run_tst_program(program, [3,4], initial_mem)
+            self.run_tst_program(program, [3, 4], initial_mem)
 
     @unittest.skip("disable")
     def test_3_load_store(self):
@@ -199,9 +199,9 @@ class GeneralTestCases(FHDLTestCase):
         initial_mem = {0x1000: (0x5432123412345678, 8),
                        0x1008: (0xabcdef0187654321, 8),
                        0x1020: (0x1828384822324252, 8),
-                        }
+                       }
         with Program(lst, bigendian) as program:
-            self.run_tst_program(program, [1,2,3,4], initial_mem)
+            self.run_tst_program(program, [1, 2, 3, 4], initial_mem)
 
     def test_loop(self):
         """in godbolt.org:
@@ -213,7 +213,7 @@ class GeneralTestCases(FHDLTestCase):
             } while (i != 0);
         }
         """
-        lst = ["addi 9, 0, 0x10", # i = 16
+        lst = ["addi 9, 0, 0x10",  # i = 16
                "addi 9,9,-1",    # i = i - 1
                "cmpi 0,1,9,12",     # compare 9 to value 0, store in CR2
                "bc 4,0,-8"         # branch if CR2 "test was != 0"
@@ -222,17 +222,17 @@ class GeneralTestCases(FHDLTestCase):
             self.run_tst_program(program, [9], initial_mem={})
 
     def test_30_addis(self):
-        lst = [#"addi 0, 0, 5",
-               "addis 12, 0, 0",
-               ]
+        lst = [  # "addi 0, 0, 5",
+            "addis 12, 0, 0",
+        ]
         with Program(lst, bigendian) as program:
             self.run_tst_program(program, [12])
 
     def run_tst_program(self, prog, initial_regs=None, initial_sprs=None,
-                                    initial_mem=None):
+                        initial_mem=None):
         initial_regs = [0] * 32
         tc = TestCase(prog, self.test_name, initial_regs, initial_sprs, 0,
-                                            initial_mem, 0)
+                      initial_mem, 0)
         self.test_data.append(tc)
 
 
@@ -261,19 +261,18 @@ class DecoderBase:
         sim = Simulator(m)
 
         def process():
-            #yield pdecode2.dec.bigendian.eq(bigendian)
+            # yield pdecode2.dec.bigendian.eq(bigendian)
             yield Settle()
 
             while True:
                 try:
                     yield from simulator.setup_one()
-                except KeyError: # indicates instruction not in imem: stop
+                except KeyError:  # indicates instruction not in imem: stop
                     break
                 yield Settle()
                 yield from simulator.execute_one()
                 yield Settle()
 
-
         sim.add_process(process)
         with sim.write_vcd("simulator.vcd", "simulator.gtkw",
                            traces=[]):
@@ -282,7 +281,7 @@ class DecoderBase:
         return simulator
 
     def run_tst_program(self, prog, reglist, initial_mem=None,
-                                             extra_break_addr=None):
+                        extra_break_addr=None):
         import sys
         simulator = self.run_tst(prog, initial_mem=initial_mem,
                                  initial_pc=0x20000000)
@@ -294,18 +293,18 @@ class DecoderBase:
         print(simulator.gpr.dump())
 
     def qemu_mem_compare(self, sim, qemu, check=True):
-        if False: # disable convenient large interesting debugging memory dump
+        if False:  # disable convenient large interesting debugging memory dump
             addr = 0x0
             qmemdump = qemu.get_mem(addr, 2048)
             for i in range(len(qmemdump)):
                 s = hex(int(qmemdump[i]))
-                print ("qemu mem %06x %s" % (addr+i*8, s))
+                print("qemu mem %06x %s" % (addr+i*8, s))
         for k, v in sim.mem.mem.items():
             qmemdump = qemu.get_mem(k*8, 8)
             s = hex(int(qmemdump[0]))[2:]
-            print ("qemu mem %06x %16s" % (k*8, s))
+            print("qemu mem %06x %16s" % (k*8, s))
         for k, v in sim.mem.mem.items():
-            print ("sim mem  %06x %016x" % (k*8, v))
+            print("sim mem  %06x %016x" % (k*8, v))
         if not check:
             return
         for k, v in sim.mem.mem.items():
diff --git a/src/unused/TLB/ariane/plru.py b/src/unused/TLB/ariane/plru.py
index a8db5c27..25d208ae 100644
--- a/src/unused/TLB/ariane/plru.py
+++ b/src/unused/TLB/ariane/plru.py
@@ -16,6 +16,7 @@ class PLRU(Elaboratable):
               / \ /\/\  /\
              ... ... ... ...
     """
+
     def __init__(self, entries):
         self.entries = entries
         self.lu_hit = Signal(entries)
@@ -52,13 +53,13 @@ class PLRU(Elaboratable):
             with m.If(hit):
                 # Set the nodes to the values we would expect
                 for lvl in range(LOG_TLB):
-                    idx_base = (1<<lvl)-1
+                    idx_base = (1 << lvl)-1
                     # lvl0 <=> MSB, lvl1 <=> MSB-1, ...
-                    shift = LOG_TLB - lvl;
+                    shift = LOG_TLB - lvl
                     new_idx = Const(~((i >> (shift-1)) & 1), (1, False))
                     plru_idx = idx_base + (i >> shift)
-                    print ("plru", i, lvl, hex(idx_base),
-                                  plru_idx, shift, new_idx)
+                    print("plru", i, lvl, hex(idx_base),
+                          plru_idx, shift, new_idx)
                     m.d.comb += self.plru_tree_o[plru_idx].eq(new_idx)
 
         # Decode tree to write enable signals
@@ -79,20 +80,20 @@ class PLRU(Elaboratable):
         for i in range(self.entries):
             en = []
             for lvl in range(LOG_TLB):
-                idx_base = (1<<lvl)-1
+                idx_base = (1 << lvl)-1
                 # lvl0 <=> MSB, lvl1 <=> MSB-1, ...
-                shift = LOG_TLB - lvl;
-                new_idx = (i >> (shift-1)) & 1;
-                plru_idx = idx_base + (i>>shift)
+                shift = LOG_TLB - lvl
+                new_idx = (i >> (shift-1)) & 1
+                plru_idx = idx_base + (i >> shift)
                 plru = Signal(reset_less=True,
                               name="plru-%d-%d-%d" % (i, lvl, plru_idx))
                 m.d.comb += plru.eq(self.plru_tree[plru_idx])
                 # en &= plru_tree_q[idx_base + (i>>shift)] == new_idx;
                 if new_idx:
-                    en.append(~plru) # yes inverted (using bool())
+                    en.append(~plru)  # yes inverted (using bool())
                 else:
                     en.append(plru)  # yes inverted (using bool())
-            print ("plru", i, en)
+            print("plru", i, en)
             # boolean logic manipulation:
             # plru0 & plru1 & plru2 == ~(~plru0 | ~plru1 | ~plru2)
             replace.append(~Cat(*en).bool())