From 157669066b9990ca430f49293bcd97f9ae51890d Mon Sep 17 00:00:00 2001
From: Luke Kenneth Casson Leighton <lkcl@lkcl.net>
Date: Tue, 24 Aug 2021 11:22:14 +0100
Subject: [PATCH] big rename, global/search/replace of ready_o with o_ready and
 the other > 4 signals as well, valid_i -> i_valid >
 https://libera.irclog.whitequark.org/nmigen/2021-08-24#30728292; > to be
 consistent with nmigen standards

---
 src/soc/config/test/test_fetch.py             |   8 +-
 src/soc/config/test/test_loadstore.py         |  16 +-
 src/soc/experiment/alu_fsm.py                 |  58 +++----
 src/soc/experiment/alu_hier.py                | 122 +++++++--------
 src/soc/experiment/compalu.py                 |  10 +-
 src/soc/experiment/compalu_multi.py           |  16 +-
 src/soc/experiment/dcache.py                  |  16 +-
 src/soc/experiment/formal/proof_alu_fsm.py    |  22 +--
 src/soc/experiment/imem.py                    |   4 +-
 src/soc/experiment/lsmem.py                   |   6 +-
 src/soc/experiment/pi2ls.py                   |   8 +-
 src/soc/experiment/score6600.py               |  14 +-
 src/soc/experiment/score6600_multi.py         |  14 +-
 src/soc/experiment/test/test_compalu_multi.py |  12 +-
 src/soc/fu/compunits/formal/proof_fu.py       |   2 +-
 src/soc/fu/div/fsm.py                         |   8 +-
 src/soc/fu/mmu/fsm.py                         |   8 +-
 src/soc/minerva/units/fetch.py                |  20 +--
 src/soc/minerva/units/loadstore.py            |  24 +--
 src/soc/scoreboard/addr_split.py              |  74 ++++-----
 src/soc/scoreboard/fn_unit.py                 |   2 +-
 src/soc/scoreboard/instruction_q.py           |  10 +-
 src/soc/scoreboard/test_iq.py                 |   4 +-
 src/soc/simple/core.py                        |   4 +-
 src/soc/simple/issuer.py                      | 146 +++++++++---------
 src/soc/simple/test/test_core.py              |   8 +-
 src/soc/simple/test/test_runner.py            |  12 +-
 27 files changed, 324 insertions(+), 324 deletions(-)

diff --git a/src/soc/config/test/test_fetch.py b/src/soc/config/test/test_fetch.py
index df9caf68..5c4097a5 100644
--- a/src/soc/config/test/test_fetch.py
+++ b/src/soc/config/test/test_fetch.py
@@ -15,8 +15,8 @@ sys.setrecursionlimit(10**6)
 
 def read_from_addr(dut, addr):
     yield dut.a_pc_i.eq(addr)
-    yield dut.a_valid_i.eq(1)
-    yield dut.f_valid_i.eq(1)
+    yield dut.a_i_valid.eq(1)
+    yield dut.f_i_valid.eq(1)
     yield dut.a_stall_i.eq(1)
     yield
     yield dut.a_stall_i.eq(0)
@@ -26,8 +26,8 @@ def read_from_addr(dut, addr):
         yield
     res = (yield dut.f_instr_o)
 
-    yield dut.a_valid_i.eq(0)
-    yield dut.f_valid_i.eq(0)
+    yield dut.a_i_valid.eq(0)
+    yield dut.f_i_valid.eq(0)
     yield
     return res
 
diff --git a/src/soc/config/test/test_loadstore.py b/src/soc/config/test/test_loadstore.py
index 02c491f9..91435bd7 100644
--- a/src/soc/config/test/test_loadstore.py
+++ b/src/soc/config/test/test_loadstore.py
@@ -16,9 +16,9 @@ def write_to_addr(dut, addr, value):
     yield dut.x_st_data_i.eq(value)
     yield dut.x_st_i.eq(1)
     yield dut.x_mask_i.eq(-1)
-    yield dut.x_valid_i.eq(1)
+    yield dut.x_i_valid.eq(1)
     yield dut.x_stall_i.eq(1)
-    yield dut.m_valid_i.eq(1)
+    yield dut.m_i_valid.eq(1)
     yield
     yield
 
@@ -33,7 +33,7 @@ def write_to_addr(dut, addr, value):
 def read_from_addr(dut, addr):
     yield dut.x_addr_i.eq(addr)
     yield dut.x_ld_i.eq(1)
-    yield dut.x_valid_i.eq(1)
+    yield dut.x_i_valid.eq(1)
     yield dut.x_stall_i.eq(1)
     yield
     yield dut.x_stall_i.eq(0)
@@ -42,7 +42,7 @@ def read_from_addr(dut, addr):
     yield Settle()
     while (yield dut.x_busy_o):
         yield
-    assert (yield dut.x_valid_i)
+    assert (yield dut.x_i_valid)
     return (yield dut.m_ld_data_o)
 
 
@@ -53,8 +53,8 @@ def write_byte(dut, addr, val):
     yield dut.x_st_i.eq(1)
     yield dut.x_mask_i.eq(1 << offset)
     print("write_byte", addr, bin(1 << offset), hex(val << (offset*8)))
-    yield dut.x_valid_i.eq(1)
-    yield dut.m_valid_i.eq(1)
+    yield dut.x_i_valid.eq(1)
+    yield dut.m_i_valid.eq(1)
 
     yield
     yield dut.x_st_i.eq(0)
@@ -66,13 +66,13 @@ def read_byte(dut, addr):
     offset = addr & 0x3
     yield dut.x_addr_i.eq(addr)
     yield dut.x_ld_i.eq(1)
-    yield dut.x_valid_i.eq(1)
+    yield dut.x_i_valid.eq(1)
     yield
     yield dut.x_ld_i.eq(0)
     yield Settle()
     while (yield dut.x_busy_o):
         yield
-    assert (yield dut.x_valid_i)
+    assert (yield dut.x_i_valid)
     val = (yield dut.m_ld_data_o)
     print("read_byte", addr, offset, hex(val))
     return (val >> (offset * 8)) & 0xff
diff --git a/src/soc/experiment/alu_fsm.py b/src/soc/experiment/alu_fsm.py
index 3fb1c6cf..3b0418a3 100644
--- a/src/soc/experiment/alu_fsm.py
+++ b/src/soc/experiment/alu_fsm.py
@@ -7,11 +7,11 @@ intended to comply with both the CompALU API and the nmutil Pipeline API
 
 The basic rules are:
 
-1) p.ready_o is asserted on the initial ("Idle") state, otherwise it keeps low.
-2) n.valid_o is asserted on the final ("Done") state, otherwise it keeps low.
-3) The FSM stays in the Idle state while p.valid_i is low, otherwise
+1) p.o_ready is asserted on the initial ("Idle") state, otherwise it keeps low.
+2) n.o_valid is asserted on the final ("Done") state, otherwise it keeps low.
+3) The FSM stays in the Idle state while p.i_valid is low, otherwise
    it accepts the input data and moves on.
-4) The FSM stays in the Done state while n.ready_i is low, otherwise
+4) The FSM stays in the Done state while n.i_ready is low, otherwise
    it releases the output data and goes back to the Idle state.
 
 """
@@ -152,15 +152,15 @@ class Shifter(Elaboratable):
         with m.FSM():
             with m.State("IDLE"):
                 m.d.comb += [
-                    # keep p.ready_o active on IDLE
-                    self.p.ready_o.eq(1),
+                    # keep p.o_ready active on IDLE
+                    self.p.o_ready.eq(1),
                     # keep loading the shift register and shift count
                     load.eq(1),
                     next_count.eq(self.p.data_i.shift),
                 ]
                 # capture the direction bit as well
                 m.d.sync += direction.eq(self.op.sdir)
-                with m.If(self.p.valid_i):
+                with m.If(self.p.i_valid):
                     # Leave IDLE when data arrives
                     with m.If(next_count == 0):
                         # short-circuit for zero shift
@@ -178,9 +178,9 @@ class Shifter(Elaboratable):
                     # exit when shift counter goes to zero
                     m.next = "DONE"
             with m.State("DONE"):
-                # keep n.valid_o active while the data is not accepted
-                m.d.comb += self.n.valid_o.eq(1)
-                with m.If(self.n.ready_i):
+                # keep n.o_valid active while the data is not accepted
+                m.d.comb += self.n.o_valid.eq(1)
+                with m.If(self.n.i_ready):
                     # go back to IDLE when the data is accepted
                     m.next = "IDLE"
 
@@ -190,10 +190,10 @@ class Shifter(Elaboratable):
         yield self.op.sdir
         yield self.p.data_i.data
         yield self.p.data_i.shift
-        yield self.p.valid_i
-        yield self.p.ready_o
-        yield self.n.ready_i
-        yield self.n.valid_o
+        yield self.p.i_valid
+        yield self.p.o_ready
+        yield self.n.i_ready
+        yield self.n.o_valid
         yield self.n.data_o.data
 
     def ports(self):
@@ -225,8 +225,8 @@ def test_shifter():
             ('p_data_i[7:0]', 'in'),
             ('p_shift_i[7:0]', 'in'),
             ({'submodule': 'p'}, [
-                ('p_valid_i', 'in'),
-                ('p_ready_o', 'out')])]),
+                ('p_i_valid', 'in'),
+                ('p_o_ready', 'out')])]),
         ('internal', [
             'fsm_state' if is_engine_pysim() else 'fsm_state[1:0]',
             'count[3:0]',
@@ -234,8 +234,8 @@ def test_shifter():
         ('next port', [
             ('n_data_o[7:0]', 'out'),
             ({'submodule': 'n'}, [
-                ('n_valid_o', 'out'),
-                ('n_ready_i', 'in')])])]
+                ('n_o_valid', 'out'),
+                ('n_i_ready', 'in')])])]
 
     write_gtkw("test_shifter.gtkw", "test_shifter.vcd",
                gtkwave_desc,  gtkwave_style,
@@ -245,32 +245,32 @@ def test_shifter():
     sim.add_clock(1e-6)
 
     def send(data, shift, direction):
-        # present input data and assert valid_i
+        # present input data and assert i_valid
         yield dut.p.data_i.data.eq(data)
         yield dut.p.data_i.shift.eq(shift)
         yield dut.op.sdir.eq(direction)
-        yield dut.p.valid_i.eq(1)
+        yield dut.p.i_valid.eq(1)
         yield
-        # wait for p.ready_o to be asserted
-        while not (yield dut.p.ready_o):
+        # wait for p.o_ready to be asserted
+        while not (yield dut.p.o_ready):
             yield
-        # clear input data and negate p.valid_i
-        yield dut.p.valid_i.eq(0)
+        # clear input data and negate p.i_valid
+        yield dut.p.i_valid.eq(0)
         yield dut.p.data_i.data.eq(0)
         yield dut.p.data_i.shift.eq(0)
         yield dut.op.sdir.eq(0)
 
     def receive(expected):
         # signal readiness to receive data
-        yield dut.n.ready_i.eq(1)
+        yield dut.n.i_ready.eq(1)
         yield
-        # wait for n.valid_o to be asserted
-        while not (yield dut.n.valid_o):
+        # wait for n.o_valid to be asserted
+        while not (yield dut.n.o_valid):
             yield
         # read result
         result = yield dut.n.data_o.data
-        # negate n.ready_i
-        yield dut.n.ready_i.eq(0)
+        # negate n.i_ready
+        yield dut.n.i_ready.eq(0)
         # check result
         assert result == expected
 
diff --git a/src/soc/experiment/alu_hier.py b/src/soc/experiment/alu_hier.py
index dbe8465f..3f92dc75 100644
--- a/src/soc/experiment/alu_hier.py
+++ b/src/soc/experiment/alu_hier.py
@@ -109,10 +109,10 @@ class DummyALU(Elaboratable):
         self.p.data_i.ctx = Dummy()
         self.n = Dummy()  # make look like nmutil pipeline API
         self.n.data_o = Dummy()
-        self.p.valid_i = Signal()
-        self.p.ready_o = Signal()
-        self.n.ready_i = Signal()
-        self.n.valid_o = Signal()
+        self.p.i_valid = Signal()
+        self.p.o_ready = Signal()
+        self.n.i_ready = Signal()
+        self.n.o_valid = Signal()
         self.counter = Signal(4)
         self.op = CompCROpSubset()
         i = []
@@ -136,11 +136,11 @@ class DummyALU(Elaboratable):
 
         go_now = Signal(reset_less=True)  # testing no-delay ALU
 
-        with m.If(self.p.valid_i):
+        with m.If(self.p.i_valid):
             # input is valid. next check, if we already said "ready" or not
-            with m.If(~self.p.ready_o):
+            with m.If(~self.p.o_ready):
                 # we didn't say "ready" yet, so say so and initialise
-                m.d.sync += self.p.ready_o.eq(1)
+                m.d.sync += self.p.o_ready.eq(1)
 
                 m.d.sync += self.o.eq(self.a)
                 m.d.comb += go_now.eq(1)
@@ -149,14 +149,14 @@ class DummyALU(Elaboratable):
         with m.Else():
             # input says no longer valid, so drop ready as well.
             # a "proper" ALU would have had to sync in the opcode and a/b ops
-            m.d.sync += self.p.ready_o.eq(0)
+            m.d.sync += self.p.o_ready.eq(0)
 
         # ok so the counter's running: when it gets to 1, fire the output
         with m.If((self.counter == 1) | go_now):
             # set the output as valid if the recipient is ready for it
-            m.d.sync += self.n.valid_o.eq(1)
-        with m.If(self.n.ready_i & self.n.valid_o):
-            m.d.sync += self.n.valid_o.eq(0)
+            m.d.sync += self.n.o_valid.eq(1)
+        with m.If(self.n.i_ready & self.n.o_valid):
+            m.d.sync += self.n.o_valid.eq(0)
             # recipient said it was ready: reset back to known-good.
             m.d.sync += self.counter.eq(0)  # reset the counter
             m.d.sync += self.o.eq(0)  # clear the output for tidiness sake
@@ -185,10 +185,10 @@ class ALU(Elaboratable):
         self.p.data_i.ctx = Dummy()
         self.n = Dummy()  # make look like nmutil pipeline API
         self.n.data_o = Dummy()
-        self.p.valid_i = Signal()
-        self.p.ready_o = Signal()
-        self.n.ready_i = Signal()
-        self.n.valid_o = Signal()
+        self.p.i_valid = Signal()
+        self.p.o_ready = Signal()
+        self.n.i_ready = Signal()
+        self.n.o_valid = Signal()
         self.counter = Signal(4)
         self.op = CompALUOpSubset(name="op")
         i = []
@@ -254,16 +254,16 @@ class ALU(Elaboratable):
         with m.If(go_now):
             # with a combinatorial, no-delay ALU, just pass through
             # the handshake signals to the other side
-            m.d.comb += self.p.ready_o.eq(self.n.ready_i)
-            m.d.comb += self.n.valid_o.eq(self.p.valid_i)
+            m.d.comb += self.p.o_ready.eq(self.n.i_ready)
+            m.d.comb += self.n.o_valid.eq(self.p.i_valid)
         with m.Else():
             # sequential ALU handshake:
-            # ready_o responds to valid_i, but only if the ALU is idle
-            m.d.comb += self.p.ready_o.eq(alu_idle)
-            # select the internally generated valid_o, above
-            m.d.comb += self.n.valid_o.eq(alu_done)
+            # o_ready responds to i_valid, but only if the ALU is idle
+            m.d.comb += self.p.o_ready.eq(alu_idle)
+            # select the internally generated o_valid, above
+            m.d.comb += self.n.o_valid.eq(alu_done)
 
-        # hold the ALU result until ready_o is asserted
+        # hold the ALU result until o_ready is asserted
         alu_r = Signal(self.width)
 
         # output masks
@@ -275,7 +275,7 @@ class ALU(Elaboratable):
         m.d.comb += self.cr.ok.eq(self.op.rc.rc)
 
         with m.If(alu_idle):
-            with m.If(self.p.valid_i):
+            with m.If(self.p.i_valid):
 
                 # as this is a "fake" pipeline, just grab the output right now
                 with m.If(self.op.insn_type == MicrOp.OP_ADD):
@@ -311,7 +311,7 @@ class ALU(Elaboratable):
                 with m.Else():
                     m.d.comb += go_now.eq(1)
 
-        with m.Elif(~alu_done | self.n.ready_i):
+        with m.Elif(~alu_done | self.n.i_ready):
             # decrement the counter while the ALU is neither idle nor finished
             m.d.sync += self.counter.eq(self.counter - 1)
 
@@ -337,10 +337,10 @@ class ALU(Elaboratable):
         yield self.a
         yield self.b
         yield from self.o.ports()
-        yield self.p.valid_i
-        yield self.p.ready_o
-        yield self.n.valid_o
-        yield self.n.ready_i
+        yield self.p.i_valid
+        yield self.p.o_ready
+        yield self.n.o_valid
+        yield self.n.i_ready
 
     def ports(self):
         return list(self)
@@ -366,10 +366,10 @@ class BranchALU(Elaboratable):
         self.p.data_i.ctx = Dummy()
         self.n = Dummy()  # make look like nmutil pipeline API
         self.n.data_o = Dummy()
-        self.p.valid_i = Signal()
-        self.p.ready_o = Signal()
-        self.n.ready_i = Signal()
-        self.n.valid_o = Signal()
+        self.p.i_valid = Signal()
+        self.p.o_ready = Signal()
+        self.n.i_ready = Signal()
+        self.n.o_valid = Signal()
         self.counter = Signal(4)
         self.op = Signal(2)
         i = []
@@ -399,11 +399,11 @@ class BranchALU(Elaboratable):
             ]
 
         go_now = Signal(reset_less=True)  # testing no-delay ALU
-        with m.If(self.p.valid_i):
+        with m.If(self.p.i_valid):
             # input is valid. next check, if we already said "ready" or not
-            with m.If(~self.p.ready_o):
+            with m.If(~self.p.o_ready):
                 # we didn't say "ready" yet, so say so and initialise
-                m.d.sync += self.p.ready_o.eq(1)
+                m.d.sync += self.p.o_ready.eq(1)
 
                 # as this is a "fake" pipeline, just grab the output right now
                 with m.Switch(self.op):
@@ -416,14 +416,14 @@ class BranchALU(Elaboratable):
         with m.Else():
             # input says no longer valid, so drop ready as well.
             # a "proper" ALU would have had to sync in the opcode and a/b ops
-            m.d.sync += self.p.ready_o.eq(0)
+            m.d.sync += self.p.o_ready.eq(0)
 
         # ok so the counter's running: when it gets to 1, fire the output
         with m.If((self.counter == 1) | go_now):
             # set the output as valid if the recipient is ready for it
-            m.d.sync += self.n.valid_o.eq(1)
-        with m.If(self.n.ready_i & self.n.valid_o):
-            m.d.sync += self.n.valid_o.eq(0)
+            m.d.sync += self.n.o_valid.eq(1)
+        with m.If(self.n.i_ready & self.n.o_valid):
+            m.d.sync += self.n.o_valid.eq(0)
             # recipient said it was ready: reset back to known-good.
             m.d.sync += self.counter.eq(0)  # reset the counter
             m.d.sync += self.o.eq(0)  # clear the output for tidiness sake
@@ -449,28 +449,28 @@ def run_op(dut, a, b, op, inv_a=0):
     yield dut.b.eq(b)
     yield dut.op.insn_type.eq(op)
     yield dut.op.invert_in.eq(inv_a)
-    yield dut.n.ready_i.eq(0)
-    yield dut.p.valid_i.eq(1)
-    yield dut.n.ready_i.eq(1)
+    yield dut.n.i_ready.eq(0)
+    yield dut.p.i_valid.eq(1)
+    yield dut.n.i_ready.eq(1)
     yield
 
     # wait for the ALU to accept our input data
-    while not (yield dut.p.ready_o):
+    while not (yield dut.p.o_ready):
         yield
 
-    yield dut.p.valid_i.eq(0)
+    yield dut.p.i_valid.eq(0)
     yield dut.a.eq(0)
     yield dut.b.eq(0)
     yield dut.op.insn_type.eq(0)
     yield dut.op.invert_in.eq(0)
 
     # wait for the ALU to present the output data
-    while not (yield dut.n.valid_o):
+    while not (yield dut.n.o_valid):
         yield
 
     # latch the result and lower read_i
     result = yield dut.o.data
-    yield dut.n.ready_i.eq(0)
+    yield dut.n.i_ready.eq(0)
 
     return result
 
@@ -520,21 +520,21 @@ def test_alu_parallel():
     sim.add_clock(1e-6)
 
     def send(a, b, op, inv_a=0, rc=0):
-        # present input data and assert valid_i
+        # present input data and assert i_valid
         yield dut.a.eq(a)
         yield dut.b.eq(b)
         yield dut.op.insn_type.eq(op)
         yield dut.op.invert_in.eq(inv_a)
         yield dut.op.rc.rc.eq(rc)
-        yield dut.p.valid_i.eq(1)
+        yield dut.p.i_valid.eq(1)
         yield
-        # wait for ready_o to be asserted
-        while not (yield dut.p.ready_o):
+        # wait for o_ready to be asserted
+        while not (yield dut.p.o_ready):
             yield
-        # clear input data and negate valid_i
+        # clear input data and negate i_valid
         # if send is called again immediately afterwards, there will be no
         # visible transition (they will not be negated, after all)
-        yield dut.p.valid_i.eq(0)
+        yield dut.p.i_valid.eq(0)
         yield dut.a.eq(0)
         yield dut.b.eq(0)
         yield dut.op.insn_type.eq(0)
@@ -543,18 +543,18 @@ def test_alu_parallel():
 
     def receive():
         # signal readiness to receive data
-        yield dut.n.ready_i.eq(1)
+        yield dut.n.i_ready.eq(1)
         yield
-        # wait for valid_o to be asserted
-        while not (yield dut.n.valid_o):
+        # wait for o_valid to be asserted
+        while not (yield dut.n.o_valid):
             yield
         # read results
         result = yield dut.o.data
         cr = yield dut.cr.data
-        # negate ready_i
+        # negate i_ready
         # if receive is called again immediately afterwards, there will be no
         # visible transition (it will not be negated, after all)
-        yield dut.n.ready_i.eq(0)
+        yield dut.n.i_ready.eq(0)
         return result, cr
 
     def producer():
@@ -650,10 +650,10 @@ def write_alu_gtkw(gtkw_name, clk_period=1e-6, sub_module=None,
         'i2[15:0]',
         'op__insn_type' if pysim else 'op__insn_type[6:0]',
         'op__invert_in',
-        'valid_i',
-        'ready_o',
-        'valid_o',
-        'ready_i',
+        'i_valid',
+        'o_ready',
+        'o_valid',
+        'i_ready',
         'alu_o[15:0]',
         'alu_o_ok',
         'alu_cr[15:0]',
diff --git a/src/soc/experiment/compalu.py b/src/soc/experiment/compalu.py
index 05539cd4..4d15ff41 100644
--- a/src/soc/experiment/compalu.py
+++ b/src/soc/experiment/compalu.py
@@ -63,7 +63,7 @@ class ComputationUnitNoDelay(Elaboratable):
         self.busy_o = Signal(reset_less=True)  # fn busy out
         self.data_o = Signal(rwid, reset_less=True)  # Dest out
         self.rd_rel_o = Signal(reset_less=True)  # release src1/src2 request
-        # release request out (valid_o)
+        # release request out (o_valid)
         self.req_rel_o = Signal(reset_less=True)
         self.done_o = self.req_rel_o  # 'normalise' API
 
@@ -133,17 +133,17 @@ class ComputationUnitNoDelay(Elaboratable):
         # NOTE: this spells TROUBLE if the ALU isn't ready!
         # go_read is only valid for one clock!
         with m.If(self.go_rd_i):                     # src operands ready, GO!
-            with m.If(~self.alu.p_ready_o):          # no ACK yet
-                m.d.comb += self.alu.p_valid_i.eq(1)  # so indicate valid
+            with m.If(~self.alu.p_o_ready):          # no ACK yet
+                m.d.comb += self.alu.p_i_valid.eq(1)  # so indicate valid
 
         # only proceed if ALU says its output is valid
-        with m.If(self.alu.n_valid_o):
+        with m.If(self.alu.n_o_valid):
             # when ALU ready, write req release out. waits for shadow
             m.d.comb += self.req_rel_o.eq(req_l.q & busy_o & self.shadown_i)
             # when output latch is ready, and ALU says ready, accept ALU output
             with m.If(self.req_rel_o & self.go_wr_i):
                 # tells ALU "thanks got it"
-                m.d.comb += self.alu.n_ready_i.eq(1)
+                m.d.comb += self.alu.n_i_ready.eq(1)
 
         # output the data from the latch on go_write
         with m.If(self.go_wr_i):
diff --git a/src/soc/experiment/compalu_multi.py b/src/soc/experiment/compalu_multi.py
index d7e32f28..4d17bad8 100644
--- a/src/soc/experiment/compalu_multi.py
+++ b/src/soc/experiment/compalu_multi.py
@@ -196,7 +196,7 @@ class MultiCompUnit(RegSpecALUAPI, Elaboratable):
         alu_done = Signal(reset_less=True)
         alu_pulse = Signal(reset_less=True)
         alu_pulsem = Signal(self.n_dst, reset_less=True)
-        m.d.comb += alu_done.eq(self.alu.n.valid_o)
+        m.d.comb += alu_done.eq(self.alu.n.o_valid)
         m.d.comb += alu_pulse.eq(rising_edge(m, alu_done))
         m.d.comb += alu_pulsem.eq(Repl(alu_pulse, self.n_dst))
 
@@ -213,13 +213,13 @@ class MultiCompUnit(RegSpecALUAPI, Elaboratable):
         m.d.comb += self.done_o.eq(self.busy_o &
                                    ~((self.wr.rel_o & ~self.wrmask).bool()))
         m.d.comb += wr_any.eq(self.wr.go_i.bool() | prev_wr_go.bool())
-        m.d.comb += req_done.eq(wr_any & ~self.alu.n.ready_i &
+        m.d.comb += req_done.eq(wr_any & ~self.alu.n.i_ready &
                                 ((req_l.q & self.wrmask) == 0))
         # argh, complicated hack: if there are no regs to write,
         # instead of waiting for regs that are never going to happen,
         # we indicate "done" when the ALU is "done"
         with m.If((self.wrmask == 0) &
-                  self.alu.n.ready_i & self.alu.n.valid_o & self.busy_o):
+                  self.alu.n.i_ready & self.alu.n.o_valid & self.busy_o):
             m.d.comb += req_done.eq(1)
 
         # shadow/go_die
@@ -234,7 +234,7 @@ class MultiCompUnit(RegSpecALUAPI, Elaboratable):
 
         # read-done,wr-proceed latch
         m.d.sync += rok_l.s.eq(self.issue_i)  # set up when issue starts
-        m.d.sync += rok_l.r.eq(self.alu.n.valid_o & self.busy_o)  # ALU done
+        m.d.sync += rok_l.r.eq(self.alu.n.o_valid & self.busy_o)  # ALU done
 
         # wr-done, back-to-start latch
         m.d.sync += rst_l.s.eq(all_rd)     # set when read-phase is fully done
@@ -323,15 +323,15 @@ class MultiCompUnit(RegSpecALUAPI, Elaboratable):
 
         # on a go_read, tell the ALU we're accepting data.
         m.submodules.alui_l = alui_l = SRLatch(False, name="alui")
-        m.d.comb += self.alu.p.valid_i.eq(alui_l.q)
-        m.d.sync += alui_l.r.eq(self.alu.p.ready_o & alui_l.q)
+        m.d.comb += self.alu.p.i_valid.eq(alui_l.q)
+        m.d.sync += alui_l.r.eq(self.alu.p.o_ready & alui_l.q)
         m.d.comb += alui_l.s.eq(all_rd_pulse)
 
         # ALU output "ready" side.  alu "ready" indication stays hi until
         # ALU says "valid".
         m.submodules.alu_l = alu_l = SRLatch(False, name="alu")
-        m.d.comb += self.alu.n.ready_i.eq(alu_l.q)
-        m.d.sync += alu_l.r.eq(self.alu.n.valid_o & alu_l.q)
+        m.d.comb += self.alu.n.i_ready.eq(alu_l.q)
+        m.d.sync += alu_l.r.eq(self.alu.n.o_valid & alu_l.q)
         m.d.comb += alu_l.s.eq(all_rd_pulse)
 
         # -----
diff --git a/src/soc/experiment/dcache.py b/src/soc/experiment/dcache.py
index b818347b..ce9b8309 100644
--- a/src/soc/experiment/dcache.py
+++ b/src/soc/experiment/dcache.py
@@ -489,7 +489,7 @@ class DTLBUpdate(Elaboratable):
 class DCachePendingHit(Elaboratable):
 
     def __init__(self, tlb_pte_way, tlb_valid_way, tlb_hit_way,
-                      cache_valid_idx, cache_tag_set,
+                      cache_i_validdx, cache_tag_set,
                     req_addr,
                     hit_set):
 
@@ -505,7 +505,7 @@ class DCachePendingHit(Elaboratable):
         self.tlb_hit_way = tlb_hit_way
         self.tlb_pte_way = tlb_pte_way
         self.tlb_valid_way = tlb_valid_way
-        self.cache_valid_idx = cache_valid_idx
+        self.cache_i_validdx = cache_i_validdx
         self.cache_tag_set = cache_tag_set
         self.req_addr = req_addr
         self.hit_set = hit_set
@@ -520,7 +520,7 @@ class DCachePendingHit(Elaboratable):
         is_hit = self.is_hit
         tlb_pte_way = self.tlb_pte_way
         tlb_valid_way = self.tlb_valid_way
-        cache_valid_idx = self.cache_valid_idx
+        cache_i_validdx = self.cache_i_validdx
         cache_tag_set = self.cache_tag_set
         req_addr = self.req_addr
         tlb_hit_way = self.tlb_hit_way
@@ -554,7 +554,7 @@ class DCachePendingHit(Elaboratable):
 
                 for i in range(NUM_WAYS): # way_t
                     is_tag_hit = Signal(name="is_tag_hit_%d_%d" % (j, i))
-                    comb += is_tag_hit.eq(go & cache_valid_idx[i] &
+                    comb += is_tag_hit.eq(go & cache_i_validdx[i] &
                                   (read_tag(i, cache_tag_set) == s_tag)
                                   & tlb_valid_way[j])
                     with m.If(is_tag_hit):
@@ -572,7 +572,7 @@ class DCachePendingHit(Elaboratable):
             comb += s_tag.eq(get_tag(req_addr))
             for i in range(NUM_WAYS): # way_t
                 is_tag_hit = Signal(name="is_tag_hit_%d" % i)
-                comb += is_tag_hit.eq(go & cache_valid_idx[i] &
+                comb += is_tag_hit.eq(go & cache_i_validdx[i] &
                           (read_tag(i, cache_tag_set) == s_tag))
                 with m.If(is_tag_hit):
                     comb += hit_way.eq(i)
@@ -872,7 +872,7 @@ class DCache(Elaboratable):
         nc          = Signal()
         hit_set     = Array(Signal(name="hit_set_%d" % i) \
                                   for i in range(TLB_NUM_WAYS))
-        cache_valid_idx = Signal(NUM_WAYS)
+        cache_i_validdx = Signal(NUM_WAYS)
 
         # Extract line, row and tag from request
         comb += req_index.eq(get_index(r0.req.addr))
@@ -884,11 +884,11 @@ class DCache(Elaboratable):
                     r0.req.addr, ra, req_index, req_tag, req_row)
 
         comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
-        comb += cache_valid_idx.eq(cache_valids[req_index])
+        comb += cache_i_validdx.eq(cache_valids[req_index])
 
         m.submodules.dcache_pend = dc = DCachePendingHit(tlb_pte_way,
                                 tlb_valid_way, tlb_hit_way,
-                                cache_valid_idx, cache_tag_set,
+                                cache_i_validdx, cache_tag_set,
                                 r0.req.addr,
                                 hit_set)
 
diff --git a/src/soc/experiment/formal/proof_alu_fsm.py b/src/soc/experiment/formal/proof_alu_fsm.py
index 97f36a8f..8883a985 100644
--- a/src/soc/experiment/formal/proof_alu_fsm.py
+++ b/src/soc/experiment/formal/proof_alu_fsm.py
@@ -39,19 +39,19 @@ class Driver(Elaboratable):
         # liveness counter
         live_cnt = Signal(5)
         # keep data and valid stable, until accepted
-        with m.If(Past(dut.p.valid_i) & ~Past(dut.p.ready_o)):
+        with m.If(Past(dut.p.i_valid) & ~Past(dut.p.o_ready)):
             comb += [
                 Assume(Stable(dut.op.sdir)),
                 Assume(Stable(dut.p.data_i.data)),
                 Assume(Stable(dut.p.data_i.shift)),
-                Assume(Stable(dut.p.valid_i)),
+                Assume(Stable(dut.p.i_valid)),
             ]
         # force reading the output in a reasonable time,
         # necessary to pass induction
-        with m.If(Past(dut.n.valid_o) & ~Past(dut.n.ready_i)):
-            comb += Assume(dut.n.ready_i)
+        with m.If(Past(dut.n.o_valid) & ~Past(dut.n.i_ready)):
+            comb += Assume(dut.n.i_ready)
         # capture transferred input data
-        with m.If(dut.p.ready_o & dut.p.valid_i):
+        with m.If(dut.p.o_ready & dut.p.i_valid):
             sync += [
                 data_i.eq(dut.p.data_i.data),
                 shift_i.eq(dut.p.data_i.shift),
@@ -71,18 +71,18 @@ class Driver(Elaboratable):
         # one work item ever in flight at any given time.
         # Whenever the unit is busy (not ready) the read and write counters
         # will differ by exactly one unit.
-        m.d.comb += Assert((read_cnt + ~dut.p.ready_o) & 0xF == write_cnt)
+        m.d.comb += Assert((read_cnt + ~dut.p.o_ready) & 0xF == write_cnt)
         # Check for liveness. It will ensure that the FSM is not stuck, and
         # will eventually produce some result.
-        # In this case, the delay between ready_o being negated and valid_o
+        # In this case, the delay between o_ready being negated and o_valid
         # being asserted has to be less than 16 cycles.
-        with m.If(~dut.p.ready_o & ~dut.n.valid_o):
+        with m.If(~dut.p.o_ready & ~dut.n.o_valid):
             m.d.sync += live_cnt.eq(live_cnt + 1)
         with m.Else():
             m.d.sync += live_cnt.eq(0)
         m.d.comb += Assert(live_cnt < 16)
         # check coverage as output data is accepted
-        with m.If(dut.n.ready_i & dut.n.valid_o):
+        with m.If(dut.n.i_ready & dut.n.o_valid):
             # increment read counter
             sync += read_cnt.eq(read_cnt + 1)
             # check result
@@ -123,9 +123,9 @@ class ALUFSMTestCase(FHDLTestCase):
         traces = [
             'clk',
             'p_data_i[7:0]', 'p_shift_i[7:0]', 'op__sdir',
-            'p_valid_i', 'p_ready_o',
+            'p_i_valid', 'p_o_ready',
             'n_data_o[7:0]',
-            'n_valid_o', 'n_ready_i',
+            'n_o_valid', 'n_i_ready',
             ('formal', {'module': 'top'}, [
                 'write_cnt[3:0]', 'read_cnt[3:0]', 'cov[7:0]'
             ])
diff --git a/src/soc/experiment/imem.py b/src/soc/experiment/imem.py
index 3a9a1bc8..177e238c 100644
--- a/src/soc/experiment/imem.py
+++ b/src/soc/experiment/imem.py
@@ -23,7 +23,7 @@ class TestMemFetchUnit(FetchUnitInterface, Elaboratable):
         m.submodules.mem = mem = self.mem
 
         do_fetch = Signal()  # set when fetch while valid and not stalled
-        m.d.comb += do_fetch.eq(self.a_valid_i & ~self.a_stall_i)
+        m.d.comb += do_fetch.eq(self.a_i_valid & ~self.a_stall_i)
 
         # bit of a messy FSM that progresses from idle to in progress
         # to done.
@@ -37,7 +37,7 @@ class TestMemFetchUnit(FetchUnitInterface, Elaboratable):
         with m.If(~do_fetch):               # done
             m.d.sync += op_in_progress.eq(0)
 
-        m.d.comb += self.a_busy_o.eq(op_actioned & self.a_valid_i)
+        m.d.comb += self.a_busy_o.eq(op_actioned & self.a_i_valid)
         # fetch
         m.d.comb += mem.rdport.addr.eq(self.a_pc_i[adr_lsb:])
         m.d.comb += self.f_instr_o.eq(mem.rdport.data)
diff --git a/src/soc/experiment/lsmem.py b/src/soc/experiment/lsmem.py
index 08764232..11a1ba81 100644
--- a/src/soc/experiment/lsmem.py
+++ b/src/soc/experiment/lsmem.py
@@ -19,8 +19,8 @@ class TestMemLoadStoreUnit(LoadStoreUnitInterface, Elaboratable):
         do_store = Signal() # set when store while valid and not stalled
 
         m.d.comb += [
-            do_load.eq(self.x_ld_i & (self.x_valid_i & ~self.x_stall_i)),
-            do_store.eq(self.x_st_i & (self.x_valid_i & ~self.x_stall_i)),
+            do_load.eq(self.x_ld_i & (self.x_i_valid & ~self.x_stall_i)),
+            do_store.eq(self.x_st_i & (self.x_i_valid & ~self.x_stall_i)),
             ]
         # bit of a messy FSM that progresses from idle to in progress
         # to done.
@@ -34,7 +34,7 @@ class TestMemLoadStoreUnit(LoadStoreUnitInterface, Elaboratable):
         with m.If(~(do_load | do_store)):               # done
             m.d.sync += op_in_progress.eq(0)
 
-        m.d.comb += self.x_busy_o.eq(op_actioned & self.x_valid_i)
+        m.d.comb += self.x_busy_o.eq(op_actioned & self.x_i_valid)
 
         m.d.comb += [
             # load
diff --git a/src/soc/experiment/pi2ls.py b/src/soc/experiment/pi2ls.py
index 2392620b..751d2551 100644
--- a/src/soc/experiment/pi2ls.py
+++ b/src/soc/experiment/pi2ls.py
@@ -11,7 +11,7 @@
     busy_o/1        most likely to be x_busy_o
     go_die_i/1      rst?
     addr.data/48    x_addr_i (x_addr_i[:4] goes into LenExpand)
-    addr.ok/1       probably x_valid_i & ~x_stall_i
+    addr.ok/1       probably x_i_valid & ~x_stall_i
 
     addr_ok_o/1     no equivalent.  *might* work using x_stall_i
     exc_o/6(?)      m_load_err_o and m_store_err_o
@@ -100,8 +100,8 @@ class Pi2LSUI(PortInterfaceBase):
                     m.next = "IDLE"
 
         # indicate valid at both ends. OR with lsui_busy (stops comb loop)
-        m.d.comb += self.lsui.m_valid_i.eq(self.valid_l.q )
-        m.d.comb += self.lsui.x_valid_i.eq(self.valid_l.q )
+        m.d.comb += self.lsui.m_i_valid.eq(self.valid_l.q )
+        m.d.comb += self.lsui.x_i_valid.eq(self.valid_l.q )
 
         # reset the valid latch when not busy.  sync to stop loop
         lsui_active = Signal()
@@ -154,7 +154,7 @@ class Pi2LSUI1(Elaboratable):
             # expand the LSBs of address plus LD/ST len into 16-bit mask
             m.d.comb += lsui.x_mask_i.eq(lenexp.lexp_o)
             # pass through the address, indicate "valid"
-            m.d.comb += lsui.x_valid_i.eq(1)
+            m.d.comb += lsui.x_i_valid.eq(1)
             # indicate "OK" - XXX should be checking address valid
             m.d.comb += pi.addr_ok_o.eq(1)
 
diff --git a/src/soc/experiment/score6600.py b/src/soc/experiment/score6600.py
index f5366961..13ecff6b 100644
--- a/src/soc/experiment/score6600.py
+++ b/src/soc/experiment/score6600.py
@@ -736,7 +736,7 @@ class IssueToScoreboard(Elaboratable):
 
         mqbits = unsigned(int(log(qlen) / log(2))+2)
         self.p_add_i = Signal(mqbits)  # instructions to add (from data_i)
-        self.p_ready_o = Signal()  # instructions were added
+        self.p_o_ready = Signal()  # instructions were added
         self.data_i = Instruction._nq(n_in, "data_i")
 
         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
@@ -762,7 +762,7 @@ class IssueToScoreboard(Elaboratable):
 
         # link up instruction queue
         comb += iq.p_add_i.eq(self.p_add_i)
-        comb += self.p_ready_o.eq(iq.p_ready_o)
+        comb += self.p_o_ready.eq(iq.p_o_ready)
         for i in range(self.n_in):
             comb += eq(iq.data_i[i], self.data_i[i])
 
@@ -839,7 +839,7 @@ class IssueToScoreboard(Elaboratable):
         return m
 
     def __iter__(self):
-        yield self.p_ready_o
+        yield self.p_o_ready
         for o in self.data_i:
             yield from list(o)
         yield self.p_add_i
@@ -859,10 +859,10 @@ def power_instr_q(dut, pdecode2, ins, code):
         print("senddata ", idx, insn_type, fn_unit, instr)
     yield dut.p_add_i.eq(sendlen)
     yield
-    o_p_ready = yield dut.p_ready_o
+    o_p_ready = yield dut.p_o_ready
     while not o_p_ready:
         yield
-        o_p_ready = yield dut.p_ready_o
+        o_p_ready = yield dut.p_o_ready
 
     yield dut.p_add_i.eq(0)
 
@@ -895,10 +895,10 @@ def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
         print("senddata %d %x" % (idx, di))
     yield dut.p_add_i.eq(sendlen)
     yield
-    o_p_ready = yield dut.p_ready_o
+    o_p_ready = yield dut.p_o_ready
     while not o_p_ready:
         yield
-        o_p_ready = yield dut.p_ready_o
+        o_p_ready = yield dut.p_o_ready
 
     yield dut.p_add_i.eq(0)
 
diff --git a/src/soc/experiment/score6600_multi.py b/src/soc/experiment/score6600_multi.py
index 85b8b45c..ff35b676 100644
--- a/src/soc/experiment/score6600_multi.py
+++ b/src/soc/experiment/score6600_multi.py
@@ -789,7 +789,7 @@ class IssueToScoreboard(Elaboratable):
 
         mqbits = unsigned(int(log(qlen) / log(2))+2)
         self.p_add_i = Signal(mqbits)  # instructions to add (from data_i)
-        self.p_ready_o = Signal()  # instructions were added
+        self.p_o_ready = Signal()  # instructions were added
         self.data_i = Instruction._nq(n_in, "data_i")
 
         self.busy_o = Signal(reset_less=True)  # at least one CU is busy
@@ -815,7 +815,7 @@ class IssueToScoreboard(Elaboratable):
 
         # link up instruction queue
         comb += iq.p_add_i.eq(self.p_add_i)
-        comb += self.p_ready_o.eq(iq.p_ready_o)
+        comb += self.p_o_ready.eq(iq.p_o_ready)
         for i in range(self.n_in):
             comb += eq(iq.data_i[i], self.data_i[i])
 
@@ -885,7 +885,7 @@ class IssueToScoreboard(Elaboratable):
         return m
 
     def __iter__(self):
-        yield self.p_ready_o
+        yield self.p_o_ready
         for o in self.data_i:
             yield from list(o)
         yield self.p_add_i
@@ -905,10 +905,10 @@ def power_instr_q(dut, pdecode2, ins, code):
         print("senddata ", idx, insn_type, fn_unit, instr)
     yield dut.p_add_i.eq(sendlen)
     yield
-    o_p_ready = yield dut.p_ready_o
+    o_p_ready = yield dut.p_o_ready
     while not o_p_ready:
         yield
-        o_p_ready = yield dut.p_ready_o
+        o_p_ready = yield dut.p_o_ready
 
     yield dut.p_add_i.eq(0)
 
@@ -941,10 +941,10 @@ def instr_q(dut, op, funit, op_imm, imm, src1, src2, dest,
         print("senddata %d %x" % (idx, di))
     yield dut.p_add_i.eq(sendlen)
     yield
-    o_p_ready = yield dut.p_ready_o
+    o_p_ready = yield dut.p_o_ready
     while not o_p_ready:
         yield
-        o_p_ready = yield dut.p_ready_o
+        o_p_ready = yield dut.p_o_ready
 
     yield dut.p_add_i.eq(0)
 
diff --git a/src/soc/experiment/test/test_compalu_multi.py b/src/soc/experiment/test/test_compalu_multi.py
index 61b3977b..4c2e1347 100644
--- a/src/soc/experiment/test/test_compalu_multi.py
+++ b/src/soc/experiment/test/test_compalu_multi.py
@@ -527,11 +527,11 @@ def test_compunit_fsm():
             ('prev port', 'in', [
                 'op__sdir', 'p_data_i[7:0]', 'p_shift_i[7:0]',
                 ({'submodule': 'p'},
-                    ['p_valid_i', 'p_ready_o'])]),
+                    ['p_i_valid', 'p_o_ready'])]),
             ('next port', 'out', [
                 'n_data_o[7:0]',
                 ({'submodule': 'n'},
-                    ['n_valid_o', 'n_ready_i'])])]),
+                    ['n_o_valid', 'n_i_ready'])])]),
         ('debug', {'module': 'top'},
             ['src1_count[7:0]', 'src2_count[7:0]', 'dest1_count[7:0]'])]
 
@@ -657,9 +657,9 @@ def test_compunit_regspec3():
         ('alu', {'submodule': 'alu'}, [
             ('prev port', 'in', [
                 'oper_i_None__insn_type', 'i1[15:0]',
-                'valid_i', 'ready_o']),
+                'i_valid', 'o_ready']),
             ('next port', 'out', [
-                'alu_o[15:0]', 'valid_o', 'ready_i'])])]
+                'alu_o[15:0]', 'o_valid', 'i_ready'])])]
 
     write_gtkw("test_compunit_regspec3.gtkw",
                "test_compunit_regspec3.vcd",
@@ -732,9 +732,9 @@ def test_compunit_regspec1():
         ('alu', {'submodule': 'alu'}, [
             ('prev port', 'in', [
                 'op__insn_type', 'op__invert_in', 'a[15:0]', 'b[15:0]',
-                'valid_i', 'ready_o']),
+                'i_valid', 'o_ready']),
             ('next port', 'out', [
-                'alu_o[15:0]', 'valid_o', 'ready_i',
+                'alu_o[15:0]', 'o_valid', 'i_ready',
                 'alu_o_ok', 'alu_cr_ok'])]),
         ('debug', {'module': 'top'},
             ['src1_count[7:0]', 'src2_count[7:0]', 'dest1_count[7:0]'])]
diff --git a/src/soc/fu/compunits/formal/proof_fu.py b/src/soc/fu/compunits/formal/proof_fu.py
index 0af6ea64..e8a5e50f 100644
--- a/src/soc/fu/compunits/formal/proof_fu.py
+++ b/src/soc/fu/compunits/formal/proof_fu.py
@@ -216,7 +216,7 @@ class FUTestCase(FHDLTestCase):
             ('alu', {'submodule': 'alu'}, [
                 ('prev port', 'in', [
                     'oper_i_None__insn_type', 'i1[15:0]',
-                    'valid_i', 'ready_o']),
+                    'i_valid', 'o_ready']),
                 ('next port', 'out', [
                     'alu_o[15:0]', 'valid_o', 'ready_i'])])]
 
diff --git a/src/soc/fu/div/fsm.py b/src/soc/fu/div/fsm.py
index 2a78b19a..afb11174 100644
--- a/src/soc/fu/div/fsm.py
+++ b/src/soc/fu/div/fsm.py
@@ -167,22 +167,22 @@ class FSMDivCoreStage(ControlBase):
         rem_start = remainder_fract_width - dividend_fract_width
         m.d.comb += core_o.remainder.eq(self.div_state_next.o.remainder
                                         << rem_start)
-        m.d.comb += self.n.valid_o.eq(
+        m.d.comb += self.n.o_valid.eq(
             ~self.empty & self.saved_state.will_be_done_after(1))
-        m.d.comb += self.p.ready_o.eq(self.empty)
+        m.d.comb += self.p.o_ready.eq(self.empty)
         m.d.sync += self.saved_state.eq(self.div_state_next.o)
 
         with m.If(self.empty):
             m.d.comb += self.div_state_next.i.eq(self.div_state_init.o)
             m.d.comb += self.div_state_next.divisor.eq(core_i.divisor_radicand)
-            with m.If(self.p.valid_i):
+            with m.If(self.p.i_valid):
                 m.d.sync += self.empty.eq(0)
                 m.d.sync += self.saved_input_data.eq(data_i)
         with m.Else():
             m.d.comb += [
                 self.div_state_next.i.eq(self.saved_state),
                 self.div_state_next.divisor.eq(core_saved_i.divisor_radicand)]
-            with m.If(self.n.ready_i & self.n.valid_o):
+            with m.If(self.n.i_ready & self.n.o_valid):
                 m.d.sync += self.empty.eq(1)
 
         return m
diff --git a/src/soc/fu/mmu/fsm.py b/src/soc/fu/mmu/fsm.py
index 1a3026e7..272c4638 100644
--- a/src/soc/fu/mmu/fsm.py
+++ b/src/soc/fu/mmu/fsm.py
@@ -98,8 +98,8 @@ class FSMMMUStage(ControlBase):
         # busy/done signals
         busy = Signal()
         done = Signal()
-        m.d.comb += self.n.valid_o.eq(busy & done)
-        m.d.comb += self.p.ready_o.eq(~busy)
+        m.d.comb += self.n.o_valid.eq(busy & done)
+        m.d.comb += self.p.o_ready.eq(~busy)
 
         # take copy of X-Form SPR field
         x_fields = self.fields.FormXFX
@@ -118,7 +118,7 @@ class FSMMMUStage(ControlBase):
         m.d.comb += blip.eq(rising_edge(m, valid))
 
         with m.If(~busy):
-            with m.If(self.p.valid_i):
+            with m.If(self.p.i_valid):
                 sync += busy.eq(1)
         with m.Else():
 
@@ -216,7 +216,7 @@ class FSMMMUStage(ControlBase):
                 with m.Case(MicrOp.OP_ILLEGAL):
                     comb += self.illegal.eq(1)
 
-            with m.If(self.n.ready_i & self.n.valid_o):
+            with m.If(self.n.i_ready & self.n.o_valid):
                 sync += busy.eq(0)
 
         return m
diff --git a/src/soc/minerva/units/fetch.py b/src/soc/minerva/units/fetch.py
index a1f14b3d..1f2629a2 100644
--- a/src/soc/minerva/units/fetch.py
+++ b/src/soc/minerva/units/fetch.py
@@ -23,9 +23,9 @@ class FetchUnitInterface:
         # inputs: address to fetch PC, and valid/stall signalling
         self.a_pc_i = Signal(self.addr_wid)
         self.a_stall_i = Signal()
-        self.a_valid_i = Signal()
+        self.a_i_valid = Signal()
         self.f_stall_i = Signal()
-        self.f_valid_i = Signal()
+        self.f_i_valid = Signal()
 
         # outputs: instruction (or error), and busy indicators
         self.a_busy_o = Signal()
@@ -45,9 +45,9 @@ class FetchUnitInterface:
     def __iter__(self):
         yield self.a_pc_i
         yield self.a_stall_i
-        yield self.a_valid_i
+        yield self.a_i_valid
         yield self.f_stall_i
-        yield self.f_valid_i
+        yield self.f_i_valid
         yield self.a_busy_o
         yield self.f_busy_o
         yield self.f_instr_o
@@ -68,14 +68,14 @@ class BareFetchUnit(FetchUnitInterface, Elaboratable):
 
             ibus_rdata = Signal.like(self.ibus.dat_r)
             with m.If(self.ibus.cyc):
-                with m.If(self.ibus.ack | self.ibus.err | ~self.f_valid_i):
+                with m.If(self.ibus.ack | self.ibus.err | ~self.f_i_valid):
                     m.d.sync += [
                         self.ibus.cyc.eq(0),
                         self.ibus.stb.eq(0),
                         self.ibus.sel.eq(0),
                         ibus_rdata.eq(self.ibus.dat_r)
                     ]
-            with m.Elif(self.a_valid_i & ~self.a_stall_i):
+            with m.Elif(self.a_i_valid & ~self.a_stall_i):
                 m.d.sync += [
                     self.ibus.adr.eq(self.a_pc_i[self.adr_lsbs:]),
                     self.ibus.cyc.eq(1),
@@ -153,11 +153,11 @@ class CachedFetchUnit(FetchUnitInterface, Elaboratable):
             icache.s1_addr.eq(self.a_pc_i[self.adr_lsbs:]),
             icache.s1_flush.eq(self.a_flush),
             icache.s1_stall.eq(self.a_stall_i),
-            icache.s1_valid.eq(self.a_valid_i & a_icache_select),
+            icache.s1_valid.eq(self.a_i_valid & a_icache_select),
             icache.s2_addr.eq(self.f_pc[self.adr_lsbs:]),
             icache.s2_re.eq(Const(1)),
             icache.s2_evict.eq(Const(0)),
-            icache.s2_valid.eq(self.f_valid_i & f_icache_select)
+            icache.s2_valid.eq(self.f_i_valid & f_icache_select)
         ]
 
         iba = WishboneArbiter(self.pspec)
@@ -180,14 +180,14 @@ class CachedFetchUnit(FetchUnitInterface, Elaboratable):
         bare_port = iba.port(priority=1)
         bare_rdata = Signal.like(bare_port.dat_r)
         with m.If(bare_port.cyc):
-            with m.If(bare_port.ack | bare_port.err | ~self.f_valid_i):
+            with m.If(bare_port.ack | bare_port.err | ~self.f_i_valid):
                 m.d.sync += [
                     bare_port.cyc.eq(0),
                     bare_port.stb.eq(0),
                     bare_port.sel.eq(0),
                     bare_rdata.eq(bare_port.dat_r)
                 ]
-        with m.Elif(~a_icache_select & self.a_valid_i & ~self.a_stall_i):
+        with m.Elif(~a_icache_select & self.a_i_valid & ~self.a_stall_i):
             m.d.sync += [
                 bare_port.cyc.eq(1),
                 bare_port.stb.eq(1),
diff --git a/src/soc/minerva/units/loadstore.py b/src/soc/minerva/units/loadstore.py
index e9830c7a..363c44ff 100644
--- a/src/soc/minerva/units/loadstore.py
+++ b/src/soc/minerva/units/loadstore.py
@@ -54,11 +54,11 @@ class LoadStoreUnitInterface:
         self.x_st_data_i = Signal(data_wid)  # The data to write when storing
 
         self.x_stall_i = Signal()           # do nothing until low
-        self.x_valid_i = Signal()           # Whether x pipeline stage is
+        self.x_i_valid = Signal()           # Whether x pipeline stage is
         # currently enabled (I
         # think?). Set to 1 for #now
         self.m_stall_i = Signal()           # do nothing until low
-        self.m_valid_i = Signal()           # Whether m pipeline stage is
+        self.m_i_valid = Signal()           # Whether m pipeline stage is
         # currently enabled. Set
         # to 1 for now
 
@@ -67,7 +67,7 @@ class LoadStoreUnitInterface:
         self.m_busy_o = Signal()            # set when the memory is busy
 
         self.m_ld_data_o = Signal(data_wid)  # Data returned from memory read
-        # Data validity is NOT indicated by m_valid_i or x_valid_i as
+        # Data validity is NOT indicated by m_i_valid or x_i_valid as
         # those are inputs. I believe it is valid on the next cycle
         # after raising m_load where busy is low
 
@@ -84,9 +84,9 @@ class LoadStoreUnitInterface:
         yield self.x_st_data_i
 
         yield self.x_stall_i
-        yield self.x_valid_i
+        yield self.x_i_valid
         yield self.m_stall_i
-        yield self.m_valid_i
+        yield self.m_i_valid
         yield self.x_busy_o
         yield self.m_busy_o
         yield self.m_ld_data_o
@@ -111,7 +111,7 @@ class BareLoadStoreUnit(LoadStoreUnitInterface, Elaboratable):
         with m.If(self.jtag_en): # for safety, JTAG can completely disable WB
 
             with m.If(self.dbus.cyc):
-                with m.If(self.dbus.ack | self.dbus.err | ~self.m_valid_i):
+                with m.If(self.dbus.ack | self.dbus.err | ~self.m_i_valid):
                     m.d.sync += [
                         self.dbus.cyc.eq(0),
                         self.dbus.stb.eq(0),
@@ -119,7 +119,7 @@ class BareLoadStoreUnit(LoadStoreUnitInterface, Elaboratable):
                         self.m_ld_data_o.eq(self.dbus.dat_r)
                     ]
             with m.Elif((self.x_ld_i | self.x_st_i) &
-                        self.x_valid_i & ~self.x_stall_i):
+                        self.x_i_valid & ~self.x_stall_i):
                 m.d.sync += [
                     self.dbus.cyc.eq(1),
                     self.dbus.stb.eq(1),
@@ -207,11 +207,11 @@ class CachedLoadStoreUnit(LoadStoreUnitInterface, Elaboratable):
             dcache.s1_addr.eq(self.x_addr_i[self.adr_lsbs:]),
             dcache.s1_flush.eq(self.x_flush),
             dcache.s1_stall.eq(self.x_stall_i),
-            dcache.s1_valid.eq(self.x_valid_i & x_dcache_select),
+            dcache.s1_valid.eq(self.x_i_valid & x_dcache_select),
             dcache.s2_addr.eq(m_addr[self.adr_lsbs:]),
             dcache.s2_re.eq(self.m_load),
             dcache.s2_evict.eq(self.m_store),
-            dcache.s2_valid.eq(self.m_valid_i & m_dcache_select)
+            dcache.s2_valid.eq(self.m_i_valid & m_dcache_select)
         ]
 
         wrbuf_w_data = Record([("addr", self.addr_wid-self.adr_lsbs),
@@ -225,7 +225,7 @@ class CachedLoadStoreUnit(LoadStoreUnitInterface, Elaboratable):
             wrbuf_w_data.addr.eq(self.x_addr_i[self.adr_lsbs:]),
             wrbuf_w_data.mask.eq(self.x_mask_i),
             wrbuf_w_data.data.eq(self.x_st_data_i),
-            wrbuf.w_en.eq(self.x_st_i & self.x_valid_i &
+            wrbuf.w_en.eq(self.x_st_i & self.x_i_valid &
                           x_dcache_select & ~self.x_stall_i),
             wrbuf_r_data.eq(wrbuf.r_data),
         ]
@@ -267,14 +267,14 @@ class CachedLoadStoreUnit(LoadStoreUnitInterface, Elaboratable):
         bare_port = dba.port(priority=2)
         bare_rdata = Signal.like(bare_port.dat_r)
         with m.If(bare_port.cyc):
-            with m.If(bare_port.ack | bare_port.err | ~self.m_valid_i):
+            with m.If(bare_port.ack | bare_port.err | ~self.m_i_valid):
                 m.d.sync += [
                     bare_port.cyc.eq(0),
                     bare_port.stb.eq(0),
                     bare_rdata.eq(bare_port.dat_r)
                 ]
         with m.Elif((self.x_ld_i | self.x_st_i) &
-                    ~x_dcache_select & self.x_valid_i & ~self.x_stall_i):
+                    ~x_dcache_select & self.x_i_valid & ~self.x_stall_i):
             m.d.sync += [
                 bare_port.cyc.eq(1),
                 bare_port.stb.eq(1),
diff --git a/src/soc/scoreboard/addr_split.py b/src/soc/scoreboard/addr_split.py
index 3d197f2e..c015599d 100644
--- a/src/soc/scoreboard/addr_split.py
+++ b/src/soc/scoreboard/addr_split.py
@@ -27,19 +27,19 @@ class LDLatch(Elaboratable):
     def __init__(self, dwidth, awidth, mlen):
         self.addr_i = Signal(awidth, reset_less=True)
         self.mask_i = Signal(mlen, reset_less=True)
-        self.valid_i = Signal(reset_less=True)
+        self.i_valid = Signal(reset_less=True)
         self.ld_i = LDData(dwidth, "ld_i")
         self.ld_o = LDData(dwidth, "ld_o")
-        self.valid_o = Signal(reset_less=True)
+        self.o_valid = Signal(reset_less=True)
 
     def elaborate(self, platform):
         m = Module()
         comb = m.d.comb
         m.submodules.in_l = in_l = SRLatch(sync=False, name="in_l")
 
-        comb += in_l.s.eq(self.valid_i)
-        comb += self.valid_o.eq(in_l.q & self.valid_i)
-        latchregister(m, self.ld_i, self.ld_o, in_l.q & self.valid_o, "ld_i_r")
+        comb += in_l.s.eq(self.i_valid)
+        comb += self.o_valid.eq(in_l.q & self.i_valid)
+        latchregister(m, self.ld_i, self.ld_o, in_l.q & self.o_valid, "ld_i_r")
 
         return m
 
@@ -50,8 +50,8 @@ class LDLatch(Elaboratable):
         yield self.ld_i.data
         yield self.ld_o.err
         yield self.ld_o.data
-        yield self.valid_i
-        yield self.valid_o
+        yield self.i_valid
+        yield self.o_valid
 
     def ports(self):
         return list(self)
@@ -83,8 +83,8 @@ class LDSTSplitter(Elaboratable):
         self.addr_i = Signal(awidth, reset_less=True)
         # no match in PortInterface
         self.len_i = Signal(dlen, reset_less=True)
-        self.valid_i = Signal(reset_less=True)
-        self.valid_o = Signal(reset_less=True)
+        self.i_valid = Signal(reset_less=True)
+        self.o_valid = Signal(reset_less=True)
 
         self.is_ld_i = Signal(reset_less=True)
         self.is_st_i = Signal(reset_less=True)
@@ -95,13 +95,13 @@ class LDSTSplitter(Elaboratable):
         self.exc = Signal(reset_less=True) # pi.exc TODO
         # TODO : create/connect two outgoing port interfaces
 
-        self.sld_valid_o = Signal(2, reset_less=True)
-        self.sld_valid_i = Signal(2, reset_less=True)
+        self.sld_o_valid = Signal(2, reset_less=True)
+        self.sld_i_valid = Signal(2, reset_less=True)
         self.sld_data_i = Array((LDData(cline_wid, "ld_data_i1"),
                                  LDData(cline_wid, "ld_data_i2")))
 
-        self.sst_valid_o = Signal(2, reset_less=True)
-        self.sst_valid_i = Signal(2, reset_less=True)
+        self.sst_o_valid = Signal(2, reset_less=True)
+        self.sst_i_valid = Signal(2, reset_less=True)
         self.sst_data_o = Array((LDData(cline_wid, "st_data_i1"),
                                  LDData(cline_wid, "st_data_i2")))
 
@@ -151,22 +151,22 @@ class LDSTSplitter(Elaboratable):
             # set up connections to LD-split.  note: not active if mask is zero
             for i, (ld, mask) in enumerate(((ld1, mask1),
                                             (ld2, mask2))):
-                ld_valid = Signal(name="ldvalid_i%d" % i, reset_less=True)
-                comb += ld_valid.eq(self.valid_i & self.sld_valid_i[i])
-                comb += ld.valid_i.eq(ld_valid & (mask != mzero))
+                ld_valid = Signal(name="ldi_valid%d" % i, reset_less=True)
+                comb += ld_valid.eq(self.i_valid & self.sld_i_valid[i])
+                comb += ld.i_valid.eq(ld_valid & (mask != mzero))
                 comb += ld.ld_i.eq(self.sld_data_i[i])
-                comb += self.sld_valid_o[i].eq(ld.valid_o)
+                comb += self.sld_o_valid[i].eq(ld.o_valid)
 
             # sort out valid: mask2 zero we ignore 2nd LD
             with m.If(mask2 == mzero):
-                comb += self.valid_o.eq(self.sld_valid_o[0])
+                comb += self.o_valid.eq(self.sld_o_valid[0])
             with m.Else():
-                comb += self.valid_o.eq(self.sld_valid_o.all())
+                comb += self.o_valid.eq(self.sld_o_valid.all())
             ## debug output -- output mask2 and mzero
             ## guess second port is invalid
 
             # all bits valid (including when data error occurs!) decode ld1/ld2
-            with m.If(self.valid_o):
+            with m.If(self.o_valid):
                 # errors cause error condition
                 comb += self.ld_data_o.err.eq(ld1.ld_o.err | ld2.ld_o.err)
 
@@ -179,10 +179,10 @@ class LDSTSplitter(Elaboratable):
             # set busy flag -- required for unit test
             for i, (ld, mask) in enumerate(((ld1, mask1),
                                             (ld2, mask2))):
-                valid = Signal(name="stvalid_i%d" % i, reset_less=True)
-                comb += valid.eq(self.valid_i & self.sst_valid_i[i])
-                comb += ld.valid_i.eq(valid & (mask != mzero))
-                comb += self.sld_valid_o[i].eq(ld.valid_o)
+                valid = Signal(name="sti_valid%d" % i, reset_less=True)
+                comb += valid.eq(self.i_valid & self.sst_i_valid[i])
+                comb += ld.i_valid.eq(valid & (mask != mzero))
+                comb += self.sld_o_valid[i].eq(ld.o_valid)
                 comb += self.sst_data_o[i].data.eq(ld.ld_o.data)
 
             comb += ld1.ld_i.eq((self.st_data_i << (ashift1*8)) & mask1)
@@ -190,12 +190,12 @@ class LDSTSplitter(Elaboratable):
 
             # sort out valid: mask2 zero we ignore 2nd LD
             with m.If(mask2 == mzero):
-                comb += self.valid_o.eq(self.sst_valid_o[0])
+                comb += self.o_valid.eq(self.sst_o_valid[0])
             with m.Else():
-                comb += self.valid_o.eq(self.sst_valid_o.all())
+                comb += self.o_valid.eq(self.sst_o_valid.all())
 
             # all bits valid (including when data error occurs!) decode ld1/ld2
-            with m.If(self.valid_o):
+            with m.If(self.o_valid):
                 # errors cause error condition
                 comb += self.st_data_i.err.eq(ld1.ld_o.err | ld2.ld_o.err)
 
@@ -207,9 +207,9 @@ class LDSTSplitter(Elaboratable):
         yield self.is_ld_i
         yield self.ld_data_o.err
         yield self.ld_data_o.data
-        yield self.valid_i
-        yield self.valid_o
-        yield self.sld_valid_i
+        yield self.i_valid
+        yield self.o_valid
+        yield self.sld_i_valid
         for i in range(2):
             yield self.sld_data_i[i].err
             yield self.sld_data_i[i].data
@@ -247,11 +247,11 @@ def sim(dut):
         yield dut.is_ld_i.eq(1)
         yield dut.len_i.eq(ld_len)
         yield dut.addr_i.eq(addr)
-        yield dut.valid_i.eq(1)
+        yield dut.i_valid.eq(1)
         print("waiting")
         while True:
-            valid_o = yield dut.valid_o
-            if valid_o:
+            o_valid = yield dut.o_valid
+            if o_valid:
                 break
             yield
         exc = yield dut.exc
@@ -267,8 +267,8 @@ def sim(dut):
     def lds():
         print("lds")
         while True:
-            valid_i = yield dut.valid_i
-            if valid_i:
+            i_valid = yield dut.i_valid
+            if i_valid:
                 break
             yield
 
@@ -281,10 +281,10 @@ def sim(dut):
         data2 = (shfdata >> 128) & dmask1
         print("ld data2", 1 << dlen, hex(data >> (1 << dlen)), hex(data2))
         yield dut.sld_data_i[0].data.eq(data1)
-        yield dut.sld_valid_i[0].eq(1)
+        yield dut.sld_i_valid[0].eq(1)
         yield
         yield dut.sld_data_i[1].data.eq(data2)
-        yield dut.sld_valid_i[1].eq(1)
+        yield dut.sld_i_valid[1].eq(1)
         yield
 
     sim.add_sync_process(lds)
diff --git a/src/soc/scoreboard/fn_unit.py b/src/soc/scoreboard/fn_unit.py
index 8b55eed1..d0e7004c 100644
--- a/src/soc/scoreboard/fn_unit.py
+++ b/src/soc/scoreboard/fn_unit.py
@@ -29,7 +29,7 @@ class FnUnit(Elaboratable):
         * dest_i / src1_i / src2_i are in *binary*, whereas...
         * ...g_rd_pend_i / g_wr_pend_i and rd_pend_o / wr_pend_o are UNARY
         * req_rel_i (request release) is the direct equivalent of pipeline
-                    "output valid" (valid_o)
+                    "output valid" (o_valid)
         * recover is a local python variable (actually go_die_o)
         * when shadow_wid = 0, recover and shadown are Consts (i.e. do nothing)
         * wr_pend is set False for the majority of uses: however for
diff --git a/src/soc/scoreboard/instruction_q.py b/src/soc/scoreboard/instruction_q.py
index 9c3d58d8..4b256654 100644
--- a/src/soc/scoreboard/instruction_q.py
+++ b/src/soc/scoreboard/instruction_q.py
@@ -47,7 +47,7 @@ class InstructionQ(Elaboratable):
         mqbits = (int(log(iqlen) / log(2))+2, False)
 
         self.p_add_i = Signal(mqbits)  # instructions to add (from data_i)
-        self.p_ready_o = Signal()  # instructions were added
+        self.p_o_ready = Signal()  # instructions were added
         self.data_i = Instruction._nq(n_in, "data_i")
 
         self.data_o = Instruction._nq(n_out, "data_o")
@@ -91,7 +91,7 @@ class InstructionQ(Elaboratable):
         comb += left.eq(self.qlen_o)  # - self.n_sub_o)
         comb += spare.eq(mqlen - self.p_add_i)
         comb += qmaxed.eq(left <= spare)
-        comb += self.p_ready_o.eq(qmaxed & (self.p_add_i != 0))
+        comb += self.p_o_ready.eq(qmaxed & (self.p_add_i != 0))
 
         # put q (flattened) into output
         for i in range(self.n_out):
@@ -103,7 +103,7 @@ class InstructionQ(Elaboratable):
             # ok now the end's moved
             sync += end_q.eq(end_q + self.n_sub_o)
 
-        with m.If(self.p_ready_o):
+        with m.If(self.p_o_ready):
             # copy in the input... insanely gate-costly... *sigh*...
             for i in range(self.n_in):
                 with m.If(self.p_add_i > Const(i, len(self.p_add_i))):
@@ -112,7 +112,7 @@ class InstructionQ(Elaboratable):
                     sync += self.q[ipos].eq(cat(self.data_i[i]))
             sync += start_q.eq(start_q + self.p_add_i)
 
-        with m.If(self.p_ready_o):
+        with m.If(self.p_o_ready):
             # update the queue length
             add2 = Signal(mqbits+1)
             comb += add2.eq(self.qlen_o + self.p_add_i)
@@ -125,7 +125,7 @@ class InstructionQ(Elaboratable):
     def __iter__(self):
         yield from self.q
 
-        yield self.p_ready_o
+        yield self.p_o_ready
         for o in self.data_i:
             yield from list(o)
         yield self.p_add_i
diff --git a/src/soc/scoreboard/test_iq.py b/src/soc/scoreboard/test_iq.py
index fb67d263..acd1b7af 100644
--- a/src/soc/scoreboard/test_iq.py
+++ b/src/soc/scoreboard/test_iq.py
@@ -34,10 +34,10 @@ class IQSim:
                 self.oq.append(di)
             yield self.dut.p_add_i.eq(sendlen)
             yield
-            o_p_ready = yield self.dut.p_ready_o
+            o_p_ready = yield self.dut.p_o_ready
             while not o_p_ready:
                 yield
-                o_p_ready = yield self.dut.p_ready_o
+                o_p_ready = yield self.dut.p_o_ready
 
             yield self.dut.p_add_i.eq(0)
 
diff --git a/src/soc/simple/core.py b/src/soc/simple/core.py
index 84555c37..e9eb1a18 100644
--- a/src/soc/simple/core.py
+++ b/src/soc/simple/core.py
@@ -117,7 +117,7 @@ class NonProductionCore(Elaboratable):
             self.sv_pred_dm = Signal() # TODO: SIMD width
 
         # issue/valid/busy signalling
-        self.ivalid_i = Signal(reset_less=True) # instruction is valid
+        self.ii_valid = Signal(reset_less=True) # instruction is valid
         self.issue_i = Signal(reset_less=True)
         self.busy_o = Signal(name="corebusy_o", reset_less=True)
 
@@ -226,7 +226,7 @@ class NonProductionCore(Elaboratable):
             sync += counter.eq(counter - 1)
             comb += self.busy_o.eq(1)
 
-        with m.If(self.ivalid_i): # run only when valid
+        with m.If(self.ii_valid): # run only when valid
             with m.Switch(self.e.do.insn_type):
                 # check for ATTN: halt if true
                 with m.Case(MicrOp.OP_ATTN):
diff --git a/src/soc/simple/issuer.py b/src/soc/simple/issuer.py
index feef6b62..5d5c1ff2 100644
--- a/src/soc/simple/issuer.py
+++ b/src/soc/simple/issuer.py
@@ -281,8 +281,8 @@ class TestIssuerInternal(Elaboratable):
             self.dstmask = Signal(64)
 
     def fetch_fsm(self, m, core, pc, svstate, nia, is_svp64_mode,
-                        fetch_pc_ready_o, fetch_pc_valid_i,
-                        fetch_insn_valid_o, fetch_insn_ready_i):
+                        fetch_pc_o_ready, fetch_pc_i_valid,
+                        fetch_insn_o_valid, fetch_insn_i_ready):
         """fetch FSM
 
         this FSM performs fetch of raw instruction data, partial-decodes
@@ -301,15 +301,15 @@ class TestIssuerInternal(Elaboratable):
 
             # waiting (zzz)
             with m.State("IDLE"):
-                comb += fetch_pc_ready_o.eq(1)
-                with m.If(fetch_pc_valid_i):
+                comb += fetch_pc_o_ready.eq(1)
+                with m.If(fetch_pc_i_valid):
                     # instruction allowed to go: start by reading the PC
                     # capture the PC and also drop it into Insn Memory
                     # we have joined a pair of combinatorial memory
                     # lookups together.  this is Generally Bad.
                     comb += self.imem.a_pc_i.eq(pc)
-                    comb += self.imem.a_valid_i.eq(1)
-                    comb += self.imem.f_valid_i.eq(1)
+                    comb += self.imem.a_i_valid.eq(1)
+                    comb += self.imem.f_i_valid.eq(1)
                     sync += cur_state.pc.eq(pc)
                     sync += cur_state.svstate.eq(svstate) # and svstate
 
@@ -327,8 +327,8 @@ class TestIssuerInternal(Elaboratable):
                     sync += cur_state.msr.eq(self.state_r_msr.data_o)
                 with m.If(self.imem.f_busy_o): # zzz...
                     # busy: stay in wait-read
-                    comb += self.imem.a_valid_i.eq(1)
-                    comb += self.imem.f_valid_i.eq(1)
+                    comb += self.imem.a_i_valid.eq(1)
+                    comb += self.imem.f_i_valid.eq(1)
                 with m.Else():
                     # not busy: instruction fetched
                     insn = get_insn(self.imem.f_instr_o, cur_state.pc)
@@ -354,8 +354,8 @@ class TestIssuerInternal(Elaboratable):
                         with m.Else():
                             # fetch the rest of the instruction from memory
                             comb += self.imem.a_pc_i.eq(cur_state.pc + 4)
-                            comb += self.imem.a_valid_i.eq(1)
-                            comb += self.imem.f_valid_i.eq(1)
+                            comb += self.imem.a_i_valid.eq(1)
+                            comb += self.imem.f_i_valid.eq(1)
                             m.next = "INSN_READ2"
                     else:
                         # not SVP64 - 32-bit only
@@ -366,8 +366,8 @@ class TestIssuerInternal(Elaboratable):
             with m.State("INSN_READ2"):
                 with m.If(self.imem.f_busy_o):  # zzz...
                     # busy: stay in wait-read
-                    comb += self.imem.a_valid_i.eq(1)
-                    comb += self.imem.f_valid_i.eq(1)
+                    comb += self.imem.a_i_valid.eq(1)
+                    comb += self.imem.f_i_valid.eq(1)
                 with m.Else():
                     # not busy: instruction fetched
                     insn = get_insn(self.imem.f_instr_o, cur_state.pc+4)
@@ -391,13 +391,13 @@ class TestIssuerInternal(Elaboratable):
 
             with m.State("INSN_READY"):
                 # hand over the instruction, to be decoded
-                comb += fetch_insn_valid_o.eq(1)
-                with m.If(fetch_insn_ready_i):
+                comb += fetch_insn_o_valid.eq(1)
+                with m.If(fetch_insn_i_ready):
                     m.next = "IDLE"
 
     def fetch_predicate_fsm(self, m,
-                            pred_insn_valid_i, pred_insn_ready_o,
-                            pred_mask_valid_o, pred_mask_ready_i):
+                            pred_insn_i_valid, pred_insn_o_ready,
+                            pred_mask_o_valid, pred_mask_i_ready):
         """fetch_predicate_fsm - obtains (constructs in the case of CR)
            src/dest predicate masks
 
@@ -439,8 +439,8 @@ class TestIssuerInternal(Elaboratable):
         with m.FSM(name="fetch_predicate"):
 
             with m.State("FETCH_PRED_IDLE"):
-                comb += pred_insn_ready_o.eq(1)
-                with m.If(pred_insn_valid_i):
+                comb += pred_insn_o_ready.eq(1)
+                with m.If(pred_insn_i_valid):
                     with m.If(predmode == SVP64PredMode.INT):
                         # skip fetching destination mask register, when zero
                         with m.If(dall1s):
@@ -558,18 +558,18 @@ class TestIssuerInternal(Elaboratable):
                 m.next = "FETCH_PRED_DONE"
 
             with m.State("FETCH_PRED_DONE"):
-                comb += pred_mask_valid_o.eq(1)
-                with m.If(pred_mask_ready_i):
+                comb += pred_mask_o_valid.eq(1)
+                with m.If(pred_mask_i_ready):
                     m.next = "FETCH_PRED_IDLE"
 
     def issue_fsm(self, m, core, pc_changed, sv_changed, nia,
                   dbg, core_rst, is_svp64_mode,
-                  fetch_pc_ready_o, fetch_pc_valid_i,
-                  fetch_insn_valid_o, fetch_insn_ready_i,
-                  pred_insn_valid_i, pred_insn_ready_o,
-                  pred_mask_valid_o, pred_mask_ready_i,
-                  exec_insn_valid_i, exec_insn_ready_o,
-                  exec_pc_valid_o, exec_pc_ready_i):
+                  fetch_pc_o_ready, fetch_pc_i_valid,
+                  fetch_insn_o_valid, fetch_insn_i_ready,
+                  pred_insn_i_valid, pred_insn_o_ready,
+                  pred_mask_o_valid, pred_mask_i_ready,
+                  exec_insn_i_valid, exec_insn_o_ready,
+                  exec_pc_o_valid, exec_pc_i_ready):
         """issue FSM
 
         decode / issue FSM.  this interacts with the "fetch" FSM
@@ -620,8 +620,8 @@ class TestIssuerInternal(Elaboratable):
                 # wait on "core stop" release, before next fetch
                 # need to do this here, in case we are in a VL==0 loop
                 with m.If(~dbg.core_stop_o & ~core_rst):
-                    comb += fetch_pc_valid_i.eq(1) # tell fetch to start
-                    with m.If(fetch_pc_ready_o):   # fetch acknowledged us
+                    comb += fetch_pc_i_valid.eq(1) # tell fetch to start
+                    with m.If(fetch_pc_o_ready):   # fetch acknowledged us
                         m.next = "INSN_WAIT"
                 with m.Else():
                     # tell core it's stopped, and acknowledge debug handshake
@@ -638,8 +638,8 @@ class TestIssuerInternal(Elaboratable):
 
             # wait for an instruction to arrive from Fetch
             with m.State("INSN_WAIT"):
-                comb += fetch_insn_ready_i.eq(1)
-                with m.If(fetch_insn_valid_o):
+                comb += fetch_insn_i_ready.eq(1)
+                with m.If(fetch_insn_o_valid):
                     # loop into ISSUE_START if it's a SVP64 instruction
                     # and VL == 0.  this because VL==0 is a for-loop
                     # from 0 to 0 i.e. always, always a NOP.
@@ -659,13 +659,13 @@ class TestIssuerInternal(Elaboratable):
                             m.next = "DECODE_SV"  # skip predication
 
             with m.State("PRED_START"):
-                comb += pred_insn_valid_i.eq(1)  # tell fetch_pred to start
-                with m.If(pred_insn_ready_o):  # fetch_pred acknowledged us
+                comb += pred_insn_i_valid.eq(1)  # tell fetch_pred to start
+                with m.If(pred_insn_o_ready):  # fetch_pred acknowledged us
                     m.next = "MASK_WAIT"
 
             with m.State("MASK_WAIT"):
-                comb += pred_mask_ready_i.eq(1) # ready to receive the masks
-                with m.If(pred_mask_valid_o): # predication masks are ready
+                comb += pred_mask_i_ready.eq(1) # ready to receive the masks
+                with m.If(pred_mask_o_valid): # predication masks are ready
                     m.next = "PRED_SKIP"
 
             # skip zeros in predicate
@@ -758,17 +758,17 @@ class TestIssuerInternal(Elaboratable):
 
             # handshake with execution FSM, move to "wait" once acknowledged
             with m.State("INSN_EXECUTE"):
-                comb += exec_insn_valid_i.eq(1) # trigger execute
-                with m.If(exec_insn_ready_o):   # execute acknowledged us
+                comb += exec_insn_i_valid.eq(1) # trigger execute
+                with m.If(exec_insn_o_ready):   # execute acknowledged us
                     m.next = "EXECUTE_WAIT"
 
             with m.State("EXECUTE_WAIT"):
                 # wait on "core stop" release, at instruction end
                 # need to do this here, in case we are in a VL>1 loop
                 with m.If(~dbg.core_stop_o & ~core_rst):
-                    comb += exec_pc_ready_i.eq(1)
+                    comb += exec_pc_i_ready.eq(1)
                     # see https://bugs.libre-soc.org/show_bug.cgi?id=636
-                    #with m.If(exec_pc_valid_o & exc_happened):
+                    #with m.If(exec_pc_o_valid & exc_happened):
                     #    probably something like this:
                     #    sync += pdecode2.ldst_exc.eq(core.fus.get_exc("ldst0")
                     # TODO: the exception info needs to be blatted
@@ -780,8 +780,8 @@ class TestIssuerInternal(Elaboratable):
                     # PC to the exception address, as well as alter MSR.
                     # nothing else needs to be done other than to note
                     # the change of PC and MSR (and, later, SVSTATE)
-                    #with m.Elif(exec_pc_valid_o):
-                    with m.If(exec_pc_valid_o): # replace with Elif (above)
+                    #with m.Elif(exec_pc_o_valid):
+                    with m.If(exec_pc_o_valid): # replace with Elif (above)
 
                         # was this the last loop iteration?
                         is_last = Signal()
@@ -844,8 +844,8 @@ class TestIssuerInternal(Elaboratable):
             sync += cur_state.svstate.eq(new_svstate) # for next clock
 
     def execute_fsm(self, m, core, pc_changed, sv_changed,
-                    exec_insn_valid_i, exec_insn_ready_o,
-                    exec_pc_valid_o, exec_pc_ready_i):
+                    exec_insn_i_valid, exec_insn_o_ready,
+                    exec_pc_o_valid, exec_pc_i_ready):
         """execute FSM
 
         execute FSM. this interacts with the "issue" FSM
@@ -860,7 +860,7 @@ class TestIssuerInternal(Elaboratable):
 
         # temporaries
         core_busy_o = core.busy_o                 # core is busy
-        core_ivalid_i = core.ivalid_i             # instruction is valid
+        core_ii_valid = core.ii_valid             # instruction is valid
         core_issue_i = core.issue_i               # instruction is issued
         insn_type = core.e.do.insn_type           # instruction MicroOp type
 
@@ -868,9 +868,9 @@ class TestIssuerInternal(Elaboratable):
 
             # waiting for instruction bus (stays there until not busy)
             with m.State("INSN_START"):
-                comb += exec_insn_ready_o.eq(1)
-                with m.If(exec_insn_valid_i):
-                    comb += core_ivalid_i.eq(1)  # instruction is valid
+                comb += exec_insn_o_ready.eq(1)
+                with m.If(exec_insn_i_valid):
+                    comb += core_ii_valid.eq(1)  # instruction is valid
                     comb += core_issue_i.eq(1)  # and issued
                     sync += sv_changed.eq(0)
                     sync += pc_changed.eq(0)
@@ -879,15 +879,15 @@ class TestIssuerInternal(Elaboratable):
             # instruction started: must wait till it finishes
             with m.State("INSN_ACTIVE"):
                 with m.If(insn_type != MicrOp.OP_NOP):
-                    comb += core_ivalid_i.eq(1) # instruction is valid
+                    comb += core_ii_valid.eq(1) # instruction is valid
                 # note changes to PC and SVSTATE
                 with m.If(self.state_nia.wen & (1<<StateRegs.SVSTATE)):
                     sync += sv_changed.eq(1)
                 with m.If(self.state_nia.wen & (1<<StateRegs.PC)):
                     sync += pc_changed.eq(1)
                 with m.If(~core_busy_o): # instruction done!
-                    comb += exec_pc_valid_o.eq(1)
-                    with m.If(exec_pc_ready_i):
+                    comb += exec_pc_o_valid.eq(1)
+                    with m.If(exec_pc_i_ready):
                         comb += self.insn_done.eq(1)
                         m.next = "INSN_START"  # back to fetch
 
@@ -1039,28 +1039,28 @@ class TestIssuerInternal(Elaboratable):
         # these are the handshake signals between each
 
         # fetch FSM can run as soon as the PC is valid
-        fetch_pc_valid_i = Signal() # Execute tells Fetch "start next read"
-        fetch_pc_ready_o = Signal() # Fetch Tells SVSTATE "proceed"
+        fetch_pc_i_valid = Signal() # Execute tells Fetch "start next read"
+        fetch_pc_o_ready = Signal() # Fetch Tells SVSTATE "proceed"
 
         # fetch FSM hands over the instruction to be decoded / issued
-        fetch_insn_valid_o = Signal()
-        fetch_insn_ready_i = Signal()
+        fetch_insn_o_valid = Signal()
+        fetch_insn_i_ready = Signal()
 
         # predicate fetch FSM decodes and fetches the predicate
-        pred_insn_valid_i = Signal()
-        pred_insn_ready_o = Signal()
+        pred_insn_i_valid = Signal()
+        pred_insn_o_ready = Signal()
 
         # predicate fetch FSM delivers the masks
-        pred_mask_valid_o = Signal()
-        pred_mask_ready_i = Signal()
+        pred_mask_o_valid = Signal()
+        pred_mask_i_ready = Signal()
 
         # issue FSM delivers the instruction to the be executed
-        exec_insn_valid_i = Signal()
-        exec_insn_ready_o = Signal()
+        exec_insn_i_valid = Signal()
+        exec_insn_o_ready = Signal()
 
         # execute FSM, hands over the PC/SVSTATE back to the issue FSM
-        exec_pc_valid_o = Signal()
-        exec_pc_ready_i = Signal()
+        exec_pc_o_valid = Signal()
+        exec_pc_i_ready = Signal()
 
         # the FSMs here are perhaps unusual in that they detect conditions
         # then "hold" information, combinatorially, for the core
@@ -1072,26 +1072,26 @@ class TestIssuerInternal(Elaboratable):
         # signalling is used to communicate between the four.
 
         self.fetch_fsm(m, core, pc, svstate, nia, is_svp64_mode,
-                       fetch_pc_ready_o, fetch_pc_valid_i,
-                       fetch_insn_valid_o, fetch_insn_ready_i)
+                       fetch_pc_o_ready, fetch_pc_i_valid,
+                       fetch_insn_o_valid, fetch_insn_i_ready)
 
         self.issue_fsm(m, core, pc_changed, sv_changed, nia,
                        dbg, core_rst, is_svp64_mode,
-                       fetch_pc_ready_o, fetch_pc_valid_i,
-                       fetch_insn_valid_o, fetch_insn_ready_i,
-                       pred_insn_valid_i, pred_insn_ready_o,
-                       pred_mask_valid_o, pred_mask_ready_i,
-                       exec_insn_valid_i, exec_insn_ready_o,
-                       exec_pc_valid_o, exec_pc_ready_i)
+                       fetch_pc_o_ready, fetch_pc_i_valid,
+                       fetch_insn_o_valid, fetch_insn_i_ready,
+                       pred_insn_i_valid, pred_insn_o_ready,
+                       pred_mask_o_valid, pred_mask_i_ready,
+                       exec_insn_i_valid, exec_insn_o_ready,
+                       exec_pc_o_valid, exec_pc_i_ready)
 
         if self.svp64_en:
             self.fetch_predicate_fsm(m,
-                                     pred_insn_valid_i, pred_insn_ready_o,
-                                     pred_mask_valid_o, pred_mask_ready_i)
+                                     pred_insn_i_valid, pred_insn_o_ready,
+                                     pred_mask_o_valid, pred_mask_i_ready)
 
         self.execute_fsm(m, core, pc_changed, sv_changed,
-                         exec_insn_valid_i, exec_insn_ready_o,
-                         exec_pc_valid_o, exec_pc_ready_i)
+                         exec_insn_i_valid, exec_insn_o_ready,
+                         exec_pc_o_valid, exec_pc_i_ready)
 
         # whatever was done above, over-ride it if core reset is held
         with m.If(core_rst):
diff --git a/src/soc/simple/test/test_core.py b/src/soc/simple/test/test_core.py
index 194c0075..2edb84c6 100644
--- a/src/soc/simple/test/test_core.py
+++ b/src/soc/simple/test/test_core.py
@@ -240,7 +240,7 @@ class TestRunner(FHDLTestCase):
         m = Module()
         comb = m.d.comb
         instruction = Signal(32)
-        ivalid_i = Signal()
+        ii_valid = Signal()
 
         pspec = TestMemPspec(ldst_ifacetype='testpi',
                              imem_ifacetype='',
@@ -253,7 +253,7 @@ class TestRunner(FHDLTestCase):
         l0 = core.l0
 
         comb += core.raw_opcode_i.eq(instruction)
-        comb += core.ivalid_i.eq(ivalid_i)
+        comb += core.ii_valid.eq(ii_valid)
 
         # temporary hack: says "go" immediately for both address gen and ST
         ldst = core.fus.fus['ldst0']
@@ -291,7 +291,7 @@ class TestRunner(FHDLTestCase):
                     # ask the decoder to decode this binary data (endian'd)
                     yield core.bigendian_i.eq(bigendian)  # little / big?
                     yield instruction.eq(ins)          # raw binary instr.
-                    yield ivalid_i.eq(1)
+                    yield ii_valid.eq(1)
                     yield Settle()
                     # fn_unit = yield pdecode2.e.fn_unit
                     #fuval = self.funit.value
@@ -302,7 +302,7 @@ class TestRunner(FHDLTestCase):
                     yield Settle()
 
                     yield from wait_for_busy_clear(core)
-                    yield ivalid_i.eq(0)
+                    yield ii_valid.eq(0)
                     yield
 
                     print("sim", code)
diff --git a/src/soc/simple/test/test_runner.py b/src/soc/simple/test/test_runner.py
index fd3f15c3..1d792fe5 100644
--- a/src/soc/simple/test/test_runner.py
+++ b/src/soc/simple/test/test_runner.py
@@ -339,16 +339,16 @@ class TestRunner(FHDLTestCase):
         traces = [
             'clk',
             ('state machines', 'closed', [
-                'fetch_pc_valid_i', 'fetch_pc_ready_o',
+                'fetch_pc_i_valid', 'fetch_pc_o_ready',
                 'fetch_fsm_state',
-                'fetch_insn_valid_o', 'fetch_insn_ready_i',
-                'pred_insn_valid_i', 'pred_insn_ready_o',
+                'fetch_insn_o_valid', 'fetch_insn_i_ready',
+                'pred_insn_i_valid', 'pred_insn_o_ready',
                 'fetch_predicate_state',
-                'pred_mask_valid_o', 'pred_mask_ready_i',
+                'pred_mask_o_valid', 'pred_mask_i_ready',
                 'issue_fsm_state',
-                'exec_insn_valid_i', 'exec_insn_ready_o',
+                'exec_insn_i_valid', 'exec_insn_o_ready',
                 'exec_fsm_state',
-                'exec_pc_valid_o', 'exec_pc_ready_i',
+                'exec_pc_o_valid', 'exec_pc_i_ready',
                 'insn_done', 'core_stop_o', 'pc_i_ok', 'pc_changed',
                 'is_last', 'dec2.no_out_vec']),
             {'comment': 'fetch and decode'},
-- 
2.30.2