from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
from nmutil.pipemodbase import PipeModBase
-from soc.logical.pipe_data import ALUInputData
-from soc.alu.pipe_data import ALUOutputData
-from ieee754.part.partsig import PartitionedSignal
+from soc.branch.pipe_data import BranchInputData, BranchOutputData
from soc.decoder.power_enums import InternalOp
-from soc.countzero.countzero import ZeroCounter
from soc.decoder.power_fields import DecodeFields
from soc.decoder.power_fieldsn import SignalBitRange
return res
-class LogicalMainStage(PipeModBase):
+class BranchMainStage(PipeModBase):
def __init__(self, pspec):
super().__init__(pspec, "main")
self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
self.fields.create_specs()
def ispec(self):
- return ALUInputData(self.pspec)
+ return BranchInputData(self.pspec)
def ospec(self):
- return ALUOutputData(self.pspec) # TODO: ALUIntermediateData
+ return BranchOutputData(self.pspec) # TODO: ALUIntermediateData
def elaborate(self, platform):
m = Module()
comb = m.d.comb
- op, a, b, o = self.i.ctx.op, self.i.a, self.i.b, self.o.o
+ op = self.i.ctx.op
##########################
# main switch for logic ops AND, OR and XOR, cmpb, parity, and popcount
with m.Switch(op.insn_type):
+ pass
- ###### AND, OR, XOR #######
- with m.Case(InternalOp.OP_AND):
- comb += o.eq(a & b)
- with m.Case(InternalOp.OP_OR):
- comb += o.eq(a | b)
- with m.Case(InternalOp.OP_XOR):
- comb += o.eq(a ^ b)
-
- ###### cmpb #######
- with m.Case(InternalOp.OP_CMPB):
- l = []
- for i in range(8):
- slc = slice(i*8, (i+1)*8)
- l.append(Repl(a[slc] == b[slc], 8))
- comb += o.eq(Cat(*l))
-
- ###### popcount #######
- with m.Case(InternalOp.OP_POPCNT):
- # starting from a, perform successive addition-reductions
- # creating arrays big enough to store the sum, each time
- pc = [a]
- # QTY32 2-bit (to take 2x 1-bit sums) etc.
- work = [(32, 2), (16, 3), (8, 4), (4, 5), (2, 6), (1, 6)]
- for l, b in work:
- pc.append(array_of(l, b))
- pc8 = pc[3] # array of 8 8-bit counts (popcntb)
- pc32 = pc[5] # array of 2 32-bit counts (popcntw)
- popcnt = pc[-1] # array of 1 64-bit count (popcntd)
- # cascade-tree of adds
- for idx, (l, b) in enumerate(work):
- for i in range(l):
- stt, end = i*2, i*2+1
- src, dst = pc[idx], pc[idx+1]
- comb += dst[i].eq(Cat(src[stt], Const(0, 1)) +
- Cat(src[end], Const(0, 1)))
- # decode operation length
- with m.If(op.data_len[2:4] == 0b00):
- # popcntb - pack 8x 4-bit answers into output
- for i in range(8):
- comb += o[i*8:i*8+4].eq(pc8[i])
- with m.Elif(op.data_len[3] == 0):
- # popcntw - pack 2x 5-bit answers into output
- for i in range(2):
- comb += o[i*32:i*32+5].eq(pc32[i])
- with m.Else():
- # popcntd - put 1x 6-bit answer into output
- comb += o.eq(popcnt[0])
-
- ###### parity #######
- with m.Case(InternalOp.OP_PRTY):
- # strange instruction which XORs together the LSBs of each byte
- par0 = Signal(reset_less=True)
- par1 = Signal(reset_less=True)
- comb += par0.eq(Cat(a[0] , a[8] , a[16], a[24]).xor())
- comb += par1.eq(Cat(a[32], a[40], a[48], a[56]).xor())
- with m.If(op.data_len[3] == 1):
- comb += o.eq(par0 ^ par1)
- with m.Else():
- comb += o[0].eq(par0)
- comb += o[32].eq(par1)
-
- ###### cntlz #######
- with m.Case(InternalOp.OP_CNTZ):
- x_fields = self.fields.instrs['X']
- XO = Signal(x_fields['XO'][0:-1].shape())
- m.submodules.countz = countz = ZeroCounter()
- comb += countz.rs_i.eq(a)
- comb += countz.is_32bit_i.eq(op.is_32bit)
- comb += countz.count_right_i.eq(XO[-1])
- comb += o.eq(countz.result_o)
-
- ###### bpermd #######
- # TODO with m.Case(InternalOp.OP_BPERM): - not in microwatt
###### sticky overflow and context, both pass-through #####
- comb += self.o.so.eq(self.i.so)
comb += self.o.ctx.eq(self.i.ctx)
return m
from nmigen import Signal, Const
from ieee754.fpcommon.getop import FPPipeContext
+from soc.decoder.power_decoder2 import Data
class IntegerData:
return [self.ctx.eq(i.ctx)]
-class ALUInputData(IntegerData):
+class BranchInputData(IntegerData):
def __init__(self, pspec):
super().__init__(pspec)
- self.a = Signal(64, reset_less=True) # RA
- self.b = Signal(64, reset_less=True) # RB/immediate
- self.so = Signal(reset_less=True)
- self.carry_in = Signal(reset_less=True)
+ # We need both lr and spr for bclr and bcctrl. Bclr can read
+ # from both ctr and lr, and bcctrl can write to both ctr and
+ # lr.
+ self.lr = Signal(64, reset_less=True)
+ self.spr = Signal(64, reset_less=True)
+ self.cr = Signal(32, reset_less=True)
+ # NIA not needed, it's already part of ctx
def __iter__(self):
yield from super().__iter__()
- yield self.a
- yield self.b
- yield self.carry_in
- yield self.so
+ yield self.lr
+ yield self.spr
+ yield self.cr
def eq(self, i):
lst = super().eq(i)
- return lst + [self.a.eq(i.a), self.b.eq(i.b),
- self.carry_in.eq(i.carry_in),
- self.so.eq(i.so)]
+ return lst + [self.lr.eq(i.lr), self.spr.eq(i.lr),
+ self.cr.eq(i.cr)]
+
+
+class BranchOutputData(IntegerData):
+ def __init__(self, pspec):
+ super().__init__(pspec)
+ self.lr = Signal(64, reset_less=True)
+ self.spr = Signal(64, reset_less=True)
+ self.nia_out = Data(64, name="nia_out")
+
+ def __iter__(self):
+ yield from super().__iter__()
+ yield self.lr
+ yield self.spr
+ yield from self.nia_out
+
+ def eq(self, i):
+ lst = super().eq(i)
+ return lst + [self.lr.eq(i.lr), self.spr.eq(i.spr),
+ self.nia_out.eq(i.nia_out)]
from nmutil.singlepipe import ControlBase
from nmutil.pipemodbase import PipeModBaseChain
-from soc.alu.input_stage import ALUInputStage
-from soc.logical.main_stage import LogicalMainStage
-from soc.alu.output_stage import ALUOutputStage
+from soc.branch.main_stage import BranchMainStage
-class LogicalStages(PipeModBaseChain):
+class BranchStages(PipeModBaseChain):
def get_chain(self):
- inp = ALUInputStage(self.pspec)
- main = LogicalMainStage(self.pspec)
- out = ALUOutputStage(self.pspec)
- return [inp, main, out]
+ main = BranchMainStage(self.pspec)
+ return [main]
-class LogicalBasePipe(ControlBase):
+class BranchBasePipe(ControlBase):
def __init__(self, pspec):
ControlBase.__init__(self)
- self.pipe1 = LogicalStages(pspec)
+ self.pipe1 = BranchStages(pspec)
self._eqs = self.connect([self.pipe1])
def elaborate(self, platform):
from soc.decoder.isa.all import ISA
-from soc.logical.pipeline import LogicalBasePipe
+from soc.branch.pipeline import BranchBasePipe
from soc.alu.alu_input_record import CompALUOpSubset
from soc.alu.pipe_data import ALUPipeSpec
import random
recwidth += width
return recwidth
-def set_alu_inputs(alu, dec2, sim):
- # TODO: see https://bugs.libre-soc.org/show_bug.cgi?id=305#c43
- # detect the immediate here (with m.If(self.i.ctx.op.imm_data.imm_ok))
- # and place it into data_i.b
-
- reg3_ok = yield dec2.e.read_reg3.ok
- reg1_ok = yield dec2.e.read_reg1.ok
- assert reg3_ok != reg1_ok
- if reg3_ok:
- data1 = yield dec2.e.read_reg3.data
- data1 = sim.gpr(data1).value
- elif reg1_ok:
- data1 = yield dec2.e.read_reg1.data
- data1 = sim.gpr(data1).value
- else:
- data1 = 0
-
- yield alu.p.data_i.a.eq(data1)
-
- # If there's an immediate, set the B operand to that
- reg2_ok = yield dec2.e.read_reg2.ok
- imm_ok = yield dec2.e.imm_data.imm_ok
- if imm_ok:
- data2 = yield dec2.e.imm_data.imm
- elif reg2_ok:
- data2 = yield dec2.e.read_reg2.data
- data2 = sim.gpr(data2).value
- else:
- data2 = 0
- yield alu.p.data_i.b.eq(data2)
-
-
-
-def set_extra_alu_inputs(alu, dec2, sim):
- carry = 1 if sim.spr['XER'][XER_bits['CA']] else 0
- yield alu.p.data_i.carry_in.eq(carry)
- so = 1 if sim.spr['XER'][XER_bits['SO']] else 0
- yield alu.p.data_i.so.eq(so)
-
# This test bench is a bit different than is usual. Initially when I
# was writing it, I had all of the tests call a function to create a
tc = TestCase(prog, initial_regs, initial_sprs, self.test_name)
test_data.append(tc)
- def test_rand(self):
- insns = ["and", "or", "xor"]
- for i in range(40):
- choice = random.choice(insns)
- lst = [f"{choice} 3, 1, 2"]
- initial_regs = [0] * 32
- initial_regs[1] = random.randint(0, (1<<64)-1)
- initial_regs[2] = random.randint(0, (1<<64)-1)
- self.run_tst_program(Program(lst), initial_regs)
-
- def test_rand_imm_logical(self):
- insns = ["andi.", "andis.", "ori", "oris", "xori", "xoris"]
- for i in range(10):
- choice = random.choice(insns)
- imm = random.randint(0, (1<<16)-1)
- lst = [f"{choice} 3, 1, {imm}"]
- print(lst)
- initial_regs = [0] * 32
- initial_regs[1] = random.randint(0, (1<<64)-1)
- self.run_tst_program(Program(lst), initial_regs)
-
- @unittest.skip("broken")
- def test_cntz(self):
- insns = ["cntlzd", "cnttzd"]
- for i in range(10):
- choice = random.choice(insns)
- lst = [f"{choice} 3, 1"]
- print(lst)
- initial_regs = [0] * 32
- initial_regs[1] = random.randint(0, (1<<64)-1)
- self.run_tst_program(Program(lst), initial_regs)
-
- def test_parity(self):
- insns = ["prtyw", "prtyd"]
- for i in range(10):
- choice = random.choice(insns)
- lst = [f"{choice} 3, 1"]
- print(lst)
- initial_regs = [0] * 32
- initial_regs[1] = random.randint(0, (1<<64)-1)
- self.run_tst_program(Program(lst), initial_regs)
-
- @unittest.skip("broken")
- def test_popcnt(self):
- insns = ["popcntb", "popcntw", "popcntd"]
- for i in range(10):
- choice = random.choice(insns)
- lst = [f"{choice} 3, 1"]
- print(lst)
- initial_regs = [0] * 32
- initial_regs[1] = random.randint(0, (1<<64)-1)
- self.run_tst_program(Program(lst), initial_regs)
-
def test_cmpb(self):
- lst = ["cmpb 3, 1, 2"]
+ lst = ["b 0x1234"]
initial_regs = [0] * 32
- initial_regs[1] = 0xdeadbeefcafec0de
- initial_regs[2] = 0xd0adb0000afec1de
self.run_tst_program(Program(lst), initial_regs)
def test_ilang(self):
rec = CompALUOpSubset()
pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
- alu = LogicalBasePipe(pspec)
+ alu = BranchBasePipe(pspec)
vl = rtlil.convert(alu, ports=[])
with open("logical_pipeline.il", "w") as f:
f.write(vl)
rec = CompALUOpSubset()
pspec = ALUPipeSpec(id_wid=2, op_wid=get_rec_width(rec))
- m.submodules.alu = alu = LogicalBasePipe(pspec)
+ m.submodules.alu = alu = BranchBasePipe(pspec)
comb += alu.p.data_i.ctx.op.eq_from_execute1(pdecode2.e)
comb += alu.p.valid_i.eq(1)
yield instruction.eq(ins) # raw binary instr.
yield Settle()
fn_unit = yield pdecode2.e.fn_unit
- self.assertEqual(fn_unit, Function.LOGICAL.value, code)
- yield from set_alu_inputs(alu, pdecode2, simulator)
- yield from set_extra_alu_inputs(alu, pdecode2, simulator)
+ self.assertEqual(fn_unit, Function.BRANCH.value, code)
yield
opname = code.split(' ')[0]
yield from simulator.call(opname)
index = simulator.pc.CIA.value//4
- vld = yield alu.n.valid_o
- while not vld:
- yield
- vld = yield alu.n.valid_o
- yield
- alu_out = yield alu.n.data_o.o
- out_reg_valid = yield pdecode2.e.write_reg.ok
- if out_reg_valid:
- write_reg_idx = yield pdecode2.e.write_reg.data
- expected = simulator.gpr(write_reg_idx).value
- print(f"expected {expected:x}, actual: {alu_out:x}")
- self.assertEqual(expected, alu_out, code)
- yield from self.check_extra_alu_outputs(alu, pdecode2,
- simulator)
sim.add_sync_process(process)
with sim.write_vcd("simulator.vcd", "simulator.gtkw",
LDST = 2
SHIFT_ROT = 3
LOGICAL = 4
+ BRANCH = 5
@unique