From ad6a3e08867098930c8965acf8c03c500c2cc1be Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Thu, 4 Nov 2021 19:12:30 +0000 Subject: [PATCH] bugfix new ReservationStations2 it is awful - horribly inefficient - but it "works" lots of delays. but, it passes i.e. does not blow up or cause hanging --- src/nmutil/concurrentunit.py | 46 ++--- src/nmutil/test/test_reservation_stations.py | 179 +++++++++++++++++++ 2 files changed, 202 insertions(+), 23 deletions(-) create mode 100644 src/nmutil/test/test_reservation_stations.py diff --git a/src/nmutil/concurrentunit.py b/src/nmutil/concurrentunit.py index 433ec10..b0faea7 100644 --- a/src/nmutil/concurrentunit.py +++ b/src/nmutil/concurrentunit.py @@ -18,15 +18,16 @@ """ from math import log -from nmigen import Module, Elaboratable, Signal +from nmigen import Module, Elaboratable, Signal, Cat from nmigen.asserts import Assert +from nmigen.lib.coding import PriorityEncoder from nmigen.cli import main, verilog from nmutil.singlepipe import PassThroughStage from nmutil.multipipe import CombMuxOutPipe from nmutil.multipipe import PriorityCombMuxInPipe -from nmutil.multipipe import InputPriorityArbiter from nmutil.iocontrol import NextControl, PrevControl +from nmutil import nmoperator def num_bits(n): @@ -193,16 +194,15 @@ class ReservationStations2(Elaboratable): self.n = [] self.alu = alu # create prev and next ready/valid and add replica of ALU data specs - for i in range(p_len): + for i in range(num_rows): suffix = "_%d" % i - p = PrevControl(maskwid=i_wid, name=suffix) - n = NextControl(maskwid=i_wid, name=suffix) + p = PrevControl(name=suffix) + n = NextControl(name=suffix) p.i_data, n.o_data = self.alu.new_specs("rs_%d" % i) self.p.append(p) self.n.append(n) self.pipe = self # for Arbiter to select the incoming prevcontrols - self.p_mux = InputPriorityArbiter(self, num_rows) # set up pseudo-alus that look like a standard pipeline self.pseudoalus = [] @@ -223,6 +223,9 @@ class ReservationStations2(Elaboratable): pe = PriorityEncoder(self.num_rows) # input priority picker m.submodules.alu = self.alu m.submodules.selector = pe + for i, (p, n) in enumerate(zip(self.p, self.n)): + m.submodules["rs_p_%d" % i] = p + m.submodules["rs_n_%d" % i] = n # Priority picker for one RS self.active = Signal() @@ -235,7 +238,7 @@ class ReservationStations2(Elaboratable): # pick first non-reserved ReservationStation with data not already # sent into the ALU - m.d.comb += pe.i.eq(~rsvd & ~sent) + m.d.comb += pe.i.eq(rsvd & ~sent) m.d.comb += self.active.eq(~pe.n) # encoder active (one input valid) m.d.comb += self.m_id.eq(pe.o) # output one active input @@ -256,48 +259,45 @@ class ReservationStations2(Elaboratable): # input side ##### - # check if ALU is ready - n_i_ready = Signal(reset_less=True, name="n_i_rdy_data") - m.d.comb += alu_p_i_valid.eq(self.alu.p.i_valid_test) - # first, establish input: select one input to pass data to (p_mux) for i in range(self.num_rows): - i_buf = _spec(self.alu.stage.ispec, "i_buf%d" % i) # input buffer - o_buf = _spec(self.alu.stage.ospec, "o_buf%d" % i) # output buffer + i_buf, o_buf = self.alu.new_specs("buf%d" % i) # buffers with m.FSM(): # indicate ready to accept data, and accept it if incoming with m.State("ACCEPTING%d" % i): m.d.comb += self.p[i].o_ready.eq(1) # ready indicator + m.d.sync += self.n[i].o_valid.eq(0) # invalidate output with m.If(self.p[i].i_valid): # valid data incoming m.d.sync += rsvd[i].eq(1) # now reserved m.d.sync += nmoperator.eq(i_buf, self.p[i].i_data) - m.next = "ACCEPTED%d" % i) # move to "accepted" + m.next = "ACCEPTED%d" % i # move to "accepted" # now try to deliver to the ALU, but only if we are "picked" with m.State("ACCEPTED%d" % i): - with m.If(mid == i): # priority picker selected us + with m.If(mid == i): # picker selected us with m.If(self.alu.p.o_ready): # ALU can accept m.d.comb += self.alu.p.i_valid.eq(1) # transfer m.d.comb += nmoperator.eq(self.alu.p.i_data, i_buf) m.d.sync += sent[i].eq(1) # now reserved - m.next = "WAITOUT%d" % i) # move to "wait output" + m.next = "WAITOUT%d" % i # move to "wait output" # waiting for output to appear on the ALU, take a copy with m.State("WAITOUT%d" % i): with m.If(o_muxid == i): # when ALU output matches our RS - with m.If(self.n.alu.o_valid): # ALU can accept + with m.If(self.alu.n.o_valid): # ALU can accept m.d.sync += nmoperator.eq(o_buf, self.alu.n.o_data) m.d.sync += wait[i].eq(1) # now waiting - m.next = "SENDON%d" % i) # move to "send data on" + m.next = "SENDON%d" % i # move to "send data on" # waiting for "valid" indicator on RS output: deliver it with m.State("SENDON%d" % i): with m.If(self.n[i].i_ready): # user is ready to receive - m.d.comb += self.n[i].o_valid.eq(1) # indicate valid + # XXX this should be combinatorial not sync + #m.d.sync += self.n[i].o_valid.eq(1) # indicate valid + #m.d.sync += nmoperator.eq(self.n[i].o_data, o_buf) + m.d.sync += self.n[i].o_valid.eq(1) # indicate valid + m.d.sync += nmoperator.eq(self.n[i].o_data, o_buf) m.d.sync += wait[i].eq(0) # clear waiting m.d.sync += sent[i].eq(0) # and sending m.d.sync += rsvd[i].eq(0) # and reserved - m.next = "ACCEPTING%d" % i) # and back to "accepting" + m.next = "ACCEPTING%d" % i # and back to "accepting" return m - def ports(self): - return self._ports - diff --git a/src/nmutil/test/test_reservation_stations.py b/src/nmutil/test/test_reservation_stations.py new file mode 100644 index 0000000..62d2698 --- /dev/null +++ b/src/nmutil/test/test_reservation_stations.py @@ -0,0 +1,179 @@ +""" key strategic example showing how to do multi-input fan-in into a + multi-stage pipeline, then multi-output fanout. + + the multiplex ID from the fan-in is passed in to the pipeline, preserved, + and used as a routing ID on the fanout. +""" + +from random import randint +from math import log +from nmigen import Module, Signal, Cat, Value, Elaboratable +from nmigen.compat.sim import run_simulation +from nmigen.cli import verilog, rtlil + +from nmutil.concurrentunit import ReservationStations2 +from nmutil.singlepipe import SimpleHandshake, RecordObject, Object + + +class PassData2(RecordObject): + def __init__(self): + RecordObject.__init__(self) + self.muxid = Signal(2, reset_less=True) + self.idx = Signal(8, reset_less=True) + self.data = Signal(16, reset_less=True) + + +class PassData(Object): + def __init__(self, name=None): + Object.__init__(self) + if name is None: + name = "" + self.muxid = Signal(2, name="muxid"+name, reset_less=True) + self.idx = Signal(8, name="idx"+name, reset_less=True) + self.data = Signal(16, name="data"+name, reset_less=True) + + + +class PassThroughStage: + def ispec(self, name=None): + return PassData(name=name) + def ospec(self, name=None): + return self.ispec(name) # same as ospec + + def process(self, i): + return i # pass-through + + + +class PassThroughPipe(SimpleHandshake): + def __init__(self): + SimpleHandshake.__init__(self, PassThroughStage()) + + +class InputTest: + def __init__(self, dut): + self.dut = dut + self.di = {} + self.do = {} + self.tlen = 100 + for muxid in range(dut.num_rows): + self.di[muxid] = {} + self.do[muxid] = {} + for i in range(self.tlen): + self.di[muxid][i] = randint(0, 255) + (muxid<<8) + self.do[muxid][i] = self.di[muxid][i] + + def send(self, muxid): + for i in range(self.tlen): + op2 = self.di[muxid][i] + rs = self.dut.p[muxid] + yield rs.i_valid.eq(1) + yield rs.i_data.data.eq(op2) + yield rs.i_data.idx.eq(i) + yield rs.i_data.muxid.eq(muxid) + yield + o_p_ready = yield rs.o_ready + while not o_p_ready: + yield + o_p_ready = yield rs.o_ready + + print ("send", muxid, i, hex(op2)) + + yield rs.i_valid.eq(0) + # wait random period of time before queueing another value + for i in range(randint(0, 3)): + yield + + yield rs.i_valid.eq(0) + yield + + print ("send ended", muxid) + + ## wait random period of time before queueing another value + #for i in range(randint(0, 3)): + # yield + + #send_range = randint(0, 3) + #if send_range == 0: + # send = True + #else: + # send = randint(0, send_range) != 0 + + def rcv(self, muxid): + while True: + #stall_range = randint(0, 3) + #for j in range(randint(1,10)): + # stall = randint(0, stall_range) != 0 + # yield self.dut.n[0].i_ready.eq(stall) + # yield + n = self.dut.n[muxid] + yield n.i_ready.eq(1) + yield + o_n_valid = yield n.o_valid + i_n_ready = yield n.i_ready + if not o_n_valid or not i_n_ready: + continue + + out_muxid = yield n.o_data.muxid + out_i = yield n.o_data.idx + out_v = yield n.o_data.data + + print ("recv", out_muxid, out_i, hex(out_v)) + + # see if this output has occurred already, delete it if it has + assert muxid == out_muxid, \ + "out_muxid %d not correct %d" % (out_muxid, muxid) + assert out_i in self.do[muxid], "out_i %d not in array %s" % \ + (out_i, repr(self.do[muxid])) + assert self.do[muxid][out_i] == out_v # pass-through data + del self.do[muxid][out_i] + + # check if there's any more outputs + if len(self.do[muxid]) == 0: + break + print ("recv ended", muxid) + + +class TestALU(Elaboratable): + def __init__(self): + self.pipe1 = PassThroughPipe() # stage 1 (clock-sync) + self.pipe2 = PassThroughPipe() # stage 2 (clock-sync) + + self.p = self.pipe1.p + self.n = self.pipe2.n + self._ports = self.pipe1.ports() + self.pipe2.ports() + + def elaborate(self, platform): + m = Module() + m.submodules.pipe1 = self.pipe1 + m.submodules.pipe2 = self.pipe2 + + m.d.comb += self.pipe1.connect_to_next(self.pipe2) + + return m + + def new_specs(self, name): + return self.pipe1.ispec(name), self.pipe2.ospec(name) + + def ports(self): + return self._ports + + +def test1(): + alu = TestALU() + dut = ReservationStations2(alu, num_rows=4) + vl = rtlil.convert(dut, ports=dut.ports()) + with open("test_reservation_stations.il", "w") as f: + f.write(vl) + #run_simulation(dut, testbench(dut), vcd_name="test_inputgroup.vcd") + + test = InputTest(dut) + run_simulation(dut, [test.rcv(1), test.rcv(0), + test.rcv(3), test.rcv(2), + test.send(0), test.send(1), + test.send(3), test.send(2), + ], + vcd_name="test_reservation_stations.vcd") + +if __name__ == '__main__': + test1() -- 2.30.2