X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fsoc%2Fscoreboard%2Faddr_split.py;h=3d197f2e9daffabccce04d9b136d0d86b01e4b90;hb=86a7f9d692d50e9d76d83fb4338e768e9892e09e;hp=d2087003e78781963bdc5d5a897de9e08e456b09;hpb=3cf1977148cda9acdd0d0bafd8f27de19b1207cb;p=soc.git diff --git a/src/soc/scoreboard/addr_split.py b/src/soc/scoreboard/addr_split.py index d2087003..3d197f2e 100644 --- a/src/soc/scoreboard/addr_split.py +++ b/src/soc/scoreboard/addr_split.py @@ -1,6 +1,14 @@ # LDST Address Splitter. For misaligned address crossing cache line boundary +""" +Links: +* https://libre-riscv.org/3d_gpu/architecture/6600scoreboard/ +* http://bugs.libre-riscv.org/show_bug.cgi?id=257 +* http://bugs.libre-riscv.org/show_bug.cgi?id=216 +""" -from nmigen import Elaboratable, Module, Signal, Record, Array, Const +#from soc.experiment.pimem import PortInterface + +from nmigen import Elaboratable, Module, Signal, Record, Array, Const, Cat from nmutil.latch import SRLatch, latchregister from nmigen.back.pysim import Simulator, Delay from nmigen.cli import verilog, rtlil @@ -8,6 +16,7 @@ from nmigen.cli import verilog, rtlil from soc.scoreboard.addr_match import LenExpand #from nmutil.queue import Queue + class LDData(Record): def __init__(self, dwidth, name=None): Record.__init__(self, (('err', 1), ('data', dwidth)), name=name) @@ -47,13 +56,32 @@ class LDLatch(Elaboratable): def ports(self): return list(self) +def byteExpand(signal): + if(type(signal)==int): + ret = 0 + shf = 0 + while(signal>0): + bit = signal & 1 + ret |= (0xFF * bit) << shf + signal = signal >> 1 + shf += 8 + return ret + lst = [] + for i in range(len(signal)): + bit = signal[i] + for j in range(8): #TODO this can be optimized + lst += [bit] + return Cat(*lst) + class LDSTSplitter(Elaboratable): - def __init__(self, dwidth, awidth, dlen): + def __init__(self, dwidth, awidth, dlen, pi=None): self.dwidth, self.awidth, self.dlen = dwidth, awidth, dlen - #cline_wid = 8<> ashift1) | - (ld2.ld_o.data << ashift2)) + comb += self.ld_data_o.data.eq((ld1.ld_o.data >> (ashift1*8)) | + (ld2.ld_o.data << (ashift2*8))) with m.If(self.is_st_i): + # set busy flag -- required for unit test for i, (ld, mask) in enumerate(((ld1, mask1), (ld2, mask2))): valid = Signal(name="stvalid_i%d" % i, reset_less=True) @@ -137,8 +185,8 @@ class LDSTSplitter(Elaboratable): comb += self.sld_valid_o[i].eq(ld.valid_o) comb += self.sst_data_o[i].data.eq(ld.ld_o.data) - comb += ld1.ld_i.eq((self.st_data_i << ashift1) & mask1) - comb += ld2.ld_i.eq((self.st_data_i >> ashift2) & mask2) + comb += ld1.ld_i.eq((self.st_data_i << (ashift1*8)) & mask1) + comb += ld2.ld_i.eq((self.st_data_i >> (ashift2*8)) & mask2) # sort out valid: mask2 zero we ignore 2nd LD with m.If(mask2 == mzero): @@ -169,60 +217,69 @@ class LDSTSplitter(Elaboratable): def ports(self): return list(self) + def sim(dut): sim = Simulator(dut) sim.add_clock(1e-6) - data = 0b11010011 - dlen = 4 # 4 bits - addr = 0b1100 + data = 0x0102030405060708A1A2A3A4A5A6A7A8 + dlen = 16 # data length in bytes + addr = 0b1110 ld_len = 8 - ldm = ((1<> (1<> (1 << dlen) + print("dmask1", bin(dmask1)) + dmask = dmask & ((1 << (1 << dlen))-1) + print("dmask", bin(dmask)) + dmask1 = byteExpand(dmask1) + dmask = byteExpand(dmask) + + def send_ld(): + print("send_ld") yield dut.is_ld_i.eq(1) yield dut.len_i.eq(ld_len) yield dut.addr_i.eq(addr) yield dut.valid_i.eq(1) - print ("waiting") + print("waiting") while True: valid_o = yield dut.valid_o if valid_o: break yield + exc = yield dut.exc ld_data_o = yield dut.ld_data_o.data yield dut.is_ld_i.eq(0) yield - print (bin(ld_data_o), bin(data)) + print(exc) + assert exc==0 + print(hex(ld_data_o), hex(data)) assert ld_data_o == data def lds(): - print ("lds") + print("lds") while True: valid_i = yield dut.valid_i if valid_i: break yield - shf = addr & dlm + shf = (addr & dlm)*8 #shift bytes not bits + print("shf",shf/8.0) shfdata = (data << shf) data1 = shfdata & dmask - print ("ld data1", bin(data), bin(data1), shf, bin(dmask)) + print("ld data1", hex(data), hex(data1), shf,shf/8.0, hex(dmask)) - data2 = (shfdata >> 16) & dmask1 - print ("ld data2", 1<> (1<> 128) & dmask1 + print("ld data2", 1 << dlen, hex(data >> (1 << dlen)), hex(data2)) yield dut.sld_data_i[0].data.eq(data1) yield dut.sld_valid_i[0].eq(1) yield @@ -231,7 +288,7 @@ def sim(dut): yield sim.add_sync_process(lds) - sim.add_sync_process(send_in) + sim.add_sync_process(send_ld) prefix = "ldst_splitter" with sim.write_vcd("%s.vcd" % prefix, traces=dut.ports()):