From c7c3c4df33d542ee62d9c4191fedc2886070d304 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Mon, 22 Jun 2020 11:49:41 +0100 Subject: [PATCH] move BE/LE byte-reverse into LDSTCompUnit --- src/soc/experiment/compldst_multi.py | 28 +++++++++++++++++++--- src/soc/experiment/l0_cache.py | 35 ++++------------------------ 2 files changed, 30 insertions(+), 33 deletions(-) diff --git a/src/soc/experiment/compldst_multi.py b/src/soc/experiment/compldst_multi.py index 21c3e314..3be7e193 100644 --- a/src/soc/experiment/compldst_multi.py +++ b/src/soc/experiment/compldst_multi.py @@ -84,6 +84,7 @@ from nmigen import Module, Signal, Mux, Cat, Elaboratable, Array, Repl from nmigen.hdl.rec import Record, Layout from nmutil.latch import SRLatch, latchregister +from nmutil.byterev import byte_reverse from soc.experiment.compalu_multi import go_record, CompUnitRecord from soc.experiment.l0_cache import PortInterface @@ -162,6 +163,11 @@ class LDSTCompUnit(RegSpecAPI, Elaboratable): in a single cycle and the CompUnit set back to doing another op. This means deasserting go_st_i, go_ad_i or go_wr_i as appropriate depending on whether the operation is a ST or LD. + + Note: LDSTCompUnit takes care of LE/BE normalisation: + * LD data is normalised after receipt from the PortInterface + * ST data is normalised *prior* to sending onto the PortInterface + TODO: use one module for the byte-reverse as it's quite expensive in gates """ def __init__(self, pi=None, rwid=64, awid=48, opsubset=CompLDSTOpSubset, @@ -464,14 +470,30 @@ class LDSTCompUnit(RegSpecAPI, Elaboratable): comb += pi.op.eq(self.oper_i) # op details (not all needed) # address comb += pi.addr.data.eq(addr_r) # EA from adder - comb += pi.addr.ok.eq(alu_ok & (lod_l.q | sto_l.q)) # "go do address stuff" + comb += pi.addr.ok.eq(alu_ok & (lod_l.q | sto_l.q)) # "do address stuff" comb += self.addr_exc_o.eq(pi.addr_exc_o) # exception occurred comb += addr_ok.eq(self.pi.addr_ok_o) # no exc, address fine + + # byte-reverse on LD - yes this is inverted (data is already BE) + with m.If(self.oper_i.byte_reverse): + comb += ldd_o.eq(pi.ld.data) # put data out, straight (as BE) + with m.Else(): + # byte-reverse the data based on ld/st width (turn it to LE) + data_len = self.oper_i.data_len + lddata_r = byte_reverse(m, 'lddata_r', pi.ld.data, data_len) + comb += ldd_o.eq(lddata_r) # put reversed- data out # ld - ld gets latched in via lod_l - comb += ldd_o.eq(pi.ld.data) # ld data goes into ld reg (above) comb += ld_ok.eq(pi.ld.ok) # ld.ok *closes* (freezes) ld data + + # yes this also looks odd (inverted) + with m.If(self.oper_i.byte_reverse): + comb += pi.st.data.eq(srl[2]) # 3rd operand latch + with m.Else(): + # byte-reverse the data based on width + data_len = self.oper_i.data_len + stdata_r = byte_reverse(m, 'stdata_r', srl[2], data_len) + comb += pi.st.data.eq(stdata_r) # store - data goes in based on go_st - comb += pi.st.data.eq(srl[2]) # 3rd operand latch comb += pi.st.ok.eq(self.st.go) # go store signals st data valid return m diff --git a/src/soc/experiment/l0_cache.py b/src/soc/experiment/l0_cache.py index 286b4e40..19d4e4e9 100644 --- a/src/soc/experiment/l0_cache.py +++ b/src/soc/experiment/l0_cache.py @@ -302,19 +302,6 @@ class LDSTPort(Elaboratable): def ports(self): return list(self) -# TODO: turn this into a module -def byte_reverse(m, name, data, length): - comb = m.d.comb - data_r = Signal.like(data, name=name) - with m.Switch(length): - for j in [1,2,4,8]: - with m.Case(j): - for i in range(j): - dest = data_r.word_select(i, 8) - src = data.word_select(j-1-i, 8) - comb += dest.eq(src) - return data_r - class L0CacheBuffer(Elaboratable): """L0 Cache / Buffer @@ -330,6 +317,9 @@ class L0CacheBuffer(Elaboratable): a "demo" / "test" class, and one important aspect: it responds combinatorially, where a nmigen FSM's state-changes only activate on clock-sync boundaries. + + Note: the data byte-order is *not* expected to be normalised (LE/BE) + by this class. That task is taken care of by LDSTCompUnit. """ def __init__(self, n_units, mem, regwid=64, addrwid=48): @@ -458,13 +448,7 @@ class L0CacheBuffer(Elaboratable): # and also handle the ready/stall/busy protocol comb += lddata.eq((rdport.data & lenexp.rexp_o) >> (lenexp.addr_i*8)) - # yes this looks odd (inverted) - with m.If(ldport.op.byte_reverse): - comb += ldport.ld.data.eq(lddata) # put data out - with m.Else(): - # byte-reverse the data based on ld/st width - lddata_r = byte_reverse(m, 'lddata_r', lddata, lenexp.len_i) - comb += ldport.ld.data.eq(lddata_r) # put reversed- data out + comb += ldport.ld.data.eq(lddata) # put data out comb += ldport.ld.ok.eq(1) # indicate data valid comb += reset_l.s.eq(1) # reset mode after 1 cycle @@ -472,17 +456,8 @@ class L0CacheBuffer(Elaboratable): with m.If(st_active.q & stport.st.ok): # shift data up before storing. lenexp *bit* version of mask is # passed straight through as byte-level "write-enable" lines. - stport_d = stport.st.data - stdata_i = Signal(self.regwid, reset_less=True) stdata = Signal(self.regwid, reset_less=True) - # yes this looks odd (inverted) - with m.If(ldport.op.byte_reverse): - comb += stdata_i.eq(stport_d) - with m.Else(): - # byte-reverse the data based on width - stdata_r = byte_reverse(m, 'stdata_r', stport_d, lenexp.len_i) - comb += stdata_i.eq(stdata_r) - comb += stdata.eq(stdata_i << (lenexp.addr_i*8)) + comb += stdata.eq(stport.st.data << (lenexp.addr_i*8)) # TODO: replace with link to LoadStoreUnitInterface.x_store_data # and also handle the ready/stall/busy protocol comb += wrport.data.eq(stdata) # write st to mem -- 2.30.2