3d197f2e9daffabccce04d9b136d0d86b01e4b90
1 # LDST Address Splitter. For misaligned address crossing cache line boundary
4 * https://libre-riscv.org/3d_gpu/architecture/6600scoreboard/
5 * http://bugs.libre-riscv.org/show_bug.cgi?id=257
6 * http://bugs.libre-riscv.org/show_bug.cgi?id=216
9 #from soc.experiment.pimem import PortInterface
11 from nmigen
import Elaboratable
, Module
, Signal
, Record
, Array
, Const
, Cat
12 from nmutil
.latch
import SRLatch
, latchregister
13 from nmigen
.back
.pysim
import Simulator
, Delay
14 from nmigen
.cli
import verilog
, rtlil
16 from soc
.scoreboard
.addr_match
import LenExpand
17 #from nmutil.queue import Queue
21 def __init__(self
, dwidth
, name
=None):
22 Record
.__init
__(self
, (('err', 1), ('data', dwidth
)), name
=name
)
25 class LDLatch(Elaboratable
):
27 def __init__(self
, dwidth
, awidth
, mlen
):
28 self
.addr_i
= Signal(awidth
, reset_less
=True)
29 self
.mask_i
= Signal(mlen
, reset_less
=True)
30 self
.valid_i
= Signal(reset_less
=True)
31 self
.ld_i
= LDData(dwidth
, "ld_i")
32 self
.ld_o
= LDData(dwidth
, "ld_o")
33 self
.valid_o
= Signal(reset_less
=True)
35 def elaborate(self
, platform
):
38 m
.submodules
.in_l
= in_l
= SRLatch(sync
=False, name
="in_l")
40 comb
+= in_l
.s
.eq(self
.valid_i
)
41 comb
+= self
.valid_o
.eq(in_l
.q
& self
.valid_i
)
42 latchregister(m
, self
.ld_i
, self
.ld_o
, in_l
.q
& self
.valid_o
, "ld_i_r")
59 def byteExpand(signal
):
60 if(type(signal
)==int):
65 ret |
= (0xFF * bit
) << shf
70 for i
in range(len(signal
)):
72 for j
in range(8): #TODO this can be optimized
76 class LDSTSplitter(Elaboratable
):
78 def __init__(self
, dwidth
, awidth
, dlen
, pi
=None):
79 self
.dwidth
, self
.awidth
, self
.dlen
= dwidth
, awidth
, dlen
80 # cline_wid = 8<<dlen # cache line width: bytes (8) times (2^^dlen)
81 cline_wid
= dwidth
*8 # convert bytes to bits
83 self
.addr_i
= Signal(awidth
, reset_less
=True)
84 # no match in PortInterface
85 self
.len_i
= Signal(dlen
, reset_less
=True)
86 self
.valid_i
= Signal(reset_less
=True)
87 self
.valid_o
= Signal(reset_less
=True)
89 self
.is_ld_i
= Signal(reset_less
=True)
90 self
.is_st_i
= Signal(reset_less
=True)
92 self
.ld_data_o
= LDData(dwidth
*8, "ld_data_o") #port.ld
93 self
.st_data_i
= LDData(dwidth
*8, "st_data_i") #port.st
95 self
.exc
= Signal(reset_less
=True) # pi.exc TODO
96 # TODO : create/connect two outgoing port interfaces
98 self
.sld_valid_o
= Signal(2, reset_less
=True)
99 self
.sld_valid_i
= Signal(2, reset_less
=True)
100 self
.sld_data_i
= Array((LDData(cline_wid
, "ld_data_i1"),
101 LDData(cline_wid
, "ld_data_i2")))
103 self
.sst_valid_o
= Signal(2, reset_less
=True)
104 self
.sst_valid_i
= Signal(2, reset_less
=True)
105 self
.sst_data_o
= Array((LDData(cline_wid
, "st_data_i1"),
106 LDData(cline_wid
, "st_data_i2")))
108 def elaborate(self
, platform
):
113 mzero
= Const(0, mlen
)
114 m
.submodules
.ld1
= ld1
= LDLatch(self
.dwidth
*8, self
.awidth
-dlen
, mlen
)
115 m
.submodules
.ld2
= ld2
= LDLatch(self
.dwidth
*8, self
.awidth
-dlen
, mlen
)
116 m
.submodules
.lenexp
= lenexp
= LenExpand(self
.dlen
)
118 #comb += self.pi.addr_ok_o.eq(self.addr_i < 65536) #FIXME 64k limit
119 #comb += self.pi.busy_o.eq(busy)
122 # FIXME bytes not bits
123 # set up len-expander, len to mask. ld1 gets first bit, ld2 gets rest
124 comb
+= lenexp
.addr_i
.eq(self
.addr_i
)
125 comb
+= lenexp
.len_i
.eq(self
.len_i
)
126 mask1
= Signal(mlen
, reset_less
=True)
127 mask2
= Signal(mlen
, reset_less
=True)
128 comb
+= mask1
.eq(lenexp
.lexp_o
[0:mlen
]) # Lo bits of expanded len-mask
129 comb
+= mask2
.eq(lenexp
.lexp_o
[mlen
:]) # Hi bits of expanded len-mask
131 # set up new address records: addr1 is "as-is", addr2 is +1
132 comb
+= ld1
.addr_i
.eq(self
.addr_i
[dlen
:])
133 ld2_value
= self
.addr_i
[dlen
:] + 1
134 comb
+= ld2
.addr_i
.eq(ld2_value
)
136 with m
.If(ld2_value
[self
.awidth
-dlen
]):
137 comb
+= self
.exc
.eq(1)
139 # data needs recombining / splitting via shifting.
140 ashift1
= Signal(self
.dlen
, reset_less
=True)
141 ashift2
= Signal(self
.dlen
, reset_less
=True)
142 comb
+= ashift1
.eq(self
.addr_i
[:self
.dlen
])
143 comb
+= ashift2
.eq((1 << dlen
)-ashift1
)
146 mask1
= byteExpand(mask1
)
147 mask2
= byteExpand(mask2
)
148 mzero
= byteExpand(mzero
)
150 with m
.If(self
.is_ld_i
):
151 # set up connections to LD-split. note: not active if mask is zero
152 for i
, (ld
, mask
) in enumerate(((ld1
, mask1
),
154 ld_valid
= Signal(name
="ldvalid_i%d" % i
, reset_less
=True)
155 comb
+= ld_valid
.eq(self
.valid_i
& self
.sld_valid_i
[i
])
156 comb
+= ld
.valid_i
.eq(ld_valid
& (mask
!= mzero
))
157 comb
+= ld
.ld_i
.eq(self
.sld_data_i
[i
])
158 comb
+= self
.sld_valid_o
[i
].eq(ld
.valid_o
)
160 # sort out valid: mask2 zero we ignore 2nd LD
161 with m
.If(mask2
== mzero
):
162 comb
+= self
.valid_o
.eq(self
.sld_valid_o
[0])
164 comb
+= self
.valid_o
.eq(self
.sld_valid_o
.all())
165 ## debug output -- output mask2 and mzero
166 ## guess second port is invalid
168 # all bits valid (including when data error occurs!) decode ld1/ld2
169 with m
.If(self
.valid_o
):
170 # errors cause error condition
171 comb
+= self
.ld_data_o
.err
.eq(ld1
.ld_o
.err | ld2
.ld_o
.err
)
173 # note that data from LD1 will be in *cache-line* byte position
174 # likewise from LD2 but we *know* it is at the start of the line
175 comb
+= self
.ld_data_o
.data
.eq((ld1
.ld_o
.data
>> (ashift1
*8)) |
176 (ld2
.ld_o
.data
<< (ashift2
*8)))
178 with m
.If(self
.is_st_i
):
179 # set busy flag -- required for unit test
180 for i
, (ld
, mask
) in enumerate(((ld1
, mask1
),
182 valid
= Signal(name
="stvalid_i%d" % i
, reset_less
=True)
183 comb
+= valid
.eq(self
.valid_i
& self
.sst_valid_i
[i
])
184 comb
+= ld
.valid_i
.eq(valid
& (mask
!= mzero
))
185 comb
+= self
.sld_valid_o
[i
].eq(ld
.valid_o
)
186 comb
+= self
.sst_data_o
[i
].data
.eq(ld
.ld_o
.data
)
188 comb
+= ld1
.ld_i
.eq((self
.st_data_i
<< (ashift1
*8)) & mask1
)
189 comb
+= ld2
.ld_i
.eq((self
.st_data_i
>> (ashift2
*8)) & mask2
)
191 # sort out valid: mask2 zero we ignore 2nd LD
192 with m
.If(mask2
== mzero
):
193 comb
+= self
.valid_o
.eq(self
.sst_valid_o
[0])
195 comb
+= self
.valid_o
.eq(self
.sst_valid_o
.all())
197 # all bits valid (including when data error occurs!) decode ld1/ld2
198 with m
.If(self
.valid_o
):
199 # errors cause error condition
200 comb
+= self
.st_data_i
.err
.eq(ld1
.ld_o
.err | ld2
.ld_o
.err
)
208 yield self
.ld_data_o
.err
209 yield self
.ld_data_o
.data
212 yield self
.sld_valid_i
214 yield self
.sld_data_i
[i
].err
215 yield self
.sld_data_i
[i
].data
225 data
= 0x0102030405060708A1A2A3A4A5A6A7A8
226 dlen
= 16 # data length in bytes
229 ldm
= ((1 << ld_len
)-1)
230 ldme
= byteExpand(ldm
)
231 dlm
= ((1 << dlen
)-1)
232 data
= data
& ldme
# truncate data to be tested, mask to within ld len
233 print("ldm", ldm
, hex(data
& ldme
))
234 print("dlm", dlm
, bin(addr
& dlm
))
236 dmask
= ldm
<< (addr
& dlm
)
237 print("dmask", bin(dmask
))
238 dmask1
= dmask
>> (1 << dlen
)
239 print("dmask1", bin(dmask1
))
240 dmask
= dmask
& ((1 << (1 << dlen
))-1)
241 print("dmask", bin(dmask
))
242 dmask1
= byteExpand(dmask1
)
243 dmask
= byteExpand(dmask
)
247 yield dut
.is_ld_i
.eq(1)
248 yield dut
.len_i
.eq(ld_len
)
249 yield dut
.addr_i
.eq(addr
)
250 yield dut
.valid_i
.eq(1)
253 valid_o
= yield dut
.valid_o
258 ld_data_o
= yield dut
.ld_data_o
.data
259 yield dut
.is_ld_i
.eq(0)
264 print(hex(ld_data_o
), hex(data
))
265 assert ld_data_o
== data
270 valid_i
= yield dut
.valid_i
275 shf
= (addr
& dlm
)*8 #shift bytes not bits
277 shfdata
= (data
<< shf
)
278 data1
= shfdata
& dmask
279 print("ld data1", hex(data
), hex(data1
), shf
,shf
/8.0, hex(dmask
))
281 data2
= (shfdata
>> 128) & dmask1
282 print("ld data2", 1 << dlen
, hex(data
>> (1 << dlen
)), hex(data2
))
283 yield dut
.sld_data_i
[0].data
.eq(data1
)
284 yield dut
.sld_valid_i
[0].eq(1)
286 yield dut
.sld_data_i
[1].data
.eq(data2
)
287 yield dut
.sld_valid_i
[1].eq(1)
290 sim
.add_sync_process(lds
)
291 sim
.add_sync_process(send_ld
)
293 prefix
= "ldst_splitter"
294 with sim
.write_vcd("%s.vcd" % prefix
, traces
=dut
.ports()):
298 if __name__
== '__main__':
299 dut
= LDSTSplitter(32, 48, 4)
300 vl
= rtlil
.convert(dut
, ports
=dut
.ports())
301 with
open("ldst_splitter.il", "w") as f
: