dcache.py add first attempt at translation of dcache_tb.vhdl as
[soc.git] / src / soc / experiment / dcache.py
1 """Dcache
2
3 based on Anton Blanchard microwatt dcache.vhdl
4
5 """
6
7 from enum import Enum, unique
8
9 from nmigen import Module, Signal, Elaboratable,
10 Cat, Repl
11 from nmigen.cli import main
12 from nmigen.iocontrol import RecordObject
13 from nmigen.util import log2_int
14
15 from experiment.mem_types import LoadStore1ToDcacheType,
16 DcacheToLoadStore1Type,
17 MmuToDcacheType,
18 DcacheToMmuType
19
20 from experiment.wb_types import WB_ADDR_BITS, WB_DATA_BITS, WB_SEL_BITS,
21 WBAddrType, WBDataType, WBSelType,
22 WbMasterOut, WBSlaveOut,
23 WBMasterOutVector, WBSlaveOutVector,
24 WBIOMasterOut, WBIOSlaveOut
25
26
27 # Record for storing permission, attribute, etc. bits from a PTE
28 class PermAttr(RecordObject):
29 def __init__(self):
30 super().__init__()
31 self.reference = Signal()
32 self.changed = Signal()
33 self.nocache = Signal()
34 self.priv = Signal()
35 self.rd_perm = Signal()
36 self.wr_perm = Signal()
37
38
39 def extract_perm_attr(pte):
40 pa = PermAttr()
41 pa.reference = pte[8]
42 pa.changed = pte[7]
43 pa.nocache = pte[5]
44 pa.priv = pte[3]
45 pa.rd_perm = pte[2]
46 pa.wr_perm = pte[1]
47 return pa;
48
49
50 # Type of operation on a "valid" input
51 @unique
52 class Op(Enum):
53 OP_NONE = 0
54 OP_BAD = 1 # NC cache hit, TLB miss, prot/RC failure
55 OP_STCX_FAIL = 2 # conditional store w/o reservation
56 OP_LOAD_HIT = 3 # Cache hit on load
57 OP_LOAD_MISS = 4 # Load missing cache
58 OP_LOAD_NC = 5 # Non-cachable load
59 OP_STORE_HIT = 6 # Store hitting cache
60 OP_STORE_MISS = 7 # Store missing cache
61
62
63 # Cache state machine
64 @unique
65 class State(Enum):
66 IDLE = 0 # Normal load hit processing
67 RELOAD_WAIT_ACK = 1 # Cache reload wait ack
68 STORE_WAIT_ACK = 2 # Store wait ack
69 NC_LOAD_WAIT_ACK = 3 # Non-cachable load wait ack
70
71
72 # Dcache operations:
73 #
74 # In order to make timing, we use the BRAMs with
75 # an output buffer, which means that the BRAM
76 # output is delayed by an extra cycle.
77 #
78 # Thus, the dcache has a 2-stage internal pipeline
79 # for cache hits with no stalls.
80 #
81 # All other operations are handled via stalling
82 # in the first stage.
83 #
84 # The second stage can thus complete a hit at the same
85 # time as the first stage emits a stall for a complex op.
86 #
87 # Stage 0 register, basically contains just the latched request
88 class RegStage0(RecordObject):
89 def __init__(self):
90 super().__init__()
91 self.req = LoadStore1ToDcacheType()
92 self.tlbie = Signal()
93 self.doall = Signal()
94 self.tlbld = Signal()
95 self.mmu_req = Signal() # indicates source of request
96
97
98 class MemAccessRequest(RecordObject):
99 def __init__(self):
100 super().__init__()
101 self.op = Op()
102 self.valid = Signal()
103 self.dcbz = Signal()
104 self.real_addr = Signal(REAL_ADDR_BITS)
105 self.data = Signal(64)
106 self.byte_sel = Signal(8)
107 self.hit_way = Signal(WAY_BITS)
108 self.same_tag = Signal()
109 self.mmu_req = Signal()
110
111
112 # First stage register, contains state for stage 1 of load hits
113 # and for the state machine used by all other operations
114 class RegStage1(RecordObject):
115 def __init__(self):
116 super().__init__()
117 # Info about the request
118 self.full = Signal() # have uncompleted request
119 self.mmu_req = Signal() # request is from MMU
120 self.req = MemAccessRequest()
121
122 # Cache hit state
123 self.hit_way = Signal(WAY_BITS)
124 self.hit_load_valid = Signal()
125 self.hit_index = Signal(NUM_LINES)
126 self.cache_hit = Signal()
127
128 # TLB hit state
129 self.tlb_hit = Signal()
130 self.tlb_hit_way = Signal(TLB_NUM_WAYS)
131 self.tlb_hit_index = Signal(TLB_SET_SIZE)
132 self.
133 # 2-stage data buffer for data forwarded from writes to reads
134 self.forward_data1 = Signal(64)
135 self.forward_data2 = Signal(64)
136 self.forward_sel1 = Signal(8)
137 self.forward_valid1 = Signal()
138 self.forward_way1 = Signal(WAY_BITS)
139 self.forward_row1 = Signal(BRAM_ROWS)
140 self.use_forward1 = Signal()
141 self.forward_sel = Signal(8)
142
143 # Cache miss state (reload state machine)
144 self.state = State()
145 self.dcbz = Signal()
146 self.write_bram = Signal()
147 self.write_tag = Signal()
148 self.slow_valid = Signal()
149 self.wb = WishboneMasterOut()
150 self.reload_tag = Signal(TAG_BITS)
151 self.store_way = Signal(WAY_BITS)
152 self.store_row = Signal(BRAM_ROWS)
153 self.store_index = Signal(NUM_LINES)
154 self.end_row_ix = Signal(ROW_LINE_BIT)
155 self.rows_valid = RowPerLineValidArray()
156 self.acks_pending = Signal(3)
157 self.inc_acks = Signal()
158 self.dec_acks = Signal()
159
160 # Signals to complete (possibly with error)
161 self.ls_valid = Signal()
162 self.ls_error = Signal()
163 self.mmu_done = Signal()
164 self.mmu_error = Signal()
165 self.cache_paradox = Signal()
166
167 # Signal to complete a failed stcx.
168 self.stcx_fail = Signal()
169
170
171 # Reservation information
172 class Reservation(RecordObject):
173 def __init__(self):
174 super().__init__()
175 valid = Signal()
176 # TODO LINE_OFF_BITS is 6
177 addr = Signal(63 downto LINE_OFF_BITS)
178
179
180 # Set associative dcache write-through
181 #
182 # TODO (in no specific order):
183 #
184 # * See list in icache.vhdl
185 # * Complete load misses on the cycle when WB data comes instead of
186 # at the end of line (this requires dealing with requests coming in
187 # while not idle...)
188 class Dcache(Elaboratable):
189 def __init__(self):
190 # TODO: make these parameters of Dcache at some point
191 self.LINE_SIZE = 64 # Line size in bytes
192 self.NUM_LINES = 32 # Number of lines in a set
193 self.NUM_WAYS = 4 # Number of ways
194 self.TLB_SET_SIZE = 64 # L1 DTLB entries per set
195 self.TLB_NUM_WAYS = 2 # L1 DTLB number of sets
196 self.TLB_LG_PGSZ = 12 # L1 DTLB log_2(page_size)
197 self.LOG_LENGTH = 0 # Non-zero to enable log data collection
198
199 self.d_in = LoadStore1ToDcacheType()
200 self.d_out = DcacheToLoadStore1Type()
201
202 self.m_in = MmuToDcacheType()
203 self.m_out = DcacheToMmuType()
204
205 self.stall_out = Signal()
206
207 self.wb_out = WBMasterOut()
208 self.wb_in = WBSlaveOut()
209
210 self.log_out = Signal(20)
211
212 # Latch the request in r0.req as long as we're not stalling
213 def stage_0(self, m, d_in, m_in):
214 comb = m.d.comb
215 sync = m.d.sync
216
217 # variable r : reg_stage_0_t;
218 r = RegStage0()
219 comb += r
220
221 # begin
222 # if rising_edge(clk) then
223 # assert (d_in.valid and m_in.valid) = '0'
224 # report "request collision loadstore vs MMU";
225 assert ~(d_in.valid & m_in.valid) "request collision
226 loadstore vs MMU"
227
228 # if m_in.valid = '1' then
229 with m.If(m_in.valid):
230 # r.req.valid := '1';
231 # r.req.load := not (m_in.tlbie or m_in.tlbld);
232 # r.req.dcbz := '0';
233 # r.req.nc := '0';
234 # r.req.reserve := '0';
235 # r.req.virt_mode := '0';
236 # r.req.priv_mode := '1';
237 # r.req.addr := m_in.addr;
238 # r.req.data := m_in.pte;
239 # r.req.byte_sel := (others => '1');
240 # r.tlbie := m_in.tlbie;
241 # r.doall := m_in.doall;
242 # r.tlbld := m_in.tlbld;
243 # r.mmu_req := '1';
244 sync += r.req.valid.eq(1)
245 sync += r.req.load.eq(~(m_in.tlbie | m_in.tlbld))
246 sync += r.req.priv_mode.eq(1)
247 sync += r.req.addr.eq(m_in.addr)
248 sync += r.req.data.eq(m_in.pte)
249 sync += r.req.byte_sel.eq(1)
250 sync += r.tlbie.eq(m_in.tlbie)
251 sync += r.doall.eq(m_in.doall)
252 sync += r.tlbld.eq(m_in.tlbld)
253 sync += r.mmu_req.eq(1)
254 # else
255 with m.Else():
256 # r.req := d_in;
257 # r.tlbie := '0';
258 # r.doall := '0';
259 # r.tlbld := '0';
260 # r.mmu_req := '0';
261 sync += r.req.eq(d_in)
262 # end if;
263 # if rst = '1' then
264 # r0_full <= '0';
265 # elsif r1.full = '0' or r0_full = '0' then
266 with m.If(~r1.full | ~r0_full):
267 # r0 <= r;
268 # r0_full <= r.req.valid;
269 sync += r0.eq(r)
270 sync += r0_full.eq(r.req.valid)
271 # end if;
272 # end if;
273 # end process;
274
275 # TLB
276 # Operates in the second cycle on the request latched in r0.req.
277 # TLB updates write the entry at the end of the second cycle.
278 def tlb_read(self, m, m_in, d_in, r0_stall, tlb_valid_way,
279 tlb_tag_way, tlb_pte_way, dtlb_valid_bits,
280 dtlb_tags, dtlb_ptes):
281
282 comb = m.d.comb
283 sync = m.d.sync
284
285 # variable index : tlb_index_t;
286 # variable addrbits :
287 # std_ulogic_vector(TLB_SET_BITS - 1 downto 0);
288 index = TLB_SET_SIZE
289 addrbits = Signal(TLB_SET_BITS)
290
291 comb += index
292 comb += addrbits
293
294 # begin
295 # if rising_edge(clk) then
296 # if m_in.valid = '1' then
297 with m.If(m_in.valid):
298 # addrbits := m_in.addr(TLB_LG_PGSZ + TLB_SET_BITS
299 # - 1 downto TLB_LG_PGSZ);
300 sync += addrbits.eq(m_in.addr[
301 TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_SET_BITS
302 ])
303 # else
304 with m.Else():
305 # addrbits := d_in.addr(TLB_LG_PGSZ + TLB_SET_BITS
306 # - 1 downto TLB_LG_PGSZ);
307 sync += addrbits.eq(d_in.addr[
308 TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_SET_BITS
309 ])
310 # end if;
311
312 # index := to_integer(unsigned(addrbits));
313 sync += index.eq(addrbits)
314 # -- If we have any op and the previous op isn't
315 # -- finished, then keep the same output for next cycle.
316 # if r0_stall = '0' then
317 # If we have any op and the previous op isn't finished,
318 # then keep the same output for next cycle.
319 with m.If(~r0_stall):
320 sync += tlb_valid_way.eq(dtlb_valid_bits[index])
321 sync += tlb_tag_way.eq(dtlb_tags[index])
322 sync += tlb_pte_way.eq(dtlb_ptes[index])
323 # end if;
324 # end if;
325 # end process;
326
327 # -- Generate TLB PLRUs
328 # maybe_tlb_plrus: if TLB_NUM_WAYS > 1 generate
329 # Generate TLB PLRUs
330 def maybe_tlb_plrus(self, m, r1, tlb_plru_victim, acc, acc_en, lru):
331 comb = m.d.comb
332 sync = m.d.sync
333
334 with m.If(TLB_NUM_WAYS > 1):
335 for i in range(TLB_SET_SIZE):
336 # TLB PLRU interface
337 tlb_plru = PLRU(TLB_WAY_BITS)
338 tlb_plru_acc = Signal(TLB_WAY_BITS)
339 tlb_plru_acc_en = Signal()
340 tlb_plru_out = Signal(TLB_WAY_BITS)
341
342 comb += tlb_plru.acc.eq(tlb_plru_acc)
343 comb += tlb_plru.acc_en.eq(tlb_plru_acc_en)
344 comb += tlb_plru.lru.eq(tlb_plru_out)
345
346 # PLRU interface
347 with m.If(r1.tlb_hit_index == i):
348 comb += tlb_plru.acc_en.eq(
349 r1.tlb_hit
350 )
351
352 with m.Else():
353 comb += tlb_plru.acc_en.eq(0)
354 comb += tlb_plru.acc.eq(
355 r1.tlb_hit_way
356 )
357
358 comb += tlb_plru_victim[i].eq(tlb_plru.lru)
359
360 def tlb_search(self, tlb_req_index, r0, tlb_valid_way_ tlb_tag_way,
361 tlb_pte_way, pte, tlb_hit, valid_ra, perm_attr, ra):
362
363 comb = m.d.comb
364 sync = m.d.sync
365
366 # variable hitway : tlb_way_t;
367 # variable hit : std_ulogic;
368 # variable eatag : tlb_tag_t;
369 hitway = TLBWay()
370 hit = Signal()
371 eatag = TLBTag()
372
373 comb += hitway
374 comb += hit
375 comb += eatag
376
377 # begin
378 # tlb_req_index <=
379 # to_integer(unsigned(r0.req.addr(
380 # TLB_LG_PGSZ + TLB_SET_BITS - 1 downto TLB_LG_PGSZ
381 # )));
382 # hitway := 0;
383 # hit := '0';
384 # eatag := r0.req.addr(63 downto TLB_LG_PGSZ + TLB_SET_BITS);
385 # for i in tlb_way_t loop
386 # if tlb_valid_way(i) = '1' and
387 # read_tlb_tag(i, tlb_tag_way) = eatag then
388 # hitway := i;
389 # hit := '1';
390 # end if;
391 # end loop;
392 # tlb_hit <= hit and r0_valid;
393 # tlb_hit_way <= hitway;
394 comb += tlb_req_index.eq(r0.req.addr[
395 TLB_LG_PGSZ:TLB_LG_PGSZ + TLB_SET_BITS
396 ])
397
398 comb += eatag.eq(r0.req.addr[
399 TLB_LG_PGSZ + TLB_SET_BITS:64
400 ])
401
402 for i in TLBWay():
403 with m.If(tlb_valid_way(i)
404 & read_tlb_tag(i, tlb_tag_way) == eatag):
405
406 comb += hitway.eq(i)
407 comb += hit.eq(1)
408
409 comb += tlb_hit.eq(hit & r0_valid)
410 comb += tlb_hit_way.eq(hitway)
411
412 # if tlb_hit = '1' then
413 with m.If(tlb_hit):
414 # pte <= read_tlb_pte(hitway, tlb_pte_way);
415 comb += pte.eq(read_tlb_pte(hitway, tlb_pte_way))
416 # else
417 with m.Else():
418 # pte <= (others => '0');
419 comb += pte.eq(0)
420 # end if;
421 # valid_ra <= tlb_hit or not r0.req.virt_mode;
422 comb += valid_ra.eq(tlb_hit | ~r0.req.virt_mode)
423 # if r0.req.virt_mode = '1' then
424 with m.If(r0.req.virt_mode):
425 # ra <= pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ) &
426 # r0.req.addr(TLB_LG_PGSZ - 1 downto ROW_OFF_BITS) &
427 # (ROW_OFF_BITS-1 downto 0 => '0');
428 # perm_attr <= extract_perm_attr(pte);
429 comb += ra.eq(Cat(
430 Const(ROW_OFF_BITS, ROW_OFF_BITS),
431 r0.req.addr[ROW_OFF_BITS:TLB_LG_PGSZ],
432 pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
433 ))
434 comb += perm_attr.eq(extract_perm_attr(pte))
435 # else
436 with m.Else():
437 # ra <= r0.req.addr(
438 # REAL_ADDR_BITS - 1 downto ROW_OFF_BITS
439 # ) & (ROW_OFF_BITS-1 downto 0 => '0');
440 comb += ra.eq(Cat(
441 Const(ROW_OFF_BITS, ROW_OFF_BITS),
442 r0.rq.addr[ROW_OFF_BITS:REAL_ADDR_BITS]
443 )
444
445 # perm_attr <= real_mode_perm_attr;
446 comb += perm_attr.reference.eq(1)
447 comb += perm_attr.changed.eq(1)
448 comb += perm_attr.priv.eq(1)
449 comb += perm_attr.nocache.eq(0)
450 comb += perm_attr.rd_perm.eq(1)
451 comb += perm_attr.wr_perm.eq(1)
452 # end if;
453 # end process;
454
455 def tlb_update(self, r0_valid, r0, dtlb_valid_bits, tlb_req_index,
456 tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way,
457 dtlb_tags, tlb_pte_way, dtlb_ptes, dtlb_valid_bits):
458
459 comb = m.d.comb
460 sync = m.d.sync
461
462 # variable tlbie : std_ulogic;
463 # variable tlbwe : std_ulogic;
464 # variable repl_way : tlb_way_t;
465 # variable eatag : tlb_tag_t;
466 # variable tagset : tlb_way_tags_t;
467 # variable pteset : tlb_way_ptes_t;
468 tlbie = Signal()
469 tlbwe = Signal()
470 repl_way = TLBWay()
471 eatag = TLBTag()
472 tagset = TLBWayTags()
473 pteset = TLBWayPtes()
474
475 comb += tlbie
476 comb += tlbwe
477 comb += repl_way
478 comb += eatag
479 comb += tagset
480 comb += pteset
481
482 # begin
483 # if rising_edge(clk) then
484 # tlbie := r0_valid and r0.tlbie;
485 # tlbwe := r0_valid and r0.tlbldoi;
486 sync += tlbie.eq(r0_valid & r0.tlbie)
487 sync += tlbwe.eq(r0_valid & r0.tlbldoi)
488
489 # if rst = '1' or (tlbie = '1' and r0.doall = '1') then
490 # with m.If (TODO understand how signal resets work in nmigen)
491 # -- clear all valid bits at once
492 # for i in tlb_index_t loop
493 # dtlb_valids(i) <= (others => '0');
494 # end loop;
495 # clear all valid bits at once
496 for i in range(TLB_SET_SIZE):
497 sync += dtlb_valid_bits[i].eq(0)
498
499 # elsif tlbie = '1' then
500 with m.Elif(tlbie):
501 # if tlb_hit = '1' then
502 with m.If(tlb_hit):
503 # dtlb_valids(tlb_req_index)(tlb_hit_way) <= '0';
504 sync += dtlb_valid_bits[tlb_req_index][tlb_hit_way].eq(0)
505 # end if;
506 # elsif tlbwe = '1' then
507 with m.Elif(tlbwe):
508 # if tlb_hit = '1' then
509 with m.If(tlb_hit):
510 # repl_way := tlb_hit_way;
511 sync += repl_way.eq(tlb_hit_way)
512 # else
513 with m.Else():
514 # repl_way := to_integer(unsigned(
515 # tlb_plru_victim(tlb_req_index)));
516 sync += repl_way.eq(tlb_plru_victim[tlb_req_index])
517 # end if;
518 # eatag := r0.req.addr(
519 # 63 downto TLB_LG_PGSZ + TLB_SET_BITS
520 # );
521 # tagset := tlb_tag_way;
522 # write_tlb_tag(repl_way, tagset, eatag);
523 # dtlb_tags(tlb_req_index) <= tagset;
524 # pteset := tlb_pte_way;
525 # write_tlb_pte(repl_way, pteset, r0.req.data);
526 # dtlb_ptes(tlb_req_index) <= pteset;
527 # dtlb_valids(tlb_req_index)(repl_way) <= '1';
528 sync += eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64])
529 sync += tagset.eq(tlb_tag_way)
530 sync += write_tlb_tag(repl_way, tagset, eatag)
531 sync += dtlb_tags[tlb_req_index].eq(tagset)
532 sync += pteset.eq(tlb_pte_way)
533 sync += write_tlb_pte(repl_way, pteset, r0.req.data)
534 sync += dtlb_ptes[tlb_req_index].eq(pteset)
535 sync += dtlb_valid_bits[tlb_req_index][repl_way].eq(1)
536 # end if;
537 # end if;
538 # end process;
539
540 # -- Generate PLRUs
541 # maybe_plrus: if NUM_WAYS > 1 generate
542 # Generate PLRUs
543 def maybe_plrus(self, r1):
544
545 comb = m.d.comb
546 sync = m.d.sync
547
548 # begin
549 # TODO learn translation of generate into nmgien @lkcl
550 # plrus: for i in 0 to NUM_LINES-1 generate
551 for i in range(NUM_LINES):
552 # -- PLRU interface
553 # signal plru_acc : std_ulogic_vector(WAY_BITS-1 downto 0);
554 # signal plru_acc_en : std_ulogic;
555 # signal plru_out : std_ulogic_vector(WAY_BITS-1 downto 0);
556 plru = PLRU(WAY_BITS)
557 plru_acc = Signal(WAY_BITS)
558 plru_acc_en = Signal()
559 plru_out = Signal(WAY_BITS)
560 #
561 # begin
562 # TODO learn tranlation of entity, generic map, port map in
563 # nmigen @lkcl
564 # plru : entity work.plru
565 # generic map (
566 # BITS => WAY_BITS
567 # )
568 # port map (
569 # clk => clk,
570 # rst => rst,
571 # acc => plru_acc,
572 # acc_en => plru_acc_en,
573 # lru => plru_out
574 # );
575 comb += plru.acc.eq(plru_acc)
576 comb += plru.acc_en.eq(plru_acc_en)
577 comb += plru.lru.eq(plru_out)
578
579 # process(all)
580 # begin
581 # -- PLRU interface
582 # if r1.hit_index = i then
583 # PLRU interface
584 with m.If(r1.hit_index == i):
585 # plru_acc_en <= r1.cache_hit;
586 comb += plru_acc_en.eq(r1.cache_hit)
587 # else
588 with m.Else():
589 # plru_acc_en <= '0';
590 comb += plru_acc_en.eq(0)
591 # end if;
592 # plru_acc <= std_ulogic_vector(to_unsigned(
593 # r1.hit_way, WAY_BITS
594 # ));
595 # plru_victim(i) <= plru_out;
596 comb += plru_acc.eq(r1.hit_way)
597 comb += plru_victim[i].eq(plru_out)
598 # end process;
599 # end generate;
600 # end generate;
601
602 # -- Cache tag RAM read port
603 # cache_tag_read : process(clk)
604 # Cache tag RAM read port
605 def cache_tag_read(self, r0_stall, req_index, m_in, d_in,
606 cache_tag_set, cache_tags):
607
608 comb = m.d.comb
609 sync = m.d.sync
610
611 # variable index : index_t;
612 index = Signal(NUM_LINES)
613
614 comb += index
615
616 # begin
617 # if rising_edge(clk) then
618 # if r0_stall = '1' then
619 with m.If(r0_stall):
620 # index := req_index;
621 sync += index.eq(req_index)
622
623 # elsif m_in.valid = '1' then
624 with m.Elif(m_in.valid):
625 # index := get_index(m_in.addr);
626 sync += index.eq(get_index(m_in.addr))
627
628 # else
629 with m.Else():
630 # index := get_index(d_in.addr);
631 sync += index.eq(get_index(d_in.addr))
632 # end if;
633 # cache_tag_set <= cache_tags(index);
634 sync += cache_tag_set.eq(cache_tags[index])
635 # end if;
636 # end process;
637
638 # Cache request parsing and hit detection
639 def dcache_request(self, r0, ra, req_index, req_row, req_tag,
640 r0_valid, r1, cache_valid_bits, replace_way,
641 use_forward1_next, use_forward2_next,
642 req_hit_way, plru_victim, rc_ok, perm_attr,
643 valid_ra, perm_ok, access_ok, req_op, req_ok,
644 r0_stall, m_in, early_req_row, d_in):
645
646 comb = m.d.comb
647 sync = m.d.sync
648
649 # variable is_hit : std_ulogic;
650 # variable hit_way : way_t;
651 # variable op : op_t;
652 # variable opsel : std_ulogic_vector(2 downto 0);
653 # variable go : std_ulogic;
654 # variable nc : std_ulogic;
655 # variable s_hit : std_ulogic;
656 # variable s_tag : cache_tag_t;
657 # variable s_pte : tlb_pte_t;
658 # variable s_ra : std_ulogic_vector(
659 # REAL_ADDR_BITS - 1 downto 0
660 # );
661 # variable hit_set : std_ulogic_vector(
662 # TLB_NUM_WAYS - 1 downto 0
663 # );
664 # variable hit_way_set : hit_way_set_t;
665 # variable rel_matches : std_ulogic_vector(
666 # TLB_NUM_WAYS - 1 downto 0
667 # );
668 rel_match = Signal()
669 is_hit = Signal()
670 hit_way = Signal(WAY_BITS)
671 op = Op()
672 opsel = Signal(3)
673 go = Signal()
674 nc = Signal()
675 s_hit = Signal()
676 s_tag = Signal(TAG_BITS)
677 s_pte = Signal(TLB_PTE_BITS)
678 s_ra = Signal(REAL_ADDR_BITS)
679 hit_set = Signal(TLB_NUM_WAYS)
680 hit_way_set = HitWaySet()
681 rel_matches = Signal(TLB_NUM_WAYS)
682 rel_match = Signal()
683
684 # begin
685 # -- Extract line, row and tag from request
686 # req_index <= get_index(r0.req.addr);
687 # req_row <= get_row(r0.req.addr);
688 # req_tag <= get_tag(ra);
689 #
690 # go := r0_valid and not (r0.tlbie or r0.tlbld)
691 # and not r1.ls_error;
692 # Extract line, row and tag from request
693 comb += req_index.eq(get_index(r0.req.addr))
694 comb += req_row.eq(get_row(r0.req.addr))
695 comb += req_tag.eq(get_tag(ra))
696
697 comb += go.eq(r0_valid & ~(r0.tlbie | r0.tlbld) & ~r1.ls_error)
698
699 # hit_way := 0;
700 # is_hit := '0';
701 # rel_match := '0';
702 # Test if pending request is a hit on any way
703 # In order to make timing in virtual mode,
704 # when we are using the TLB, we compare each
705 # way with each of the real addresses from each way of
706 # the TLB, and then decide later which match to use.
707
708 # if r0.req.virt_mode = '1' then
709 with m.If(r0.req.virt_mode):
710 # rel_matches := (others => '0');
711 comb += rel_matches.eq(0)
712 # for j in tlb_way_t loop
713 for j in range(TLB_NUM_WAYS):
714 # hit_way_set(j) := 0;
715 # s_hit := '0';
716 # s_pte := read_tlb_pte(j, tlb_pte_way);
717 # s_ra := s_pte(REAL_ADDR_BITS - 1 downto TLB_LG_PGSZ)
718 # & r0.req.addr(TLB_LG_PGSZ - 1 downto 0);
719 # s_tag := get_tag(s_ra);
720 comb += hit_way_set[j].eq(0)
721 comb += s_hit.eq(0)
722 comb += s_pte.eq(read_tlb_pte(j, tlb_pte_way))
723 comb += s_ra.eq(Cat(
724 r0.req.addr[0:TLB_LG_PGSZ],
725 s_pte[TLB_LG_PGSZ:REAL_ADDR_BITS]
726 ))
727 comb += s_tag.eq(get_tag(s_ra))
728
729 # for i in way_t loop
730 for i in range(NUM_WAYS):
731 # if go = '1' and cache_valids(req_index)(i) = '1'
732 # and read_tag(i, cache_tag_set) = s_tag
733 # and tlb_valid_way(j) = '1' then
734 with m.If(go & cache_valid_bits[req_index][i] &
735 read_tag(i, cache_tag_set) == s_tag
736 & tlb_valid_way[j]):
737 # hit_way_set(j) := i;
738 # s_hit := '1';
739 comb += hit_way_set[j].eq(i)
740 comb += s_hit.eq(1)
741 # end if;
742 # end loop;
743 # hit_set(j) := s_hit;
744 comb += hit_set[j].eq(s_hit)
745 # if s_tag = r1.reload_tag then
746 with m.If(s_tag == r1.reload_tag):
747 # rel_matches(j) := '1';
748 comb += rel_matches[j].eq(1)
749 # end if;
750 # end loop;
751 # if tlb_hit = '1' then
752 with m.If(tlb_hit):
753 # is_hit := hit_set(tlb_hit_way);
754 # hit_way := hit_way_set(tlb_hit_way);
755 # rel_match := rel_matches(tlb_hit_way);
756 comb += is_hit.eq(hit_set[tlb_hit_way])
757 comb += hit_way.eq(hit_way_set[tlb_hit_way])
758 comb += rel_match.eq(rel_matches[tlb_hit_way])
759 # end if;
760 # else
761 with m.Else():
762 # s_tag := get_tag(r0.req.addr);
763 comb += s_tag.eq(get_tag(r0.req.addr))
764 # for i in way_t loop
765 for i in range(NUM_WAYS):
766 # if go = '1' and cache_valids(req_index)(i) = '1' and
767 # read_tag(i, cache_tag_set) = s_tag then
768 with m.If(go & cache_valid_bits[req_index][i] &
769 read_tag(i, cache_tag_set) == s_tag):
770 # hit_way := i;
771 # is_hit := '1';
772 comb += hit_way.eq(i)
773 comb += is_hit.eq(1)
774 # end if;
775 # end loop;
776 # if s_tag = r1.reload_tag then
777 with m.If(s_tag == r1.reload_tag):
778 # rel_match := '1';
779 comb += rel_match.eq(1)
780 # end if;
781 # end if;
782 # req_same_tag <= rel_match;
783 comb += req_same_tag.eq(rel_match)
784
785 # if r1.state = RELOAD_WAIT_ACK and req_index = r1.store_index
786 # and rel_match = '1' then
787 # See if the request matches the line currently being reloaded
788 with m.If(r1.state == State.RELOAD_WAIT_ACK & req_index ==
789 r1.store_index & rel_match):
790 # For a store, consider this a hit even if the row isn't
791 # valid since it will be by the time we perform the store.
792 # For a load, check the appropriate row valid bit.
793 # is_hit :=
794 # not r0.req.load
795 # or r1.rows_valid(req_row mod ROW_PER_LINE);
796 # hit_way := replace_way;
797 comb += is_hit.eq(~r0.req.load
798 | r1.rows_valid[req_row % ROW_PER_LINE]
799 )
800 comb += hit_way.eq(replace_way)
801 # end if;
802
803 # -- Whether to use forwarded data for a load or not
804 # Whether to use forwarded data for a load or not
805 # use_forward1_next <= '0';
806 comb += use_forward1_next.eq(0)
807 # if get_row(r1.req.real_addr) = req_row
808 # and r1.req.hit_way = hit_way then
809 with m.If(get_row(r1.req.real_addr) == req_row
810 & r1.req.hit_way == hit_way)
811 # Only need to consider r1.write_bram here, since if we
812 # are writing refill data here, then we don't have a
813 # cache hit this cycle on the line being refilled.
814 # (There is the possibility that the load following the
815 # load miss that started the refill could be to the old
816 # contents of the victim line, since it is a couple of
817 # cycles after the refill starts before we see the updated
818 # cache tag. In that case we don't use the bypass.)
819 # use_forward1_next <= r1.write_bram;
820 comb += use_forward1_next.eq(r1.write_bram)
821 # end if;
822 # use_forward2_next <= '0';
823 comb += use_forward2_next.eq(0)
824 # if r1.forward_row1 = req_row
825 # and r1.forward_way1 = hit_way then
826 with m.If(r1.forward_row1 == req_row
827 & r1.forward_way1 == hit_way):
828 # use_forward2_next <= r1.forward_valid1;
829 comb += use_forward2_next.eq(r1.forward_valid1)
830 # end if;
831
832 # The way that matched on a hit
833 # req_hit_way <= hit_way;
834 comb += req_hit_way.eq(hit_way)
835
836 # The way to replace on a miss
837 # if r1.write_tag = '1' then
838 with m.If(r1.write_tag):
839 # replace_way <= to_integer(unsigned(
840 # plru_victim(r1.store_index)
841 # ));
842 replace_way.eq(plru_victim[r1.store_index])
843 # else
844 with m.Else():
845 # replace_way <= r1.store_way;
846 comb += replace_way.eq(r1.store_way)
847 # end if;
848
849 # work out whether we have permission for this access
850 # NB we don't yet implement AMR, thus no KUAP
851 # rc_ok <= perm_attr.reference and
852 # (r0.req.load or perm_attr.changed);
853 # perm_ok <= (r0.req.priv_mode or not perm_attr.priv) and
854 # (perm_attr.wr_perm or (r0.req.load
855 # and perm_attr.rd_perm));
856 # access_ok <= valid_ra and perm_ok and rc_ok;
857 comb += rc_ok.eq(
858 perm_attr.reference
859 & (r0.req.load | perm_attr.changed)
860 )
861 comb += perm_ok.eq((r0.req.prive_mode | ~perm_attr.priv)
862 & perm_attr.wr_perm
863 | (r0.req.load & perm_attr.rd_perm)
864 )
865 comb += access_ok.eq(valid_ra & perm_ok & rc_ok)
866 # nc := r0.req.nc or perm_attr.nocache;
867 # op := OP_NONE;
868 # Combine the request and cache hit status to decide what
869 # operation needs to be done
870 comb += nc.eq(r0.req.nc | perm_attr.nocache)
871 comb += op.eq(Op.OP_NONE)
872 # if go = '1' then
873 with m.If(go):
874 # if access_ok = '0' then
875 with m.If(~access_ok):
876 # op := OP_BAD;
877 comb += op.eq(Op.OP_BAD)
878 # elsif cancel_store = '1' then
879 with m.Elif(cancel_store):
880 # op := OP_STCX_FAIL;
881 comb += op.eq(Op.OP_STCX_FAIL)
882 # else
883 with m.Else():
884 # opsel := r0.req.load & nc & is_hit;
885 comb += opsel.eq(Cat(is_hit, nc, r0.req.load))
886 # case opsel is
887 with m.Switch(opsel):
888 # when "101" => op := OP_LOAD_HIT;
889 # when "100" => op := OP_LOAD_MISS;
890 # when "110" => op := OP_LOAD_NC;
891 # when "001" => op := OP_STORE_HIT;
892 # when "000" => op := OP_STORE_MISS;
893 # when "010" => op := OP_STORE_MISS;
894 # when "011" => op := OP_BAD;
895 # when "111" => op := OP_BAD;
896 # when others => op := OP_NONE;
897 with m.Case(Const(0b101, 3)):
898 comb += op.eq(Op.OP_LOAD_HIT)
899
900 with m.Case(Cosnt(0b100, 3)):
901 comb += op.eq(Op.OP_LOAD_MISS)
902
903 with m.Case(Const(0b110, 3)):
904 comb += op.eq(Op.OP_LOAD_NC)
905
906 with m.Case(Const(0b001, 3)):
907 comb += op.eq(Op.OP_STORE_HIT)
908
909 with m.Case(Const(0b000, 3)):
910 comb += op.eq(Op.OP_STORE_MISS)
911
912 with m.Case(Const(0b010, 3)):
913 comb += op.eq(Op.OP_STORE_MISS)
914
915 with m.Case(Const(0b011, 3)):
916 comb += op.eq(Op.OP_BAD)
917
918 with m.Case(Const(0b111, 3)):
919 comb += op.eq(Op.OP_BAD)
920
921 with m.Default():
922 comb += op.eq(Op.OP_NONE)
923 # end case;
924 # end if;
925 # end if;
926 # req_op <= op;
927 # req_go <= go;
928 comb += req_op.eq(op)
929 comb += req_go.eq(go)
930
931 # Version of the row number that is valid one cycle earlier
932 # in the cases where we need to read the cache data BRAM.
933 # If we're stalling then we need to keep reading the last
934 # row requested.
935 # if r0_stall = '0' then
936 with m.If(~r0_stall):
937 # if m_in.valid = '1' then
938 with m.If(m_in.valid):
939 # early_req_row <= get_row(m_in.addr);
940 comb += early_req_row.eq(get_row(m_in.addr))
941 # else
942 with m.Else():
943 # early_req_row <= get_row(d_in.addr);
944 comb += early_req_row.eq(get_row(d_in.addr))
945 # end if;
946 # else
947 with m.Else():
948 # early_req_row <= req_row;
949 comb += early_req_row.eq(req_row)
950 # end if;
951 # end process;
952
953 # Handle load-with-reservation and store-conditional instructions
954 def reservation_comb(self, cancel_store, set_rsrv, clear_rsrv,
955 r0_valid, r0, reservation):
956
957 comb = m.d.comb
958 sync = m.d.sync
959
960 # begin
961 # cancel_store <= '0';
962 # set_rsrv <= '0';
963 # clear_rsrv <= '0';
964 # if r0_valid = '1' and r0.req.reserve = '1' then
965 with m.If(r0_valid & r0.req.reserve):
966
967 # -- XXX generate alignment interrupt if address
968 # -- is not aligned XXX or if r0.req.nc = '1'
969 # if r0.req.load = '1' then
970 # XXX generate alignment interrupt if address
971 # is not aligned XXX or if r0.req.nc = '1'
972 with m.If(r0.req.load):
973 # -- load with reservation
974 # set_rsrv <= '1';
975 # load with reservation
976 comb += set_rsrv(1)
977 # else
978 with m.Else():
979 # -- store conditional
980 # clear_rsrv <= '1';
981 # store conditional
982 comb += clear_rsrv.eq(1)
983 # if reservation.valid = '0' or r0.req.addr(63
984 # downto LINE_OFF_BITS) /= reservation.addr then
985 with m.If(~reservation.valid
986 | r0.req.addr[LINE_OFF_BITS:64]):
987 # cancel_store <= '1';
988 comb += cancel_store.eq(1)
989 # end if;
990 # end if;
991 # end if;
992 # end process;
993
994 def reservation_reg(self, r0_valid, access_ok, clear_rsrv,
995 reservation, r0):
996
997 comb = m.d.comb
998 sync = m.d.sync
999
1000 # begin
1001 # if rising_edge(clk) then
1002 # if rst = '1' then
1003 # reservation.valid <= '0';
1004 # TODO understand how resets work in nmigen
1005 # elsif r0_valid = '1' and access_ok = '1' then
1006 with m.Elif(r0_valid & access_ok):
1007 # if clear_rsrv = '1' then
1008 with m.If(clear_rsrv):
1009 # reservation.valid <= '0';
1010 sync += reservation.valid.ea(0)
1011 # elsif set_rsrv = '1' then
1012 with m.Elif(set_rsrv):
1013 # reservation.valid <= '1';
1014 # reservation.addr <=
1015 # r0.req.addr(63 downto LINE_OFF_BITS);
1016 sync += reservation.valid.eq(1)
1017 sync += reservation.addr.eq(
1018 r0.req.addr[LINE_OFF_BITS:64]
1019 )
1020 # end if;
1021 # end if;
1022 # end if;
1023 # end process;
1024
1025 # Return data for loads & completion control logic
1026 def writeback_control(self, r1, cache_out, d_out, m_out):
1027
1028 comb = m.d.comb
1029 sync = m.d.sync
1030
1031 # variable data_out : std_ulogic_vector(63 downto 0);
1032 # variable data_fwd : std_ulogic_vector(63 downto 0);
1033 # variable j : integer;
1034 data_out = Signal(64)
1035 data_fwd = Signal(64)
1036 j = Signal()
1037
1038 # begin
1039 # -- Use the bypass if are reading the row that was
1040 # -- written 1 or 2 cycles ago, including for the
1041 # -- slow_valid = 1 case (i.e. completing a load
1042 # -- miss or a non-cacheable load).
1043 # if r1.use_forward1 = '1' then
1044 # Use the bypass if are reading the row that was
1045 # written 1 or 2 cycles ago, including for the
1046 # slow_valid = 1 case (i.e. completing a load
1047 # miss or a non-cacheable load).
1048 with m.If(r1.use_forward1):
1049 # data_fwd := r1.forward_data1;
1050 comb += data_fwd.eq(r1.forward_data1)
1051 # else
1052 with m.Else():
1053 # data_fwd := r1.forward_data2;
1054 comb += data_fwd.eq(r1.forward_data2)
1055 # end if;
1056
1057 # data_out := cache_out(r1.hit_way);
1058 comb += data_out.eq(cache_out[r1.hit_way])
1059
1060 # for i in 0 to 7 loop
1061 for i in range(8):
1062 # j := i * 8;
1063 comb += i * 8
1064
1065 # if r1.forward_sel(i) = '1' then
1066 with m.If(r1.forward_sel[i]):
1067 # data_out(j + 7 downto j) := data_fwd(j + 7 downto j);
1068 comb += data_out[j:j+8].eq(data_fwd[j:j+8])
1069 # end if;
1070 # end loop;
1071
1072 # d_out.valid <= r1.ls_valid;
1073 # d_out.data <= data_out;
1074 # d_out.store_done <= not r1.stcx_fail;
1075 # d_out.error <= r1.ls_error;
1076 # d_out.cache_paradox <= r1.cache_paradox;
1077 comb += d_out.valid.eq(r1.ls_valid)
1078 comb += d_out.data.eq(data_out)
1079 comb += d_out.store_done.eq(~r1.stcx_fail)
1080 comb += d_out.error.eq(r1.ls_error)
1081 comb += d_out.cache_paradox.eq(r1.cache_paradox)
1082
1083 # -- Outputs to MMU
1084 # m_out.done <= r1.mmu_done;
1085 # m_out.err <= r1.mmu_error;
1086 # m_out.data <= data_out;
1087 comb += m_out.done.eq(r1.mmu_done)
1088 comb += m_out.err.eq(r1.mmu_error)
1089 comb += m_out.data.eq(data_out)
1090
1091 # -- We have a valid load or store hit or we just completed
1092 # -- a slow op such as a load miss, a NC load or a store
1093 # --
1094 # -- Note: the load hit is delayed by one cycle. However it
1095 # -- can still not collide with r.slow_valid (well unless I
1096 # -- miscalculated) because slow_valid can only be set on a
1097 # -- subsequent request and not on its first cycle (the state
1098 # -- machine must have advanced), which makes slow_valid
1099 # -- at least 2 cycles from the previous hit_load_valid.
1100 #
1101 # -- Sanity: Only one of these must be set in any given cycle
1102 # assert (r1.slow_valid and r1.stcx_fail) /= '1'
1103 # report "unexpected slow_valid collision with stcx_fail"
1104 # severity FAILURE;
1105 # assert ((r1.slow_valid or r1.stcx_fail) and r1.hit_load_valid)
1106 # /= '1' report "unexpected hit_load_delayed collision with
1107 # slow_valid" severity FAILURE;
1108 # We have a valid load or store hit or we just completed
1109 # a slow op such as a load miss, a NC load or a store
1110 #
1111 # Note: the load hit is delayed by one cycle. However it
1112 # can still not collide with r.slow_valid (well unless I
1113 # miscalculated) because slow_valid can only be set on a
1114 # subsequent request and not on its first cycle (the state
1115 # machine must have advanced), which makes slow_valid
1116 # at least 2 cycles from the previous hit_load_valid.
1117
1118 # Sanity: Only one of these must be set in any given cycle
1119 assert (r1.slow_valid & r1.stcx_fail) != 1 "unexpected" \
1120 "slow_valid collision with stcx_fail -!- severity FAILURE"
1121
1122 assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1
1123 "unexpected hit_load_delayed collision with slow_valid -!-" \
1124 "severity FAILURE"
1125
1126 # if r1.mmu_req = '0' then
1127 with m.If(~r1._mmu_req):
1128 # -- Request came from loadstore1...
1129 # -- Load hit case is the standard path
1130 # if r1.hit_load_valid = '1' then
1131 # Request came from loadstore1...
1132 # Load hit case is the standard path
1133 with m.If(r1.hit_load_valid):
1134 # report
1135 # "completing load hit data=" & to_hstring(data_out);
1136 print(f"completing load hit data={data_out}")
1137 # end if;
1138
1139 # -- error cases complete without stalling
1140 # if r1.ls_error = '1' then
1141 # error cases complete without stalling
1142 with m.If(r1.ls_error):
1143 # report "completing ld/st with error";
1144 print("completing ld/st with error")
1145 # end if;
1146
1147 # -- Slow ops (load miss, NC, stores)
1148 # if r1.slow_valid = '1' then
1149 # Slow ops (load miss, NC, stores)
1150 with m.If(r1.slow_valid):
1151 # report
1152 # "completing store or load miss data="
1153 # & to_hstring(data_out);
1154 print(f"completing store or load miss data={data_out}")
1155 # end if;
1156
1157 # else
1158 with m.Else():
1159 # -- Request came from MMU
1160 # if r1.hit_load_valid = '1' then
1161 # Request came from MMU
1162 with m.If(r1.hit_load_valid):
1163 # report "completing load hit to MMU, data="
1164 # & to_hstring(m_out.data);
1165 print(f"completing load hit to MMU, data={m_out.data}")
1166 # end if;
1167 #
1168 # -- error cases complete without stalling
1169 # if r1.mmu_error = '1' then
1170 # report "completing MMU ld with error";
1171 # error cases complete without stalling
1172 with m.If(r1.mmu_error):
1173 print("combpleting MMU ld with error")
1174 # end if;
1175 #
1176 # -- Slow ops (i.e. load miss)
1177 # if r1.slow_valid = '1' then
1178 # Slow ops (i.e. load miss)
1179 with m.If(r1.slow_valid):
1180 # report "completing MMU load miss, data="
1181 # & to_hstring(m_out.data);
1182 print("completing MMU load miss, data={m_out.data}")
1183 # end if;
1184 # end if;
1185 # end process;
1186
1187 # -- Generate a cache RAM for each way. This handles the normal
1188 # -- reads, writes from reloads and the special store-hit update
1189 # -- path as well.
1190 # --
1191 # -- Note: the BRAMs have an extra read buffer, meaning the output
1192 # -- is pipelined an extra cycle. This differs from the
1193 # -- icache. The writeback logic needs to take that into
1194 # -- account by using 1-cycle delayed signals for load hits.
1195 # --
1196 # rams: for i in 0 to NUM_WAYS-1 generate
1197 # Generate a cache RAM for each way. This handles the normal
1198 # reads, writes from reloads and the special store-hit update
1199 # path as well.
1200 #
1201 # Note: the BRAMs have an extra read buffer, meaning the output
1202 # is pipelined an extra cycle. This differs from the
1203 # icache. The writeback logic needs to take that into
1204 # account by using 1-cycle delayed signals for load hits.
1205 def rams(self, ):
1206 for i in range(NUM_WAYS):
1207 # signal do_read : std_ulogic;
1208 # signal rd_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
1209 # signal do_write : std_ulogic;
1210 # signal wr_addr : std_ulogic_vector(ROW_BITS-1 downto 0);
1211 # signal wr_data :
1212 # std_ulogic_vector(wishbone_data_bits-1 downto 0);
1213 # signal wr_sel : std_ulogic_vector(ROW_SIZE-1 downto 0);
1214 # signal wr_sel_m : std_ulogic_vector(ROW_SIZE-1 downto 0);
1215 # signal dout : cache_row_t;
1216 do_read = Signal()
1217 rd_addr = Signal(ROW_BITS)
1218 do_write = Signal()
1219 wr_addr = Signal(ROW_BITS)
1220 wr_data = Signal(WB_DATA_BITS)
1221 wr_sel = Signal(ROW_SIZE)
1222 wr_sel_m = Signal(ROW_SIZE)
1223 _d_out = Signal(WB_DATA_BITS)
1224
1225 # begin
1226 # way: entity work.cache_ram
1227 # generic map (
1228 # ROW_BITS => ROW_BITS,
1229 # WIDTH => wishbone_data_bits,
1230 # ADD_BUF => true
1231 # )
1232 # port map (
1233 # clk => clk,
1234 # rd_en => do_read,
1235 # rd_addr => rd_addr,
1236 # rd_data => dout,
1237 # wr_sel => wr_sel_m,
1238 # wr_addr => wr_addr,
1239 # wr_data => wr_data
1240 # );
1241 # process(all)
1242 way = CacheRam(ROW_BITS, WB_DATA_BITS, True)
1243 comb += way.rd_en.eq(do_read)
1244 comb += way.rd_addr.eq(rd_addr)
1245 comb += way.rd_data.eq(_d_out)
1246 comb += way.wr_sel.eq(wr_sel_m)
1247 comb += way.wr_addr.eq(wr_addr)
1248 comb += way.wr_data.eq(wr_data)
1249
1250 # begin
1251 # -- Cache hit reads
1252 # do_read <= '1';
1253 # rd_addr <=
1254 # std_ulogic_vector(to_unsigned(early_req_row, ROW_BITS));
1255 # cache_out(i) <= dout;
1256 # Cache hit reads
1257 comb += do_read.eq(1)
1258 comb += rd_addr.eq(Signal(BRAM_ROWS))
1259 comb += cache_out[i].eq(dout)
1260
1261 # -- Write mux:
1262 # --
1263 # -- Defaults to wishbone read responses (cache refill)
1264 # --
1265 # -- For timing, the mux on wr_data/sel/addr is not
1266 # -- dependent on anything other than the current state.
1267 # Write mux:
1268 #
1269 # Defaults to wishbone read responses (cache refill)
1270 #
1271 # For timing, the mux on wr_data/sel/addr is not
1272 # dependent on anything other than the current state.
1273 # wr_sel_m <= (others => '0');
1274 comb += wr_sel_m.eq(0)
1275
1276 # do_write <= '0';
1277 comb += do_write.eq(0)
1278 # if r1.write_bram = '1' then
1279 with m.If(r1.write_bram):
1280 # -- Write store data to BRAM. This happens one
1281 # -- cycle after the store is in r0.
1282 # Write store data to BRAM. This happens one
1283 # cycle after the store is in r0.
1284 # wr_data <= r1.req.data;
1285 # wr_sel <= r1.req.byte_sel;
1286 # wr_addr <= std_ulogic_vector(to_unsigned(
1287 # get_row(r1.req.real_addr), ROW_BITS
1288 # ));
1289 comb += wr_data.eq(r1.req.data)
1290 comb += wr_sel.eq(r1.req.byte_sel)
1291 comb += wr_addr.eq(Signal(get_row(r1.req.real_addr)))
1292
1293 # if i = r1.req.hit_way then
1294 with m.If(i == r1.req.hit_way):
1295 # do_write <= '1';
1296 comb += do_write.eq(1)
1297 # end if;
1298 # else
1299 with m.Else():
1300 # -- Otherwise, we might be doing a reload or a DCBZ
1301 # if r1.dcbz = '1' then
1302 # Otherwise, we might be doing a reload or a DCBZ
1303 with m.If(r1.dcbz):
1304 # wr_data <= (others => '0');
1305 comb += wr_data.eq(0)
1306 # else
1307 with m.Else():
1308 # wr_data <= wishbone_in.dat;
1309 comb += wr_data.eq(wishbone_in.dat)
1310 # end if;
1311
1312 # wr_addr <= std_ulogic_vector(to_unsigned(
1313 # r1.store_row, ROW_BITS
1314 # ));
1315 # wr_sel <= (others => '1');
1316 comb += wr_addr.eq(Signal(r1.store_row))
1317 comb += wr_sel.eq(1)
1318
1319 # if r1.state = RELOAD_WAIT_ACK and
1320 # wishbone_in.ack = '1' and replace_way = i then
1321 with m.If(r1.state == State.RELOAD_WAIT_ACK
1322 & wishbone_in.ack & relpace_way == i):
1323 # do_write <= '1';
1324 comb += do_write.eq(1)
1325 # end if;
1326 # end if;
1327
1328 # -- Mask write selects with do_write since BRAM
1329 # -- doesn't have a global write-enable
1330 # if do_write = '1' then
1331 # -- Mask write selects with do_write since BRAM
1332 # -- doesn't have a global write-enable
1333 with m.If(do_write):
1334 # wr_sel_m <= wr_sel;
1335 comb += wr_sel_m.eq(wr_sel)
1336 # end if;
1337 # end process;
1338 # end generate;
1339
1340 # Cache hit synchronous machine for the easy case.
1341 # This handles load hits.
1342 # It also handles error cases (TLB miss, cache paradox)
1343 def dcache_fast_hit(self, req_op, r0_valid, r1, ):
1344
1345 comb = m.d.comb
1346 sync = m.d.sync
1347
1348 # begin
1349 # if rising_edge(clk) then
1350 # if req_op /= OP_NONE then
1351 with m.If(req_op != Op.OP_NONE):
1352 # report "op:" & op_t'image(req_op) &
1353 # " addr:" & to_hstring(r0.req.addr) &
1354 # " nc:" & std_ulogic'image(r0.req.nc) &
1355 # " idx:" & integer'image(req_index) &
1356 # " tag:" & to_hstring(req_tag) &
1357 # " way: " & integer'image(req_hit_way);
1358 print(f"op:{req_op} addr:{r0.req.addr} nc: {r0.req.nc}" \
1359 f"idx:{req_index} tag:{req_tag} way: {req_hit_way}"
1360 )
1361 # end if;
1362 # if r0_valid = '1' then
1363 with m.If(r0_valid):
1364 # r1.mmu_req <= r0.mmu_req;
1365 sync += r1.mmu_req.eq(r0.mmu_req)
1366 # end if;
1367
1368 # -- Fast path for load/store hits.
1369 # -- Set signals for the writeback controls.
1370 # r1.hit_way <= req_hit_way;
1371 # r1.hit_index <= req_index;
1372 # Fast path for load/store hits.
1373 # Set signals for the writeback controls.
1374 sync += r1.hit_way.eq(req_hit_way)
1375 sync += r1.hit_index.eq(req_index)
1376
1377 # if req_op = OP_LOAD_HIT then
1378 with m.If(req_op == Op.OP_LOAD_HIT):
1379 # r1.hit_load_valid <= '1';
1380 sync += r1.hit_load_valid.eq(1)
1381
1382 # else
1383 with m.Else():
1384 # r1.hit_load_valid <= '0';
1385 sync += r1.hit_load_valid.eq(0)
1386 # end if;
1387
1388 # if req_op = OP_LOAD_HIT or req_op = OP_STORE_HIT then
1389 with m.If(req_op == Op.OP_LOAD_HIT | req_op == Op.OP_STORE_HIT):
1390 # r1.cache_hit <= '1';
1391 sync += r1.cache_hit.eq(1)
1392 # else
1393 with m.Else():
1394 # r1.cache_hit <= '0';
1395 sync += r1.cache_hit.eq(0)
1396 # end if;
1397
1398 # if req_op = OP_BAD then
1399 with m.If(req_op == Op.OP_BAD):
1400 # report "Signalling ld/st error valid_ra=" &
1401 # std_ulogic'image(valid_ra) & " rc_ok=" &
1402 # std_ulogic'image(rc_ok) & " perm_ok=" &
1403 # std_ulogic'image(perm_ok);
1404 print(f"Signalling ld/st error valid_ra={valid_ra}"
1405 f"rc_ok={rc_ok} perm_ok={perm_ok}"
1406
1407 # r1.ls_error <= not r0.mmu_req;
1408 # r1.mmu_error <= r0.mmu_req;
1409 # r1.cache_paradox <= access_ok;
1410 sync += r1.ls_error.eq(~r0.mmu_req)
1411 sync += r1.mmu_error.eq(r0.mmu_req)
1412 sync += r1.cache_paradox.eq(access_ok)
1413
1414 # else
1415 with m.Else():
1416 # r1.ls_error <= '0';
1417 # r1.mmu_error <= '0';
1418 # r1.cache_paradox <= '0';
1419 sync += r1.ls_error.eq(0)
1420 sync += r1.mmu_error.eq(0)
1421 sync += r1.cache_paradox.eq(0)
1422 # end if;
1423 #
1424 # if req_op = OP_STCX_FAIL then
1425 with m.If(req_op == Op.OP_STCX_FAIL):
1426 # r1.stcx_fail <= '1';
1427 r1.stcx_fail.eq(1)
1428
1429 # else
1430 with m.Else():
1431 # r1.stcx_fail <= '0';
1432 sync += r1.stcx_fail.eq(0)
1433 # end if;
1434 #
1435 # -- Record TLB hit information for updating TLB PLRU
1436 # r1.tlb_hit <= tlb_hit;
1437 # r1.tlb_hit_way <= tlb_hit_way;
1438 # r1.tlb_hit_index <= tlb_req_index;
1439 # Record TLB hit information for updating TLB PLRU
1440 sync += r1.tlb_hit.eq(tlb_hit)
1441 sync += r1.tlb_hit_way.eq(tlb_hit_way)
1442 sync += r1.tlb_hit_index.eq(tlb_req_index)
1443 # end if;
1444 # end process;
1445
1446 # Memory accesses are handled by this state machine:
1447 #
1448 # * Cache load miss/reload (in conjunction with "rams")
1449 # * Load hits for non-cachable forms
1450 # * Stores (the collision case is handled in "rams")
1451 #
1452 # All wishbone requests generation is done here.
1453 # This machine operates at stage 1.
1454 def dcache_slow(self, r1, use_forward1_next, cache_valid_bits, r0,
1455 r0_valid, req_op, cache_tag, req_go, ra, wb_in):
1456
1457 comb = m.d.comb
1458 sync = m.d.sync
1459
1460 # variable stbs_done : boolean;
1461 # variable req : mem_access_request_t;
1462 # variable acks : unsigned(2 downto 0);
1463 stbs_done = Signal()
1464 req = MemAccessRequest()
1465 acks = Signal(3)
1466
1467 comb += stbs_done
1468 comb += req
1469 comb += acks
1470
1471 # begin
1472 # if rising_edge(clk) then
1473 # r1.use_forward1 <= use_forward1_next;
1474 # r1.forward_sel <= (others => '0');
1475 sync += r1.use_forward1.eq(use_forward1_next)
1476 sync += r1.forward_sel.eq(0)
1477
1478 # if use_forward1_next = '1' then
1479 with m.If(use_forward1_next):
1480 # r1.forward_sel <= r1.req.byte_sel;
1481 sync += r1.forward_sel.eq(r1.req.byte_sel)
1482
1483 # elsif use_forward2_next = '1' then
1484 with m.Elif(use_forward2_next):
1485 # r1.forward_sel <= r1.forward_sel1;
1486 sync += r1.forward_sel.eq(r1.forward_sel1)
1487 # end if;
1488
1489 # r1.forward_data2 <= r1.forward_data1;
1490 sync += r1.forward_data2.eq(r1.forward_data1)
1491
1492 # if r1.write_bram = '1' then
1493 with m.If(r1.write_bram):
1494 # r1.forward_data1 <= r1.req.data;
1495 # r1.forward_sel1 <= r1.req.byte_sel;
1496 # r1.forward_way1 <= r1.req.hit_way;
1497 # r1.forward_row1 <= get_row(r1.req.real_addr);
1498 # r1.forward_valid1 <= '1';
1499 sync += r1.forward_data1.eq(r1.req.data)
1500 sync += r1.forward_sel1.eq(r1.req.byte_sel)
1501 sync += r1.forward_way1.eq(r1.req.hit_way)
1502 sync += r1.forward_row1.eq(get_row(r1.req.real_addr))
1503 sync += r1.forward_valid1.eq(1)
1504 # else
1505 with m.Else():
1506
1507 # if r1.dcbz = '1' then
1508 with m.If(r1.bcbz):
1509 # r1.forward_data1 <= (others => '0');
1510 sync += r1.forward_data1.eq(0)
1511
1512 # else
1513 with m.Else():
1514 # r1.forward_data1 <= wishbone_in.dat;
1515 sync += r1.forward_data1.eq(wb_in.dat)
1516 # end if;
1517
1518 # r1.forward_sel1 <= (others => '1');
1519 # r1.forward_way1 <= replace_way;
1520 # r1.forward_row1 <= r1.store_row;
1521 # r1.forward_valid1 <= '0';
1522 sync += r1.forward_sel1.eq(1)
1523 sync += r1.forward_way1.eq(replace_way)
1524 sync += r1.forward_row1.eq(r1.store_row)
1525 sync += r1.forward_valid1.eq(0)
1526 # end if;
1527
1528 # -- On reset, clear all valid bits to force misses
1529 # if rst = '1' then
1530 # On reset, clear all valid bits to force misses
1531 # TODO figure out how reset signal works in nmigeni
1532 with m.If("""TODO RST???"""):
1533 # for i in index_t loop
1534 for i in range(NUM_LINES):
1535 # cache_valids(i) <= (others => '0');
1536 sync += cache_valid_bits[i].eq(0)
1537 # end loop;
1538
1539 # r1.state <= IDLE;
1540 # r1.full <= '0';
1541 # r1.slow_valid <= '0';
1542 # r1.wb.cyc <= '0';
1543 # r1.wb.stb <= '0';
1544 # r1.ls_valid <= '0';
1545 # r1.mmu_done <= '0';
1546 sync += r1.state.eq(State.IDLE)
1547 sync += r1.full.eq(0)
1548 sync += r1.slow_valid.eq(0)
1549 sync += r1.wb.cyc.eq(0)
1550 sync += r1.wb.stb.eq(0)
1551 sync += r1.ls_valid.eq(0)
1552 sync += r1.mmu_done.eq(0)
1553
1554 # -- Not useful normally but helps avoiding
1555 # -- tons of sim warnings
1556 # Not useful normally but helps avoiding
1557 # tons of sim warnings
1558 # r1.wb.adr <= (others => '0');
1559 sync += r1.wb.adr.eq(0)
1560 # else
1561 with m.Else():
1562 # -- One cycle pulses reset
1563 # r1.slow_valid <= '0';
1564 # r1.write_bram <= '0';
1565 # r1.inc_acks <= '0';
1566 # r1.dec_acks <= '0';
1567 #
1568 # r1.ls_valid <= '0';
1569 # -- complete tlbies and TLB loads in the third cycle
1570 # r1.mmu_done <= r0_valid and (r0.tlbie or r0.tlbld);
1571 # One cycle pulses reset
1572 sync += r1.slow_valid.eq(0)
1573 sync += r1.write_bram.eq(0)
1574 sync += r1.inc_acks.eq(0)
1575 sync += r1.dec_acks.eq(0)
1576
1577 sync += r1.ls_valid.eq(0)
1578 # complete tlbies and TLB loads in the third cycle
1579 sync += r1.mmu_done.eq(r0_valid & (r0.tlbie | r0.tlbld))
1580
1581 # if req_op = OP_LOAD_HIT or req_op = OP_STCX_FAIL then
1582 with m.If(req_op == Op.OP_LOAD_HIT
1583 | req_op == Op.OP_STCX_FAIL):
1584 # if r0.mmu_req = '0' then
1585 with m.If(~r0.mmu_req):
1586 # r1.ls_valid <= '1';
1587 sync += r1.ls_valid.eq(1)
1588 # else
1589 with m.Else():
1590 # r1.mmu_done <= '1';
1591 sync += r1.mmu_done.eq(1)
1592 # end if;
1593 # end if;
1594
1595 # if r1.write_tag = '1' then
1596 with m.If(r1.write_tag):
1597 # -- Store new tag in selected way
1598 # for i in 0 to NUM_WAYS-1 loop
1599 # Store new tag in selected way
1600 for i in range(NUM_WAYS):
1601 # if i = replace_way then
1602 with m.If(i == replace_way):
1603 # cache_tags(r1.store_index)(
1604 # (i + 1) * TAG_WIDTH - 1
1605 # downto i * TAG_WIDTH
1606 # ) <=
1607 # (TAG_WIDTH - 1 downto TAG_BITS => '0')
1608 # & r1.reload_tag;
1609 sync += cache_tag[
1610 r1.store_index
1611 ][i * TAG_WIDTH:(i +1) * TAG_WIDTH].eq(
1612 Const(TAG_WIDTH, TAG_WIDTH)
1613 & r1.reload_tag
1614 )
1615 # end if;
1616 # end loop;
1617 # r1.store_way <= replace_way;
1618 # r1.write_tag <= '0';
1619 sync += r1.store_way.eq(replace_way)
1620 sync += r1.write_tag.eq(0)
1621 # end if;
1622
1623 # -- Take request from r1.req if there is one there,
1624 # -- else from req_op, ra, etc.
1625 # if r1.full = '1' then
1626 # Take request from r1.req if there is one there,
1627 # else from req_op, ra, etc.
1628 with m.If(r1.full)
1629 # req := r1.req;
1630 sync += req.eq(r1.req)
1631
1632 # else
1633 with m.Else():
1634 # req.op := req_op;
1635 # req.valid := req_go;
1636 # req.mmu_req := r0.mmu_req;
1637 # req.dcbz := r0.req.dcbz;
1638 # req.real_addr := ra;
1639 sync += req.op.eq(req_op)
1640 sync += req.valid.eq(req_go)
1641 sync += req.mmu_req.eq(r0.mmu_req)
1642 sync += req.dcbz.eq(r0.req.dcbz)
1643 sync += req.real_addr.eq(ra)
1644
1645 # -- Force data to 0 for dcbz
1646 # if r0.req.dcbz = '0' then
1647 with m.If(~r0.req.dcbz):
1648 # req.data := r0.req.data;
1649 sync += req.data.eq(r0.req.data)
1650
1651 # else
1652 with m.Else():
1653 # req.data := (others => '0');
1654 sync += req.data.eq(0)
1655 # end if;
1656
1657 # -- Select all bytes for dcbz
1658 # -- and for cacheable loads
1659 # if r0.req.dcbz = '1'
1660 # or (r0.req.load = '1' and r0.req.nc = '0') then
1661 # Select all bytes for dcbz
1662 # and for cacheable loads
1663 with m.If(r0.req.dcbz | (r0.req.load & ~r0.req.nc):
1664 # req.byte_sel := (others => '1');
1665 sync += req.byte_sel.eq(1)
1666
1667 # else
1668 with m.Else():
1669 # req.byte_sel := r0.req.byte_sel;
1670 sync += req.byte_sel.eq(r0.req.byte_sel)
1671 # end if;
1672
1673 # req.hit_way := req_hit_way;
1674 # req.same_tag := req_same_tag;
1675 sync += req.hit_way.eq(req_hit_way)
1676 sync += req.same_tag.eq(req_same_tag)
1677
1678 # -- Store the incoming request from r0,
1679 # -- if it is a slow request
1680 # -- Note that r1.full = 1 implies req_op = OP_NONE
1681 # if req_op = OP_LOAD_MISS or req_op = OP_LOAD_NC
1682 # or req_op = OP_STORE_MISS
1683 # or req_op = OP_STORE_HIT then
1684 # Store the incoming request from r0,
1685 # if it is a slow request
1686 # Note that r1.full = 1 implies req_op = OP_NONE
1687 with m.If(req_op == Op.OP_LOAD_MISS
1688 | req_op == Op.OP_LOAD_NC
1689 | req_op == Op.OP_STORE_MISS
1690 | req_op == Op.OP_STORE_HIT):
1691 # r1.req <= req;
1692 # r1.full <= '1';
1693 sync += r1.req(req)
1694 sync += r1.full.eq(1)
1695 # end if;
1696 # end if;
1697 #
1698 # -- Main state machine
1699 # case r1.state is
1700 # Main state machine
1701 with m.Switch(r1.state):
1702
1703 # when IDLE =>
1704 with m.Case(State.IDLE)
1705 # r1.wb.adr <= req.real_addr(
1706 # r1.wb.adr'left downto 0
1707 # );
1708 # r1.wb.sel <= req.byte_sel;
1709 # r1.wb.dat <= req.data;
1710 # r1.dcbz <= req.dcbz;
1711 #
1712 # -- Keep track of our index and way
1713 # -- for subsequent stores.
1714 # r1.store_index <= get_index(req.real_addr);
1715 # r1.store_row <= get_row(req.real_addr);
1716 # r1.end_row_ix <=
1717 # get_row_of_line(get_row(req.real_addr)) - 1;
1718 # r1.reload_tag <= get_tag(req.real_addr);
1719 # r1.req.same_tag <= '1';
1720 sync += r1.wb.adr.eq(req.real_addr[0:r1.wb.adr])
1721 sync += r1.wb.sel.eq(req.byte_sel)
1722 sync += r1.wb.dat.eq(req.data)
1723 sync += r1.dcbz.eq(req.dcbz)
1724
1725 # Keep track of our index and way
1726 # for subsequent stores.
1727 sync += r1.store_index.eq(get_index(req.real_addr))
1728 sync += r1.store_row.eq(get_row(req.real_addr))
1729 sync += r1.end_row_ix.eq(
1730 get_row_of_line(get_row(req.real_addr))
1731 )
1732 sync += r1.reload_tag.eq(get_tag(req.real_addr))
1733 sync += r1.req.same_tag.eq(1)
1734
1735 # if req.op = OP_STORE_HIT theni
1736 with m.If(req.op == Op.OP_STORE_HIT):
1737 # r1.store_way <= req.hit_way;
1738 sync += r1.store_way.eq(req.hit_way)
1739 # end if;
1740
1741 # -- Reset per-row valid bits,
1742 # -- ready for handling OP_LOAD_MISS
1743 # for i in 0 to ROW_PER_LINE - 1 loop
1744 # Reset per-row valid bits,
1745 # ready for handling OP_LOAD_MISS
1746 for i in range(ROW_PER_LINE):
1747 # r1.rows_valid(i) <= '0';
1748 sync += r1.rows_valid[i].eq(0)
1749 # end loop;
1750
1751 # case req.op is
1752 with m.Switch(req.op):
1753 # when OP_LOAD_HIT =>
1754 with m.Case(Op.OP_LOAD_HIT):
1755 # -- stay in IDLE state
1756 # stay in IDLE state
1757 pass
1758
1759 # when OP_LOAD_MISS =>
1760 with m.Case(Op.OP_LOAD_MISS):
1761 # -- Normal load cache miss,
1762 # -- start the reload machine
1763 # report "cache miss real addr:" &
1764 # to_hstring(req.real_addr) & " idx:" &
1765 # integer'image(get_index(req.real_addr)) &
1766 # " tag:" & to_hstring(get_tag(req.real_addr));
1767 # Normal load cache miss,
1768 # start the reload machine
1769 print(f"cache miss real addr:" \
1770 f"{req_real_addr}" \
1771 f" idx:{get_index(req_real_addr)}" \
1772 f" tag:{get_tag(req.real_addr)}")
1773
1774 # -- Start the wishbone cycle
1775 # r1.wb.we <= '0';
1776 # r1.wb.cyc <= '1';
1777 # r1.wb.stb <= '1';
1778 # Start the wishbone cycle
1779 sync += r1.wb.we.eq(0)
1780 sync += r1.wb.cyc.eq(1)
1781 sync += r1.wb.stb.eq(1)
1782
1783 # -- Track that we had one request sent
1784 # r1.state <= RELOAD_WAIT_ACK;
1785 # r1.write_tag <= '1';
1786 # Track that we had one request sent
1787 sync += r1.state.eq(State.RELOAD_WAIT_ACK)
1788 sync += r1.write_tag.eq(1)
1789
1790 # when OP_LOAD_NC =>
1791 with m.Case(Op.OP_LOAD_NC):
1792 # r1.wb.cyc <= '1';
1793 # r1.wb.stb <= '1';
1794 # r1.wb.we <= '0';
1795 # r1.state <= NC_LOAD_WAIT_ACK;
1796 sync += r1.wb.cyc.eq(1)
1797 sync += r1.wb.stb.eq(1)
1798 sync += r1.wb.we.eq(0)
1799 sync += r1.state.eq(State.NC_LOAD_WAIT_ACK)
1800
1801 # when OP_STORE_HIT | OP_STORE_MISS =>
1802 with m.Case(Op.OP_STORE_HIT
1803 | Op.OP_STORE_MISS):
1804 # if req.dcbz = '0' then
1805 with m.If(~req.bcbz):
1806 # r1.state <= STORE_WAIT_ACK;
1807 # r1.acks_pending <= to_unsigned(1, 3);
1808 # r1.full <= '0';
1809 # r1.slow_valid <= '1';
1810 sync += r1.state.eq(
1811 State.STORE_WAIT_ACK
1812 )
1813 sync += r1.acks_pending.eq(
1814 '''TODO to_unsignes(1,3)'''
1815 )
1816 sync += r1.full.eq(0)
1817 sync += r1.slow_valid.eq(1)
1818
1819 # if req.mmu_req = '0' then
1820 with m.If(~req.mmu_req):
1821 # r1.ls_valid <= '1';
1822 sync += r1.ls_valid.eq(1)
1823 # else
1824 with m.Else():
1825 # r1.mmu_done <= '1';
1826 sync += r1.mmu_done.eq(1)
1827 # end if;
1828
1829 # if req.op = OP_STORE_HIT then
1830 with m.If(req.op == Op.OP_STORE_HIT):
1831 # r1.write_bram <= '1';
1832 sync += r1.write_bram.eq(1)
1833 # end if;
1834
1835 # else
1836 with m.Else():
1837 # -- dcbz is handled much like a load
1838 # -- miss except that we are writing
1839 # -- to memory instead of reading
1840 # r1.state <= RELOAD_WAIT_ACK;
1841 # dcbz is handled much like a load
1842 # miss except that we are writing
1843 # to memory instead of reading
1844 sync += r1.state.eq(Op.RELOAD_WAIT_ACK)
1845
1846 # if req.op = OP_STORE_MISS then
1847 with m.If(req.op == Op.OP_STORE_MISS):
1848 # r1.write_tag <= '1';
1849 sync += r1.write_tag.eq(1)
1850 # end if;
1851 # end if;
1852
1853 # r1.wb.we <= '1';
1854 # r1.wb.cyc <= '1';
1855 # r1.wb.stb <= '1';
1856 sync += r1.wb.we.eq(1)
1857 sync += r1.wb.cyc.eq(1)
1858 sync += r1.wb.stb.eq(1)
1859
1860 # -- OP_NONE and OP_BAD do nothing
1861 # -- OP_BAD & OP_STCX_FAIL were handled above already
1862 # when OP_NONE =>
1863 # when OP_BAD =>
1864 # when OP_STCX_FAIL =>
1865 # OP_NONE and OP_BAD do nothing
1866 # OP_BAD & OP_STCX_FAIL were
1867 # handled above already
1868 with m.Case(Op.OP_NONE):
1869 pass
1870
1871 with m.Case(OP_BAD):
1872 pass
1873
1874 with m.Case(OP_STCX_FAIL):
1875 pass
1876 # end case;
1877
1878 # when RELOAD_WAIT_ACK =>
1879 with m.Case(State.RELOAD_WAIT_ACK):
1880 # -- Requests are all sent if stb is 0
1881 # Requests are all sent if stb is 0
1882 sync += stbs_done.eq(~r1.wb.stb)
1883 # stbs_done := r1.wb.stb = '0';
1884
1885 # -- If we are still sending requests,
1886 # -- was one accepted?
1887 # if wishbone_in.stall = '0' and not stbs_done then
1888 # If we are still sending requests,
1889 # was one accepted?
1890 with m.If(~wb_in.stall & ~stbs_done):
1891 # -- That was the last word ? We are done sending.
1892 # -- Clear stb and set stbs_done so we can handle
1893 # -- an eventual last ack on the same cycle.
1894 # if is_last_row_addr(
1895 # r1.wb.adr, r1.end_row_ix
1896 # ) then
1897 # That was the last word?
1898 # We are done sending.
1899 # Clear stb and set stbs_done
1900 # so we can handle an eventual
1901 # last ack on the same cycle.
1902 with m.If(is_last_row_addr(
1903 r1.wb.adr, r1.end_row_ix)):
1904 # r1.wb.stb <= '0';
1905 # stbs_done := true;
1906 sync += r1.wb.stb.eq(0)
1907 sync += stbs_done.eq(0)
1908 # end if;
1909
1910 # -- Calculate the next row address
1911 # r1.wb.adr <= next_row_addr(r1.wb.adr);
1912 # Calculate the next row address
1913 sync += r1.wb.adr.eq(next_row_addr(r1.wb.adr))
1914 # end if;
1915
1916 # -- Incoming acks processing
1917 # r1.forward_valid1 <= wishbone_in.ack;
1918 # Incoming acks processing
1919 sync += r1.forward_valid1.eq(wb_in.ack)
1920
1921 # if wishbone_in.ack = '1' then
1922 with m.If(wb_in.ack):
1923 # r1.rows_valid(
1924 # r1.store_row mod ROW_PER_LINE
1925 # ) <= '1';
1926 sync += r1.rows_valid[
1927 r1.store_row % ROW_PER_LINE
1928 ].eq(1)
1929
1930 # -- If this is the data we were looking for,
1931 # -- we can complete the request next cycle.
1932 # -- Compare the whole address in case the
1933 # -- request in r1.req is not the one that
1934 # -- started this refill.
1935 # if r1.full = '1' and r1.req.same_tag = '1'
1936 # and ((r1.dcbz = '1' and r1.req.dcbz = '1')
1937 # or (r1.dcbz = '0' and r1.req.op = OP_LOAD_MISS))
1938 # and r1.store_row = get_row(r1.req.real_addr) then
1939 # If this is the data we were looking for,
1940 # we can complete the request next cycle.
1941 # Compare the whole address in case the
1942 # request in r1.req is not the one that
1943 # started this refill.
1944 with m.If(r1.full & r1.req.same_tag &
1945 ((r1.dcbz & r1.req.dcbz)
1946 (~r1.dcbz &
1947 r1.req.op == Op.OP_LOAD_MISS)
1948 ) &
1949 r1.store_row
1950 == get_row(r1.req.real_addr):
1951 # r1.full <= '0';
1952 # r1.slow_valid <= '1';
1953 sync += r1.full.eq(0)
1954 sync += r1.slow_valid.eq(1)
1955
1956 # if r1.mmu_req = '0' then
1957 with m.If(~r1.mmu_req):
1958 # r1.ls_valid <= '1';
1959 sync += r1.ls_valid.eq(1)
1960 # else
1961 with m.Else():
1962 # r1.mmu_done <= '1';
1963 sync += r1.mmu_done.eq(1)
1964 # end if;
1965 # r1.forward_sel <= (others => '1');
1966 # r1.use_forward1 <= '1';
1967 sync += r1.forward_sel.eq(1)
1968 sync += r1.use_forward1.eq(1)
1969 # end if;
1970
1971 # -- Check for completion
1972 # if stbs_done and is_last_row(r1.store_row,
1973 # r1.end_row_ix) then
1974 # Check for completion
1975 with m.If(stbs_done &
1976 is_last_row(r1.store_row,
1977 r1.end_row_ix)):
1978
1979 # -- Complete wishbone cycle
1980 # r1.wb.cyc <= '0';
1981 # Complete wishbone cycle
1982 sync += r1.wb.cyc.eq(0)
1983
1984 # -- Cache line is now valid
1985 # cache_valids(r1.store_index)(
1986 # r1.store_way
1987 # ) <= '1';
1988 # Cache line is now valid
1989 sync += cache_valid_bits[
1990 r1.store_index
1991 ][r1.store_way].eq(1)
1992
1993 # r1.state <= IDLE;
1994 sync += r1.state.eq(State.IDLE)
1995 # end if;
1996
1997 # -- Increment store row counter
1998 # r1.store_row <= next_row(r1.store_row);
1999 # Increment store row counter
2000 sync += r1.store_row.eq(next_row(
2001 r1.store_row
2002 ))
2003 # end if;
2004
2005 # when STORE_WAIT_ACK =>
2006 with m.Case(State.STORE_WAIT_ACK):
2007 # stbs_done := r1.wb.stb = '0';
2008 # acks := r1.acks_pending;
2009 sync += stbs_done.eq(~r1.wb.stb)
2010 sync += acks.eq(r1.acks_pending)
2011
2012 # if r1.inc_acks /= r1.dec_acks then
2013 with m.If(r1.inc_acks != r1.dec_acks):
2014
2015 # if r1.inc_acks = '1' then
2016 with m.If(r1.inc_acks):
2017 # acks := acks + 1;
2018 sync += acks.eq(acks + 1)
2019
2020 # else
2021 with m.Else():
2022 # acks := acks - 1;
2023 sync += acks.eq(acks - 1)
2024 # end if;
2025 # end if;
2026
2027 # r1.acks_pending <= acks;
2028 sync += r1.acks_pending.eq(acks)
2029
2030 # -- Clear stb when slave accepted request
2031 # if wishbone_in.stall = '0' then
2032 # Clear stb when slave accepted request
2033 with m.If(~wb_in.stall):
2034 # -- See if there is another store waiting
2035 # -- to be done which is in the same real page.
2036 # if req.valid = '1' then
2037 # See if there is another store waiting
2038 # to be done which is in the same real page.
2039 with m.If(req.valid):
2040 # r1.wb.adr(
2041 # SET_SIZE_BITS - 1 downto 0
2042 # ) <= req.real_addr(
2043 # SET_SIZE_BITS - 1 downto 0
2044 # );
2045 # r1.wb.dat <= req.data;
2046 # r1.wb.sel <= req.byte_sel;
2047 sync += r1.wb.adr[0:SET_SIZE_BITS].eq(
2048 req.real_addr[0:SET_SIZE_BITS]
2049 )
2050 # end if;
2051
2052 # if acks < 7 and req.same_tag = '1'
2053 # and (req.op = OP_STORE_MISS
2054 # or req.op = OP_STORE_HIT) then
2055 with m.Elif(acks < 7 & req.same_tag &
2056 (req.op == Op.Op_STORE_MISS
2057 | req.op == Op.OP_SOTRE_HIT)):
2058 # r1.wb.stb <= '1';
2059 # stbs_done := false;
2060 sync += r1.wb.stb.eq(1)
2061 sync += stbs_done.eq(0)
2062
2063 # if req.op = OP_STORE_HIT then
2064 with m.If(req.op == Op.OP_STORE_HIT):
2065 # r1.write_bram <= '1';
2066 sync += r1.write_bram.eq(1)
2067 # end if;
2068 # r1.full <= '0';
2069 # r1.slow_valid <= '1';
2070 sync += r1.full.eq(0)
2071 sync += r1.slow_valid.eq(1)
2072
2073 # -- Store requests never come from the MMU
2074 # r1.ls_valid <= '1';
2075 # stbs_done := false;
2076 # r1.inc_acks <= '1';
2077 # Store request never come from the MMU
2078 sync += r1.ls_valid.eq(1)
2079 sync += stbs_done.eq(0)
2080 sync += r1.inc_acks.eq(1)
2081 # else
2082 with m.Else():
2083 # r1.wb.stb <= '0';
2084 # stbs_done := true;
2085 sync += r1.wb.stb.eq(0)
2086 sync += stbs_done.eq(1)
2087 # end if;
2088 # end if;
2089
2090 # -- Got ack ? See if complete.
2091 # if wishbone_in.ack = '1' then
2092 # Got ack ? See if complete.
2093 with m.If(wb_in.ack):
2094 # if stbs_done and acks = 1 then
2095 with m.If(stbs_done & acks)
2096 # r1.state <= IDLE;
2097 # r1.wb.cyc <= '0';
2098 # r1.wb.stb <= '0';
2099 sync += r1.state.eq(State.IDLE)
2100 sync += r1.wb.cyc.eq(0)
2101 sync += r1.wb.stb.eq(0)
2102 # end if;
2103 # r1.dec_acks <= '1';
2104 sync += r1.dec_acks.eq(1)
2105 # end if;
2106
2107 # when NC_LOAD_WAIT_ACK =>
2108 with m.Case(State.NC_LOAD_WAIT_ACK):
2109 # -- Clear stb when slave accepted request
2110 # if wishbone_in.stall = '0' then
2111 # Clear stb when slave accepted request
2112 with m.If(~wb_in.stall):
2113 # r1.wb.stb <= '0';
2114 sync += r1.wb.stb.eq(0)
2115 # end if;
2116
2117 # -- Got ack ? complete.
2118 # if wishbone_in.ack = '1' then
2119 # Got ack ? complete.
2120 with m.If(wb_in.ack):
2121 # r1.state <= IDLE;
2122 # r1.full <= '0';
2123 # r1.slow_valid <= '1';
2124 sync += r1.state.eq(State.IDLE)
2125 sync += r1.full.eq(0)
2126 sync += r1.slow_valid.eq(1)
2127
2128 # if r1.mmu_req = '0' then
2129 with m.If(~r1.mmu_req):
2130 # r1.ls_valid <= '1';
2131 sync += r1.ls_valid.eq(1)
2132
2133 # else
2134 with m.Else():
2135 # r1.mmu_done <= '1';
2136 sync += r1.mmu_done.eq(1)
2137 # end if;
2138
2139 # r1.forward_sel <= (others => '1');
2140 # r1.use_forward1 <= '1';
2141 # r1.wb.cyc <= '0';
2142 # r1.wb.stb <= '0';
2143 sync += r1.forward_sel.eq(1)
2144 sync += r1.use_forward1.eq(1)
2145 sync += r1.wb.cyc.eq(0)
2146 sync += r1.wb.stb.eq(0)
2147 # end if;
2148 # end case;
2149 # end if;
2150 # end if;
2151 # end process;
2152
2153 # dc_log: if LOG_LENGTH > 0 generate
2154 # TODO learn how to tranlate vhdl generate into nmigen
2155 def dcache_log(self, r1, valid_ra, tlb_hit_way, stall_out,
2156 d_out, wb_in, log_out):
2157
2158 comb = m.d.comb
2159 sync = m.d.sync
2160
2161 # signal log_data : std_ulogic_vector(19 downto 0);
2162 log_data = Signal(20)
2163
2164 comb += log_data
2165
2166 # begin
2167 # dcache_log: process(clk)
2168 # begin
2169 # if rising_edge(clk) then
2170 # log_data <= r1.wb.adr(5 downto 3) &
2171 # wishbone_in.stall &
2172 # wishbone_in.ack &
2173 # r1.wb.stb & r1.wb.cyc &
2174 # d_out.error &
2175 # d_out.valid &
2176 # std_ulogic_vector(
2177 # to_unsigned(op_t'pos(req_op), 3)) &
2178 # stall_out &
2179 # std_ulogic_vector(
2180 # to_unsigned(tlb_hit_way, 3)) &
2181 # valid_ra &
2182 # std_ulogic_vector(
2183 # to_unsigned(state_t'pos(r1.state), 3));
2184 sync += log_data.eq(Cat(
2185 Const(r1.state, 3), valid_ra, Const(tlb_hit_way, 3),
2186 stall_out, Const(req_op, 3), d_out.valid, d_out.error,
2187 r1.wb.cyc, r1.wb.stb, wb_in.ack, wb_in.stall,
2188 r1.wb.adr[3:6]
2189 ))
2190 # end if;
2191 # end process;
2192 # log_out <= log_data;
2193 # TODO ??? I am very confused need help
2194 comb += log_out.eq(log_data)
2195 # end generate;
2196 # end;
2197
2198 def elaborate(self, platform):
2199 LINE_SIZE = self.LINE_SIZE
2200 NUM_LINES = self.NUM_LINES
2201 NUM_WAYS = self.NUM_WAYS
2202 TLB_SET_SIZE = self.TLB_SET_SIZE
2203 TLB_NUM_WAYS = self.TLB_NUM_WAYS
2204 TLB_LG_PGSZ = self.TLB_LG_PGSZ
2205 LOG_LENGTH = self.LOG_LENGTH
2206
2207 # BRAM organisation: We never access more than
2208 # -- wishbone_data_bits at a time so to save
2209 # -- resources we make the array only that wide, and
2210 # -- use consecutive indices for to make a cache "line"
2211 # --
2212 # -- ROW_SIZE is the width in bytes of the BRAM
2213 # -- (based on WB, so 64-bits)
2214 ROW_SIZE = WB_DATA_BITS / 8;
2215
2216 # ROW_PER_LINE is the number of row (wishbone
2217 # transactions) in a line
2218 ROW_PER_LINE = LINE_SIZE // ROW_SIZE
2219
2220 # BRAM_ROWS is the number of rows in BRAM needed
2221 # to represent the full dcache
2222 BRAM_ROWS = NUM_LINES * ROW_PER_LINE
2223
2224
2225 # Bit fields counts in the address
2226
2227 # REAL_ADDR_BITS is the number of real address
2228 # bits that we store
2229 REAL_ADDR_BITS = 56
2230
2231 # ROW_BITS is the number of bits to select a row
2232 ROW_BITS = log2_int(BRAM_ROWS)
2233
2234 # ROW_LINE_BITS is the number of bits to select
2235 # a row within a line
2236 ROW_LINE_BITS = log2_int(ROW_PER_LINE)
2237
2238 # LINE_OFF_BITS is the number of bits for
2239 # the offset in a cache line
2240 LINE_OFF_BITS = log2_int(LINE_SIZE)
2241
2242 # ROW_OFF_BITS is the number of bits for
2243 # the offset in a row
2244 ROW_OFF_BITS = log2_int(ROW_SIZE)
2245
2246 # INDEX_BITS is the number if bits to
2247 # select a cache line
2248 INDEX_BITS = log2_int(NUM_LINES)
2249
2250 # SET_SIZE_BITS is the log base 2 of the set size
2251 SET_SIZE_BITS = LINE_OFF_BITS + INDEX_BITS
2252
2253 # TAG_BITS is the number of bits of
2254 # the tag part of the address
2255 TAG_BITS = REAL_ADDR_BITS - SET_SIZE_BITS
2256
2257 # TAG_WIDTH is the width in bits of each way of the tag RAM
2258 TAG_WIDTH = TAG_BITS + 7 - ((TAG_BITS + 7) % 8)
2259
2260 # WAY_BITS is the number of bits to select a way
2261 WAY_BITS = log2_int(NUM_WAYS)
2262
2263 # Example of layout for 32 lines of 64 bytes:
2264 #
2265 # .. tag |index| line |
2266 # .. | row | |
2267 # .. | |---| | ROW_LINE_BITS (3)
2268 # .. | |--- - --| LINE_OFF_BITS (6)
2269 # .. | |- --| ROW_OFF_BITS (3)
2270 # .. |----- ---| | ROW_BITS (8)
2271 # .. |-----| | INDEX_BITS (5)
2272 # .. --------| | TAG_BITS (45)
2273
2274 TAG_RAM_WIDTH = TAG_WIDTH * NUM_WAYS
2275
2276 def CacheTagArray():
2277 return Array(CacheTagSet() for x in range(NUM_LINES))
2278
2279 def CacheValidBitsArray():
2280 return Array(CacheWayValidBits() for x in range(NUM_LINES))
2281
2282 def RowPerLineValidArray():
2283 return Array(Signal() for x in range(ROW_PER_LINE))
2284
2285 # Storage. Hopefully "cache_rows" is a BRAM, the rest is LUTs
2286 cache_tags = CacheTagArray()
2287 cache_tag_set = Signal(TAG_RAM_WIDTH)
2288 cache_valid_bits = CacheValidBitsArray()
2289
2290 # TODO attribute ram_style : string;
2291 # TODO attribute ram_style of cache_tags : signal is "distributed";
2292
2293 # L1 TLB
2294 TLB_SET_BITS = log2_int(TLB_SET_SIZE)
2295 TLB_WAY_BITS = log2_int(TLB_NUM_WAYS)
2296 TLB_EA_TAG_BITS = 64 - (TLB_LG_PGSZ + TLB_SET_BITS)
2297 TLB_TAG_WAY_BITS = TLB_NUM_WAYS * TLB_EA_TAG_BITS
2298 TLB_PTE_BITS = 64
2299 TLB_PTE_WAY_BITS = TLB_NUM_WAYS * TLB_PTE_BITS;
2300
2301 def TLBValidBitsArray():
2302 return Array(
2303 Signal(TLB_NUM_WAYS) for x in range(TLB_SET_SIZE)
2304 )
2305
2306 def TLBTagsArray():
2307 return Array(
2308 Signal(TLB_TAG_WAY_BITS) for x in range (TLB_SET_SIZE)
2309 )
2310
2311 def TLBPtesArray():
2312 return Array(
2313 Signal(TLB_PTE_WAY_BITS) for x in range(TLB_SET_SIZE)
2314 )
2315
2316 def HitWaySet():
2317 return Array(Signal(NUM_WAYS) for x in range(TLB_NUM_WAYS))
2318
2319 """note: these are passed to nmigen.hdl.Memory as "attributes".
2320 don't know how, just that they are.
2321 """
2322 dtlb_valid_bits = TLBValidBitsArray()
2323 dtlb_tags = TLBTagsArray()
2324 dtlb_ptes = TLBPtesArray()
2325 # TODO attribute ram_style of
2326 # dtlb_tags : signal is "distributed";
2327 # TODO attribute ram_style of
2328 # dtlb_ptes : signal is "distributed";
2329
2330 r0 = RegStage0()
2331 r0_full = Signal()
2332
2333 r1 = RegStage1()
2334
2335 reservation = Reservation()
2336
2337 # Async signals on incoming request
2338 req_index = Signal(NUM_LINES)
2339 req_row = Signal(BRAM_ROWS)
2340 req_hit_way = Signal(WAY_BITS)
2341 req_tag = Signal(TAG_BITS)
2342 req_op = Op()
2343 req_data = Signal(64)
2344 req_same_tag = Signal()
2345 req_go = Signal()
2346
2347 early_req_row = Signal(BRAM_ROWS)
2348
2349 cancel_store = Signal()
2350 set_rsrv = Signal()
2351 clear_rsrv = Signal()
2352
2353 r0_valid = Signal()
2354 r0_stall = Signal()
2355
2356 use_forward1_next = Signal()
2357 use_forward2_next = Signal()
2358
2359 # Cache RAM interface
2360 def CacheRamOut():
2361 return Array(Signal(WB_DATA_BITS) for x in range(NUM_WAYS))
2362
2363 cache_out = CacheRamOut()
2364
2365 # PLRU output interface
2366 def PLRUOut():
2367 return Array(Signal(WAY_BITS) for x in range(Index()))
2368
2369 plru_victim = PLRUOut()
2370 replace_way = Signal(WAY_BITS)
2371
2372 # Wishbone read/write/cache write formatting signals
2373 bus_sel = Signal(8)
2374
2375 # TLB signals
2376 tlb_tag_way = Signal(TLB_TAG_WAY_BITS)
2377 tlb_pte_way = Signal(TLB_PTE_WAY_BITS)
2378 tlb_valid_way = Signal(TLB_NUM_WAYS)
2379 tlb_req_index = Signal(TLB_SET_SIZE)
2380 tlb_hit = Signal()
2381 tlb_hit_way = Signal(TLB_NUM_WAYS)
2382 pte = Signal(TLB_PTE_BITS)
2383 ra = Signal(REAL_ADDR_BITS)
2384 valid_ra = Signal()
2385 perm_attr = PermAttr()
2386 rc_ok = Signal()
2387 perm_ok = Signal()
2388 access_ok = Signal()
2389
2390 # TLB PLRU output interface
2391 def TLBPLRUOut():
2392 return Array(
2393 Signal(TLB_WAY_BITS) for x in range(TLB_SET_SIZE)
2394 )
2395
2396 tlb_plru_victim = TLBPLRUOut()
2397
2398 # Helper functions to decode incoming requests
2399 #
2400 # Return the cache line index (tag index) for an address
2401 def get_index(addr):
2402 return addr[LINE_OFF_BITS:SET_SIZE_BITS]
2403
2404 # Return the cache row index (data memory) for an address
2405 def get_row(addr):
2406 return addr[ROW_OFF_BITS:SET_SIZE_BITS]
2407
2408 # Return the index of a row within a line
2409 def get_row_of_line(row):
2410 row_v = Signal(ROW_BITS)
2411 row_v = Signal(row)
2412 return row_v[0:ROW_LINE_BITS]
2413
2414 # Returns whether this is the last row of a line
2415 def is_last_row_addr(addr, last):
2416 return addr[ROW_OFF_BITS:LINE_OFF_BITS] == last
2417
2418 # Returns whether this is the last row of a line
2419 def is_last_row(row, last):
2420 return get_row_of_line(row) == last
2421
2422 # Return the address of the next row in the current cache line
2423 def next_row_addr(addr):
2424 row_idx = Signal(ROW_LINE_BITS)
2425 result = WBAddrType()
2426 # Is there no simpler way in VHDL to
2427 # generate that 3 bits adder ?
2428 row_idx = addr[ROW_OFF_BITS:LINE_OFF_BITS]
2429 row_idx = Signal(row_idx + 1)
2430 result = addr
2431 result[ROW_OFF_BITS:LINE_OFF_BITS] = row_idx
2432 return result
2433
2434 # Return the next row in the current cache line. We use a
2435 # dedicated function in order to limit the size of the
2436 # generated adder to be only the bits within a cache line
2437 # (3 bits with default settings)
2438 def next_row(row)
2439 row_v = Signal(ROW_BITS)
2440 row_idx = Signal(ROW_LINE_BITS)
2441 result = Signal(ROW_BITS)
2442
2443 row_v = Signal(row)
2444 row_idx = row_v[ROW_LINE_BITS]
2445 row_v[0:ROW_LINE_BITS] = Signal(row_idx + 1)
2446 return row_v
2447
2448 # Get the tag value from the address
2449 def get_tag(addr):
2450 return addr[SET_SIZE_BITS:REAL_ADDR_BITS]
2451
2452 # Read a tag from a tag memory row
2453 def read_tag(way, tagset):
2454 return tagset[way *TAG_WIDTH:way * TAG_WIDTH + TAG_BITS]
2455
2456 # Read a TLB tag from a TLB tag memory row
2457 def read_tlb_tag(way, tags):
2458 j = Signal()
2459
2460 j = way * TLB_EA_TAG_BITS
2461 return tags[j:j + TLB_EA_TAG_BITS]
2462
2463 # Write a TLB tag to a TLB tag memory row
2464 def write_tlb_tag(way, tags), tag):
2465 j = Signal()
2466
2467 j = way * TLB_EA_TAG_BITS
2468 tags[j:j + TLB_EA_TAG_BITS] = tag
2469
2470 # Read a PTE from a TLB PTE memory row
2471 def read_tlb_pte(way, ptes):
2472 j = Signal()
2473
2474 j = way * TLB_PTE_BITS
2475 return ptes[j:j + TLB_PTE_BITS]
2476
2477 def write_tlb_pte(way, ptes,newpte):
2478 j = Signal()
2479
2480 j = way * TLB_PTE_BITS
2481 return ptes[j:j + TLB_PTE_BITS] = newpte
2482
2483 assert (LINE_SIZE % ROW_SIZE) == 0 "LINE_SIZE not " \
2484 "multiple of ROW_SIZE"
2485
2486 assert (LINE_SIZE % 2) == 0 "LINE_SIZE not power of 2"
2487
2488 assert (NUM_LINES % 2) == 0 "NUM_LINES not power of 2"
2489
2490 assert (ROW_PER_LINE % 2) == 0 "ROW_PER_LINE not" \
2491 "power of 2"
2492
2493 assert ROW_BITS == (INDEX_BITS + ROW_LINE_BITS) \
2494 "geometry bits don't add up"
2495
2496 assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS) \
2497 "geometry bits don't add up"
2498
2499 assert REAL_ADDR_BITS == (TAG_BITS + INDEX_BITS \
2500 + LINE_OFF_BITS) "geometry bits don't add up"
2501
2502 assert REAL_ADDR_BITS == (TAG_BITS + ROW_BITS + ROW_OFF_BITS) \
2503 "geometry bits don't add up"
2504
2505 assert 64 == wishbone_data_bits "Can't yet handle a" \
2506 "wishbone width that isn't 64-bits"
2507
2508 assert SET_SIZE_BITS <= TLB_LG_PGSZ "Set indexed by" \
2509 "virtual address"
2510
2511 # we don't yet handle collisions between loadstore1 requests
2512 # and MMU requests
2513 comb += m_out.stall.eq(0)
2514
2515 # Hold off the request in r0 when r1 has an uncompleted request
2516 comb += r0_stall.eq(r0_full & r1.full)
2517 comb += r0_valid.eq(r0_full & ~r1.full)
2518 comb += stall_out.eq(r0_stall)
2519
2520 # Wire up wishbone request latch out of stage 1
2521 comb += wishbone_out.eq(r1.wb)
2522
2523
2524
2525 # dcache_tb.vhdl
2526 #
2527 # entity dcache_tb is
2528 # end dcache_tb;
2529 #
2530 # architecture behave of dcache_tb is
2531 # signal clk : std_ulogic;
2532 # signal rst : std_ulogic;
2533 #
2534 # signal d_in : Loadstore1ToDcacheType;
2535 # signal d_out : DcacheToLoadstore1Type;
2536 #
2537 # signal m_in : MmuToDcacheType;
2538 # signal m_out : DcacheToMmuType;
2539 #
2540 # signal wb_bram_in : wishbone_master_out;
2541 # signal wb_bram_out : wishbone_slave_out;
2542 #
2543 # constant clk_period : time := 10 ns;
2544 # begin
2545 # dcache0: entity work.dcache
2546 # generic map(
2547 #
2548 # LINE_SIZE => 64,
2549 # NUM_LINES => 4
2550 # )
2551 # port map(
2552 # clk => clk,
2553 # rst => rst,
2554 # d_in => d_in,
2555 # d_out => d_out,
2556 # m_in => m_in,
2557 # m_out => m_out,
2558 # wishbone_out => wb_bram_in,
2559 # wishbone_in => wb_bram_out
2560 # );
2561 #
2562 # -- BRAM Memory slave
2563 # bram0: entity work.wishbone_bram_wrapper
2564 # generic map(
2565 # MEMORY_SIZE => 1024,
2566 # RAM_INIT_FILE => "icache_test.bin"
2567 # )
2568 # port map(
2569 # clk => clk,
2570 # rst => rst,
2571 # wishbone_in => wb_bram_in,
2572 # wishbone_out => wb_bram_out
2573 # );
2574 #
2575 # clk_process: process
2576 # begin
2577 # clk <= '0';
2578 # wait for clk_period/2;
2579 # clk <= '1';
2580 # wait for clk_period/2;
2581 # end process;
2582 #
2583 # rst_process: process
2584 # begin
2585 # rst <= '1';
2586 # wait for 2*clk_period;
2587 # rst <= '0';
2588 # wait;
2589 # end process;
2590 #
2591 # stim: process
2592 # begin
2593 # -- Clear stuff
2594 # d_in.valid <= '0';
2595 # d_in.load <= '0';
2596 # d_in.nc <= '0';
2597 # d_in.addr <= (others => '0');
2598 # d_in.data <= (others => '0');
2599 # m_in.valid <= '0';
2600 # m_in.addr <= (others => '0');
2601 # m_in.pte <= (others => '0');
2602 #
2603 # wait for 4*clk_period;
2604 # wait until rising_edge(clk);
2605 #
2606 # -- Cacheable read of address 4
2607 # d_in.load <= '1';
2608 # d_in.nc <= '0';
2609 # d_in.addr <= x"0000000000000004";
2610 # d_in.valid <= '1';
2611 # wait until rising_edge(clk);
2612 # d_in.valid <= '0';
2613 #
2614 # wait until rising_edge(clk) and d_out.valid = '1';
2615 # assert d_out.data = x"0000000100000000"
2616 # report "data @" & to_hstring(d_in.addr) &
2617 # "=" & to_hstring(d_out.data) &
2618 # " expected 0000000100000000"
2619 # severity failure;
2620 # -- wait for clk_period;
2621 #
2622 # -- Cacheable read of address 30
2623 # d_in.load <= '1';
2624 # d_in.nc <= '0';
2625 # d_in.addr <= x"0000000000000030";
2626 # d_in.valid <= '1';
2627 # wait until rising_edge(clk);
2628 # d_in.valid <= '0';
2629 #
2630 # wait until rising_edge(clk) and d_out.valid = '1';
2631 # assert d_out.data = x"0000000D0000000C"
2632 # report "data @" & to_hstring(d_in.addr) &
2633 # "=" & to_hstring(d_out.data) &
2634 # " expected 0000000D0000000C"
2635 # severity failure;
2636 #
2637 # -- Non-cacheable read of address 100
2638 # d_in.load <= '1';
2639 # d_in.nc <= '1';
2640 # d_in.addr <= x"0000000000000100";
2641 # d_in.valid <= '1';
2642 # wait until rising_edge(clk);
2643 # d_in.valid <= '0';
2644 # wait until rising_edge(clk) and d_out.valid = '1';
2645 # assert d_out.data = x"0000004100000040"
2646 # report "data @" & to_hstring(d_in.addr) &
2647 # "=" & to_hstring(d_out.data) &
2648 # " expected 0000004100000040"
2649 # severity failure;
2650 #
2651 # wait until rising_edge(clk);
2652 # wait until rising_edge(clk);
2653 # wait until rising_edge(clk);
2654 # wait until rising_edge(clk);
2655 #
2656 # std.env.finish;
2657 # end process;
2658 # end;
2659 def dcache_sim(dut):
2660 # clear stuff
2661 yield dut.d_in.valid.eq(0)
2662 yield dut.d_in.load.eq(0)
2663 yield dut.d_in.nc.eq(0)
2664 yield dut.d_in.adrr.eq(0)
2665 yield dut.d_in.data.eq(0)
2666 yield dut.m_in.valid.eq(0)
2667 yield dut.m_in.addr.eq(0)
2668 yield dut.m_in.pte.eq(0)
2669 # wait 4 * clk_period
2670 yield
2671 yield
2672 yield
2673 yield
2674 # wait_until rising_edge(clk)
2675 yield
2676 # Cacheable read of address 4
2677 yield dut.d_in.load.eq(1)
2678 yield dut.d_in.nc.eq(0)
2679 yield dut.d_in.addr.eq(Const(0x0000000000000004, 64))
2680 yield dut.d_in.valid.eq(1)
2681 # wait-until rising_edge(clk)
2682 yield
2683 yield dut.d_in.valid.eq(0)
2684 yield
2685 while not (yield dut.d_out.valid):
2686 yield
2687 assert dut.d_out.data == Const(0x0000000100000000, 64) f"data @" \
2688 f"{dut.d_in.addr}={dut.d_in.data} expected 0000000100000000" \
2689 " -!- severity failure"
2690
2691
2692 # Cacheable read of address 30
2693 yield dut.d_in.load.eq(1)
2694 yield dut.d_in.nc.eq(0)
2695 yield dut.d_in.addr.eq(Const(0x0000000000000030, 64))
2696 yield dut.d_in.valid.eq(1)
2697 yield
2698 yield dut.d_in.valid.eq(0)
2699 yield
2700 while not (yield dut.d_out.valid):
2701 yield
2702 assert dut.d_out.data == Const(0x0000000D0000000C, 64) f"data @" \
2703 f"{dut.d_in.addr}={dut.d_out.data} expected 0000000D0000000C" \
2704 f"-!- severity failure"
2705
2706 # Non-cacheable read of address 100
2707 yield dut.d_in.load.eq(1)
2708 yield dut.d_in.nc.eq(1)
2709 yield dut.d_in.addr.eq(Const(0x0000000000000100, 64))
2710 yield dut.d_in.valid.eq(1)
2711 yield
2712 yield dut.d_in.valid.eq(0)
2713 yield
2714 while not (yield dut.d_out.valid):
2715 yield
2716 assert dut.d_out.data == Const(0x0000004100000040, 64) f"data @" \
2717 f"{dut.d_in.addr}={dut.d_out.data} expected 0000004100000040" \
2718 f"-!- severity failure"
2719
2720 yield
2721 yield
2722 yield
2723 yield
2724
2725
2726 def test_dcache():
2727 dut = Dcache()
2728 vl = rtlil.convert(dut, ports=[])
2729 with open("test_dcache.il", "w") as f:
2730 f.write(vl)
2731
2732 run_simulation(dut, dcache_sim(), vcd_name='test_dcache.vcd')
2733
2734 if __name__ == '__main__':
2735 test_dcache()
2736