From 1aef49534f3686e129446a88e5e594dc85811a13 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Fri, 11 Sep 2020 00:38:22 +0100 Subject: [PATCH] eek, big sort-out of syntax errors in dcache.py, now generates .il --- src/soc/experiment/cache_ram.py | 7 +- src/soc/experiment/dcache.py | 147 ++++++++++++++++---------------- src/soc/experiment/plru.py | 5 +- 3 files changed, 82 insertions(+), 77 deletions(-) diff --git a/src/soc/experiment/cache_ram.py b/src/soc/experiment/cache_ram.py index a5d825f2..b2456fd8 100644 --- a/src/soc/experiment/cache_ram.py +++ b/src/soc/experiment/cache_ram.py @@ -17,6 +17,7 @@ class CacheRam(Elaboratable): def elaborate(self, platform): m = Module() + comb, sync = m.d.comb, m.d.sync ROW_BITS = self.ROW_BITS WIDTH = self.WIDTH @@ -41,12 +42,12 @@ class CacheRam(Elaboratable): lbit = i * 8; mbit = lbit + 8; with m.If(self.wr_sel[i]): - sync += ram[self.wr_addr][lbit:mbit].eq(wr_data[lbit:mbit]) + sync += ram[self.wr_addr][lbit:mbit].eq(self.wr_data[lbit:mbit]) with m.If(self.rd_en): if ADD_BUF: - sync += self.rd_data_o.eq(ram[rd_addr]) + sync += self.rd_data_o.eq(ram[self.rd_addr]) else: - comb += self.rd_data_o.eq(ram[rd_addr]) + comb += self.rd_data_o.eq(ram[self.rd_addr]) if TRACE: # Display( "read a:" & to_hstring(rd_addr) & diff --git a/src/soc/experiment/dcache.py b/src/soc/experiment/dcache.py index fb2cb3ba..4e8b5961 100644 --- a/src/soc/experiment/dcache.py +++ b/src/soc/experiment/dcache.py @@ -174,9 +174,7 @@ def get_row(addr): # Return the index of a row within a line def get_row_of_line(row): - row_v = Signal(ROW_BITS) - row_v = Signal(row) - return row_v[0:ROW_LINE_BITS] + return row[:ROW_LINE_BITS] # Returns whether this is the last row of a line def is_last_row_addr(addr, last): @@ -186,18 +184,6 @@ def is_last_row_addr(addr, last): def is_last_row(row, last): return get_row_of_line(row) == last -# Return the address of the next row in the current cache line -def next_row_addr(addr): - row_idx = Signal(ROW_LINE_BITS) - result = WBAddrType() - # Is there no simpler way in VHDL to - # generate that 3 bits adder ? - row_idx = addr[ROW_OFF_BITS:LINE_OFF_BITS] - row_idx = Signal(row_idx + 1) - result = addr - result[ROW_OFF_BITS:LINE_OFF_BITS] = row_idx - return result - # Return the next row in the current cache line. We use a # dedicated function in order to limit the size of the # generated adder to be only the bits within a cache line @@ -217,12 +203,11 @@ def read_tag(way, tagset): # Read a TLB tag from a TLB tag memory row def read_tlb_tag(way, tags): j = way * TLB_EA_TAG_BITS - return tags[j:j + TLB_EA_TAG_BITS] + return tags.bit_select(j, TLB_EA_TAG_BITS) # Write a TLB tag to a TLB tag memory row def write_tlb_tag(way, tags, tag): - j = way * TLB_EA_TAG_BITS - tags[j:j + TLB_EA_TAG_BITS] = tag + return read_tlb_tag(way, tags).eq(tag) # Read a PTE from a TLB PTE memory row def read_tlb_pte(way, ptes): @@ -230,8 +215,7 @@ def read_tlb_pte(way, ptes): return ptes.bit_select(j, TLB_PTE_BITS) def write_tlb_pte(way, ptes,newpte): - j = way * TLB_PTE_BITS - return ptes[j:j + TLB_PTE_BITS].eq(newpte) + return read_tlb_pte(way, ptes).eq(newpte) # Record for storing permission, attribute, etc. bits from a PTE @@ -565,8 +549,11 @@ class DCache(Elaboratable): tagset = Signal(TLB_TAG_WAY_BITS) pteset = Signal(TLB_PTE_WAY_BITS) + vb = Signal(TLB_NUM_WAYS) + comb += tlbie.eq(r0_valid & r0.tlbie) comb += tlbwe.eq(r0_valid & r0.tlbld) + sync += vb.eq(dtlb_valid_bits[tlb_req_index]) with m.If(tlbie & r0.doall): # clear all valid bits at once @@ -575,22 +562,22 @@ class DCache(Elaboratable): with m.Elif(tlbie): with m.If(tlb_hit): - sync += dtlb_valid_bits[tlb_req_index][tlb_hit_way].eq(0) + sync += vb.bit_select(tlb_hit_way, 1).eq(Const(0, 1)) with m.Elif(tlbwe): with m.If(tlb_hit): comb += repl_way.eq(tlb_hit_way) with m.Else(): comb += repl_way.eq(tlb_plru_victim[tlb_req_index]) comb += eatag.eq(r0.req.addr[TLB_LG_PGSZ + TLB_SET_BITS:64]) - comb += tagset.eq(tlb_tag_way) + sync += tagset.eq(tlb_tag_way) sync += write_tlb_tag(repl_way, tagset, eatag) sync += dtlb_tags[tlb_req_index].eq(tagset) - comb += pteset.eq(tlb_pte_way) + sync += pteset.eq(tlb_pte_way) sync += write_tlb_pte(repl_way, pteset, r0.req.data) sync += dtlb_ptes[tlb_req_index].eq(pteset) - sync += dtlb_valid_bits[tlb_req_index][repl_way].eq(1) + sync += vb.bit_select(repl_way, 1).eq(1) - def maybe_plrus(self, r1): + def maybe_plrus(self, m, r1, plru_victim): """Generate PLRUs """ comb = m.d.comb @@ -606,7 +593,7 @@ class DCache(Elaboratable): comb += plru.acc.eq(plru_acc) comb += plru.acc_en.eq(plru_acc_en) - comb += plru.lru.eq(plru_out) + comb += plru_out.eq(plru.lru_o) with m.If(r1.hit_index == i): comb += plru_acc_en.eq(r1.cache_hit) @@ -635,8 +622,10 @@ class DCache(Elaboratable): r0_valid, r1, cache_valid_bits, replace_way, use_forward1_next, use_forward2_next, req_hit_way, plru_victim, rc_ok, perm_attr, - valid_ra, perm_ok, access_ok, req_op, req_ok, - r0_stall, early_req_row): + valid_ra, perm_ok, access_ok, req_op, req_go, + tlb_pte_way, + tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set, + cancel_store, req_same_tag, r0_stall, early_req_row): """Cache request parsing and hit detection """ @@ -690,9 +679,9 @@ class DCache(Elaboratable): with m.If(s_tag == r1.reload_tag): comb += rel_matches[j].eq(1) with m.If(tlb_hit): - comb += is_hit.eq(hit_set[tlb_hit_way]) + comb += is_hit.eq(hit_set.bit_select(tlb_hit_way, 1)) comb += hit_way.eq(hit_way_set[tlb_hit_way]) - comb += rel_match.eq(rel_matches[tlb_hit_way]) + comb += rel_match.eq(rel_matches.bit_select(tlb_hit_way, 1)) with m.Else(): comb += s_tag.eq(get_tag(r0.req.addr)) for i in range(NUM_WAYS): @@ -745,7 +734,7 @@ class DCache(Elaboratable): comb += rc_ok.eq(perm_attr.reference & (r0.req.load | perm_attr.changed) ) - comb += perm_ok.eq((r0.req.prive_mode | ~perm_attr.priv) + comb += perm_ok.eq((r0.req.priv_mode | ~perm_attr.priv) & perm_attr.wr_perm | (r0.req.load & perm_attr.rd_perm) ) @@ -762,21 +751,21 @@ class DCache(Elaboratable): with m.Else(): comb += opsel.eq(Cat(is_hit, nc, r0.req.load)) with m.Switch(opsel): - with m.Case(Const(0b101, 3)): + with m.Case(0b101): comb += op.eq(Op.OP_LOAD_HIT) - with m.Case(Cosnt(0b100, 3)): + with m.Case(0b100): comb += op.eq(Op.OP_LOAD_MISS) - with m.Case(Const(0b110, 3)): + with m.Case(0b110): comb += op.eq(Op.OP_LOAD_NC) - with m.Case(Const(0b001, 3)): + with m.Case(0b001): comb += op.eq(Op.OP_STORE_HIT) - with m.Case(Const(0b000, 3)): + with m.Case(0b000): comb += op.eq(Op.OP_STORE_MISS) - with m.Case(Const(0b010, 3)): + with m.Case(0b010): comb += op.eq(Op.OP_STORE_MISS) - with m.Case(Const(0b011, 3)): + with m.Case(0b011): comb += op.eq(Op.OP_BAD) - with m.Case(Const(0b111, 3)): + with m.Case(0b111): comb += op.eq(Op.OP_BAD) with m.Default(): comb += op.eq(Op.OP_NONE) @@ -807,13 +796,13 @@ class DCache(Elaboratable): # XXX generate alignment interrupt if address # is not aligned XXX or if r0.req.nc = '1' with m.If(r0.req.load): - comb += set_rsrv(1) # load with reservation + comb += set_rsrv.eq(1) # load with reservation with m.Else(): comb += clear_rsrv.eq(1) # store conditional with m.If(~reservation.valid | r0.req.addr[LINE_OFF_BITS:64]): comb += cancel_store.eq(1) - def reservation_reg(self, m, r0_valid, access_ok, clear_rsrv, + def reservation_reg(self, m, r0_valid, access_ok, set_rsrv, clear_rsrv, reservation, r0): comb = m.d.comb @@ -882,7 +871,7 @@ class DCache(Elaboratable): assert ((r1.slow_valid | r1.stcx_fail) | r1.hit_load_valid) != 1, \ "unexpected hit_load_delayed collision with slow_valid" - with m.If(~r1._mmu_req): + with m.If(~r1.mmu_req): # Request came from loadstore1... # Load hit case is the standard path with m.If(r1.hit_load_valid): @@ -914,7 +903,7 @@ class DCache(Elaboratable): #Display("completing MMU load miss, data={m_out.data}") pass - def rams(self, m, r1): + def rams(self, m, r1, early_req_row, cache_out, replace_way): """rams Generate a cache RAM for each way. This handles the normal reads, writes from reloads and the special store-hit update @@ -943,7 +932,7 @@ class DCache(Elaboratable): comb += way.rd_en.eq(do_read) comb += way.rd_addr.eq(rd_addr) - comb += _d_out.eq(way.rd_data) + comb += _d_out.eq(way.rd_data_o) comb += way.wr_sel.eq(wr_sel_m) comb += way.wr_addr.eq(wr_addr) comb += way.wr_data.eq(wr_data) @@ -990,7 +979,9 @@ class DCache(Elaboratable): # Cache hit synchronous machine for the easy case. # This handles load hits. # It also handles error cases (TLB miss, cache paradox) - def dcache_fast_hit(self, m, req_op, r0_valid, r1): + def dcache_fast_hit(self, m, req_op, r0_valid, r0, r1, + req_hit_way, req_index, access_ok, + tlb_hit, tlb_hit_way, tlb_req_index): comb = m.d.comb sync = m.d.sync @@ -1049,16 +1040,19 @@ class DCache(Elaboratable): # # All wishbone requests generation is done here. # This machine operates at stage 1. - def dcache_slow(self, m, r1, use_forward1_next, cache_valid_bits, r0, + def dcache_slow(self, m, r1, use_forward1_next, use_forward2_next, + cache_valid_bits, r0, replace_way, + req_hit_way, req_same_tag, r0_valid, req_op, cache_tag, req_go, ra): comb = m.d.comb sync = m.d.sync - wb_in = self.wb_i + wb_in = self.wb_in req = MemAccessRequest() acks = Signal(3) adjust_acks = Signal(3) + stbs_done = Signal() sync += r1.use_forward1.eq(use_forward1_next) sync += r1.forward_sel.eq(0) @@ -1076,7 +1070,7 @@ class DCache(Elaboratable): sync += r1.forward_row1.eq(get_row(r1.req.real_addr)) sync += r1.forward_valid1.eq(1) with m.Else(): - with m.If(r1.bcbz): + with m.If(r1.dcbz): sync += r1.forward_data1.eq(0) with m.Else(): sync += r1.forward_data1.eq(wb_in.dat) @@ -1106,9 +1100,8 @@ class DCache(Elaboratable): # Store new tag in selected way for i in range(NUM_WAYS): with m.If(i == replace_way): - idx = r1.store_index - trange = range(i * TAG_WIDTH, (i+1) * TAG_WIDTH) - sync += cache_tag[idx][trange].eq(r1.reload_tag) + ct = cache_tag[r1.store_index].word_select(i, TAG_WIDTH) + sync += ct.eq(r1.reload_tag) sync += r1.store_way.eq(replace_way) sync += r1.write_tag.eq(0) @@ -1144,7 +1137,7 @@ class DCache(Elaboratable): | (req_op == Op.OP_LOAD_NC) | (req_op == Op.OP_STORE_MISS) | (req_op == Op.OP_STORE_HIT)): - sync += r1.req(req) + sync += r1.req.eq(req) sync += r1.full.eq(1) # Main state machine @@ -1155,7 +1148,7 @@ class DCache(Elaboratable): # r1.wb.adr <= req.real_addr( # r1.wb.adr'left downto 0 # ); - sync += r1.wb.adr.eq(req.real_addr[0:r1.wb.adr]) + sync += r1.wb.adr.eq(req.real_addr) sync += r1.wb.sel.eq(req.byte_sel) sync += r1.wb.dat.eq(req.data) sync += r1.dcbz.eq(req.dcbz) @@ -1206,7 +1199,7 @@ class DCache(Elaboratable): sync += r1.state.eq(State.NC_LOAD_WAIT_ACK) with m.Case(Op.OP_STORE_HIT, Op.OP_STORE_MISS): - with m.If(~req.bcbz): + with m.If(~req.dcbz): sync += r1.state.eq(State.STORE_WAIT_ACK) sync += r1.acks_pending.eq(1) sync += r1.full.eq(0) @@ -1220,7 +1213,7 @@ class DCache(Elaboratable): with m.If(req.op == Op.OP_STORE_HIT): sync += r1.write_bram.eq(1) with m.Else(): - sync += r1.state.eq(Op.RELOAD_WAIT_ACK) + sync += r1.state.eq(State.RELOAD_WAIT_ACK) with m.If(req.op == Op.OP_STORE_MISS): sync += r1.write_tag.eq(1) @@ -1234,9 +1227,9 @@ class DCache(Elaboratable): # handled above already with m.Case(Op.OP_NONE): pass - with m.Case(OP_BAD): + with m.Case(Op.OP_BAD): pass - with m.Case(OP_STCX_FAIL): + with m.Case(Op.OP_STCX_FAIL): pass with m.Case(State.RELOAD_WAIT_ACK): @@ -1254,8 +1247,9 @@ class DCache(Elaboratable): sync += r1.wb.stb.eq(0) comb += stbs_done.eq(0) - # Calculate the next row address - sync += r1.wb.adr.eq(next_row_addr(r1.wb.adr)) + # Calculate the next row address in the current cache line + rarange = r1.wb.adr[ROW_OFF_BITS : LINE_OFF_BITS] + sync += rarange.eq(rarange + 1) # Incoming acks processing sync += r1.forward_valid1.eq(wb_in.ack) @@ -1288,8 +1282,9 @@ class DCache(Elaboratable): sync += r1.wb.cyc.eq(0) # Cache line is now valid - cv = cache_valid_bits[r1.store_index] - sync += cv[r1.store_way].eq(1) + cv = Signal(INDEX_BITS) + sync += cv.eq(cache_valid_bits[r1.store_index]) + sync += cv.bit_select(r1.store_way, 1).eq(1) sync += r1.state.eq(State.IDLE) # Increment store row counter @@ -1320,8 +1315,8 @@ class DCache(Elaboratable): sync += r1.wb.sel.eq(req.byte_sel) with m.Elif((adjust_acks < 7) & req.same_tag & - ((req.op == Op.Op_STORE_MISS) - | (req.op == Op.OP_SOTRE_HIT))): + ((req.op == Op.OP_STORE_MISS) + | (req.op == Op.OP_STORE_HIT))): sync += r1.wb.stb.eq(1) comb += stbs_done.eq(0) @@ -1479,25 +1474,33 @@ class DCache(Elaboratable): self.tlb_update(m, r0_valid, r0, dtlb_valid_bits, tlb_req_index, tlb_hit_way, tlb_hit, tlb_plru_victim, tlb_tag_way, dtlb_tags, tlb_pte_way, dtlb_ptes) - self.maybe_plrus(r1) + self.maybe_plrus(m, r1, plru_victim) self.cache_tag_read(m, r0_stall, req_index, cache_tag_set, cache_tags) self.dcache_request(m, r0, ra, req_index, req_row, req_tag, r0_valid, r1, cache_valid_bits, replace_way, use_forward1_next, use_forward2_next, req_hit_way, plru_victim, rc_ok, perm_attr, - valid_ra, perm_ok, access_ok, req_op, req_ok, - r0_stall, early_req_row) + valid_ra, perm_ok, access_ok, req_op, req_go, + tlb_pte_way, + tlb_hit, tlb_hit_way, tlb_valid_way, cache_tag_set, + cancel_store, req_same_tag, r0_stall, early_req_row) self.reservation_comb(m, cancel_store, set_rsrv, clear_rsrv, r0_valid, r0, reservation) - self.reservation_reg(m, r0_valid, access_ok, clear_rsrv, + self.reservation_reg(m, r0_valid, access_ok, set_rsrv, clear_rsrv, reservation, r0) self.writeback_control(m, r1, cache_out) - self.rams(m, r1) - self.dcache_fast_hit(m, req_op, r0_valid, r1) - self.dcache_slow(m, r1, use_forward1_next, cache_valid_bits, r0, - r0_valid, req_op, cache_tag, req_go, ra) + self.rams(m, r1, early_req_row, cache_out, replace_way) + self.dcache_fast_hit(m, req_op, r0_valid, r0, r1, + req_hit_way, req_index, access_ok, + tlb_hit, tlb_hit_way, tlb_req_index) + self.dcache_slow(m, r1, use_forward1_next, use_forward2_next, + cache_valid_bits, r0, replace_way, + req_hit_way, req_same_tag, + r0_valid, req_op, cache_tags, req_go, ra) #self.dcache_log(m, r1, valid_ra, tlb_hit_way, stall_out) + return m + # dcache_tb.vhdl # @@ -1703,7 +1706,7 @@ def test_dcache(): with open("test_dcache.il", "w") as f: f.write(vl) - run_simulation(dut, dcache_sim(), vcd_name='test_dcache.vcd') + #run_simulation(dut, dcache_sim(), vcd_name='test_dcache.vcd') if __name__ == '__main__': test_dcache() diff --git a/src/soc/experiment/plru.py b/src/soc/experiment/plru.py index 51128c76..9699694f 100644 --- a/src/soc/experiment/plru.py +++ b/src/soc/experiment/plru.py @@ -12,6 +12,7 @@ class PLRU(Elaboratable): def elaborate(self, platform): m = Module() + comb, sync = m.d.comb, m.d.sync tree = Array(Signal() for i in range(self.BITS)) @@ -25,7 +26,7 @@ class PLRU(Elaboratable): # report "GET: i:" & integer'image(i) & " node:" & # integer'image(node) & " val:" & Signal()'image(tree(node)) comb += self.lru_o[self.BITS-1-i].eq(tree[node]) - if i != BITS-1: + if i != self.BITS-1: comb += node2.eq(node << 1) else: comb += node2.eq(node) @@ -44,7 +45,7 @@ class PLRU(Elaboratable): # integer'image(node) & " val:" & Signal()'image(tree(node)) abit = self.acc[self.BITS-1-i] sync += tree[node].eq(~abit) - if i != BITS-1: + if i != self.BITS-1: comb += node2.eq(node << 1) else: comb += node2.eq(node) -- 2.30.2