From: Paul Mackerras Date: Wed, 6 May 2020 10:21:01 +0000 (+1000) Subject: MMU: Refetch PTE on access fault X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=3eb07dc6370c3825394596657ac044c47f5b3cd2;p=microwatt.git MMU: Refetch PTE on access fault This is required by the architecture. It means that the error bits reported in DSISR or SRR1 now come from the permission/RC check done on the refetched PTE rather than the TLB entry. Unfortunately that somewhat breaks the software-loaded TLB mode of operation in that DSISR/SRR1 always report no PTE rather than permission error or RC failure. This also restructures the loadstore1 state machine a bit, combining the FIRST_ACK_WAIT and LAST_ACK_WAIT states into a single state and the MMU_LOOKUP_1ST and MMU_LOOKUP_LAST states likewise. We now have a 'dwords_done' bit to say whether the first transfer of two (for an unaligned access) has been done. The cache paradox error (where a non-cacheable access finds a hit in the cache) is now the only cause of DSI from the dcache. This should probably be a machine check rather than DSI in fact. Signed-off-by: Paul Mackerras --- diff --git a/common.vhdl b/common.vhdl index 07d1a36..424259b 100644 --- a/common.vhdl +++ b/common.vhdl @@ -263,26 +263,28 @@ package common is data : std_ulogic_vector(63 downto 0); store_done : std_ulogic; error : std_ulogic; - tlb_miss : std_ulogic; - perm_error : std_ulogic; - rc_error : std_ulogic; + cache_paradox : std_ulogic; end record; type Loadstore1ToMmuType is record valid : std_ulogic; tlbie : std_ulogic; mtspr : std_ulogic; + load : std_ulogic; + priv : std_ulogic; sprn : std_ulogic_vector(3 downto 0); addr : std_ulogic_vector(63 downto 0); rs : std_ulogic_vector(63 downto 0); end record; type MmuToLoadstore1Type is record - done : std_ulogic; - invalid : std_ulogic; - badtree : std_ulogic; - segerr : std_ulogic; - sprval : std_ulogic_vector(63 downto 0); + done : std_ulogic; + invalid : std_ulogic; + badtree : std_ulogic; + segerr : std_ulogic; + perm_error : std_ulogic; + rc_error : std_ulogic; + sprval : std_ulogic_vector(63 downto 0); end record; type MmuToDcacheType is record diff --git a/dcache.vhdl b/dcache.vhdl index 96563a5..ed593e8 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -179,6 +179,7 @@ architecture rtl of dcache is OP_LOAD_MISS, -- Load missing cache OP_LOAD_NC, -- Non-cachable load OP_BAD, -- BAD: Cache hit on NC load/store + OP_TLB_ERR, -- TLB miss or protection/RC failure OP_STORE_HIT, -- Store hitting cache OP_STORE_MISS); -- Store missing cache @@ -244,9 +245,7 @@ architecture rtl of dcache is -- Signals to complete with error error_done : std_ulogic; - tlb_miss : std_ulogic; -- No entry found in TLB - perm_error : std_ulogic; -- Permissions don't allow access - rc_error : std_ulogic; -- Reference or change bit clear + cache_paradox : std_ulogic; -- completion signal for tlbie tlbie_done : std_ulogic; @@ -758,7 +757,7 @@ begin when others => op := OP_NONE; end case; else - op := OP_BAD; + op := OP_TLB_ERR; end if; end if; req_op <= op; @@ -829,9 +828,7 @@ begin d_out.data <= cache_out(r1.hit_way); d_out.store_done <= '0'; d_out.error <= '0'; - d_out.tlb_miss <= '0'; - d_out.perm_error <= '0'; - d_out.rc_error <= '0'; + d_out.cache_paradox <= '0'; -- Outputs to MMU m_out.done <= r1.tlbie_done; @@ -868,9 +865,7 @@ begin if r1.error_done = '1' then report "completing ld/st with error"; d_out.error <= '1'; - d_out.tlb_miss <= r1.tlb_miss; - d_out.perm_error <= r1.perm_error; - d_out.rc_error <= r1.rc_error; + d_out.cache_paradox <= r1.cache_paradox; d_out.valid <= '1'; end if; @@ -1034,15 +1029,18 @@ begin r1.hit_load_valid <= '0'; end if; - if req_op = OP_BAD then + if req_op = OP_TLB_ERR then report "Signalling ld/st error valid_ra=" & std_ulogic'image(valid_ra) & " rc_ok=" & std_ulogic'image(rc_ok) & " perm_ok=" & std_ulogic'image(perm_ok); r1.error_done <= '1'; - r1.tlb_miss <= not valid_ra; - r1.perm_error <= valid_ra and not perm_ok; - r1.rc_error <= valid_ra and perm_ok and not rc_ok; + r1.cache_paradox <= '0'; + elsif req_op = OP_BAD then + report "Signalling cache paradox"; + r1.error_done <= '1'; + r1.cache_paradox <= '1'; else r1.error_done <= '0'; + r1.cache_paradox <= '0'; end if; -- complete tlbies and TLB loads in the third cycle @@ -1187,7 +1185,8 @@ begin -- OP_NONE and OP_BAD do nothing -- OP_BAD was handled above already when OP_NONE => - when OP_BAD => + when OP_BAD => + when OP_TLB_ERR => end case; when RELOAD_WAIT_ACK => diff --git a/loadstore1.vhdl b/loadstore1.vhdl index a29564b..c56346f 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -38,11 +38,10 @@ architecture behave of loadstore1 is -- State machine for unaligned loads/stores type state_t is (IDLE, -- ready for instruction SECOND_REQ, -- send 2nd request of unaligned xfer - FIRST_ACK_WAIT, -- waiting for 1st ack from dcache - LAST_ACK_WAIT, -- waiting for last ack from dcache + ACK_WAIT, -- waiting for ack from dcache LD_UPDATE, -- writing rA with computed addr on load - MMU_LOOKUP_1ST, -- waiting for MMU to look up translation - MMU_LOOKUP_LAST + MMU_LOOKUP, -- waiting for MMU to look up translation + TLBIE_WAIT -- waiting for MMU to finish doing a tlbie ); type reg_stage_t is record @@ -66,6 +65,7 @@ architecture behave of loadstore1 is virt_mode : std_ulogic; priv_mode : std_ulogic; state : state_t; + dwords_done : std_ulogic; first_bytes : std_ulogic_vector(7 downto 0); second_bytes : std_ulogic_vector(7 downto 0); dar : std_ulogic_vector(63 downto 0); @@ -230,6 +230,7 @@ begin v.load := '0'; v.dcbz := '0'; v.tlbie := '0'; + v.dwords_done := '0'; case l_in.op is when OP_STORE => req := '1'; @@ -241,7 +242,9 @@ begin v.dcbz := '1'; when OP_TLBIE => mmureq := '1'; + stall := '1'; v.tlbie := '1'; + v.state := TLBIE_WAIT; when OP_MFSPR => done := '1'; mfspr := '1'; @@ -318,15 +321,11 @@ begin if req = '1' then stall := '1'; if long_sel(15 downto 8) = "00000000" then - v.state := LAST_ACK_WAIT; + v.state := ACK_WAIT; else v.state := SECOND_REQ; end if; end if; - if mmureq = '1' then - stall := '1'; - v.state := LAST_ACK_WAIT; - end if; end if; when SECOND_REQ => @@ -334,37 +333,58 @@ begin byte_sel := r.second_bytes; req := '1'; stall := '1'; - v.state := FIRST_ACK_WAIT; + v.state := ACK_WAIT; - when FIRST_ACK_WAIT => + when ACK_WAIT => stall := '1'; if d_in.valid = '1' then if d_in.error = '1' then - -- dcache will discard the second request - addr := r.addr; - if d_in.tlb_miss = '1' then - -- give it to the MMU to look up - mmureq := '1'; - v.state := MMU_LOOKUP_1ST; + -- dcache will discard the second request if it + -- gets an error on the 1st of two requests + if r.dwords_done = '1' then + addr := next_addr; else + addr := r.addr; + end if; + if d_in.cache_paradox = '1' then -- signal an interrupt straight away exception := '1'; - dsisr(63 - 36) := d_in.perm_error; dsisr(63 - 38) := not r.load; - dsisr(63 - 45) := d_in.rc_error; + -- XXX there is no architected bit for this + dsisr(63 - 35) := d_in.cache_paradox; v.state := IDLE; + else + -- Look up the translation for TLB miss + -- and also for permission error and RC error + -- in case the PTE has been updated. + mmureq := '1'; + v.state := MMU_LOOKUP; end if; else - v.state := LAST_ACK_WAIT; - if r.load = '1' then - v.load_data := data_permuted; + if two_dwords = '1' and r.dwords_done = '0' then + v.dwords_done := '1'; + if r.load = '1' then + v.load_data := data_permuted; + end if; + else + write_enable := r.load; + if r.load = '1' and r.update = '1' then + -- loads with rA update need an extra cycle + v.state := LD_UPDATE; + else + -- stores write back rA update in this cycle + do_update := r.update; + stall := '0'; + done := '1'; + v.state := IDLE; + end if; end if; end if; end if; - when MMU_LOOKUP_1ST | MMU_LOOKUP_LAST => + when MMU_LOOKUP => stall := '1'; - if two_dwords = '1' and r.state = MMU_LOOKUP_LAST then + if r.dwords_done = '1' then addr := next_addr; byte_sel := r.second_bytes; else @@ -372,58 +392,28 @@ begin byte_sel := r.first_bytes; end if; if m_in.done = '1' then - if m_in.invalid = '0' and m_in.badtree = '0' and m_in.segerr = '0' then + if m_in.invalid = '0' and m_in.perm_error = '0' and m_in.rc_error = '0' and + m_in.badtree = '0' and m_in.segerr = '0' then -- retry the request now that the MMU has installed a TLB entry req := '1'; - if r.state = MMU_LOOKUP_1ST then + if two_dwords = '1' and r.dwords_done = '0' then v.state := SECOND_REQ; else - v.state := LAST_ACK_WAIT; + v.state := ACK_WAIT; end if; else exception := '1'; dsisr(63 - 33) := m_in.invalid; + dsisr(63 - 36) := m_in.perm_error; dsisr(63 - 38) := not r.load; dsisr(63 - 44) := m_in.badtree; + dsisr(63 - 45) := m_in.rc_error; v.state := IDLE; end if; end if; - when LAST_ACK_WAIT => + when TLBIE_WAIT => stall := '1'; - if d_in.valid = '1' then - if d_in.error = '1' then - if two_dwords = '1' then - addr := next_addr; - else - addr := r.addr; - end if; - if d_in.tlb_miss = '1' then - -- give it to the MMU to look up - mmureq := '1'; - v.state := MMU_LOOKUP_LAST; - else - -- signal an interrupt straight away - exception := '1'; - dsisr(63 - 36) := d_in.perm_error; - dsisr(63 - 38) := not r.load; - dsisr(63 - 45) := d_in.rc_error; - v.state := IDLE; - end if; - else - write_enable := r.load; - if r.load = '1' and r.update = '1' then - -- loads with rA update need an extra cycle - v.state := LD_UPDATE; - else - -- stores write back rA update in this cycle - do_update := r.update; - stall := '0'; - done := '1'; - v.state := IDLE; - end if; - end if; - end if; if m_in.done = '1' then -- tlbie is finished stall := '0'; @@ -451,6 +441,8 @@ begin -- Update outputs to MMU m_out.valid <= mmureq; + m_out.load <= r.load; + m_out.priv <= r.priv_mode; m_out.tlbie <= v.tlbie; m_out.mtspr <= mmu_mtspr; m_out.sprn <= sprn(3 downto 0); diff --git a/mmu.vhdl b/mmu.vhdl index 293b7a8..3a1003c 100644 --- a/mmu.vhdl +++ b/mmu.vhdl @@ -36,6 +36,8 @@ architecture behave of mmu is type reg_stage_t is record -- latched request from loadstore1 valid : std_ulogic; + store : std_ulogic; + priv : std_ulogic; addr : std_ulogic_vector(63 downto 0); -- internal state state : state_t; @@ -47,6 +49,8 @@ architecture behave of mmu is invalid : std_ulogic; badtree : std_ulogic; segerror : std_ulogic; + perm_err : std_ulogic; + rc_error : std_ulogic; end record; signal r, rin : reg_stage_t; @@ -166,6 +170,8 @@ begin variable pte : std_ulogic_vector(63 downto 0); variable data : std_ulogic_vector(63 downto 0); variable nonzero : std_ulogic; + variable perm_ok : std_ulogic; + variable rc_ok : std_ulogic; begin v := r; v.valid := '0'; @@ -174,6 +180,8 @@ begin v.invalid := '0'; v.badtree := '0'; v.segerror := '0'; + v.perm_err := '0'; + v.rc_error := '0'; tlb_load := '0'; tlbie_req := '0'; @@ -196,6 +204,8 @@ begin if l_in.valid = '1' then v.addr := l_in.addr; + v.store := not l_in.load; + v.priv := l_in.priv; if l_in.tlbie = '1' then dcreq := '1'; tlbie_req := '1'; @@ -247,7 +257,20 @@ begin if data(63) = '1' then -- test leaf bit if data(62) = '1' then - v.state := RADIX_LOAD_TLB; + -- check permissions and RC bits + perm_ok := '0'; + if r.priv = '1' or data(3) = '0' then + perm_ok := data(1) or (data(2) and not r.store); + end if; + rc_ok := data(8) and (data(7) or not r.store); + if perm_ok = '1' and rc_ok = '1' then + v.state := RADIX_LOAD_TLB; + else + v.state := RADIX_ERROR; + v.perm_err := not perm_ok; + -- permission error takes precedence over RC error + v.rc_error := perm_ok; + end if; else mbits := unsigned('0' & data(4 downto 0)); if mbits < 5 or mbits > 16 or mbits > r.shift then @@ -297,6 +320,8 @@ begin l_out.invalid <= r.invalid; l_out.badtree <= r.badtree; l_out.segerr <= r.segerror; + l_out.perm_error <= r.perm_err; + l_out.rc_error <= r.rc_error; d_out.valid <= dcreq; d_out.tlbie <= tlbie_req;