From: Paul Mackerras Date: Sun, 17 Jan 2021 21:55:56 +0000 (+1100) Subject: dcache: Fix bugs in pipelined operation X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=f636bb7c3999d9326a2bd1c6131fc128be2cae24;p=microwatt.git dcache: Fix bugs in pipelined operation This fixes two bugs which show up when multiple operations are in flight in the dcache, and adds a 'hold' input which will be needed when loadstore1 is pipelined. The first bug is that dcache needs to sample the data for a store on the cycle after the store request comes in even if the store request is held up because of a previous request (e.g. if the previous request is a load miss or a dcbz). The second bug is that a load request coming in for a cache line being refilled needs to be handled immediately in the case where it is for the row whose data arrives on the same cycle. If it is not, then it will be handled as a separate cache miss and the cache line will be refilled again into a different way, leading to two ways both being valid for the same tag. This can lead to data corruption, in the scenario where subsequent writes go to one of the ways and then that way gets displaced but the other way doesn't. This bug could in principle show up even without having multiple operations in flight in the dcache. Signed-off-by: Paul Mackerras --- diff --git a/common.vhdl b/common.vhdl index e79bcb5..35f782b 100644 --- a/common.vhdl +++ b/common.vhdl @@ -372,6 +372,7 @@ package common is type Loadstore1ToDcacheType is record valid : std_ulogic; + hold : std_ulogic; load : std_ulogic; -- is this a load dcbz : std_ulogic; nc : std_ulogic; diff --git a/dcache.vhdl b/dcache.vhdl index 7da67e1..bb93148 100644 --- a/dcache.vhdl +++ b/dcache.vhdl @@ -275,6 +275,7 @@ architecture rtl of dcache is doall : std_ulogic; -- with tlbie, indicates flush whole TLB tlbld : std_ulogic; -- indicates a TLB load request (from MMU) mmu_req : std_ulogic; -- indicates source of request + d_valid : std_ulogic; -- indicates req.data is valid now end record; signal r0 : reg_stage_0_t; @@ -564,17 +565,27 @@ begin r.mmu_req := '1'; else r.req := d_in; + r.req.data := (others => '0'); r.tlbie := '0'; r.doall := '0'; r.tlbld := '0'; r.mmu_req := '0'; end if; + r.d_valid := '0'; if rst = '1' then r0_full <= '0'; - elsif r1.full = '0' or r0_full = '0' then + elsif (r1.full = '0' and d_in.hold = '0') or r0_full = '0' then r0 <= r; r0_full <= r.req.valid; end if; + -- Sample data the cycle after a request comes in from loadstore1. + -- If another request has come in already then the data will get + -- put directly into req.data below. + if r0.req.valid = '1' and r.req.valid = '0' and r0.d_valid = '0' and + r0.mmu_req = '0' then + r0.req.data <= d_in.data; + r0.d_valid <= '1'; + end if; end if; end process; @@ -582,8 +593,8 @@ begin m_out.stall <= '0'; -- Hold off the request in r0 when r1 has an uncompleted request - r0_stall <= r0_full and r1.full; - r0_valid <= r0_full and not r1.full; + r0_stall <= r0_full and (r1.full or d_in.hold); + r0_valid <= r0_full and not r1.full and not d_in.hold; stall_out <= r0_stall; -- TLB @@ -1305,10 +1316,12 @@ begin req.dcbz := r0.req.dcbz; req.real_addr := ra; -- Force data to 0 for dcbz - if r0.req.dcbz = '0' then - req.data := d_in.data; - else + if r0.req.dcbz = '1' then req.data := (others => '0'); + elsif r0.d_valid = '1' then + req.data := r0.req.data; + else + req.data := d_in.data; end if; -- Select all bytes for dcbz and for cacheable loads if r0.req.dcbz = '1' or (r0.req.load = '1' and r0.req.nc = '0') then @@ -1438,10 +1451,10 @@ begin -- complete the request next cycle. -- Compare the whole address in case the request in -- r1.req is not the one that started this refill. - if r1.full = '1' and r1.req.same_tag = '1' and - ((r1.dcbz = '1' and r1.req.dcbz = '1') or - (r1.dcbz = '0' and r1.req.op = OP_LOAD_MISS)) and - r1.store_row = get_row(r1.req.real_addr) then + if req.valid = '1' and req.same_tag = '1' and + ((r1.dcbz = '1' and req.dcbz = '1') or + (r1.dcbz = '0' and req.op = OP_LOAD_MISS)) and + r1.store_row = get_row(req.real_addr) then r1.full <= '0'; r1.slow_valid <= '1'; if r1.mmu_req = '0' then diff --git a/loadstore1.vhdl b/loadstore1.vhdl index a754cc4..8e6c7be 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -745,6 +745,7 @@ begin d_out.byte_sel <= byte_sel; d_out.virt_mode <= v.virt_mode; d_out.priv_mode <= v.priv_mode; + d_out.hold <= '0'; -- Update outputs to MMU m_out.valid <= mmureq;