From: Paul Mackerras Date: Fri, 24 Apr 2020 00:58:56 +0000 (+1000) Subject: MMU: Implement reading of the process table X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=2843c99a71ad4b88d8d722bb7bae7d4979b6083c;p=microwatt.git MMU: Implement reading of the process table This adds the PID register and repurposes SPR 720 as the PRTBL register, which points to the base of the process table. There doesn't seem to be any point to implementing the partition table given that we don't have hypervisor mode. The MMU caches entry 0 of the process table internally (in pgtbl3) plus the entry indexed by the value in the PID register (pgtbl0). Both caches are invalidated by a tlbie[l] with RIC=2 or by a move to PRTBL. The pgtbl0 cache is invalidated by a move to PID. The dTLB and iTLB are cleared by a move to either PRTBL or PID. Which of the two page table root pointers is used (pgtbl0 or pgtbl3) depends on the MSB of the address being translated. Since the segment checking ensures that address(63) = address(62), this is sufficient to map quadrants 0 and 3. Signed-off-by: Paul Mackerras --- diff --git a/common.vhdl b/common.vhdl index 02f0d3f..aaf176d 100644 --- a/common.vhdl +++ b/common.vhdl @@ -39,7 +39,8 @@ package common is constant SPR_SPRG3U : spr_num_t := 259; constant SPR_HSPRG0 : spr_num_t := 304; constant SPR_HSPRG1 : spr_num_t := 305; - constant SPR_PGTBL0 : spr_num_t := 720; + constant SPR_PID : spr_num_t := 48; + constant SPR_PRTBL : spr_num_t := 720; -- GPR indices in the register file (GPR only) subtype gpr_index_t is std_ulogic_vector(4 downto 0); @@ -288,7 +289,7 @@ package common is iside : std_ulogic; load : std_ulogic; priv : std_ulogic; - sprn : std_ulogic_vector(3 downto 0); + sprn : std_ulogic_vector(9 downto 0); addr : std_ulogic_vector(63 downto 0); rs : std_ulogic_vector(63 downto 0); end record; diff --git a/decode1.vhdl b/decode1.vhdl index cd17d1e..4cd195f 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -449,7 +449,7 @@ begin v.decode.sgl_pipe := '1'; -- send MMU-related SPRs to loadstore1 case sprn is - when SPR_DAR | SPR_DSISR | SPR_PGTBL0 => + when SPR_DAR | SPR_DSISR | SPR_PID | SPR_PRTBL => v.decode.unit := LDST; when others => end case; diff --git a/loadstore1.vhdl b/loadstore1.vhdl index 251f529..e71ad74 100644 --- a/loadstore1.vhdl +++ b/loadstore1.vhdl @@ -255,7 +255,7 @@ begin mfspr := '1'; -- partial decode on SPR number should be adequate given -- the restricted set that get sent down this path - if sprn(9) = '0' then + if sprn(9) = '0' and sprn(5) = '0' then if sprn(0) = '0' then sprval := x"00000000" & r.dsisr; else @@ -266,16 +266,18 @@ begin sprval := m_in.sprval; end if; when OP_MTSPR => - done := '1'; - if sprn(9) = '0' then + if sprn(9) = '0' and sprn(5) = '0' then if sprn(0) = '0' then v.dsisr := l_in.data(31 downto 0); else v.dar := l_in.data; end if; + done := '1'; else -- writing one of the SPRs in the MMU mmu_mtspr := '1'; + stall := '1'; + v.state := TLBIE_WAIT; end if; when OP_FETCH_FAILED => -- send it to the MMU to do the radix walk @@ -466,7 +468,7 @@ begin m_out.priv <= r.priv_mode; m_out.tlbie <= v.tlbie; m_out.mtspr <= mmu_mtspr; - m_out.sprn <= sprn(3 downto 0); + m_out.sprn <= sprn; m_out.addr <= addr; m_out.slbia <= l_in.insn(7); m_out.rs <= l_in.data; diff --git a/mmu.vhdl b/mmu.vhdl index 8415443..0eefbab 100644 --- a/mmu.vhdl +++ b/mmu.vhdl @@ -28,6 +28,8 @@ architecture behave of mmu is type state_t is (IDLE, TLB_WAIT, + PROC_TBL_READ, + PROC_TBL_WAIT, SEGMENT_CHECK, RADIX_LOOKUP, RADIX_READ_WAIT, @@ -42,9 +44,15 @@ architecture behave of mmu is store : std_ulogic; priv : std_ulogic; addr : std_ulogic_vector(63 downto 0); + -- config SPRs + prtbl : std_ulogic_vector(63 downto 0); + pid : std_ulogic_vector(31 downto 0); -- internal state state : state_t; pgtbl0 : std_ulogic_vector(63 downto 0); + pt0_valid : std_ulogic; + pgtbl3 : std_ulogic_vector(63 downto 0); + pt3_valid : std_ulogic; shift : unsigned(5 downto 0); mask_size : unsigned(4 downto 0); pgbase : std_ulogic_vector(55 downto 0); @@ -64,8 +72,8 @@ architecture behave of mmu is begin -- Multiplex internal SPR values back to loadstore1, selected - -- by l_in.sprn. Easy when there's only one... - l_out.sprval <= r.pgtbl0; + -- by l_in.sprn. + l_out.sprval <= r.prtbl when l_in.sprn(9) = '1' else x"00000000" & r.pid; mmu_0: process(clk) begin @@ -73,7 +81,9 @@ begin if rst = '1' then r.state <= IDLE; r.valid <= '0'; - r.pgtbl0 <= (others => '0'); + r.pt0_valid <= '0'; + r.pt3_valid <= '0'; + r.prtbl <= (others => '0'); else if rin.valid = '1' then report "MMU got tlb miss for " & to_hstring(rin.addr); @@ -169,12 +179,17 @@ begin variable itlb_load : std_ulogic; variable tlbie_req : std_ulogic; variable inval_all : std_ulogic; + variable prtbl_rd : std_ulogic; + variable pt_valid : std_ulogic; + variable effpid : std_ulogic_vector(31 downto 0); + variable prtable_addr : std_ulogic_vector(63 downto 0); variable rts : unsigned(5 downto 0); variable mbits : unsigned(5 downto 0); variable pgtable_addr : std_ulogic_vector(63 downto 0); variable pte : std_ulogic_vector(63 downto 0); variable tlb_data : std_ulogic_vector(63 downto 0); variable nonzero : std_ulogic; + variable pgtbl : std_ulogic_vector(63 downto 0); variable perm_ok : std_ulogic; variable rc_ok : std_ulogic; variable addr : std_ulogic_vector(63 downto 0); @@ -193,6 +208,7 @@ begin itlb_load := '0'; tlbie_req := '0'; inval_all := '0'; + prtbl_rd := '0'; -- Radix tree data structures in memory are big-endian, -- so we need to byte-swap them @@ -202,14 +218,21 @@ begin case r.state is when IDLE => + if l_in.addr(63) = '0' then + pgtbl := r.pgtbl0; + pt_valid := r.pt0_valid; + else + pgtbl := r.pgtbl3; + pt_valid := r.pt3_valid; + end if; -- rts == radix tree size, # address bits being translated - rts := unsigned('0' & r.pgtbl0(62 downto 61) & r.pgtbl0(7 downto 5)); + rts := unsigned('0' & pgtbl(62 downto 61) & pgtbl(7 downto 5)); -- mbits == # address bits to index top level of tree - mbits := unsigned('0' & r.pgtbl0(4 downto 0)); + mbits := unsigned('0' & pgtbl(4 downto 0)); -- set v.shift to rts so that we can use finalmask for the segment check v.shift := rts; v.mask_size := mbits(4 downto 0); - v.pgbase := r.pgtbl0(55 downto 8) & x"00"; + v.pgbase := pgtbl(55 downto 8) & x"00"; if l_in.valid = '1' then v.addr := l_in.addr; @@ -223,11 +246,23 @@ begin -- RB[IS] != 0 or RB[AP] != 0, or for slbia inval_all := l_in.slbia or l_in.addr(11) or l_in.addr(10) or l_in.addr(7) or l_in.addr(6) or l_in.addr(5); + -- The RIC field of the tlbie instruction comes across on the + -- sprn bus as bits 2--3. RIC=2 flushes process table caches. + if l_in.sprn(3) = '1' then + v.pt0_valid := '0'; + v.pt3_valid := '0'; + end if; v.state := TLB_WAIT; else v.valid := '1'; - -- Use RPDS = 0 to disable radix tree walks - if mbits = 0 then + if pt_valid = '0' then + -- need to fetch process table entry + -- set v.shift so we can use finalmask for generating + -- the process table entry address + v.shift := unsigned('0' & r.prtbl(4 downto 0)); + v.state := PROC_TBL_READ; + elsif mbits = 0 then + -- Use RPDS = 0 to disable radix tree walks v.state := RADIX_ERROR; v.invalid := '1'; else @@ -236,7 +271,20 @@ begin end if; end if; if l_in.mtspr = '1' then - v.pgtbl0 := l_in.rs; + -- Move to PID needs to invalidate L1 TLBs and cached + -- pgtbl0 value. Move to PRTBL does that plus + -- invalidating the cached pgtbl3 value as well. + if l_in.sprn(9) = '0' then + v.pid := l_in.rs(31 downto 0); + else + v.prtbl := l_in.rs; + v.pt3_valid := '0'; + end if; + v.pt0_valid := '0'; + dcreq := '1'; + tlbie_req := '1'; + inval_all := '1'; + v.state := TLB_WAIT; end if; when TLB_WAIT => @@ -245,6 +293,41 @@ begin v.state := IDLE; end if; + when PROC_TBL_READ => + dcreq := '1'; + prtbl_rd := '1'; + v.state := PROC_TBL_WAIT; + + when PROC_TBL_WAIT => + if d_in.done = '1' then + if d_in.err = '0' then + if r.addr(63) = '1' then + v.pgtbl3 := data; + v.pt3_valid := '1'; + else + v.pgtbl0 := data; + v.pt0_valid := '1'; + end if; + -- rts == radix tree size, # address bits being translated + rts := unsigned('0' & data(62 downto 61) & data(7 downto 5)); + -- mbits == # address bits to index top level of tree + mbits := unsigned('0' & data(4 downto 0)); + -- set v.shift to rts so that we can use finalmask for the segment check + v.shift := rts; + v.mask_size := mbits(4 downto 0); + v.pgbase := data(55 downto 8) & x"00"; + if mbits = 0 then + v.state := RADIX_ERROR; + v.invalid := '1'; + else + v.state := SEGMENT_CHECK; + end if; + else + v.state := RADIX_ERROR; + v.badtree := '1'; + end if; + end if; + when SEGMENT_CHECK => mbits := '0' & r.mask_size; v.shift := r.shift + (31 - 12) - mbits; @@ -331,6 +414,16 @@ begin end case; + if r.addr(63) = '1' then + effpid := x"00000000"; + else + effpid := r.pid; + end if; + prtable_addr := x"00" & r.prtbl(55 downto 36) & + ((r.prtbl(35 downto 12) and not finalmask(23 downto 0)) or + (effpid(31 downto 8) and finalmask(23 downto 0))) & + effpid(7 downto 0) & "0000"; + pgtable_addr := x"00" & r.pgbase(55 downto 19) & ((r.pgbase(18 downto 3) and not mask) or (addrsh and mask)) & "000"; @@ -348,6 +441,9 @@ begin elsif tlb_load = '1' then addr := r.addr(63 downto 12) & x"000"; tlb_data := pte; + elsif prtbl_rd = '1' then + addr := prtable_addr; + tlb_data := (others => '0'); else addr := pgtable_addr; tlb_data := (others => '0'); diff --git a/tests/mmu/mmu.c b/tests/mmu/mmu.c index 8281b04..a5d086b 100644 --- a/tests/mmu/mmu.c +++ b/tests/mmu/mmu.c @@ -21,6 +21,8 @@ static inline void do_tlbie(unsigned long rb, unsigned long rs) #define DAR 19 #define SRR0 26 #define SRR1 27 +#define PID 48 +#define PRTBL 720 static inline unsigned long mfspr(int sprnum) { @@ -110,15 +112,20 @@ void zero_memory(void *ptr, unsigned long nbytes) * 8kB PGD level pointing to 4kB PTE pages. */ unsigned long *pgdir = (unsigned long *) 0x10000; -unsigned long free_ptr = 0x12000; +unsigned long *proc_tbl = (unsigned long *) 0x12000; +unsigned long free_ptr = 0x13000; void *eas_mapped[4]; int neas_mapped; void init_mmu(void) { + /* set up process table */ + zero_memory(proc_tbl, 512 * sizeof(unsigned long)); + mtspr(PRTBL, (unsigned long)proc_tbl); + mtspr(PID, 1); zero_memory(pgdir, 1024 * sizeof(unsigned long)); /* RTS = 0 (2GB address space), RPDS = 10 (1024-entry top level) */ - mtspr(720, (unsigned long) pgdir | 10); + store_pte(&proc_tbl[2 * 1], (unsigned long) pgdir | 10); do_tlbie(0xc00, 0); /* invalidate all TLB entries */ } diff --git a/tests/privileged/privileged.c b/tests/privileged/privileged.c index eca6e0e..98c037c 100644 --- a/tests/privileged/privileged.c +++ b/tests/privileged/privileged.c @@ -13,6 +13,8 @@ extern int call_with_msr(unsigned long arg, int (*fn)(unsigned long), unsigned l #define SRR0 26 #define SRR1 27 +#define PID 48 +#define PRTBL 720 static inline unsigned long mfspr(int sprnum) { @@ -55,11 +57,6 @@ void print_test_number(int i) putchar(':'); } -static inline void do_tlbie(unsigned long rb, unsigned long rs) -{ - __asm__ volatile("tlbie %0,%1" : : "r" (rb), "r" (rs) : "memory"); -} - static inline void store_pte(unsigned long *p, unsigned long pte) { __asm__ volatile("stdbrx %1,0,%0" : : "r" (p), "r" (pte) : "memory"); @@ -107,14 +104,18 @@ void zero_memory(void *ptr, unsigned long nbytes) * 8kB PGD level pointing to 4kB PTE pages. */ unsigned long *pgdir = (unsigned long *) 0x10000; -unsigned long free_ptr = 0x12000; +unsigned long *proc_tbl = (unsigned long *) 0x12000; +unsigned long free_ptr = 0x13000; void init_mmu(void) { - zero_memory(pgdir, 1024 * sizeof(unsigned long)); + /* set up process table */ + zero_memory(proc_tbl, 512 * sizeof(unsigned long)); /* RTS = 0 (2GB address space), RPDS = 10 (1024-entry top level) */ - mtspr(720, (unsigned long) pgdir | 10); - do_tlbie(0xc00, 0); /* invalidate all TLB entries */ + store_pte(&proc_tbl[2 * 1], (unsigned long) pgdir | 10); + mtspr(PRTBL, (unsigned long)proc_tbl); + mtspr(PID, 1); + zero_memory(pgdir, 1024 * sizeof(unsigned long)); } static unsigned long *read_pgd(unsigned long i) diff --git a/tests/test_mmu.bin b/tests/test_mmu.bin index a1861b2..706f0d8 100755 Binary files a/tests/test_mmu.bin and b/tests/test_mmu.bin differ diff --git a/tests/test_privileged.bin b/tests/test_privileged.bin index 6eb6b53..340b7c0 100755 Binary files a/tests/test_privileged.bin and b/tests/test_privileged.bin differ