core: Implement big-endian mode

author Paul Mackerras <paulus@ozlabs.org>

Wed, 12 Aug 2020 11:59:28 +0000 (21:59 +1000)

committer Paul Mackerras <paulus@ozlabs.org>

Thu, 20 Aug 2020 08:17:25 +0000 (18:17 +1000)
author Paul Mackerras <paulus@ozlabs.org>
Wed, 12 Aug 2020 11:59:28 +0000 (21:59 +1000)
committer Paul Mackerras <paulus@ozlabs.org>
Thu, 20 Aug 2020 08:17:25 +0000 (18:17 +1000)
diff --git a/common.vhdl b/common.vhdl

index bd9210b1ba5ca41c21df8861497594b7c65a9ce7..9ed07b30a38ac29167e548537732f1c88aafb5c9 100644 (file)
--- a/common.vhdl
+++ b/common.vhdl
@@ -108,6 +108,7 @@ package common is
         req: std_ulogic;
          virt_mode : std_ulogic;
          priv_mode : std_ulogic;
+        big_endian : std_ulogic;
         stop_mark: std_ulogic;
          sequential: std_ulogic;
         nia: std_ulogic_vector(63 downto 0);
@@ -245,10 +246,12 @@ package common is
         redirect: std_ulogic;
          virt_mode: std_ulogic;
          priv_mode: std_ulogic;
+        big_endian: std_ulogic;
         redirect_nia: std_ulogic_vector(63 downto 0);
      end record;
      constant Execute1ToFetch1Init : Execute1ToFetch1Type := (redirect => '0', virt_mode => '0',
-                                                             priv_mode => '0', others => (others => '0'));
+                                                             priv_mode => '0', big_endian => '0',
+                                                             others => (others => '0'));
  
      type Execute1ToLoadstore1Type is record
         valid : std_ulogic;
diff --git a/execute1.vhdl b/execute1.vhdl

index 1b839978439624150d78f7cc9eb0d747c9c4900e..99553ccfddd480651430997a7e15c89612b5fb37 100644 (file)
--- a/execute1.vhdl
+++ b/execute1.vhdl
@@ -496,9 +496,10 @@ begin
         v.terminate := '0';
         icache_inval <= '0';
         v.busy := '0';
-        -- send MSR[IR] and ~MSR[PR] up to fetch1
+        -- send MSR[IR], ~MSR[PR] and ~MSR[LE] up to fetch1
          v.f.virt_mode := ctrl.msr(MSR_IR);
          v.f.priv_mode := not ctrl.msr(MSR_PR);
+        v.f.big_endian := not ctrl.msr(MSR_LE);
  
         -- Next insn adder used in a couple of places
         next_nia := std_ulogic_vector(unsigned(e_in.nia) + 4);
@@ -740,6 +741,7 @@ begin
             when OP_RFID =>
                  v.f.virt_mode := a_in(MSR_IR) or a_in(MSR_PR);
                  v.f.priv_mode := not a_in(MSR_PR);
+                v.f.big_endian := not a_in(MSR_LE);
                  -- Can't use msr_copy here because the partial function MSR
                  -- bits should be left unchanged, not zeroed.
                  ctrl_tmp.msr(63 downto 31) <= a_in(63 downto 31);
@@ -1161,6 +1163,8 @@ begin
              v.f.redirect := '1';
              v.f.virt_mode := '0';
              v.f.priv_mode := '1';
+            -- XXX need an interrupt LE bit here, e.g. from LPCR
+            v.f.big_endian := '0';
          end if;
  
          if v.f.redirect = '1' then
@@ -1176,7 +1180,7 @@ begin
          lv.data := c_in;
          lv.write_reg := gspr_to_gpr(e_in.write_reg);
          lv.length := e_in.data_len;
-        lv.byte_reverse := e_in.byte_reverse;
+        lv.byte_reverse := e_in.byte_reverse xnor ctrl.msr(MSR_LE);
          lv.sign_extend := e_in.sign_extend;
          lv.update := e_in.update;
          lv.update_reg := gspr_to_gpr(e_in.read_reg1);
diff --git a/fetch1.vhdl b/fetch1.vhdl

index a56f33dad12e304b7c0dfa9da99cbbd822814a84..63672cbbce69c77665b468221de048f2eb5dd34c 100644 (file)
--- a/fetch1.vhdl
+++ b/fetch1.vhdl
@@ -50,8 +50,9 @@ begin
              log_nia <= r.nia(63) & r.nia(43 downto 2);
             if r /= r_next then
                 report "fetch1 rst:" & std_ulogic'image(rst) &
-                    " IR:" & std_ulogic'image(e_in.virt_mode) &
-                    " P:" & std_ulogic'image(e_in.priv_mode) &
+                    " IR:" & std_ulogic'image(r_next.virt_mode) &
+                    " P:" & std_ulogic'image(r_next.priv_mode) &
+                    " E:" & std_ulogic'image(r_next.big_endian) &
                     " R:" & std_ulogic'image(e_in.redirect) & std_ulogic'image(d_in.redirect) &
                     " S:" & std_ulogic'image(stall_in) &
                     " T:" & std_ulogic'image(stop_in) &
@@ -81,11 +82,13 @@ begin
             end if;
              v.virt_mode := '0';
              v.priv_mode := '1';
+            v.big_endian := '0';
             v_int.stop_state := RUNNING;
         elsif e_in.redirect = '1' then
             v.nia := e_in.redirect_nia(63 downto 2) & "00";
              v.virt_mode := e_in.virt_mode;
              v.priv_mode := e_in.priv_mode;
+            v.big_endian := e_in.big_endian;
          elsif d_in.redirect = '1' then
              v.nia := d_in.redirect_nia(63 downto 2) & "00";
         elsif stall_in = '0' then
diff --git a/icache.vhdl b/icache.vhdl

index 3f1c15ff14f0c37078a39030a55b95f4c56a4c22..d24a1466367b0bb8b4554e6e545bf59408a9a8e4 100644 (file)
--- a/icache.vhdl
+++ b/icache.vhdl
@@ -98,7 +98,8 @@ architecture rtl of icache is
      -- SET_SIZE_BITS is the log base 2 of the set size
      constant SET_SIZE_BITS : natural := LINE_OFF_BITS + INDEX_BITS;
      -- TAG_BITS is the number of bits of the tag part of the address
-    constant TAG_BITS      : natural := REAL_ADDR_BITS - SET_SIZE_BITS;
+    -- the +1 is to allow the endianness to be stored in the tag
+    constant TAG_BITS      : natural := REAL_ADDR_BITS - SET_SIZE_BITS + 1;
      -- WAY_BITS is the number of bits to select a way
      constant WAY_BITS     : natural := log2(NUM_WAYS);
  
@@ -289,9 +290,10 @@ architecture rtl of icache is
      end;
  
      -- Get the tag value from the address
-    function get_tag(addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0)) return cache_tag_t is
+    function get_tag(addr: std_ulogic_vector(REAL_ADDR_BITS - 1 downto 0);
+                     endian: std_ulogic) return cache_tag_t is
      begin
-        return addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
+        return endian & addr(REAL_ADDR_BITS - 1 downto SET_SIZE_BITS);
      end;
  
      -- Read a tag from a tag memory row
@@ -327,9 +329,9 @@ begin
         report "geometry bits don't add up" severity FAILURE;
      assert (LINE_OFF_BITS = ROW_OFF_BITS + ROW_LINEBITS)
         report "geometry bits don't add up" severity FAILURE;
-    assert (REAL_ADDR_BITS = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
+    assert (REAL_ADDR_BITS + 1 = TAG_BITS + INDEX_BITS + LINE_OFF_BITS)
         report "geometry bits don't add up" severity FAILURE;
-    assert (REAL_ADDR_BITS = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
+    assert (REAL_ADDR_BITS + 1 = TAG_BITS + ROW_BITS + ROW_OFF_BITS)
         report "geometry bits don't add up" severity FAILURE;
  
      sim_debug: if SIM generate
@@ -359,6 +361,7 @@ begin
         signal wr_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
         signal dout     : cache_row_t;
         signal wr_sel   : std_ulogic_vector(ROW_SIZE-1 downto 0);
+        signal wr_dat   : std_ulogic_vector(wishbone_in.dat'left downto 0);
      begin
         way: entity work.cache_ram
             generic map (
@@ -372,10 +375,20 @@ begin
                 rd_data => dout,
                 wr_sel  => wr_sel,
                 wr_addr => wr_addr,
-               wr_data => wishbone_in.dat
+               wr_data => wr_dat
                 );
         process(all)
+            variable j: integer;
         begin
+            -- byte-swap read data if big endian
+            if r.store_tag(TAG_BITS - 1) = '0' then
+                wr_dat <= wishbone_in.dat;
+            else
+                for i in 0 to (wishbone_in.dat'length / 8) - 1 loop
+                    j := ((i / 4) * 4) + (3 - (i mod 4));
+                    wr_dat(i * 8 + 7 downto i * 8) <= wishbone_in.dat(j * 8 + 7 downto j * 8);
+                end loop;
+            end if;
             do_read <= not (stall_in or use_previous);
             do_write <= '0';
             if wishbone_in.ack = '1' and replace_way = i then
@@ -494,7 +507,7 @@ begin
         -- Extract line, row and tag from request
          req_index <= get_index(i_in.nia);
          req_row <= get_row(i_in.nia);
-        req_tag <= get_tag(real_addr);
+        req_tag <= get_tag(real_addr, i_in.big_endian);
  
         -- Calculate address of beginning of cache row, will be
         -- used for cache miss processing if needed
diff --git a/loadstore1.vhdl b/loadstore1.vhdl

index 62914c06fbc877dfd339d378f1aae484644a1ec0..123c8ad7c30a9082bde73212c74e3dc4062557de 100644 (file)
--- a/loadstore1.vhdl
+++ b/loadstore1.vhdl
@@ -201,14 +201,20 @@ begin
          end loop;
  
          -- Work out the sign bit for sign extension.
-        -- Assumes we are not doing both sign extension and byte reversal,
-        -- in that for unaligned loads crossing two dwords we end up
-        -- using a bit from the second dword, whereas for a byte-reversed
-        -- (i.e. big-endian) load the sign bit would be in the first dword.
-        negative := (r.length(3) and data_permuted(63)) or
-                    (r.length(2) and data_permuted(31)) or
-                    (r.length(1) and data_permuted(15)) or
-                    (r.length(0) and data_permuted(7));
+        -- For unaligned loads crossing two dwords, the sign bit is in the
+        -- first dword for big-endian (byte_reverse = 1), or the second dword
+        -- for little-endian.
+        if r.dwords_done = '1' and r.byte_reverse = '1' then
+            negative := (r.length(3) and r.load_data(63)) or
+                        (r.length(2) and r.load_data(31)) or
+                        (r.length(1) and r.load_data(15)) or
+                        (r.length(0) and r.load_data(7));
+        else
+            negative := (r.length(3) and data_permuted(63)) or
+                        (r.length(2) and data_permuted(31)) or
+                        (r.length(1) and data_permuted(15)) or
+                        (r.length(0) and data_permuted(7));
+        end if;
  
          -- trim and sign-extend
          for i in 0 to 7 loop
author	Paul Mackerras <paulus@ozlabs.org>
	Wed, 12 Aug 2020 11:59:28 +0000 (21:59 +1000)
committer	Paul Mackerras <paulus@ozlabs.org>
	Thu, 20 Aug 2020 08:17:25 +0000 (18:17 +1000)
common.vhdl		patch \| blob \| history
execute1.vhdl		patch \| blob \| history
fetch1.vhdl		patch \| blob \| history
icache.vhdl		patch \| blob \| history
loadstore1.vhdl		patch \| blob \| history