Pre-decode instructions when writing them to icache
authorPaul Mackerras <paulus@ozlabs.org>
Fri, 29 Jul 2022 10:29:26 +0000 (20:29 +1000)
committerPaul Mackerras <paulus@ozlabs.org>
Tue, 9 Aug 2022 10:14:31 +0000 (20:14 +1000)
This splits out the decoding done in the decode0 step into a separate
predecoder, used when writing instructions into the icache.  The
icache now holds 36 bits per instruction rather than 32.  For valid
instructions, those 36 bits comprise the bottom 26 bits of the
instruction word, a 9-bit insn_code value (which uniquely identifies
the instruction), and a zero in the MSB.  For illegal instructions,
the MSB is one and the full instruction word is in the bottom 32 bits.
Having the full instruction word available for illegal instructions
means that it can be printed in the log when simulating, or in future
could be placed in the HEIR register.

If we don't have an FPU, then the floating-point instructions are
regarded as illegal.  In that case, the insn_code values would fit
into 8 bits, which could be used in future to reduce the size of
decode_rom from 512 to 256 entries.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
Makefile
cache_ram.vhdl
common.vhdl
core.vhdl
decode1.vhdl
icache.vhdl
microwatt.core
predecode.vhdl [new file with mode: 0644]

index 85a0fee7c3e1d468cf351f7311261c75bd4c7aec..ebb1b79b304887039ea20ab28c9d655fce02630d 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -56,7 +56,7 @@ all = core_tb icache_tb dcache_tb dmi_dtm_tb \
 all: $(all)
 
 core_files = decode_types.vhdl common.vhdl wishbone_types.vhdl fetch1.vhdl \
-       utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl \
+       utils.vhdl plru.vhdl cache_ram.vhdl icache.vhdl predecode.vhdl \
        decode1.vhdl helpers.vhdl insn_helpers.vhdl \
        control.vhdl decode2.vhdl register_file.vhdl \
        cr_file.vhdl crhelpers.vhdl ppc_fx_insns.vhdl rotator.vhdl \
index 8c8c495a4f6a430a8f30dc36dd6d55f25595297f..641917f41c587c4742f802bacb51ba4c181bd324 100644 (file)
@@ -7,6 +7,7 @@ entity cache_ram is
     generic(
         ROW_BITS : integer := 16;
         WIDTH    : integer := 64;
+        BYTEWID  : integer := 8;
         TRACE    : boolean := false;
         ADD_BUF  : boolean := false
         );
@@ -16,7 +17,7 @@ entity cache_ram is
         rd_en   : in  std_logic;
         rd_addr : in  std_logic_vector(ROW_BITS - 1 downto 0);
         rd_data : out std_logic_vector(WIDTH - 1 downto 0);
-        wr_sel  : in  std_logic_vector(WIDTH/8 - 1 downto 0);
+        wr_sel  : in  std_logic_vector(WIDTH/BYTEWID - 1 downto 0);
         wr_addr : in  std_logic_vector(ROW_BITS - 1 downto 0);
         wr_data : in  std_logic_vector(WIDTH - 1 downto 0)
         );
@@ -38,7 +39,7 @@ begin
         variable lbit : integer range 0 to WIDTH - 1;
         variable mbit : integer range 0 to WIDTH - 1;
         variable widx : integer range 0 to SIZE - 1;
-        constant sel0 : std_logic_vector(WIDTH/8 - 1 downto 0)
+        constant sel0 : std_logic_vector(WIDTH/BYTEWID - 1 downto 0)
             := (others => '0');
     begin
         if rising_edge(clk) then
@@ -49,9 +50,9 @@ begin
                         " dat:" & to_hstring(wr_data);
                 end if;
             end if;
-            for i in 0 to WIDTH/8-1 loop
-                lbit := i * 8;
-                mbit := lbit + 7;
+            for i in 0 to WIDTH/BYTEWID-1 loop
+                lbit := i * BYTEWID;
+                mbit := lbit + BYTEWID - 1;
                 widx := to_integer(unsigned(wr_addr));
                 if wr_sel(i) = '1' then
                     ram(widx)(mbit downto lbit) <= wr_data(mbit downto lbit);
index a698ba97ba7ff1830bc35e9c63c12f6ed77d9df1..6287be58babb6af97714e4d48a5c09fedfdeca54 100644 (file)
@@ -246,12 +246,13 @@ package common is
         fetch_failed: std_ulogic;
        nia: std_ulogic_vector(63 downto 0);
        insn: std_ulogic_vector(31 downto 0);
+        icode: insn_code;
         big_endian: std_ulogic;
         next_predicted: std_ulogic;
         next_pred_ntaken: std_ulogic;
     end record;
     constant IcacheToDecode1Init : IcacheToDecode1Type :=
-        (nia => (others => '0'), insn => (others => '0'), others => '0');
+        (nia => (others => '0'), insn => (others => '0'), icode => INSN_illegal, others => '0');
 
     type IcacheEventType is record
         icache_miss : std_ulogic;
index 764141a86701a1f5bd68bf19d76db8999527d763..06240003ec8e4aa5adf2838fd87c4f5329bd514e 100644 (file)
--- a/core.vhdl
+++ b/core.vhdl
@@ -246,6 +246,7 @@ begin
     icache_0: entity work.icache
         generic map(
             SIM => SIM,
+            HAS_FPU => HAS_FPU,
             LINE_SIZE => 64,
             NUM_LINES => ICACHE_NUM_LINES,
             NUM_WAYS => ICACHE_NUM_WAYS,
index d0179125a27278474d8345a50bcdf12d59dd8547..559a505f42d9fb27416424f81812eddb2845edde 100644 (file)
@@ -31,22 +31,6 @@ entity decode1 is
 end entity decode1;
 
 architecture behaviour of decode1 is
-    type dc0_t is record
-        f_in     : IcacheToDecode1Type;
-        use_row  : std_ulogic;
-        br_pred  : std_ulogic;
-        override : std_ulogic;
-        ov_insn  : insn_code;
-        spr_info : spr_id;
-        ram_spr  : ram_spr_info;
-    end record;
-    constant dc0_t_init : dc0_t :=
-        (f_in => IcacheToDecode1Init, ov_insn => INSN_illegal,
-         spr_info => spr_id_init, ram_spr => ram_spr_info_init,
-         others => '0');
-
-    signal dc0, dc0in : dc0_t;
-
     signal r, rin : Decode1ToDecode2Type;
     signal f, fin : Decode1ToFetch1Type;
 
@@ -58,437 +42,10 @@ architecture behaviour of decode1 is
 
     signal br, br_in : br_predictor_t;
 
-    signal maj_rom_addr : std_ulogic_vector(10 downto 0);
-    signal row_rom_addr : std_ulogic_vector(10 downto 0);
-    signal major_predecode : insn_code;
-    signal row_predecode   : insn_code;
-
     signal decode_rom_addr : insn_code;
     signal decode : decode_rom_t;
-    signal rom_ce : std_ulogic;
-
-    type predecoder_rom_t is array(0 to 2047) of insn_code;
-
-    constant major_predecode_rom : predecoder_rom_t := (
-        2#001100_00000# to 2#001100_11111# =>  INSN_addic,
-        2#001101_00000# to 2#001101_11111# =>  INSN_addic_dot,
-        2#001110_00000# to 2#001110_11111# =>  INSN_addi,
-        2#001111_00000# to 2#001111_11111# =>  INSN_addis,
-        2#010011_00100# to 2#010011_00101# =>  INSN_addpcis,
-        2#011100_00000# to 2#011100_11111# =>  INSN_andi_dot,
-        2#011101_00000# to 2#011101_11111# =>  INSN_andis_dot,
-        2#000000_00000#                    =>  INSN_attn,
-        2#010010_00000# to 2#010010_11111# =>  INSN_b,
-        2#010000_00000# to 2#010000_11111# =>  INSN_bc,
-        2#001011_00000# to 2#001011_11111# =>  INSN_cmpi,
-        2#001010_00000# to 2#001010_11111# =>  INSN_cmpli,
-        2#100010_00000# to 2#100010_11111# =>  INSN_lbz,
-        2#100011_00000# to 2#100011_11111# =>  INSN_lbzu,
-        2#110010_00000# to 2#110010_11111# =>  INSN_lfd,
-        2#110011_00000# to 2#110011_11111# =>  INSN_lfdu,
-        2#110000_00000# to 2#110000_11111# =>  INSN_lfs,
-        2#110001_00000# to 2#110001_11111# =>  INSN_lfsu,
-        2#101010_00000# to 2#101010_11111# =>  INSN_lha,
-        2#101011_00000# to 2#101011_11111# =>  INSN_lhau,
-        2#101000_00000# to 2#101000_11111# =>  INSN_lhz,
-        2#101001_00000# to 2#101001_11111# =>  INSN_lhzu,
-        2#100000_00000# to 2#100000_11111# =>  INSN_lwz,
-        2#100001_00000# to 2#100001_11111# =>  INSN_lwzu,
-        2#000111_00000# to 2#000111_11111# =>  INSN_mulli,
-        2#011000_00000# to 2#011000_11111# =>  INSN_ori,
-        2#011001_00000# to 2#011001_11111# =>  INSN_oris,
-        2#010100_00000# to 2#010100_11111# =>  INSN_rlwimi,
-        2#010101_00000# to 2#010101_11111# =>  INSN_rlwinm,
-        2#010111_00000# to 2#010111_11111# =>  INSN_rlwnm,
-        2#010001_00000# to 2#010001_11111# =>  INSN_sc,
-        2#100110_00000# to 2#100110_11111# =>  INSN_stb,
-        2#100111_00000# to 2#100111_11111# =>  INSN_stbu,
-        2#110110_00000# to 2#110110_11111# =>  INSN_stfd,
-        2#110111_00000# to 2#110111_11111# =>  INSN_stfdu,
-        2#110100_00000# to 2#110100_11111# =>  INSN_stfs,
-        2#110101_00000# to 2#110101_11111# =>  INSN_stfsu,
-        2#101100_00000# to 2#101100_11111# =>  INSN_sth,
-        2#101101_00000# to 2#101101_11111# =>  INSN_sthu,
-        2#100100_00000# to 2#100100_11111# =>  INSN_stw,
-        2#100101_00000# to 2#100101_11111# =>  INSN_stwu,
-        2#001000_00000# to 2#001000_11111# =>  INSN_subfic,
-        2#000010_00000# to 2#000010_11111# =>  INSN_tdi,
-        2#000011_00000# to 2#000011_11111# =>  INSN_twi,
-        2#011010_00000# to 2#011010_11111# =>  INSN_xori,
-        2#011011_00000# to 2#011011_11111# =>  INSN_xoris,
-        -- major opcode 4
-        2#000100_10000#                    =>  INSN_maddhd,
-        2#000100_10001#                    =>  INSN_maddhdu,
-        2#000100_10011#                    =>  INSN_maddld,
-        -- major opcode 30
-        2#011110_01000# to 2#011110_01001# =>  INSN_rldic,
-        2#011110_01010# to 2#011110_01011# =>  INSN_rldic,
-        2#011110_00000# to 2#011110_00001# =>  INSN_rldicl,
-        2#011110_00010# to 2#011110_00011# =>  INSN_rldicl,
-        2#011110_00100# to 2#011110_00101# =>  INSN_rldicr,
-        2#011110_00110# to 2#011110_00111# =>  INSN_rldicr,
-        2#011110_01100# to 2#011110_01101# =>  INSN_rldimi,
-        2#011110_01110# to 2#011110_01111# =>  INSN_rldimi,
-        2#011110_10000# to 2#011110_10001# =>  INSN_rldcl,
-        2#011110_10010# to 2#011110_10011# =>  INSN_rldcr,
-        -- major opcode 58
-        2#111010_00000#                    =>  INSN_ld,
-        2#111010_00001#                    =>  INSN_ldu,
-        2#111010_00010#                    =>  INSN_lwa,
-        2#111010_00100#                    =>  INSN_ld,
-        2#111010_00101#                    =>  INSN_ldu,
-        2#111010_00110#                    =>  INSN_lwa,
-        2#111010_01000#                    =>  INSN_ld,
-        2#111010_01001#                    =>  INSN_ldu,
-        2#111010_01010#                    =>  INSN_lwa,
-        2#111010_01100#                    =>  INSN_ld,
-        2#111010_01101#                    =>  INSN_ldu,
-        2#111010_01110#                    =>  INSN_lwa,
-        2#111010_10000#                    =>  INSN_ld,
-        2#111010_10001#                    =>  INSN_ldu,
-        2#111010_10010#                    =>  INSN_lwa,
-        2#111010_10100#                    =>  INSN_ld,
-        2#111010_10101#                    =>  INSN_ldu,
-        2#111010_10110#                    =>  INSN_lwa,
-        2#111010_11000#                    =>  INSN_ld,
-        2#111010_11001#                    =>  INSN_ldu,
-        2#111010_11010#                    =>  INSN_lwa,
-        2#111010_11100#                    =>  INSN_ld,
-        2#111010_11101#                    =>  INSN_ldu,
-        2#111010_11110#                    =>  INSN_lwa,
-        -- major opcode 59
-        2#111011_00100# to 2#111011_00101# =>  INSN_fdivs,
-        2#111011_01000# to 2#111011_01001# =>  INSN_fsubs,
-        2#111011_01010# to 2#111011_01011# =>  INSN_fadds,
-        2#111011_01100# to 2#111011_01101# =>  INSN_fsqrts,
-        2#111011_10000# to 2#111011_10001# =>  INSN_fres,
-        2#111011_10010# to 2#111011_10011# =>  INSN_fmuls,
-        2#111011_10100# to 2#111011_10101# =>  INSN_frsqrtes,
-        2#111011_11000# to 2#111011_11001# =>  INSN_fmsubs,
-        2#111011_11010# to 2#111011_11011# =>  INSN_fmadds,
-        2#111011_11100# to 2#111011_11101# =>  INSN_fnmsubs,
-        2#111011_11110# to 2#111011_11111# =>  INSN_fnmadds,
-        -- major opcode 62
-        2#111110_00000#                    =>  INSN_std,
-        2#111110_00001#                    =>  INSN_stdu,
-        2#111110_00100#                    =>  INSN_std,
-        2#111110_00101#                    =>  INSN_stdu,
-        2#111110_01000#                    =>  INSN_std,
-        2#111110_01001#                    =>  INSN_stdu,
-        2#111110_01100#                    =>  INSN_std,
-        2#111110_01101#                    =>  INSN_stdu,
-        2#111110_10000#                    =>  INSN_std,
-        2#111110_10001#                    =>  INSN_stdu,
-        2#111110_10100#                    =>  INSN_std,
-        2#111110_10101#                    =>  INSN_stdu,
-        2#111110_11000#                    =>  INSN_std,
-        2#111110_11001#                    =>  INSN_stdu,
-        2#111110_11100#                    =>  INSN_std,
-        2#111110_11101#                    =>  INSN_stdu,
-        -- major opcode 63
-        2#111111_00100# to 2#111111_00101# =>  INSN_fdiv,
-        2#111111_01000# to 2#111111_01001# =>  INSN_fsub,
-        2#111111_01010# to 2#111111_01011# =>  INSN_fadd,
-        2#111111_01100# to 2#111111_01101# =>  INSN_fsqrt,
-        2#111111_01110# to 2#111111_01111# =>  INSN_fsel,
-        2#111111_10000# to 2#111111_10001# =>  INSN_fre,
-        2#111111_10010# to 2#111111_10011# =>  INSN_fmul,
-        2#111111_10100# to 2#111111_10101# =>  INSN_frsqrte,
-        2#111111_11000# to 2#111111_11001# =>  INSN_fmsub,
-        2#111111_11010# to 2#111111_11011# =>  INSN_fmadd,
-        2#111111_11100# to 2#111111_11101# =>  INSN_fnmsub,
-        2#111111_11110# to 2#111111_11111# =>  INSN_fnmadd,
-        others                             =>  INSN_illegal
-        );
 
-    constant row_predecode_rom : predecoder_rom_t := (
-        -- Major opcode 31
-        -- Address bits are 0, insn(10:1)
-        2#0_01000_01010#  =>  INSN_add,
-        2#0_11000_01010#  =>  INSN_add, -- addo
-        2#0_00000_01010#  =>  INSN_addc,
-        2#0_10000_01010#  =>  INSN_addc, -- addco
-        2#0_00100_01010#  =>  INSN_adde,
-        2#0_10100_01010#  =>  INSN_adde, -- addeo
-        2#0_00101_01010#  =>  INSN_addex,
-        2#0_00010_01010#  =>  INSN_addg6s,
-        2#0_00111_01010#  =>  INSN_addme,
-        2#0_10111_01010#  =>  INSN_addme, -- addmeo
-        2#0_00110_01010#  =>  INSN_addze,
-        2#0_10110_01010#  =>  INSN_addze, -- addzeo
-        2#0_00000_11100#  =>  INSN_and,
-        2#0_00001_11100#  =>  INSN_andc,
-        2#0_00111_11100#  =>  INSN_bperm,
-        2#0_01001_11010#  =>  INSN_cbcdtd,
-        2#0_01000_11010#  =>  INSN_cdtbcd,
-        2#0_00000_00000#  =>  INSN_cmp,
-        2#0_01111_11100#  =>  INSN_cmpb,
-        2#0_00111_00000#  =>  INSN_cmpeqb,
-        2#0_00001_00000#  =>  INSN_cmpl,
-        2#0_00110_00000#  =>  INSN_cmprb,
-        2#0_00001_11010#  =>  INSN_cntlzd,
-        2#0_00000_11010#  =>  INSN_cntlzw,
-        2#0_10001_11010#  =>  INSN_cnttzd,
-        2#0_10000_11010#  =>  INSN_cnttzw,
-        2#0_10111_10011#  =>  INSN_darn,
-        2#0_00010_10110#  =>  INSN_dcbf,
-        2#0_00001_10110#  =>  INSN_dcbst,
-        2#0_01000_10110#  =>  INSN_dcbt,
-        2#0_00111_10110#  =>  INSN_dcbtst,
-        2#0_11111_10110#  =>  INSN_dcbz,
-        2#0_01100_01001#  =>  INSN_divdeu,
-        2#0_11100_01001#  =>  INSN_divdeu, -- divdeuo
-        2#0_01100_01011#  =>  INSN_divweu,
-        2#0_11100_01011#  =>  INSN_divweu, -- divweuo
-        2#0_01101_01001#  =>  INSN_divde,
-        2#0_11101_01001#  =>  INSN_divde, -- divdeo
-        2#0_01101_01011#  =>  INSN_divwe,
-        2#0_11101_01011#  =>  INSN_divwe, -- divweo
-        2#0_01110_01001#  =>  INSN_divdu,
-        2#0_11110_01001#  =>  INSN_divdu, -- divduo
-        2#0_01110_01011#  =>  INSN_divwu,
-        2#0_11110_01011#  =>  INSN_divwu, -- divwuo
-        2#0_01111_01001#  =>  INSN_divd,
-        2#0_11111_01001#  =>  INSN_divd, -- divdo
-        2#0_01111_01011#  =>  INSN_divw,
-        2#0_11111_01011#  =>  INSN_divw, -- divwo
-        2#0_11001_10110#  =>  INSN_nop, -- dss
-        2#0_01010_10110#  =>  INSN_nop, -- dst
-        2#0_01011_10110#  =>  INSN_nop, -- dstst
-        2#0_11010_10110#  =>  INSN_eieio,
-        2#0_01000_11100#  =>  INSN_eqv,
-        2#0_11101_11010#  =>  INSN_extsb,
-        2#0_11100_11010#  =>  INSN_extsh,
-        2#0_11110_11010#  =>  INSN_extsw,
-        2#0_11011_11010#  =>  INSN_extswsli,
-        2#0_11011_11011#  =>  INSN_extswsli,
-        2#0_11110_10110#  =>  INSN_icbi,
-        2#0_00000_10110#  =>  INSN_icbt,
-        2#0_00000_01111#  =>  INSN_isel,
-        2#0_00001_01111#  =>  INSN_isel,
-        2#0_00010_01111#  =>  INSN_isel,
-        2#0_00011_01111#  =>  INSN_isel,
-        2#0_00100_01111#  =>  INSN_isel,
-        2#0_00101_01111#  =>  INSN_isel,
-        2#0_00110_01111#  =>  INSN_isel,
-        2#0_00111_01111#  =>  INSN_isel,
-        2#0_01000_01111#  =>  INSN_isel,
-        2#0_01001_01111#  =>  INSN_isel,
-        2#0_01010_01111#  =>  INSN_isel,
-        2#0_01011_01111#  =>  INSN_isel,
-        2#0_01100_01111#  =>  INSN_isel,
-        2#0_01101_01111#  =>  INSN_isel,
-        2#0_01110_01111#  =>  INSN_isel,
-        2#0_01111_01111#  =>  INSN_isel,
-        2#0_10000_01111#  =>  INSN_isel,
-        2#0_10001_01111#  =>  INSN_isel,
-        2#0_10010_01111#  =>  INSN_isel,
-        2#0_10011_01111#  =>  INSN_isel,
-        2#0_10100_01111#  =>  INSN_isel,
-        2#0_10101_01111#  =>  INSN_isel,
-        2#0_10110_01111#  =>  INSN_isel,
-        2#0_10111_01111#  =>  INSN_isel,
-        2#0_11000_01111#  =>  INSN_isel,
-        2#0_11001_01111#  =>  INSN_isel,
-        2#0_11010_01111#  =>  INSN_isel,
-        2#0_11011_01111#  =>  INSN_isel,
-        2#0_11100_01111#  =>  INSN_isel,
-        2#0_11101_01111#  =>  INSN_isel,
-        2#0_11110_01111#  =>  INSN_isel,
-        2#0_11111_01111#  =>  INSN_isel,
-        2#0_00001_10100#  =>  INSN_lbarx,
-        2#0_11010_10101#  =>  INSN_lbzcix,
-        2#0_00011_10111#  =>  INSN_lbzux,
-        2#0_00010_10111#  =>  INSN_lbzx,
-        2#0_00010_10100#  =>  INSN_ldarx,
-        2#0_10000_10100#  =>  INSN_ldbrx,
-        2#0_11011_10101#  =>  INSN_ldcix,
-        2#0_00001_10101#  =>  INSN_ldux,
-        2#0_00000_10101#  =>  INSN_ldx,
-        2#0_10010_10111#  =>  INSN_lfdx,
-        2#0_10011_10111#  =>  INSN_lfdux,
-        2#0_11010_10111#  =>  INSN_lfiwax,
-        2#0_11011_10111#  =>  INSN_lfiwzx,
-        2#0_10000_10111#  =>  INSN_lfsx,
-        2#0_10001_10111#  =>  INSN_lfsux,
-        2#0_00011_10100#  =>  INSN_lharx,
-        2#0_01011_10111#  =>  INSN_lhaux,
-        2#0_01010_10111#  =>  INSN_lhax,
-        2#0_11000_10110#  =>  INSN_lhbrx,
-        2#0_11001_10101#  =>  INSN_lhzcix,
-        2#0_01001_10111#  =>  INSN_lhzux,
-        2#0_01000_10111#  =>  INSN_lhzx,
-        2#0_00000_10100#  =>  INSN_lwarx,
-        2#0_01011_10101#  =>  INSN_lwaux,
-        2#0_01010_10101#  =>  INSN_lwax,
-        2#0_10000_10110#  =>  INSN_lwbrx,
-        2#0_11000_10101#  =>  INSN_lwzcix,
-        2#0_00001_10111#  =>  INSN_lwzux,
-        2#0_00000_10111#  =>  INSN_lwzx,
-        2#0_10010_00000#  =>  INSN_mcrxrx,
-        2#0_00000_10011#  =>  INSN_mfcr,
-        2#0_00010_10011#  =>  INSN_mfmsr,
-        2#0_01010_10011#  =>  INSN_mfspr,
-        2#0_01000_01001#  =>  INSN_modud,
-        2#0_01000_01011#  =>  INSN_moduw,
-        2#0_11000_01001#  =>  INSN_modsd,
-        2#0_11000_01011#  =>  INSN_modsw,
-        2#0_00100_10000#  =>  INSN_mtcrf,
-        2#0_00100_10010#  =>  INSN_mtmsr,
-        2#0_00101_10010#  =>  INSN_mtmsrd,
-        2#0_01110_10011#  =>  INSN_mtspr,
-        2#0_00010_01001#  =>  INSN_mulhd,
-        2#0_00000_01001#  =>  INSN_mulhdu,
-        2#0_00010_01011#  =>  INSN_mulhw,
-        2#0_00000_01011#  =>  INSN_mulhwu,
-        -- next 4 have reserved bit set
-        2#0_10010_01001#  =>  INSN_mulhd,
-        2#0_10000_01001#  =>  INSN_mulhdu,
-        2#0_10010_01011#  =>  INSN_mulhw,
-        2#0_10000_01011#  =>  INSN_mulhwu,
-        2#0_00111_01001#  =>  INSN_mulld,
-        2#0_10111_01001#  =>  INSN_mulld, -- mulldo
-        2#0_00111_01011#  =>  INSN_mullw,
-        2#0_10111_01011#  =>  INSN_mullw, -- mullwo
-        2#0_01110_11100#  =>  INSN_nand,
-        2#0_00011_01000#  =>  INSN_neg,
-        2#0_10011_01000#  =>  INSN_neg, -- nego
-        -- next 8 are reserved no-op instructions
-        2#0_10000_10010#  =>  INSN_nop,
-        2#0_10001_10010#  =>  INSN_nop,
-        2#0_10010_10010#  =>  INSN_nop,
-        2#0_10011_10010#  =>  INSN_nop,
-        2#0_10100_10010#  =>  INSN_nop,
-        2#0_10101_10010#  =>  INSN_nop,
-        2#0_10110_10010#  =>  INSN_nop,
-        2#0_10111_10010#  =>  INSN_nop,
-        2#0_00011_11100#  =>  INSN_nor,
-        2#0_01101_11100#  =>  INSN_or,
-        2#0_01100_11100#  =>  INSN_orc,
-        2#0_00011_11010#  =>  INSN_popcntb,
-        2#0_01111_11010#  =>  INSN_popcntd,
-        2#0_01011_11010#  =>  INSN_popcntw,
-        2#0_00101_11010#  =>  INSN_prtyd,
-        2#0_00100_11010#  =>  INSN_prtyw,
-        2#0_00100_00000#  =>  INSN_setb,
-        2#0_01111_10010#  =>  INSN_slbia,
-        2#0_00000_11011#  =>  INSN_sld,
-        2#0_00000_11000#  =>  INSN_slw,
-        2#0_11000_11010#  =>  INSN_srad,
-        2#0_11001_11010#  =>  INSN_sradi,
-        2#0_11001_11011#  =>  INSN_sradi,
-        2#0_11000_11000#  =>  INSN_sraw,
-        2#0_11001_11000#  =>  INSN_srawi,
-        2#0_10000_11011#  =>  INSN_srd,
-        2#0_10000_11000#  =>  INSN_srw,
-        2#0_11110_10101#  =>  INSN_stbcix,
-        2#0_10101_10110#  =>  INSN_stbcx,
-        2#0_00111_10111#  =>  INSN_stbux,
-        2#0_00110_10111#  =>  INSN_stbx,
-        2#0_10100_10100#  =>  INSN_stdbrx,
-        2#0_11111_10101#  =>  INSN_stdcix,
-        2#0_00110_10110#  =>  INSN_stdcx,
-        2#0_00101_10101#  =>  INSN_stdux,
-        2#0_00100_10101#  =>  INSN_stdx,
-        2#0_10110_10111#  =>  INSN_stfdx,
-        2#0_10111_10111#  =>  INSN_stfdux,
-        2#0_11110_10111#  =>  INSN_stfiwx,
-        2#0_10100_10111#  =>  INSN_stfsx,
-        2#0_10101_10111#  =>  INSN_stfsux,
-        2#0_11100_10110#  =>  INSN_sthbrx,
-        2#0_11101_10101#  =>  INSN_sthcix,
-        2#0_10110_10110#  =>  INSN_sthcx,
-        2#0_01101_10111#  =>  INSN_sthux,
-        2#0_01100_10111#  =>  INSN_sthx,
-        2#0_10100_10110#  =>  INSN_stwbrx,
-        2#0_11100_10101#  =>  INSN_stwcix,
-        2#0_00100_10110#  =>  INSN_stwcx,
-        2#0_00101_10111#  =>  INSN_stwux,
-        2#0_00100_10111#  =>  INSN_stwx,
-        2#0_00001_01000#  =>  INSN_subf,
-        2#0_10001_01000#  =>  INSN_subf, -- subfo
-        2#0_00000_01000#  =>  INSN_subfc,
-        2#0_10000_01000#  =>  INSN_subfc, -- subfco
-        2#0_00100_01000#  =>  INSN_subfe,
-        2#0_10100_01000#  =>  INSN_subfe, -- subfeo
-        2#0_00111_01000#  =>  INSN_subfme,
-        2#0_10111_01000#  =>  INSN_subfme, -- subfmeo
-        2#0_00110_01000#  =>  INSN_subfze,
-        2#0_10110_01000#  =>  INSN_subfze, -- subfzeo
-        2#0_10010_10110#  =>  INSN_sync,
-        2#0_00010_00100#  =>  INSN_td,
-        2#0_00000_00100#  =>  INSN_tw,
-        2#0_01001_10010#  =>  INSN_tlbie,
-        2#0_01000_10010#  =>  INSN_tlbiel,
-        2#0_10001_10110#  =>  INSN_tlbsync,
-        2#0_00000_11110#  =>  INSN_wait,
-        2#0_01001_11100#  =>  INSN_xor,
-
-        -- Major opcode 19
-        -- Columns with insn(4) = '1' are all illegal and not mapped here; to
-        -- fit into 2048 entries, the columns are remapped so that 16-24 are
-        -- stored here as 8-15; in other words the address bits are
-        -- 1, insn(10..6), 1, insn(5), insn(3..1)
-        2#1_10000_11000#  =>  INSN_bcctr,
-        2#1_00000_11000#  =>  INSN_bclr,
-        2#1_10001_11000#  =>  INSN_bctar,
-        2#1_01000_10001#  =>  INSN_crand,
-        2#1_00100_10001#  =>  INSN_crandc,
-        2#1_01001_10001#  =>  INSN_creqv,
-        2#1_00111_10001#  =>  INSN_crnand,
-        2#1_00001_10001#  =>  INSN_crnor,
-        2#1_01110_10001#  =>  INSN_cror,
-        2#1_01101_10001#  =>  INSN_crorc,
-        2#1_00110_10001#  =>  INSN_crxor,
-        2#1_00100_11110#  =>  INSN_isync,
-        2#1_00000_10000#  =>  INSN_mcrf,
-        2#1_00000_11010#  =>  INSN_rfid,
-
-        -- Major opcode 59
-        -- Only column 14 is valid here; columns 16-31 are handled in the major table
-        -- Column 14 is mapped to column 6 of the space which is
-        -- mostly used for opcode 19.
-        2#1_11010_10110#  =>  INSN_fcfids,
-        2#1_11110_10110#  =>  INSN_fcfidus,
-
-        -- Major opcode 63
-        -- Columns 0-15 are mapped here; columns 16-31 are in the major table.
-        -- Address bits are 1, insn(10:6), 0, insn(4:1)
-        2#1_00000_00000#  =>  INSN_fcmpu,
-        2#1_00001_00000#  =>  INSN_fcmpo,
-        2#1_00010_00000#  =>  INSN_mcrfs,
-        2#1_00100_00000#  =>  INSN_ftdiv,
-        2#1_00101_00000#  =>  INSN_ftsqrt,
-        2#1_00001_00110#  =>  INSN_mtfsb,
-        2#1_00010_00110#  =>  INSN_mtfsb,
-        2#1_00100_00110#  =>  INSN_mtfsfi,
-        2#1_11010_00110#  =>  INSN_fmrgow,
-        2#1_11110_00110#  =>  INSN_fmrgew,
-        2#1_10010_00111#  =>  INSN_mffs,
-        2#1_10110_00111#  =>  INSN_mtfsf,
-        2#1_00000_01000#  =>  INSN_fcpsgn,
-        2#1_00001_01000#  =>  INSN_fneg,
-        2#1_00010_01000#  =>  INSN_fmr,
-        2#1_00100_01000#  =>  INSN_fnabs,
-        2#1_01000_01000#  =>  INSN_fabs,
-        2#1_01100_01000#  =>  INSN_frin,
-        2#1_01101_01000#  =>  INSN_friz,
-        2#1_01110_01000#  =>  INSN_frip,
-        2#1_01111_01000#  =>  INSN_frim,
-        2#1_00000_01100#  =>  INSN_frsp,
-        2#1_00000_01110#  =>  INSN_fctiw,
-        2#1_00100_01110#  =>  INSN_fctiwu,
-        2#1_11001_01110#  =>  INSN_fctid,
-        2#1_11010_01110#  =>  INSN_fcfid,
-        2#1_11101_01110#  =>  INSN_fctidu,
-        2#1_11110_01110#  =>  INSN_fcfidu,
-        2#1_00000_01111#  =>  INSN_fctiwz,
-        2#1_00100_01111#  =>  INSN_fctiwuz,
-        2#1_11001_01111#  =>  INSN_fctidz,
-        2#1_11101_01111#  =>  INSN_fctiduz,
-
-        others            =>  INSN_illegal
-        );
+    signal fetch_failed : std_ulogic;
 
     -- If we have an FPU, then it is used for integer divisions,
     -- otherwise a dedicated divider in the ALU is used.
@@ -871,16 +428,18 @@ architecture behaviour of decode1 is
     end;
 
 begin
-    decode0_0: process(clk)
+    decode1_0: process(clk)
     begin
         if rising_edge(clk) then
             if rst = '1' then
-                dc0 <= dc0_t_init;
+                r <= Decode1ToDecode2Init;
+                fetch_failed <= '0';
             elsif flush_in = '1' then
-                dc0.f_in.valid <= '0';
-                dc0.f_in.fetch_failed <= '0';
+                r.valid <= '0';
+                fetch_failed <= '0';
             elsif stall_in = '0' then
-                dc0 <= dc0in;
+                r <= rin;
+                fetch_failed <= f_in.fetch_failed;
             end if;
             if rst = '1' then
                 br.br_nia <= (others => '0');
@@ -892,139 +451,73 @@ begin
         end if;
     end process;
 
-    decode0_roms: process(clk)
+    busy_out <= stall_in;
+
+    decode1_rom: process(clk)
     begin
         if rising_edge(clk) then
             if stall_in = '0' then
-                if is_X(maj_rom_addr) then
-                    major_predecode <= INSN_illegal;
-                else
-                    major_predecode <= major_predecode_rom(to_integer(unsigned(maj_rom_addr)));
-                end if;
-                if is_X(row_rom_addr) then
-                    row_predecode   <= INSN_illegal;
-                else
-                    row_predecode   <= row_predecode_rom(to_integer(unsigned(row_rom_addr)));
-                end if;
+                decode <= decode_rom(decode_rom_addr);
             end if;
         end if;
     end process;
 
-    decode0_1: process(all)
-        variable v : dc0_t;
-        variable majorop : std_ulogic_vector(5 downto 0);
-        variable majaddr : std_ulogic_vector(10 downto 0);
-        variable rowaddr : std_ulogic_vector(10 downto 0);
-        variable sprn : spr_num_t;
+    decode1_1: process(all)
+        variable v : Decode1ToDecode2Type;
+        variable vr : Decode1ToRegisterFileType;
         variable br_target : std_ulogic_vector(61 downto 0);
         variable br_offset : signed(23 downto 0);
         variable bv : br_predictor_t;
+        variable icode : insn_code;
+        variable sprn : spr_num_t;
+        variable maybe_rb : std_ulogic;
     begin
-        v := dc0_t_init;
-        v.f_in := f_in;
-
-        br_offset := (others => '0');
-
-        majorop := f_in.insn(31 downto 26);
-        majaddr := majorop & f_in.insn(4 downto 0);
-
-        -- row_predecode_rom is used for op 19, 31, 59, 63
-        -- addr bit 10 is 0 for op 31, 1 for 19, 59, 63
-        rowaddr(10) := f_in.insn(31) or not f_in.insn(29);
-        rowaddr(9 downto 5) := f_in.insn(10 downto 6);
-        if f_in.insn(28) = '0' then
-            -- op 19 and op 59
-            rowaddr(4 downto 3) := '1' & f_in.insn(5);
-        else
-            -- op 31 and 63; for 63 we only use this when f_in.insn(5) = '0'
-            rowaddr(4 downto 3) := f_in.insn(5 downto 4);
-        end if;
-        rowaddr(2 downto 0) := f_in.insn(3 downto 1);
+        v := Decode1ToDecode2Init;
 
-        maj_rom_addr <= majaddr;
-        row_rom_addr <= rowaddr;
+        v.valid := f_in.valid;
+        v.nia  := f_in.nia;
+        v.insn := f_in.insn;
+        v.stop_mark := f_in.stop_mark;
+        v.big_endian := f_in.big_endian;
 
        if is_X(f_in.insn) then
            v.spr_info := (sel => "XXX", others => 'X');
            v.ram_spr := (index => (others => 'X'), others => 'X');
        else
-           sprn := decode_spr_num(f_in.insn);
-           v.spr_info := map_spr(sprn);
-           v.ram_spr := decode_ram_spr(sprn);
-       end if;
-
-        case unsigned(majorop) is
-        when "000100" => -- 4
-            -- major opcode 4, mostly VMX/VSX stuff but also some integer ops (madd*)
-            v.override := not f_in.insn(5);
-
-        when "011111" => -- 31
-            -- major opcode 31, lots of things
-            -- Use the first half of the row table for all columns
-            v.use_row := '1';
-
-        when "010000" => -- 16
-            -- Predict backward branches as taken, forward as untaken
-            v.br_pred := f_in.insn(15);
-            br_offset := resize(signed(f_in.insn(15 downto 2)), 24);
-
-        when "010010" => -- 18
-            -- Unconditional branches are always taken
-            v.br_pred := '1';
-            br_offset := signed(f_in.insn(25 downto 2));
-
-        when "010011" => -- 19
-            -- Columns 8-15 and 24-31 don't have any valid instructions
-            -- (where insn(5..1) is the column number).
-            -- addpcis (column 2) is in the major table
-            -- Other valid columns are mapped to columns in the second
-            -- half of the row table: columns 0-1 are mapped to 16-17
-            -- and 16-23 are mapped to 24-31.
-            v.override := f_in.insn(4);
-            v.use_row := f_in.insn(5) or (not f_in.insn(3) and not f_in.insn(2));
-
-        when "011000" => -- 24
-            -- ori, special-case the standard NOP
-            if std_match(f_in.insn, "01100000000000000000000000000000") then
-                v.override := '1';
-                v.ov_insn := INSN_nop;
-            end if;
-
-        when "111011" => -- 59
-            if HAS_FPU then
-                -- floating point operations, mostly single-precision
-                -- Columns 0-11 are illegal; columns 12-15 are mapped
-                -- to columns 20-23 in the second half of the row table,
-                -- and columns 16-31 are in the major table.
-                v.override := not f_in.insn(5) and (not f_in.insn(4) or not f_in.insn(3));
-                v.use_row := not f_in.insn(5);
-            else
-                v.override := '1';
-            end if;
-
-        when "111111" => -- 63
-            if HAS_FPU then
-                -- floating point operations, general and double-precision
-                -- Use columns 0-15 of the second half of the row table
-                -- for columns 0-15, and the major table for columns 16-31.
-                v.use_row := not f_in.insn(5);
-            else
-                v.override := '1';
-            end if;
+            sprn := decode_spr_num(f_in.insn);
+            v.spr_info := map_spr(sprn);
+            v.ram_spr := decode_ram_spr(sprn);
+        end if;
 
-        when others =>
-        end case;
+        icode := f_in.icode;
 
         if f_in.fetch_failed = '1' then
-            v.override := '1';
-            v.ov_insn := INSN_fetch_fail;
+            icode := INSN_fetch_fail;
             -- Only send down a single OP_FETCH_FAILED
-            v.f_in.valid := not dc0.f_in.fetch_failed;
+            v.valid := not fetch_failed;
+        end if;
+        decode_rom_addr <= icode;
+
+        if f_in.valid = '1' then
+            report "Decode " & insn_code'image(icode) & " " & to_hstring(f_in.insn) &
+                " at " & to_hstring(f_in.nia);
         end if;
 
         -- Branch predictor
-        -- Note bclr, bcctr and bctar are predicted not taken as we have no
+        -- Note bclr, bcctr and bctar not predicted as we have no
         -- count cache or link stack.
+        br_offset := (others => '0');
+        case icode is
+            when INSN_b =>
+                -- Unconditional branches are always taken
+                v.br_pred := '1';
+                br_offset := signed(f_in.insn(25 downto 2));
+            when INSN_bc =>
+                -- Predict backward branches as taken, forward as untaken
+                v.br_pred := f_in.insn(15);
+                br_offset := resize(signed(f_in.insn(15 downto 2)), 24);
+            when others =>
+        end case;
         bv.br_nia := f_in.nia(63 downto 2);
         if f_in.insn(1) = '1' then
             bv.br_nia := (others => '0');
@@ -1039,80 +532,16 @@ begin
         -- after a clock edge...
         br_target := std_ulogic_vector(signed(br.br_nia) + br.br_offset);
 
-        dc0in <= v;
-        br_in <= bv;
-
-        f_out.redirect <= br.predict;
-        f_out.redirect_nia <= br_target & "00";
-        flush_out <= bv.predict or br.predict;
-    end process;
-
-    decode1_0: process(clk)
-    begin
-        if rising_edge(clk) then
-            if rst = '1' then
-                r <= Decode1ToDecode2Init;
-            elsif flush_in = '1' then
-                r.valid <= '0';
-            elsif stall_in = '0' then
-                r <= rin;
-            end if;
-        end if;
-    end process;
-
-    busy_out <= stall_in;
-
-    decode1_rom: process(clk)
-    begin
-        if rising_edge(clk) then
-            if stall_in = '0' then
-                decode <= decode_rom(decode_rom_addr);
-            end if;
-        end if;
-    end process;
-
-    decode1_1: process(all)
-        variable v : Decode1ToDecode2Type;
-        variable vr : Decode1ToRegisterFileType;
-        variable icode : insn_code;
-        variable sprn : spr_num_t;
-        variable maybe_rb : std_ulogic;
-    begin
-        v := Decode1ToDecode2Init;
-
-        v.valid := dc0.f_in.valid;
-        v.nia  := dc0.f_in.nia;
-        v.insn := dc0.f_in.insn;
-        v.stop_mark := dc0.f_in.stop_mark;
-        v.big_endian := dc0.f_in.big_endian;
-        v.br_pred := dc0.br_pred;
-        v.spr_info := dc0.spr_info;
-        v.ram_spr := dc0.ram_spr;
-
-        if dc0.override = '1' then
-            icode := dc0.ov_insn;
-        elsif dc0.use_row = '0' then
-            icode := major_predecode;
-        else
-            icode := row_predecode;
-        end if;
-        decode_rom_addr <= icode;
-
-        if dc0.f_in.valid = '1' then
-            report "Decode insn " & to_hstring(dc0.f_in.insn) & " at " & to_hstring(dc0.f_in.nia) &
-                " code " & insn_code'image(icode);
-        end if;
-
         -- Work out GPR/FPR read addresses
         maybe_rb := '0';
-        vr.reg_1_addr := '0' & insn_ra(dc0.f_in.insn);
-        vr.reg_2_addr := '0' & insn_rb(dc0.f_in.insn);
-        vr.reg_3_addr := '0' & insn_rs(dc0.f_in.insn);
+        vr.reg_1_addr := '0' & insn_ra(f_in.insn);
+        vr.reg_2_addr := '0' & insn_rb(f_in.insn);
+        vr.reg_3_addr := '0' & insn_rs(f_in.insn);
         if icode >= INSN_first_rb then
             maybe_rb := '1';
             if icode < INSN_first_frs then
                 if icode >= INSN_first_rc then
-                    vr.reg_3_addr := '0' & insn_rcreg(dc0.f_in.insn);
+                    vr.reg_3_addr := '0' & insn_rcreg(f_in.insn);
                 end if;
             else
                 -- access FRS operand
@@ -1124,13 +553,13 @@ begin
                 end if;
                 if icode >= INSN_first_frabc then
                     -- access FRC operand
-                    vr.reg_3_addr := '1' & insn_rcreg(dc0.f_in.insn);
+                    vr.reg_3_addr := '1' & insn_rcreg(f_in.insn);
                 end if;
             end if;
         end if;
-        vr.read_1_enable := dc0.f_in.valid and not dc0.f_in.fetch_failed;
-        vr.read_2_enable := dc0.f_in.valid and not dc0.f_in.fetch_failed and maybe_rb;
-        vr.read_3_enable := dc0.f_in.valid and not dc0.f_in.fetch_failed;
+        vr.read_1_enable := f_in.valid;
+        vr.read_2_enable := f_in.valid and maybe_rb;
+        vr.read_3_enable := f_in.valid;
 
         v.reg_a := vr.reg_1_addr;
         v.reg_b := vr.reg_2_addr;
@@ -1138,11 +567,15 @@ begin
 
         -- Update registers
         rin <= v;
+        br_in <= bv;
 
         -- Update outputs
         d_out <= r;
         d_out.decode <= decode;
         r_out <= vr;
+        f_out.redirect <= br.predict;
+        f_out.redirect_nia <= br_target & "00";
+        flush_out <= bv.predict or br.predict;
     end process;
 
     d1_log: if LOG_LENGTH > 0 generate
index 9eb08c13076c96f6ae540ce0178a4651b262a792..63de22960b0462a7694f70d4a9c172eceb3e6fc4 100644 (file)
@@ -23,6 +23,7 @@ use ieee.numeric_std.all;
 library work;
 use work.utils.all;
 use work.common.all;
+use work.decode_types.all;
 use work.wishbone_types.all;
 
 -- 64 bit direct mapped icache. All instructions are 4B aligned.
@@ -30,6 +31,7 @@ use work.wishbone_types.all;
 entity icache is
     generic (
         SIM : boolean := false;
+        HAS_FPU : boolean := true;
         -- Line size in bytes
         LINE_SIZE : positive := 64;
         -- BRAM organisation: We never access more than wishbone_data_bits at
@@ -122,8 +124,20 @@ architecture rtl of icache is
     subtype way_t is integer range 0 to NUM_WAYS-1;
     subtype row_in_line_t is unsigned(ROW_LINEBITS-1 downto 0);
 
+    -- We store a pre-decoded 10-bit insn_code along with the bottom 26 bits of
+    -- each instruction, giving a total of 36 bits per instruction, which
+    -- fits neatly into the block RAMs available on FPGAs.
+    -- For illegal instructions, the top 4 bits are ones and the bottom 6 bits
+    -- are the instruction's primary opcode, so we have the whole instruction
+    -- word available (e.g. to put in HEIR).  For other instructions, the
+    -- primary opcode is not stored but could be determined from the insn_code.
+    constant PREDECODE_BITS : natural := 10;
+    constant INSN_IMAGE_BITS : natural := 26;
+    constant ICWORDLEN : natural := PREDECODE_BITS + INSN_IMAGE_BITS;
+    constant ROW_WIDTH : natural := INSN_PER_ROW * ICWORDLEN;
+
     -- The cache data BRAM organized as described above for each way
-    subtype cache_row_t is std_ulogic_vector(ROW_SIZE_BITS-1 downto 0);
+    subtype cache_row_t is std_ulogic_vector(ROW_WIDTH-1 downto 0);
 
     -- The cache tags LUTRAM has a row per set. Vivado is a pain and will
     -- not handle a clean (commented) definition of the cache tags as a 3d
@@ -184,6 +198,8 @@ architecture rtl of icache is
         wb               : wishbone_master_out;
        store_way        : way_t;
         store_index      : index_t;
+        recv_row         : row_t;
+        recv_valid       : std_ulogic;
        store_row        : row_t;
         store_tag        : cache_tag_t;
         store_valid      : std_ulogic;
@@ -214,7 +230,9 @@ architecture rtl of icache is
 
     -- Cache RAM interface
     type cache_ram_out_t is array(way_t) of cache_row_t;
-    signal cache_out   : cache_ram_out_t;
+    signal cache_out     : cache_ram_out_t;
+    signal cache_wr_data : std_ulogic_vector(ROW_WIDTH - 1 downto 0);
+    signal wb_rd_data    : std_ulogic_vector(ROW_SIZE_BITS - 1 downto 0);
 
     -- PLRU output interface
     type plru_out_t is array(index_t) of std_ulogic_vector(WAY_BITS-1 downto 0);
@@ -293,7 +311,7 @@ architecture rtl of icache is
        variable word: integer range 0 to INSN_PER_ROW-1;
     begin
         word := to_integer(unsigned(addr(INSN_BITS+2-1 downto 2)));
-       return data(31+word*32 downto word*32);
+       return data(word * ICWORDLEN + ICWORDLEN - 1 downto word * ICWORDLEN);
     end;
 
     -- Get the tag value from the address
@@ -327,6 +345,34 @@ architecture rtl of icache is
 
 begin
 
+    -- byte-swap read data if big endian
+    process(all)
+        variable j: integer;
+    begin
+        if r.store_tag(TAG_BITS - 1) = '0' then
+            wb_rd_data <= wishbone_in.dat;
+        else
+            for ii in 0 to (wishbone_in.dat'length / 8) - 1 loop
+                j := ((ii / 4) * 4) + (3 - (ii mod 4));
+                wb_rd_data(ii * 8 + 7 downto ii * 8) <= wishbone_in.dat(j * 8 + 7 downto j * 8);
+            end loop;
+        end if;
+    end process;
+
+    predecoder_0: entity work.predecoder
+        generic map (
+            HAS_FPU => HAS_FPU,
+            WIDTH => INSN_PER_ROW,
+            ICODE_LEN => PREDECODE_BITS,
+            IMAGE_LEN => INSN_IMAGE_BITS
+            )
+        port map (
+            clk => clk,
+            valid_in => wishbone_in.ack,
+            insns_in => wb_rd_data,
+            icodes_out => cache_wr_data
+            );
+
     assert LINE_SIZE mod ROW_SIZE = 0;
     assert ispow2(LINE_SIZE)    report "LINE_SIZE not power of 2" severity FAILURE;
     assert ispow2(NUM_LINES)    report "NUM_LINES not power of 2" severity FAILURE;
@@ -367,13 +413,13 @@ begin
        signal rd_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
        signal wr_addr  : std_ulogic_vector(ROW_BITS-1 downto 0);
        signal dout     : cache_row_t;
-       signal wr_sel   : std_ulogic_vector(ROW_SIZE-1 downto 0);
-        signal wr_dat   : std_ulogic_vector(wishbone_in.dat'left downto 0);
+       signal wr_sel   : std_ulogic_vector(0 downto 0);
     begin
        way: entity work.cache_ram
            generic map (
                ROW_BITS => ROW_BITS,
-               WIDTH => ROW_SIZE_BITS
+               WIDTH => ROW_WIDTH,
+                BYTEWID => ROW_WIDTH
                )
            port map (
                clk     => clk,
@@ -382,31 +428,19 @@ begin
                rd_data => dout,
                wr_sel  => wr_sel,
                wr_addr => wr_addr,
-               wr_data => wr_dat
+               wr_data => cache_wr_data
                );
        process(all)
-            variable j: integer;
        begin
-            -- byte-swap read data if big endian
-            if r.store_tag(TAG_BITS - 1) = '0' then
-                wr_dat <= wishbone_in.dat;
-            else
-                for ii in 0 to (wishbone_in.dat'length / 8) - 1 loop
-                    j := ((ii / 4) * 4) + (3 - (ii mod 4));
-                    wr_dat(ii * 8 + 7 downto ii * 8) <= wishbone_in.dat(j * 8 + 7 downto j * 8);
-                end loop;
-            end if;
            do_read <= not stall_in;
            do_write <= '0';
-           if wishbone_in.ack = '1' and replace_way = i then
+           if r.recv_valid = '1' and r.store_way = i then
                do_write <= '1';
            end if;
            cache_out(i) <= dout;
            rd_addr <= std_ulogic_vector(to_unsigned(req_row, ROW_BITS));
            wr_addr <= std_ulogic_vector(to_unsigned(r.store_row, ROW_BITS));
-            for ii in 0 to ROW_SIZE-1 loop
-                wr_sel(ii) <= do_write;
-            end loop;
+            wr_sel(0) <= do_write;
        end process;
     end generate;
     
@@ -515,6 +549,8 @@ begin
     icache_comb : process(all)
        variable is_hit  : std_ulogic;
        variable hit_way : way_t;
+        variable insn    : std_ulogic_vector(ICWORDLEN - 1 downto 0);
+        variable icode   : insn_code;
     begin
        -- Extract line, row and tag from request
        if not is_X(i_in.nia) then
@@ -575,11 +611,18 @@ begin
        --       I prefer not to do just yet as it would force fetch2 to know about
        --       some of the cache geometry information.
        --
+        insn := (others => '0');
+        icode := INSN_illegal;
        if r.hit_valid = '1' then
-           i_out.insn <= read_insn_word(r.hit_nia, cache_out(r.hit_way));
-       else
-            i_out.insn <= (others => '0');
+            insn := read_insn_word(r.hit_nia, cache_out(r.hit_way));
+            -- Currently we use only the top bit for indicating illegal
+            -- instructions because we know that insn_codes fit into 9 bits.
+            if insn(ICWORDLEN - 1) = '0' then
+                icode := insn_code'val(to_integer(unsigned(insn(ICWORDLEN-1 downto INSN_IMAGE_BITS))));
+            end if;
        end if;
+        i_out.insn <= insn(31 downto 0);
+        i_out.icode <= icode;
        i_out.valid <= r.hit_valid;
        i_out.nia <= r.hit_nia;
        i_out.stop_mark <= r.hit_smark;
@@ -640,9 +683,11 @@ begin
         variable snoop_addr : real_addr_t;
         variable snoop_tag : cache_tag_t;
         variable snoop_cache_tags : cache_tags_set_t;
+        variable replace_way : way_t;
     begin
         if rising_edge(clk) then
             ev.icache_miss <= '0';
+            r.recv_valid <= '0';
            -- On reset, clear all valid bits to force misses
             if rst = '1' then
                for i in index_t loop
@@ -714,13 +759,13 @@ begin
                             " IR:" & std_ulogic'image(i_in.virt_mode) &
                            " SM:" & std_ulogic'image(i_in.stop_mark) &
                            " idx:" & integer'image(req_index) &
-                           " way:" & integer'image(replace_way) &
                            " tag:" & to_hstring(req_tag) &
                             " RA:" & to_hstring(real_addr);
                         ev.icache_miss <= '1';
 
                        -- Keep track of our index and way for subsequent stores
                        r.store_index <= req_index;
+                        r.recv_row <= get_row(req_raddr);
                        r.store_row <= get_row(req_raddr);
                         r.store_tag <= req_tag;
                         r.store_valid <= '1';
@@ -740,6 +785,7 @@ begin
                when CLR_TAG | WAIT_ACK =>
                     if r.state = CLR_TAG then
                         -- Get victim way from plru
+                        replace_way := to_integer(unsigned(plru_victim(r.store_index)));
                        r.store_way <= replace_way;
 
                        -- Force misses on that way while reloading that line
@@ -757,6 +803,19 @@ begin
                         r.state <= WAIT_ACK;
                     end if;
 
+                    -- If we are writing in this cycle, mark row valid and see if we are done
+                    if r.recv_valid = '1' then
+                        r.rows_valid(r.store_row mod ROW_PER_LINE) <= not inval_in;
+                       if is_last_row(r.store_row, r.end_row_ix) then
+                           -- Cache line is now valid
+                           cache_valids(r.store_index)(r.store_way) <= r.store_valid and not inval_in;
+                           -- We are done
+                           r.state <= IDLE;
+                       end if;
+                       -- Increment store row counter
+                       r.store_row <= r.recv_row;
+                    end if;
+
                    -- If we are still sending requests, was one accepted ?
                    if wishbone_in.stall = '0' and r.wb.stb = '1' then
                        -- That was the last word ? We are done sending. Clear stb.
@@ -777,33 +836,27 @@ begin
 
                    -- Incoming acks processing
                    if wishbone_in.ack = '1' then
-                        r.rows_valid(r.store_row mod ROW_PER_LINE) <= not inval_in;
                        -- Check for completion
-                       if is_last_row(r.store_row, r.end_row_ix) then
+                       if is_last_row(r.recv_row, r.end_row_ix) then
                            -- Complete wishbone cycle
                            r.wb.cyc <= '0';
-
-                           -- Cache line is now valid
-                           cache_valids(r.store_index)(replace_way) <= r.store_valid and not inval_in;
-
-                           -- We are done
-                           r.state <= IDLE;
                        end if;
+                        r.recv_valid <= '1';
 
-                       -- Increment store row counter
-                       r.store_row <= next_row(r.store_row);
+                       -- Increment receive row counter
+                       r.recv_row <= next_row(r.recv_row);
                    end if;
 
                 when STOP_RELOAD =>
                     -- Wait for all outstanding requests to be satisfied, then
                     -- go to IDLE state.
-                    if get_row_of_line(r.store_row) = get_row_of_line(get_row(wb_to_addr(r.wb.adr))) then
+                    if get_row_of_line(r.recv_row) = get_row_of_line(get_row(wb_to_addr(r.wb.adr))) then
                         r.wb.cyc <= '0';
                         r.state <= IDLE;
                     end if;
                     if wishbone_in.ack = '1' then
                        -- Increment store row counter
-                       r.store_row <= next_row(r.store_row);
+                       r.recv_row <= next_row(r.recv_row);
                    end if;
                end case;
            end if;
index 46e114eea3fa8e734ea79a4e4eef7d9cf2487250..4c8695ee1793489399078fde730bbe356ed5220b 100644 (file)
@@ -9,6 +9,7 @@ filesets:
       - wishbone_types.vhdl
       - common.vhdl
       - fetch1.vhdl
+      - predecode.vhdl
       - decode1.vhdl
       - helpers.vhdl
       - decode2.vhdl
diff --git a/predecode.vhdl b/predecode.vhdl
new file mode 100644 (file)
index 0000000..1e1d85b
--- /dev/null
@@ -0,0 +1,582 @@
+-- Instruction pre-decoder for microwatt
+-- One cycle latency.  Does 'WIDTH' instructions in parallel.
+
+library ieee;
+use ieee.std_logic_1164.all;
+use ieee.numeric_std.all;
+
+library work;
+use work.common.all;
+use work.decode_types.all;
+use work.insn_helpers.all;
+
+entity predecoder is
+    generic (
+        HAS_FPU   : boolean := true;
+        WIDTH     : natural := 2;
+        ICODE_LEN : natural := 10;
+        IMAGE_LEN : natural := 26
+        );
+    port (
+        clk        : in  std_ulogic;
+        valid_in   : in  std_ulogic;
+        insns_in   : in  std_ulogic_vector(WIDTH * 32 - 1 downto 0);
+        icodes_out : out std_ulogic_vector(WIDTH * (ICODE_LEN + IMAGE_LEN) - 1 downto 0)
+        );
+end entity predecoder;
+
+architecture behaviour of predecoder is
+
+    type predecoder_rom_t is array(0 to 2047) of insn_code;
+
+    constant major_predecode_rom : predecoder_rom_t := (
+        2#001100_00000# to 2#001100_11111# =>  INSN_addic,
+        2#001101_00000# to 2#001101_11111# =>  INSN_addic_dot,
+        2#001110_00000# to 2#001110_11111# =>  INSN_addi,
+        2#001111_00000# to 2#001111_11111# =>  INSN_addis,
+        2#010011_00100# to 2#010011_00101# =>  INSN_addpcis,
+        2#011100_00000# to 2#011100_11111# =>  INSN_andi_dot,
+        2#011101_00000# to 2#011101_11111# =>  INSN_andis_dot,
+        2#000000_00000#                    =>  INSN_attn,
+        2#010010_00000# to 2#010010_11111# =>  INSN_b,
+        2#010000_00000# to 2#010000_11111# =>  INSN_bc,
+        2#001011_00000# to 2#001011_11111# =>  INSN_cmpi,
+        2#001010_00000# to 2#001010_11111# =>  INSN_cmpli,
+        2#100010_00000# to 2#100010_11111# =>  INSN_lbz,
+        2#100011_00000# to 2#100011_11111# =>  INSN_lbzu,
+        2#110010_00000# to 2#110010_11111# =>  INSN_lfd,
+        2#110011_00000# to 2#110011_11111# =>  INSN_lfdu,
+        2#110000_00000# to 2#110000_11111# =>  INSN_lfs,
+        2#110001_00000# to 2#110001_11111# =>  INSN_lfsu,
+        2#101010_00000# to 2#101010_11111# =>  INSN_lha,
+        2#101011_00000# to 2#101011_11111# =>  INSN_lhau,
+        2#101000_00000# to 2#101000_11111# =>  INSN_lhz,
+        2#101001_00000# to 2#101001_11111# =>  INSN_lhzu,
+        2#100000_00000# to 2#100000_11111# =>  INSN_lwz,
+        2#100001_00000# to 2#100001_11111# =>  INSN_lwzu,
+        2#000111_00000# to 2#000111_11111# =>  INSN_mulli,
+        2#011000_00000# to 2#011000_11111# =>  INSN_ori,
+        2#011001_00000# to 2#011001_11111# =>  INSN_oris,
+        2#010100_00000# to 2#010100_11111# =>  INSN_rlwimi,
+        2#010101_00000# to 2#010101_11111# =>  INSN_rlwinm,
+        2#010111_00000# to 2#010111_11111# =>  INSN_rlwnm,
+        2#010001_00000# to 2#010001_11111# =>  INSN_sc,
+        2#100110_00000# to 2#100110_11111# =>  INSN_stb,
+        2#100111_00000# to 2#100111_11111# =>  INSN_stbu,
+        2#110110_00000# to 2#110110_11111# =>  INSN_stfd,
+        2#110111_00000# to 2#110111_11111# =>  INSN_stfdu,
+        2#110100_00000# to 2#110100_11111# =>  INSN_stfs,
+        2#110101_00000# to 2#110101_11111# =>  INSN_stfsu,
+        2#101100_00000# to 2#101100_11111# =>  INSN_sth,
+        2#101101_00000# to 2#101101_11111# =>  INSN_sthu,
+        2#100100_00000# to 2#100100_11111# =>  INSN_stw,
+        2#100101_00000# to 2#100101_11111# =>  INSN_stwu,
+        2#001000_00000# to 2#001000_11111# =>  INSN_subfic,
+        2#000010_00000# to 2#000010_11111# =>  INSN_tdi,
+        2#000011_00000# to 2#000011_11111# =>  INSN_twi,
+        2#011010_00000# to 2#011010_11111# =>  INSN_xori,
+        2#011011_00000# to 2#011011_11111# =>  INSN_xoris,
+        -- major opcode 4
+        2#000100_10000#                    =>  INSN_maddhd,
+        2#000100_10001#                    =>  INSN_maddhdu,
+        2#000100_10011#                    =>  INSN_maddld,
+        -- major opcode 30
+        2#011110_01000# to 2#011110_01001# =>  INSN_rldic,
+        2#011110_01010# to 2#011110_01011# =>  INSN_rldic,
+        2#011110_00000# to 2#011110_00001# =>  INSN_rldicl,
+        2#011110_00010# to 2#011110_00011# =>  INSN_rldicl,
+        2#011110_00100# to 2#011110_00101# =>  INSN_rldicr,
+        2#011110_00110# to 2#011110_00111# =>  INSN_rldicr,
+        2#011110_01100# to 2#011110_01101# =>  INSN_rldimi,
+        2#011110_01110# to 2#011110_01111# =>  INSN_rldimi,
+        2#011110_10000# to 2#011110_10001# =>  INSN_rldcl,
+        2#011110_10010# to 2#011110_10011# =>  INSN_rldcr,
+        -- major opcode 58
+        2#111010_00000#                    =>  INSN_ld,
+        2#111010_00001#                    =>  INSN_ldu,
+        2#111010_00010#                    =>  INSN_lwa,
+        2#111010_00100#                    =>  INSN_ld,
+        2#111010_00101#                    =>  INSN_ldu,
+        2#111010_00110#                    =>  INSN_lwa,
+        2#111010_01000#                    =>  INSN_ld,
+        2#111010_01001#                    =>  INSN_ldu,
+        2#111010_01010#                    =>  INSN_lwa,
+        2#111010_01100#                    =>  INSN_ld,
+        2#111010_01101#                    =>  INSN_ldu,
+        2#111010_01110#                    =>  INSN_lwa,
+        2#111010_10000#                    =>  INSN_ld,
+        2#111010_10001#                    =>  INSN_ldu,
+        2#111010_10010#                    =>  INSN_lwa,
+        2#111010_10100#                    =>  INSN_ld,
+        2#111010_10101#                    =>  INSN_ldu,
+        2#111010_10110#                    =>  INSN_lwa,
+        2#111010_11000#                    =>  INSN_ld,
+        2#111010_11001#                    =>  INSN_ldu,
+        2#111010_11010#                    =>  INSN_lwa,
+        2#111010_11100#                    =>  INSN_ld,
+        2#111010_11101#                    =>  INSN_ldu,
+        2#111010_11110#                    =>  INSN_lwa,
+        -- major opcode 59
+        2#111011_00100# to 2#111011_00101# =>  INSN_fdivs,
+        2#111011_01000# to 2#111011_01001# =>  INSN_fsubs,
+        2#111011_01010# to 2#111011_01011# =>  INSN_fadds,
+        2#111011_01100# to 2#111011_01101# =>  INSN_fsqrts,
+        2#111011_10000# to 2#111011_10001# =>  INSN_fres,
+        2#111011_10010# to 2#111011_10011# =>  INSN_fmuls,
+        2#111011_10100# to 2#111011_10101# =>  INSN_frsqrtes,
+        2#111011_11000# to 2#111011_11001# =>  INSN_fmsubs,
+        2#111011_11010# to 2#111011_11011# =>  INSN_fmadds,
+        2#111011_11100# to 2#111011_11101# =>  INSN_fnmsubs,
+        2#111011_11110# to 2#111011_11111# =>  INSN_fnmadds,
+        -- major opcode 62
+        2#111110_00000#                    =>  INSN_std,
+        2#111110_00001#                    =>  INSN_stdu,
+        2#111110_00100#                    =>  INSN_std,
+        2#111110_00101#                    =>  INSN_stdu,
+        2#111110_01000#                    =>  INSN_std,
+        2#111110_01001#                    =>  INSN_stdu,
+        2#111110_01100#                    =>  INSN_std,
+        2#111110_01101#                    =>  INSN_stdu,
+        2#111110_10000#                    =>  INSN_std,
+        2#111110_10001#                    =>  INSN_stdu,
+        2#111110_10100#                    =>  INSN_std,
+        2#111110_10101#                    =>  INSN_stdu,
+        2#111110_11000#                    =>  INSN_std,
+        2#111110_11001#                    =>  INSN_stdu,
+        2#111110_11100#                    =>  INSN_std,
+        2#111110_11101#                    =>  INSN_stdu,
+        -- major opcode 63
+        2#111111_00100# to 2#111111_00101# =>  INSN_fdiv,
+        2#111111_01000# to 2#111111_01001# =>  INSN_fsub,
+        2#111111_01010# to 2#111111_01011# =>  INSN_fadd,
+        2#111111_01100# to 2#111111_01101# =>  INSN_fsqrt,
+        2#111111_01110# to 2#111111_01111# =>  INSN_fsel,
+        2#111111_10000# to 2#111111_10001# =>  INSN_fre,
+        2#111111_10010# to 2#111111_10011# =>  INSN_fmul,
+        2#111111_10100# to 2#111111_10101# =>  INSN_frsqrte,
+        2#111111_11000# to 2#111111_11001# =>  INSN_fmsub,
+        2#111111_11010# to 2#111111_11011# =>  INSN_fmadd,
+        2#111111_11100# to 2#111111_11101# =>  INSN_fnmsub,
+        2#111111_11110# to 2#111111_11111# =>  INSN_fnmadd,
+        others                             =>  INSN_illegal
+        );
+
+    constant row_predecode_rom : predecoder_rom_t := (
+        -- Major opcode 31
+        -- Address bits are 0, insn(10:1)
+        2#0_01000_01010#  =>  INSN_add,
+        2#0_11000_01010#  =>  INSN_add, -- addo
+        2#0_00000_01010#  =>  INSN_addc,
+        2#0_10000_01010#  =>  INSN_addc, -- addco
+        2#0_00100_01010#  =>  INSN_adde,
+        2#0_10100_01010#  =>  INSN_adde, -- addeo
+        2#0_00101_01010#  =>  INSN_addex,
+        2#0_00010_01010#  =>  INSN_addg6s,
+        2#0_00111_01010#  =>  INSN_addme,
+        2#0_10111_01010#  =>  INSN_addme, -- addmeo
+        2#0_00110_01010#  =>  INSN_addze,
+        2#0_10110_01010#  =>  INSN_addze, -- addzeo
+        2#0_00000_11100#  =>  INSN_and,
+        2#0_00001_11100#  =>  INSN_andc,
+        2#0_00111_11100#  =>  INSN_bperm,
+        2#0_01001_11010#  =>  INSN_cbcdtd,
+        2#0_01000_11010#  =>  INSN_cdtbcd,
+        2#0_00000_00000#  =>  INSN_cmp,
+        2#0_01111_11100#  =>  INSN_cmpb,
+        2#0_00111_00000#  =>  INSN_cmpeqb,
+        2#0_00001_00000#  =>  INSN_cmpl,
+        2#0_00110_00000#  =>  INSN_cmprb,
+        2#0_00001_11010#  =>  INSN_cntlzd,
+        2#0_00000_11010#  =>  INSN_cntlzw,
+        2#0_10001_11010#  =>  INSN_cnttzd,
+        2#0_10000_11010#  =>  INSN_cnttzw,
+        2#0_10111_10011#  =>  INSN_darn,
+        2#0_00010_10110#  =>  INSN_dcbf,
+        2#0_00001_10110#  =>  INSN_dcbst,
+        2#0_01000_10110#  =>  INSN_dcbt,
+        2#0_00111_10110#  =>  INSN_dcbtst,
+        2#0_11111_10110#  =>  INSN_dcbz,
+        2#0_01100_01001#  =>  INSN_divdeu,
+        2#0_11100_01001#  =>  INSN_divdeu, -- divdeuo
+        2#0_01100_01011#  =>  INSN_divweu,
+        2#0_11100_01011#  =>  INSN_divweu, -- divweuo
+        2#0_01101_01001#  =>  INSN_divde,
+        2#0_11101_01001#  =>  INSN_divde, -- divdeo
+        2#0_01101_01011#  =>  INSN_divwe,
+        2#0_11101_01011#  =>  INSN_divwe, -- divweo
+        2#0_01110_01001#  =>  INSN_divdu,
+        2#0_11110_01001#  =>  INSN_divdu, -- divduo
+        2#0_01110_01011#  =>  INSN_divwu,
+        2#0_11110_01011#  =>  INSN_divwu, -- divwuo
+        2#0_01111_01001#  =>  INSN_divd,
+        2#0_11111_01001#  =>  INSN_divd, -- divdo
+        2#0_01111_01011#  =>  INSN_divw,
+        2#0_11111_01011#  =>  INSN_divw, -- divwo
+        2#0_11001_10110#  =>  INSN_nop, -- dss
+        2#0_01010_10110#  =>  INSN_nop, -- dst
+        2#0_01011_10110#  =>  INSN_nop, -- dstst
+        2#0_11010_10110#  =>  INSN_eieio,
+        2#0_01000_11100#  =>  INSN_eqv,
+        2#0_11101_11010#  =>  INSN_extsb,
+        2#0_11100_11010#  =>  INSN_extsh,
+        2#0_11110_11010#  =>  INSN_extsw,
+        2#0_11011_11010#  =>  INSN_extswsli,
+        2#0_11011_11011#  =>  INSN_extswsli,
+        2#0_11110_10110#  =>  INSN_icbi,
+        2#0_00000_10110#  =>  INSN_icbt,
+        2#0_00000_01111#  =>  INSN_isel,
+        2#0_00001_01111#  =>  INSN_isel,
+        2#0_00010_01111#  =>  INSN_isel,
+        2#0_00011_01111#  =>  INSN_isel,
+        2#0_00100_01111#  =>  INSN_isel,
+        2#0_00101_01111#  =>  INSN_isel,
+        2#0_00110_01111#  =>  INSN_isel,
+        2#0_00111_01111#  =>  INSN_isel,
+        2#0_01000_01111#  =>  INSN_isel,
+        2#0_01001_01111#  =>  INSN_isel,
+        2#0_01010_01111#  =>  INSN_isel,
+        2#0_01011_01111#  =>  INSN_isel,
+        2#0_01100_01111#  =>  INSN_isel,
+        2#0_01101_01111#  =>  INSN_isel,
+        2#0_01110_01111#  =>  INSN_isel,
+        2#0_01111_01111#  =>  INSN_isel,
+        2#0_10000_01111#  =>  INSN_isel,
+        2#0_10001_01111#  =>  INSN_isel,
+        2#0_10010_01111#  =>  INSN_isel,
+        2#0_10011_01111#  =>  INSN_isel,
+        2#0_10100_01111#  =>  INSN_isel,
+        2#0_10101_01111#  =>  INSN_isel,
+        2#0_10110_01111#  =>  INSN_isel,
+        2#0_10111_01111#  =>  INSN_isel,
+        2#0_11000_01111#  =>  INSN_isel,
+        2#0_11001_01111#  =>  INSN_isel,
+        2#0_11010_01111#  =>  INSN_isel,
+        2#0_11011_01111#  =>  INSN_isel,
+        2#0_11100_01111#  =>  INSN_isel,
+        2#0_11101_01111#  =>  INSN_isel,
+        2#0_11110_01111#  =>  INSN_isel,
+        2#0_11111_01111#  =>  INSN_isel,
+        2#0_00001_10100#  =>  INSN_lbarx,
+        2#0_11010_10101#  =>  INSN_lbzcix,
+        2#0_00011_10111#  =>  INSN_lbzux,
+        2#0_00010_10111#  =>  INSN_lbzx,
+        2#0_00010_10100#  =>  INSN_ldarx,
+        2#0_10000_10100#  =>  INSN_ldbrx,
+        2#0_11011_10101#  =>  INSN_ldcix,
+        2#0_00001_10101#  =>  INSN_ldux,
+        2#0_00000_10101#  =>  INSN_ldx,
+        2#0_10010_10111#  =>  INSN_lfdx,
+        2#0_10011_10111#  =>  INSN_lfdux,
+        2#0_11010_10111#  =>  INSN_lfiwax,
+        2#0_11011_10111#  =>  INSN_lfiwzx,
+        2#0_10000_10111#  =>  INSN_lfsx,
+        2#0_10001_10111#  =>  INSN_lfsux,
+        2#0_00011_10100#  =>  INSN_lharx,
+        2#0_01011_10111#  =>  INSN_lhaux,
+        2#0_01010_10111#  =>  INSN_lhax,
+        2#0_11000_10110#  =>  INSN_lhbrx,
+        2#0_11001_10101#  =>  INSN_lhzcix,
+        2#0_01001_10111#  =>  INSN_lhzux,
+        2#0_01000_10111#  =>  INSN_lhzx,
+        2#0_00000_10100#  =>  INSN_lwarx,
+        2#0_01011_10101#  =>  INSN_lwaux,
+        2#0_01010_10101#  =>  INSN_lwax,
+        2#0_10000_10110#  =>  INSN_lwbrx,
+        2#0_11000_10101#  =>  INSN_lwzcix,
+        2#0_00001_10111#  =>  INSN_lwzux,
+        2#0_00000_10111#  =>  INSN_lwzx,
+        2#0_10010_00000#  =>  INSN_mcrxrx,
+        2#0_00000_10011#  =>  INSN_mfcr,
+        2#0_00010_10011#  =>  INSN_mfmsr,
+        2#0_01010_10011#  =>  INSN_mfspr,
+        2#0_01000_01001#  =>  INSN_modud,
+        2#0_01000_01011#  =>  INSN_moduw,
+        2#0_11000_01001#  =>  INSN_modsd,
+        2#0_11000_01011#  =>  INSN_modsw,
+        2#0_00100_10000#  =>  INSN_mtcrf,
+        2#0_00100_10010#  =>  INSN_mtmsr,
+        2#0_00101_10010#  =>  INSN_mtmsrd,
+        2#0_01110_10011#  =>  INSN_mtspr,
+        2#0_00010_01001#  =>  INSN_mulhd,
+        2#0_00000_01001#  =>  INSN_mulhdu,
+        2#0_00010_01011#  =>  INSN_mulhw,
+        2#0_00000_01011#  =>  INSN_mulhwu,
+        -- next 4 have reserved bit set
+        2#0_10010_01001#  =>  INSN_mulhd,
+        2#0_10000_01001#  =>  INSN_mulhdu,
+        2#0_10010_01011#  =>  INSN_mulhw,
+        2#0_10000_01011#  =>  INSN_mulhwu,
+        2#0_00111_01001#  =>  INSN_mulld,
+        2#0_10111_01001#  =>  INSN_mulld, -- mulldo
+        2#0_00111_01011#  =>  INSN_mullw,
+        2#0_10111_01011#  =>  INSN_mullw, -- mullwo
+        2#0_01110_11100#  =>  INSN_nand,
+        2#0_00011_01000#  =>  INSN_neg,
+        2#0_10011_01000#  =>  INSN_neg, -- nego
+        -- next 8 are reserved no-op instructions
+        2#0_10000_10010#  =>  INSN_nop,
+        2#0_10001_10010#  =>  INSN_nop,
+        2#0_10010_10010#  =>  INSN_nop,
+        2#0_10011_10010#  =>  INSN_nop,
+        2#0_10100_10010#  =>  INSN_nop,
+        2#0_10101_10010#  =>  INSN_nop,
+        2#0_10110_10010#  =>  INSN_nop,
+        2#0_10111_10010#  =>  INSN_nop,
+        2#0_00011_11100#  =>  INSN_nor,
+        2#0_01101_11100#  =>  INSN_or,
+        2#0_01100_11100#  =>  INSN_orc,
+        2#0_00011_11010#  =>  INSN_popcntb,
+        2#0_01111_11010#  =>  INSN_popcntd,
+        2#0_01011_11010#  =>  INSN_popcntw,
+        2#0_00101_11010#  =>  INSN_prtyd,
+        2#0_00100_11010#  =>  INSN_prtyw,
+        2#0_00100_00000#  =>  INSN_setb,
+        2#0_01111_10010#  =>  INSN_slbia,
+        2#0_00000_11011#  =>  INSN_sld,
+        2#0_00000_11000#  =>  INSN_slw,
+        2#0_11000_11010#  =>  INSN_srad,
+        2#0_11001_11010#  =>  INSN_sradi,
+        2#0_11001_11011#  =>  INSN_sradi,
+        2#0_11000_11000#  =>  INSN_sraw,
+        2#0_11001_11000#  =>  INSN_srawi,
+        2#0_10000_11011#  =>  INSN_srd,
+        2#0_10000_11000#  =>  INSN_srw,
+        2#0_11110_10101#  =>  INSN_stbcix,
+        2#0_10101_10110#  =>  INSN_stbcx,
+        2#0_00111_10111#  =>  INSN_stbux,
+        2#0_00110_10111#  =>  INSN_stbx,
+        2#0_10100_10100#  =>  INSN_stdbrx,
+        2#0_11111_10101#  =>  INSN_stdcix,
+        2#0_00110_10110#  =>  INSN_stdcx,
+        2#0_00101_10101#  =>  INSN_stdux,
+        2#0_00100_10101#  =>  INSN_stdx,
+        2#0_10110_10111#  =>  INSN_stfdx,
+        2#0_10111_10111#  =>  INSN_stfdux,
+        2#0_11110_10111#  =>  INSN_stfiwx,
+        2#0_10100_10111#  =>  INSN_stfsx,
+        2#0_10101_10111#  =>  INSN_stfsux,
+        2#0_11100_10110#  =>  INSN_sthbrx,
+        2#0_11101_10101#  =>  INSN_sthcix,
+        2#0_10110_10110#  =>  INSN_sthcx,
+        2#0_01101_10111#  =>  INSN_sthux,
+        2#0_01100_10111#  =>  INSN_sthx,
+        2#0_10100_10110#  =>  INSN_stwbrx,
+        2#0_11100_10101#  =>  INSN_stwcix,
+        2#0_00100_10110#  =>  INSN_stwcx,
+        2#0_00101_10111#  =>  INSN_stwux,
+        2#0_00100_10111#  =>  INSN_stwx,
+        2#0_00001_01000#  =>  INSN_subf,
+        2#0_10001_01000#  =>  INSN_subf, -- subfo
+        2#0_00000_01000#  =>  INSN_subfc,
+        2#0_10000_01000#  =>  INSN_subfc, -- subfco
+        2#0_00100_01000#  =>  INSN_subfe,
+        2#0_10100_01000#  =>  INSN_subfe, -- subfeo
+        2#0_00111_01000#  =>  INSN_subfme,
+        2#0_10111_01000#  =>  INSN_subfme, -- subfmeo
+        2#0_00110_01000#  =>  INSN_subfze,
+        2#0_10110_01000#  =>  INSN_subfze, -- subfzeo
+        2#0_10010_10110#  =>  INSN_sync,
+        2#0_00010_00100#  =>  INSN_td,
+        2#0_00000_00100#  =>  INSN_tw,
+        2#0_01001_10010#  =>  INSN_tlbie,
+        2#0_01000_10010#  =>  INSN_tlbiel,
+        2#0_10001_10110#  =>  INSN_tlbsync,
+        2#0_00000_11110#  =>  INSN_wait,
+        2#0_01001_11100#  =>  INSN_xor,
+
+        -- Major opcode 19
+        -- Columns with insn(4) = '1' are all illegal and not mapped here; to
+        -- fit into 2048 entries, the columns are remapped so that 16-24 are
+        -- stored here as 8-15; in other words the address bits are
+        -- 1, insn(10..6), 1, insn(5), insn(3..1)
+        2#1_10000_11000#  =>  INSN_bcctr,
+        2#1_00000_11000#  =>  INSN_bclr,
+        2#1_10001_11000#  =>  INSN_bctar,
+        2#1_01000_10001#  =>  INSN_crand,
+        2#1_00100_10001#  =>  INSN_crandc,
+        2#1_01001_10001#  =>  INSN_creqv,
+        2#1_00111_10001#  =>  INSN_crnand,
+        2#1_00001_10001#  =>  INSN_crnor,
+        2#1_01110_10001#  =>  INSN_cror,
+        2#1_01101_10001#  =>  INSN_crorc,
+        2#1_00110_10001#  =>  INSN_crxor,
+        2#1_00100_11110#  =>  INSN_isync,
+        2#1_00000_10000#  =>  INSN_mcrf,
+        2#1_00000_11010#  =>  INSN_rfid,
+
+        -- Major opcode 59
+        -- Only column 14 is valid here; columns 16-31 are handled in the major table
+        -- Column 14 is mapped to column 6 of the space which is
+        -- mostly used for opcode 19.
+        2#1_11010_10110#  =>  INSN_fcfids,
+        2#1_11110_10110#  =>  INSN_fcfidus,
+
+        -- Major opcode 63
+        -- Columns 0-15 are mapped here; columns 16-31 are in the major table.
+        -- Address bits are 1, insn(10:6), 0, insn(4:1)
+        2#1_00000_00000#  =>  INSN_fcmpu,
+        2#1_00001_00000#  =>  INSN_fcmpo,
+        2#1_00010_00000#  =>  INSN_mcrfs,
+        2#1_00100_00000#  =>  INSN_ftdiv,
+        2#1_00101_00000#  =>  INSN_ftsqrt,
+        2#1_00001_00110#  =>  INSN_mtfsb,
+        2#1_00010_00110#  =>  INSN_mtfsb,
+        2#1_00100_00110#  =>  INSN_mtfsfi,
+        2#1_11010_00110#  =>  INSN_fmrgow,
+        2#1_11110_00110#  =>  INSN_fmrgew,
+        2#1_10010_00111#  =>  INSN_mffs,
+        2#1_10110_00111#  =>  INSN_mtfsf,
+        2#1_00000_01000#  =>  INSN_fcpsgn,
+        2#1_00001_01000#  =>  INSN_fneg,
+        2#1_00010_01000#  =>  INSN_fmr,
+        2#1_00100_01000#  =>  INSN_fnabs,
+        2#1_01000_01000#  =>  INSN_fabs,
+        2#1_01100_01000#  =>  INSN_frin,
+        2#1_01101_01000#  =>  INSN_friz,
+        2#1_01110_01000#  =>  INSN_frip,
+        2#1_01111_01000#  =>  INSN_frim,
+        2#1_00000_01100#  =>  INSN_frsp,
+        2#1_00000_01110#  =>  INSN_fctiw,
+        2#1_00100_01110#  =>  INSN_fctiwu,
+        2#1_11001_01110#  =>  INSN_fctid,
+        2#1_11010_01110#  =>  INSN_fcfid,
+        2#1_11101_01110#  =>  INSN_fctidu,
+        2#1_11110_01110#  =>  INSN_fcfidu,
+        2#1_00000_01111#  =>  INSN_fctiwz,
+        2#1_00100_01111#  =>  INSN_fctiwuz,
+        2#1_11001_01111#  =>  INSN_fctidz,
+        2#1_11101_01111#  =>  INSN_fctiduz,
+
+        others            =>  INSN_illegal
+        );
+
+    constant IOUT_LEN : natural := ICODE_LEN + IMAGE_LEN;
+
+    type predec_t is record
+        image         : std_ulogic_vector(31 downto 0);
+        maj_predecode : insn_code;
+        row_predecode : insn_code;
+    end record;
+
+    subtype index_t is integer range 0 to WIDTH-1;
+    type predec_array is array(index_t) of predec_t;
+
+    signal pred : predec_array;
+
+begin
+    predecode_0: process(clk)
+        variable majaddr  : std_ulogic_vector(10 downto 0);
+        variable rowaddr  : std_ulogic_vector(10 downto 0);
+        variable iword    : std_ulogic_vector(31 downto 0);
+    begin
+        if rising_edge(clk) then
+            for i in index_t loop
+                if valid_in = '1' then
+                    iword := insns_in(i * 32 + 31 downto i * 32);
+
+                    majaddr := iword(31 downto 26) & iword(4 downto 0);
+
+                    -- row_predecode_rom is used for op 19, 31, 59, 63
+                    -- addr bit 10 is 0 for op 31, 1 for 19, 59, 63
+                    rowaddr(10) := iword(31) or not iword(29);
+                    rowaddr(9 downto 5) := iword(10 downto 6);
+                    if iword(28) = '0' then
+                        -- op 19 and op 59
+                        rowaddr(4 downto 3) := '1' & iword(5);
+                    else
+                        -- op 31 and 63; for 63 we only use this when iword(5) = '0'
+                        rowaddr(4 downto 3) := iword(5 downto 4);
+                    end if;
+                    rowaddr(2 downto 0) := iword(3 downto 1);
+
+                    pred(i).image <= iword;
+                    pred(i).maj_predecode <= major_predecode_rom(to_integer(unsigned(majaddr)));
+                    pred(i).row_predecode <= row_predecode_rom(to_integer(unsigned(rowaddr)));
+                else
+                    pred(i).image <= (others => '0');
+                    pred(i).maj_predecode <= INSN_illegal;
+                    pred(i).row_predecode <= INSN_illegal;
+                end if;
+            end loop;
+        end if;
+    end process;
+
+    predecode_1: process(all)
+        variable iword    : std_ulogic_vector(31 downto 0);
+        variable use_row  : std_ulogic;
+        variable illegal  : std_ulogic;
+        variable ici      : std_ulogic_vector(IOUT_LEN - 1 downto 0);
+        variable icode    : insn_code;
+    begin
+        for i in index_t loop
+            iword := pred(i).image;
+            icode := pred(i).maj_predecode;
+            use_row := '0';
+            illegal := '0';
+
+            case iword(31 downto 26) is
+                when "000100" => -- 4
+                    -- major opcode 4, mostly VMX/VSX stuff but also some integer ops (madd*)
+                    illegal := not iword(5);
+
+                when "010011" => -- 19
+                    -- Columns 8-15 and 24-31 don't have any valid instructions
+                    -- (where insn(5..1) is the column number).
+                    -- addpcis (column 2) is in the major table
+                    -- Other valid columns are mapped to columns in the second
+                    -- half of the row table: columns 0-1 are mapped to 16-17
+                    -- and 16-23 are mapped to 24-31.
+                    illegal := iword(4);
+                    use_row := iword(5) or (not iword(3) and not iword(2));
+
+                when "011000" => -- 24
+                    -- ori, special-case the standard NOP
+                    if std_match(iword, "01100000000000000000000000000000") then
+                        icode := INSN_nop;
+                    end if;
+
+                when "011111" => -- 31
+                    -- major opcode 31, lots of things
+                    -- Use the first half of the row table for all columns
+                    use_row := '1';
+
+                when "111011" => -- 59
+                    -- floating point operations, mostly single-precision
+                    -- Columns 0-11 are illegal; columns 12-15 are mapped
+                    -- to columns 20-23 in the second half of the row table,
+                    -- and columns 16-31 are in the major table.
+                    illegal := not iword(5) and (not iword(4) or not iword(3));
+                    use_row := not iword(5);
+
+                when "111111" => -- 63
+                    -- floating point operations, general and double-precision
+                    -- Use columns 0-15 of the second half of the row table
+                    -- for columns 0-15, and the major table for columns 16-31.
+                    use_row := not iword(5);
+
+                when others =>
+            end case;
+            if use_row = '1' then
+                icode := pred(i).row_predecode;
+            end if;
+
+            -- Mark FP instructions as illegal if we don't have an FPU
+            if not HAS_FPU and icode >= INSN_first_frs then
+                illegal := '1';
+            end if;
+
+            ici(31 downto 0) := iword;
+            ici(IOUT_LEN - 1 downto 32) := (others => '0');
+            if illegal = '1' or icode = INSN_illegal then
+                -- Since an insn_code currently fits in 9 bits, use just
+                -- the most significant bit of ici to indicate illegal insns.
+                ici(IOUT_LEN - 1) := '1';
+            else
+                ici(IOUT_LEN - 1 downto IMAGE_LEN) :=
+                    std_ulogic_vector(to_unsigned(insn_code'pos(icode), ICODE_LEN));
+            end if;
+            icodes_out(i * IOUT_LEN + IOUT_LEN - 1 downto i * IOUT_LEN) <= ici;
+        end loop;
+    end process;
+
+end architecture behaviour;