From: Paul Mackerras Date: Sat, 9 Sep 2023 12:14:03 +0000 (+1000) Subject: Implement byte reversal instructions X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=b50170cd1d158ed6ae19e98d9b4cca022b6b2c2f;p=microwatt.git Implement byte reversal instructions This implements the byte-reverse halfword, word and doubleword instructions: brh, brw, and brd. These instructions were added to the ISA in version 3.1. They use a new OP_BREV insn_type value. The logic for these instructions is implemented in logical.vhdl. In order to avoid going over 64 insn_type values, OP_AND and OP_OR were combined into OP_LOGIC, which is like OP_AND except that the RS input can be inverted as well as the RB input. The various forms of OR instruction are then implemented using the identity a OR b = NOT (NOT a AND NOT b) The 'is_signed' field of the instruction decode table is used to indicate that RS should be inverted. Signed-off-by: Paul Mackerras --- diff --git a/decode1.vhdl b/decode1.vhdl index 0aa2fee..e090d66 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -89,10 +89,10 @@ architecture behaviour of decode1 is INSN_addme => (ALU, NONE, OP_ADD, RA, CONST_M1, NONE, RT, '0', '0', '0', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE), INSN_addpcis => (ALU, NONE, OP_ADD, CIA, CONST_DXHI4, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_addze => (ALU, NONE, OP_ADD, RA, NONE, NONE, RT, '0', '0', '0', '0', CA, '1', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE), - INSN_and => (ALU, NONE, OP_AND, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), - INSN_andc => (ALU, NONE, OP_AND, NONE, RB, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), - INSN_andi_dot => (ALU, NONE, OP_AND, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0', NONE), - INSN_andis_dot => (ALU, NONE, OP_AND, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0', NONE), + INSN_and => (ALU, NONE, OP_LOGIC, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), + INSN_andc => (ALU, NONE, OP_LOGIC, NONE, RB, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), + INSN_andi_dot => (ALU, NONE, OP_LOGIC, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0', NONE), + INSN_andis_dot => (ALU, NONE, OP_LOGIC, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0', NONE), INSN_attn => (ALU, NONE, OP_ATTN, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1', NONE), INSN_b => (ALU, NONE, OP_B, NONE, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE), INSN_bc => (ALU, NONE, OP_BC, NONE, CONST_BD, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE), @@ -100,6 +100,9 @@ architecture behaviour of decode1 is INSN_bclr => (ALU, NONE, OP_BCREG, NONE, NONE, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE), INSN_bctar => (ALU, NONE, OP_BCREG, NONE, NONE, NONE, NONE, '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0', NONE), INSN_bperm => (ALU, NONE, OP_BPERM, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), + INSN_brh => (ALU, NONE, OP_BREV, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), + INSN_brw => (ALU, NONE, OP_BREV, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is4B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), + INSN_brd => (ALU, NONE, OP_BREV, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_cbcdtd => (ALU, NONE, OP_BCD, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_cdtbcd => (ALU, NONE, OP_BCD, NONE, NONE, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_cmp => (ALU, NONE, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), @@ -268,14 +271,14 @@ architecture behaviour of decode1 is INSN_mulld => (ALU, NONE, OP_MUL_L64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RCOE, '0', '0', NONE), INSN_mulli => (ALU, NONE, OP_MUL_L64, RA, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), INSN_mullw => (ALU, NONE, OP_MUL_L64, RA, RB, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '1', RCOE, '0', '0', NONE), - INSN_nand => (ALU, NONE, OP_AND, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), + INSN_nand => (ALU, NONE, OP_LOGIC, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), INSN_neg => (ALU, NONE, OP_ADD, RA, NONE, NONE, RT, '0', '0', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', RCOE, '0', '0', NONE), INSN_nop => (ALU, NONE, OP_NOP, NONE, NONE, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), - INSN_nor => (ALU, NONE, OP_OR, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), - INSN_or => (ALU, NONE, OP_OR, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), - INSN_orc => (ALU, NONE, OP_OR, NONE, RB, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0', NONE), - INSN_ori => (ALU, NONE, OP_OR, NONE, CONST_UI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), - INSN_oris => (ALU, NONE, OP_OR, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), + INSN_nor => (ALU, NONE, OP_LOGIC, NONE, RB, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE), + INSN_or => (ALU, NONE, OP_LOGIC, NONE, RB, RS, RA, '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE), + INSN_orc => (ALU, NONE, OP_LOGIC, NONE, RB, RS, RA, '0', '0', '0', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '0', NONE), + INSN_ori => (ALU, NONE, OP_LOGIC, NONE, CONST_UI, RS, RA, '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), + INSN_oris => (ALU, NONE, OP_LOGIC, NONE, CONST_UI_HI, RS, RA, '0', '0', '1', '1', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0', NONE), INSN_paddi => (ALU, NONE, OP_ADD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_plbz => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), INSN_pld => (LDST, NONE, OP_LOAD, RA0_OR_CIA, CONST_PSI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is8B, '0', '0', '0', '0', '0', '0', NONE, '0', '0', NONE), diff --git a/decode2.vhdl b/decode2.vhdl index 338a80a..1f3e7ff 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -205,13 +205,13 @@ architecture behaviour of decode2 is type mux_select_array_t is array(insn_type_t) of std_ulogic_vector(2 downto 0); constant result_select : mux_select_array_t := ( - OP_AND => "001", -- logical_result - OP_OR => "001", + OP_LOGIC => "001", -- logical_result OP_XOR => "001", OP_PRTY => "001", OP_CMPB => "001", OP_EXTS => "001", OP_BPERM => "001", + OP_BREV => "001", OP_BCD => "001", OP_MTSPR => "001", OP_RLC => "010", -- rotator_result diff --git a/decode_types.vhdl b/decode_types.vhdl index 428d943..9e7ef84 100644 --- a/decode_types.vhdl +++ b/decode_types.vhdl @@ -3,8 +3,9 @@ use ieee.std_logic_1164.all; package decode_types is type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD, - OP_AND, OP_ATTN, OP_B, OP_BC, OP_BCREG, - OP_BCD, OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB, + OP_ATTN, OP_B, OP_BC, OP_BCREG, + OP_BCD, OP_BPERM, OP_BREV, + OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB, OP_CNTZ, OP_CROP, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST, OP_DCBZ, OP_ICBI, OP_ICBT, @@ -12,10 +13,11 @@ package decode_types is OP_DIV, OP_DIVE, OP_MOD, OP_EXTS, OP_EXTSWSLI, OP_ISEL, OP_ISYNC, + OP_LOGIC, OP_LOAD, OP_STORE, OP_MCRXRX, OP_MFCR, OP_MFMSR, OP_MFSPR, OP_MTCRF, OP_MTMSRD, OP_MTSPR, OP_MUL_L64, - OP_MUL_H64, OP_MUL_H32, OP_OR, + OP_MUL_H64, OP_MUL_H32, OP_POPCNT, OP_PRTY, OP_RFID, OP_RLC, OP_RLCL, OP_RLCR, OP_SC, OP_SETB, OP_SHL, OP_SHR, @@ -50,84 +52,84 @@ package decode_types is INSN_bcctr, INSN_bclr, INSN_bctar, + INSN_brh, + INSN_brw, + INSN_brd, -- 20 INSN_cbcdtd, INSN_cdtbcd, - INSN_cmpi, -- 20 + INSN_cmpi, INSN_cmpli, INSN_cntlzw, INSN_cntlzd, INSN_cnttzw, INSN_cnttzd, INSN_crand, - INSN_crandc, + INSN_crandc, -- 30 INSN_creqv, INSN_crnand, - INSN_crnor, -- 30 + INSN_crnor, INSN_cror, INSN_crorc, INSN_crxor, INSN_darn, INSN_eieio, INSN_extsb, - INSN_extsh, + INSN_extsh, -- 40 INSN_extsw, INSN_extswsli, - INSN_isync, -- 40 + INSN_isync, INSN_lbzu, INSN_ld, INSN_ldu, INSN_lhau, INSN_lwa, INSN_lwzu, - INSN_mcrf, + INSN_mcrf, -- 50 INSN_mcrxrx, INSN_mfcr, - INSN_mfmsr, -- 50 + INSN_mfmsr, INSN_mfspr, INSN_mtcrf, INSN_mtmsr, INSN_mtmsrd, INSN_mtspr, INSN_mulli, - INSN_neg, + INSN_neg, -- 60 INSN_nop, INSN_ori, - INSN_oris, -- 60 + INSN_oris, INSN_popcntb, INSN_popcntw, INSN_popcntd, INSN_prtyw, INSN_prtyd, INSN_rfid, - INSN_rldic, + INSN_rldic, -- 70 INSN_rldicl, INSN_rldicr, - INSN_rldimi, -- 70 + INSN_rldimi, INSN_rlwimi, INSN_rlwinm, INSN_sc, INSN_setb, INSN_slbia, INSN_sradi, - INSN_srawi, + INSN_srawi, -- 80 INSN_stbu, INSN_std, - INSN_stdu, -- 80 + INSN_stdu, INSN_sthu, INSN_stwu, INSN_subfic, INSN_subfme, INSN_subfze, INSN_sync, - INSN_tdi, + INSN_tdi, -- 90 INSN_tlbsync, INSN_twi, - INSN_wait, -- 90 + INSN_wait, INSN_xori, INSN_xoris, - INSN_93, -- padding - INSN_94, - INSN_95, -- Non-prefixed instructions that have a MLS:D prefixed form and -- their corresponding prefixed instructions. diff --git a/execute1.vhdl b/execute1.vhdl index e537048..7c1ff8f 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -392,6 +392,7 @@ begin op => e_in.insn_type, invert_in => e_in.invert_a, invert_out => e_in.invert_out, + is_signed => e_in.is_signed, result => logical_result, datalen => e_in.data_len ); @@ -1105,8 +1106,8 @@ begin when OP_ADDG6S => when OP_CMPRB => when OP_CMPEQB => - when OP_AND | OP_OR | OP_XOR | OP_PRTY | OP_CMPB | OP_EXTS | - OP_BPERM | OP_BCD => + when OP_LOGIC | OP_XOR | OP_PRTY | OP_CMPB | OP_EXTS | + OP_BPERM | OP_BREV | OP_BCD => when OP_B => v.take_branch := '1'; diff --git a/logical.vhdl b/logical.vhdl index 77ef29c..2d139f8 100644 --- a/logical.vhdl +++ b/logical.vhdl @@ -13,6 +13,7 @@ entity logical is op : in insn_type_t; invert_in : in std_ulogic; invert_out : in std_ulogic; + is_signed : in std_ulogic; result : out std_ulogic_vector(63 downto 0); datalen : in std_logic_vector(3 downto 0) ); @@ -92,7 +93,8 @@ architecture behaviour of logical is begin logical_0: process(all) - variable rb_adj, tmp : std_ulogic_vector(63 downto 0); + variable rb_adj, rs_adj : std_ulogic_vector(63 downto 0); + variable tmp : std_ulogic_vector(63 downto 0); variable negative : std_ulogic; variable j : integer; begin @@ -123,19 +125,34 @@ begin end if; case op is - when OP_AND | OP_OR | OP_XOR => - case op is - when OP_AND => - tmp := rs and rb_adj; - when OP_OR => - tmp := rs or rb_adj; - when others => - tmp := rs xor rb_adj; - end case; + when OP_LOGIC => + -- for now, abuse the 'is_signed' field to indicate inversion of RS + rs_adj := rs; + if is_signed = '1' then + rs_adj := not rs; + end if; + tmp := rs_adj and rb_adj; + if invert_out = '1' then + tmp := not tmp; + end if; + when OP_XOR => + tmp := rs xor rb; if invert_out = '1' then tmp := not tmp; end if; + when OP_BREV => + if datalen(3) = '1' then + tmp := rs( 7 downto 0) & rs(15 downto 8) & rs(23 downto 16) & rs(31 downto 24) & + rs(39 downto 32) & rs(47 downto 40) & rs(55 downto 48) & rs(63 downto 56); + elsif datalen(2) = '1' then + tmp := rs(39 downto 32) & rs(47 downto 40) & rs(55 downto 48) & rs(63 downto 56) & + rs( 7 downto 0) & rs(15 downto 8) & rs(23 downto 16) & rs(31 downto 24); + else + tmp := rs(55 downto 48) & rs(63 downto 56) & rs(39 downto 32) & rs(47 downto 40) & + rs(23 downto 16) & rs(31 downto 24) & rs( 7 downto 0) & rs(15 downto 8); + end if; + when OP_PRTY => tmp := parity; when OP_CMPB => diff --git a/predecode.vhdl b/predecode.vhdl index 58b17e3..27f80e1 100644 --- a/predecode.vhdl +++ b/predecode.vhdl @@ -184,6 +184,9 @@ architecture behaviour of predecoder is 2#0_00000_11100# => INSN_and, 2#0_00001_11100# => INSN_andc, 2#0_00111_11100# => INSN_bperm, + 2#0_00110_11011# => INSN_brh, + 2#0_00100_11011# => INSN_brw, + 2#0_00101_11011# => INSN_brd, 2#0_01001_11010# => INSN_cbcdtd, 2#0_01000_11010# => INSN_cdtbcd, 2#0_00000_00000# => INSN_cmp,