From f7c393ba7e73787a77c444bba6c9eed329ff3b32 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 7 Oct 2019 18:26:11 +1100 Subject: [PATCH] Add a rotate/mask/shift unit and use it in execute1 This adds a new entity 'rotator' which contains combinatorial logic for rotating and masking 64-bit values. It implements the operations of the rlwinm, rlwnm, rlwimi, rldicl, rldicr, rldic, rldimi, rldcl, rldcr, sld, slw, srd, srw, srad, sradi, sraw and srawi instructions. It consists of a 3-stage 64-bit rotator using 4:1 multiplexors at each stage, two mask generators, output logic and control logic. The insn_type_t values used for these instructions have been reduced to just 5: OP_RLC, OP_RLCL and OP_RLCR for the rotate and mask instructions (clear both left and right, clear left, clear right variants), OP_SHL for left shifts, and OP_SHR for right shifts. The control signals for the rotator are derived from the opcode and from the is_32bit and is_signed fields of the decode_rom_t. The rotator is instantiated as an entity in execute1 so that we can be sure we only have one of it. Signed-off-by: Paul Mackerras --- Makefile | 10 +- decode1.vhdl | 45 ++++---- decode2.vhdl | 6 +- decode_types.vhdl | 7 +- execute1.vhdl | 109 ++++++------------- microwatt.core | 1 + rotator.vhdl | 183 +++++++++++++++++++++++++++++++ rotator_tb.vhdl | 269 ++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 523 insertions(+), 107 deletions(-) create mode 100644 rotator.vhdl create mode 100644 rotator_tb.vhdl diff --git a/Makefile b/Makefile index 8075439..7b28b31 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,8 @@ GHDL=ghdl GHDLFLAGS=--std=08 -Psim-unisim CFLAGS=-O2 -Wall -all = core_tb simple_ram_behavioural_tb soc_reset_tb icache_tb multiply_tb dmi_dtm_tb divider_tb +all = core_tb simple_ram_behavioural_tb soc_reset_tb icache_tb multiply_tb dmi_dtm_tb divider_tb \ + rotator_tb # XXX # loadstore_tb fetch_tb @@ -22,7 +23,7 @@ crhelpers.o: common.o decode1.o: common.o decode_types.o decode2.o: decode_types.o common.o helpers.o insn_helpers.o decode_types.o: -execute1.o: decode_types.o common.o helpers.o crhelpers.o ppc_fx_insns.o insn_helpers.o +execute1.o: decode_types.o common.o helpers.o crhelpers.o ppc_fx_insns.o insn_helpers.o rotator.o execute2.o: common.o crhelpers.o ppc_fx_insns.o fetch1.o: common.o fetch2.o: common.o wishbone_types.o @@ -40,6 +41,8 @@ divider_tb.o: decode_types.o common.o glibc_random.o ppc_fx_insns.o divider.o divider.o: common.o decode_types.o crhelpers.o ppc_fx_insns.o: helpers.o register_file.o: common.o +rotator.o: common.o +rotator_tb.o: common.o glibc_random.o ppc_fx_insns.o insn_helpers.o rotator.o sim_console.o: simple_ram_behavioural_helpers.o: simple_ram_behavioural_tb.o: wishbone_types.o simple_ram_behavioural.o @@ -81,6 +84,9 @@ multiply_tb: multiply_tb.o divider_tb: divider_tb.o $(GHDL) -e $(GHDLFLAGS) $@ +rotator_tb: rotator_tb.o + $(GHDL) -e $(GHDLFLAGS) $@ + simple_ram_tb: simple_ram_tb.o $(GHDL) -e $(GHDLFLAGS) $@ diff --git a/decode1.vhdl b/decode1.vhdl index 275c45e..8ad123f 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -26,7 +26,7 @@ architecture behaviour of decode1 is type major_rom_array_t is array(0 to 63) of decode_rom_t; type minor_valid_array_t is array(0 to 1023) of std_ulogic; type op_19_subop_array_t is array(0 to 7) of decode_rom_t; - type op_30_subop_array_t is array(0 to 7) of decode_rom_t; + type op_30_subop_array_t is array(0 to 15) of decode_rom_t; type op_31_subop_array_t is array(0 to 1023) of decode_rom_t; type minor_rom_array_2_t is array(0 to 3) of decode_rom_t; @@ -57,9 +57,9 @@ architecture behaviour of decode1 is 7 => (MUL, OP_MUL_L64, RA, CONST_SI, NONE, RT, '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '1'), -- mulli 24 => (ALU, OP_OR, NONE, CONST_UI, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- ori 25 => (ALU, OP_OR, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- oris - 20 => (ALU, OP_RLWIMI, RA, NONE, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rlwimi - 21 => (ALU, OP_RLWINM, NONE, NONE, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rlwinm - 23 => (ALU, OP_RLWNM, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rlwnm + 20 => (ALU, OP_RLC, RA, CONST_SH32, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- rlwimi + 21 => (ALU, OP_RLC, NONE, CONST_SH32, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- rlwinm + 23 => (ALU, OP_RLC, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- rlwnm 38 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- stb 39 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', RC, '0', '1'), -- stbu 44 => (LDST, OP_STORE, RA_OR_ZERO, CONST_SI, RS, NONE, '0', '0', '0', ZERO, '0', is2B, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- sth @@ -115,12 +115,16 @@ architecture behaviour of decode1 is constant decode_op_30_array : op_30_subop_array_t := ( -- unit internal in1 in2 in3 out CR CR inv cry cry ldst BR sgn upd rsrv 32b sgn rc lk sgl -- op in out A in out len ext pipe - 2#010# => (ALU, OP_RLDIC, NONE, NONE, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), - 2#000# => (ALU, OP_RLDICL, NONE, NONE, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), - 2#001# => (ALU, OP_RLDICR, NONE, NONE, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), - 2#011# => (ALU, OP_RLDIMI, RA, NONE, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), - -- rldcl, rldcr - 2#100# => (ALU, OP_RLDCX, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), + 2#0100# => (ALU, OP_RLC, NONE, CONST_SH, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rldic + 2#0101# => (ALU, OP_RLC, NONE, CONST_SH, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rldic + 2#0000# => (ALU, OP_RLCL, NONE, CONST_SH, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rldicl + 2#0001# => (ALU, OP_RLCL, NONE, CONST_SH, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rldicl + 2#0010# => (ALU, OP_RLCR, NONE, CONST_SH, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rldicr + 2#0011# => (ALU, OP_RLCR, NONE, CONST_SH, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rldicr + 2#0110# => (ALU, OP_RLC, RA, CONST_SH, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rldimi + 2#0111# => (ALU, OP_RLC, RA, CONST_SH, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rldimi + 2#1000# => (ALU, OP_RLCL, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rldcl + 2#1001# => (ALU, OP_RLCR, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- rldcr others => illegal_inst ); @@ -249,15 +253,15 @@ architecture behaviour of decode1 is 2#0010111010# => (ALU, OP_PRTYD, NONE, NONE, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- prtyd 2#0010011010# => (ALU, OP_PRTYW, NONE, NONE, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- prtyw -- 2#0010000000# setb - 2#0000011011# => (ALU, OP_SLD, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- sld - 2#0000011000# => (ALU, OP_SLW, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- slw - 2#1100011010# => (ALU, OP_SRAD, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- srad - 2#1100111010# => (ALU, OP_SRADI, NONE, NONE, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- sradi - 2#1100111011# => (ALU, OP_SRADI, NONE, NONE, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- sradi - 2#1100011000# => (ALU, OP_SRAW, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- sraw - 2#1100111000# => (ALU, OP_SRAWI, NONE, NONE, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- srawi - 2#1000011011# => (ALU, OP_SRD, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- srd - 2#1000011000# => (ALU, OP_SRW, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- srw + 2#0000011011# => (ALU, OP_SHL, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- sld + 2#0000011000# => (ALU, OP_SHL, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- slw + 2#1100011010# => (ALU, OP_SHR, NONE, RB, RS, RA, '0', '0', '0', ZERO, '1', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- srad + 2#1100111010# => (ALU, OP_SHR, NONE, CONST_SH, RS, RA, '0', '0', '0', ZERO, '1', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- sradi + 2#1100111011# => (ALU, OP_SHR, NONE, CONST_SH, RS, RA, '0', '0', '0', ZERO, '1', NONE, '0', '0', '0', '0', '0', '1', RC, '0', '1'), -- sradi + 2#1100011000# => (ALU, OP_SHR, NONE, RB, RS, RA, '0', '0', '0', ZERO, '1', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- sraw + 2#1100111000# => (ALU, OP_SHR, NONE, CONST_SH32, RS, RA, '0', '0', '0', ZERO, '1', NONE, '0', '0', '0', '0', '1', '1', RC, '0', '1'), -- srawi + 2#1000011011# => (ALU, OP_SHR, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- srd + 2#1000011000# => (ALU, OP_SHR, NONE, RB, RS, RA, '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '1'), -- srw 2#1010110110# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '1', '0', '0', RC, '0', '1'), -- stbcx 2#0011110111# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', RC, '0', '1'), -- stbux 2#0011010111# => (LDST, OP_STORE, RA_OR_ZERO, RB, RS, NONE, '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', RC, '0', '1'), -- stbx @@ -323,7 +327,6 @@ begin variable v : Decode1ToDecode2Type; variable majorop : major_opcode_t; variable op_19_bits: std_ulogic_vector(2 downto 0); - variable op_30_bits: std_ulogic_vector(2 downto 0); begin v := r; @@ -352,7 +355,7 @@ begin end if; elsif majorop = "011110" then - v.decode := decode_op_30_array(to_integer(unsigned(f_in.insn(4 downto 2)))); + v.decode := decode_op_30_array(to_integer(unsigned(f_in.insn(4 downto 1)))); elsif majorop = "111010" then v.decode := decode_op_58_array(to_integer(unsigned(f_in.insn(1 downto 0)))); diff --git a/decode2.vhdl b/decode2.vhdl index 7c6c0ca..c0afcc2 100644 --- a/decode2.vhdl +++ b/decode2.vhdl @@ -90,7 +90,11 @@ architecture behaviour of decode2 is when CONST_DS => return ('0', (others => '0'), std_ulogic_vector(resize(signed(insn_ds(insn_in)) & "00", 64))); when CONST_M1 => - return ('0', (others => '0'), x"FFFFFFFFFFFFFFFF"); + return ('0', (others => '0'), x"FFFFFFFFFFFFFFFF"); + when CONST_SH => + return ('0', (others => '0'), x"00000000000000" & "00" & insn_in(1) & insn_in(15 downto 11)); + when CONST_SH32 => + return ('0', (others => '0'), x"00000000000000" & "000" & insn_in(15 downto 11)); when NONE => return ('0', (others => '0'), (others => '0')); end case; diff --git a/decode_types.vhdl b/decode_types.vhdl index 6ac91a5..54b17e7 100644 --- a/decode_types.vhdl +++ b/decode_types.vhdl @@ -15,14 +15,13 @@ package decode_types is OP_MTCRF, OP_MTSPR, OP_MUL_L64, OP_MUL_H64, OP_MUL_H32, OP_NAND, OP_NEG, OP_NOR, OP_OR, OP_ORC, OP_POPCNTB, OP_POPCNTD, OP_POPCNTW, OP_PRTYD, - OP_PRTYW, OP_RLDCX, OP_RLDIC, OP_RLDICL, OP_RLDICR, - OP_RLDIMI, OP_RLWIMI, OP_RLWINM, OP_RLWNM, OP_SETB, OP_SLD, - OP_SLW, OP_SRAD, OP_SRADI, OP_SRAW, OP_SRAWI, OP_SRD, OP_SRW, + OP_PRTYW, OP_RLC, OP_RLCL, OP_RLCR, OP_SETB, + OP_SHL, OP_SHR, OP_SYNC, OP_TD, OP_TDI, OP_TW, OP_TWI, OP_XOR, OP_SIM_CONFIG); type input_reg_a_t is (NONE, RA, RA_OR_ZERO); - type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, CONST_DS, CONST_M1); + type input_reg_b_t is (NONE, RB, CONST_UI, CONST_SI, CONST_SI_HI, CONST_UI_HI, CONST_LI, CONST_BD, CONST_DS, CONST_M1, CONST_SH, CONST_SH32); type input_reg_c_t is (NONE, RS); type output_reg_a_t is (NONE, RT, RA); type rc_t is (NONE, ONE, RC); diff --git a/execute1.vhdl b/execute1.vhdl index cdd79f1..9f54bad 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -42,6 +42,10 @@ architecture behaviour of execute1 is signal ctrl: ctrl_t := (carry => '0', others => (others => '0')); signal ctrl_tmp: ctrl_t := (carry => '0', others => (others => '0')); + signal right_shift, rot_clear_left, rot_clear_right: std_ulogic; + signal rotator_result: std_ulogic_vector(63 downto 0); + signal rotator_carry: std_ulogic; + function decode_input_carry (carry_sel : carry_in_t; ca_in : std_ulogic) return std_ulogic is begin case carry_sel is @@ -54,6 +58,22 @@ architecture behaviour of execute1 is end case; end; begin + + rotator_0: entity work.rotator + port map ( + rs => e_in.read_data3, + ra => e_in.read_data1, + shift => e_in.read_data2(6 downto 0), + insn => e_in.insn, + is_32bit => e_in.is_32bit, + right_shift => right_shift, + arith => e_in.is_signed, + clear_left => rot_clear_left, + clear_right => rot_clear_right, + result => rotator_result, + carry_out => rotator_carry + ); + execute1_0: process(clk) begin if rising_edge(clk) then @@ -93,6 +113,11 @@ begin terminate_out <= '0'; f_out <= Execute1ToFetch1TypeInit; + -- rotator control signals + right_shift <= '1' when e_in.insn_type = OP_SHR else '0'; + rot_clear_left <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCL else '0'; + rot_clear_right <= '1' when e_in.insn_type = OP_RLC or e_in.insn_type = OP_RLCR else '0'; + if e_in.valid = '1' then v.e.valid := '1'; @@ -310,85 +335,11 @@ begin when OP_PRTYW => result := ppc_prtyw(e_in.read_data3); result_en := 1; - when OP_RLDCX => - -- note rldcl mb field and rldcr me field are in the same place - mb := insn_mb(e_in.insn); - if e_in.insn(1) = '0' then - result := ppc_rldcl(e_in.read_data3, e_in.read_data2, mb); - else - result := ppc_rldcr(e_in.read_data3, e_in.read_data2, mb); - end if; - result_en := 1; - when OP_RLDICL => - sh := insn_sh(e_in.insn); - mb := insn_mb(e_in.insn); - result := ppc_rldicl(e_in.read_data3, sh, mb); - result_en := 1; - when OP_RLDICR => - sh := insn_sh(e_in.insn); - me := insn_me(e_in.insn); - result := ppc_rldicr(e_in.read_data3, sh, me); - result_en := 1; - when OP_RLWNM => - mb32 := insn_mb32(e_in.insn); - me32 := insn_me32(e_in.insn); - result := ppc_rlwnm(e_in.read_data3, e_in.read_data2, mb32, me32); - result_en := 1; - when OP_RLWINM => - sh32 := insn_sh32(e_in.insn); - mb32 := insn_mb32(e_in.insn); - me32 := insn_me32(e_in.insn); - result := ppc_rlwinm(e_in.read_data3, sh32, mb32, me32); - result_en := 1; - when OP_RLDIC => - sh := insn_sh(e_in.insn); - mb := insn_mb(e_in.insn); - result := ppc_rldic(e_in.read_data3, sh, mb); - result_en := 1; - when OP_RLDIMI => - sh := insn_sh(e_in.insn); - mb := insn_mb(e_in.insn); - result := ppc_rldimi(e_in.read_data1, e_in.read_data3, sh, mb); - result_en := 1; - when OP_RLWIMI => - sh32 := insn_sh32(e_in.insn); - mb32 := insn_mb32(e_in.insn); - me32 := insn_me32(e_in.insn); - result := ppc_rlwimi(e_in.read_data1, e_in.read_data3, sh32, mb32, me32); - result_en := 1; - when OP_SLD => - result := ppc_sld(e_in.read_data3, e_in.read_data2); - result_en := 1; - when OP_SLW => - result := ppc_slw(e_in.read_data3, e_in.read_data2); - result_en := 1; - when OP_SRAW => - result_with_carry := ppc_sraw(e_in.read_data3, e_in.read_data2); - result := result_with_carry(63 downto 0); - ctrl_tmp.carry <= result_with_carry(64); - result_en := 1; - when OP_SRAWI => - sh := '0' & insn_sh32(e_in.insn); - result_with_carry := ppc_srawi(e_in.read_data3, sh); - result := result_with_carry(63 downto 0); - ctrl_tmp.carry <= result_with_carry(64); - result_en := 1; - when OP_SRAD => - result_with_carry := ppc_srad(e_in.read_data3, e_in.read_data2); - result := result_with_carry(63 downto 0); - ctrl_tmp.carry <= result_with_carry(64); - result_en := 1; - when OP_SRADI => - sh := insn_sh(e_in.insn); - result_with_carry := ppc_sradi(e_in.read_data3, sh); - result := result_with_carry(63 downto 0); - ctrl_tmp.carry <= result_with_carry(64); - result_en := 1; - when OP_SRD => - result := ppc_srd(e_in.read_data3, e_in.read_data2); - result_en := 1; - when OP_SRW => - result := ppc_srw(e_in.read_data3, e_in.read_data2); + when OP_RLC | OP_RLCL | OP_RLCR | OP_SHL | OP_SHR => + result := rotator_result; + if e_in.output_carry = '1' then + ctrl_tmp.carry <= rotator_carry; + end if; result_en := 1; when OP_XOR => result := ppc_xor(e_in.read_data3, e_in.read_data2); diff --git a/microwatt.core b/microwatt.core index 94ff0c1..6143f50 100644 --- a/microwatt.core +++ b/microwatt.core @@ -24,6 +24,7 @@ filesets: - loadstore2.vhdl - multiply.vhdl - divider.vhdl + - rotator.vhdl - writeback.vhdl - insn_helpers.vhdl - core.vhdl diff --git a/rotator.vhdl b/rotator.vhdl new file mode 100644 index 0000000..d8a8ee9 --- /dev/null +++ b/rotator.vhdl @@ -0,0 +1,183 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +library work; +use work.common.all; + +entity rotator is + port (rs: in std_ulogic_vector(63 downto 0); + ra: in std_ulogic_vector(63 downto 0); + shift: in std_ulogic_vector(6 downto 0); + insn: in std_ulogic_vector(31 downto 0); + is_32bit: in std_ulogic; + right_shift: in std_ulogic; + arith: in std_ulogic; + clear_left: in std_ulogic; + clear_right: in std_ulogic; + result: out std_ulogic_vector(63 downto 0); + carry_out: out std_ulogic + ); +end entity rotator; + +architecture behaviour of rotator is + signal repl32: std_ulogic_vector(63 downto 0); + signal rot_count: std_ulogic_vector(5 downto 0); + signal rot1, rot2, rot: std_ulogic_vector(63 downto 0); + signal sh, mb, me: std_ulogic_vector(6 downto 0); + signal mr, ml: std_ulogic_vector(63 downto 0); + signal output_mode: std_ulogic_vector(1 downto 0); + + -- note BE bit numbering + function right_mask(mask_begin: std_ulogic_vector(6 downto 0)) return std_ulogic_vector is + variable ret: std_ulogic_vector(63 downto 0); + begin + ret := (others => '0'); + for i in 0 to 63 loop + if i >= to_integer(unsigned(mask_begin)) then + ret(63 - i) := '1'; + end if; + end loop; + return ret; + end; + + function left_mask(mask_end: std_ulogic_vector(6 downto 0)) return std_ulogic_vector is + variable ret: std_ulogic_vector(63 downto 0); + begin + ret := (others => '0'); + if mask_end(6) = '0' then + for i in 0 to 63 loop + if i <= to_integer(unsigned(mask_end)) then + ret(63 - i) := '1'; + end if; + end loop; + end if; + return ret; + end; + +begin + rotator_0: process(all) + begin + -- First replicate bottom 32 bits to both halves if 32-bit + if is_32bit = '1' then + repl32 <= rs(31 downto 0) & rs(31 downto 0); + else + repl32 <= rs; + end if; + + -- Negate shift count for right shifts + if right_shift = '1' then + rot_count <= std_ulogic_vector(- signed(shift(5 downto 0))); + else + rot_count <= shift(5 downto 0); + end if; + + -- Rotator works in 3 stages using 2 bits of the rotate count each + -- time. This gives 4:1 multiplexors which is ideal for the 6-input + -- LUTs in the Xilinx Artix 7. + -- We look at the low bits of the rotate count first because they will + -- have less delay through the negation above. + -- First rotate by 0, 1, 2, or 3 + case rot_count(1 downto 0) is + when "00" => + rot1 <= repl32; + when "01" => + rot1 <= repl32(62 downto 0) & repl32(63); + when "10" => + rot1 <= repl32(61 downto 0) & repl32(63 downto 62); + when others => + rot1 <= repl32(60 downto 0) & repl32(63 downto 61); + end case; + -- Next rotate by 0, 4, 8 or 12 + case rot_count(3 downto 2) is + when "00" => + rot2 <= rot1; + when "01" => + rot2 <= rot1(59 downto 0) & rot1(63 downto 60); + when "10" => + rot2 <= rot1(55 downto 0) & rot1(63 downto 56); + when others => + rot2 <= rot1(51 downto 0) & rot1(63 downto 52); + end case; + -- Lastly rotate by 0, 16, 32 or 48 + case rot_count(5 downto 4) is + when "00" => + rot <= rot2; + when "01" => + rot <= rot2(47 downto 0) & rot2(63 downto 48); + when "10" => + rot <= rot2(31 downto 0) & rot2(63 downto 32); + when others => + rot <= rot2(15 downto 0) & rot2(63 downto 16); + end case; + + -- Trim shift count to 6 bits for 32-bit shifts + sh <= (shift(6) and not is_32bit) & shift(5 downto 0); + + -- Work out mask begin/end indexes (caution, big-endian bit numbering) + if clear_left = '1' then + if is_32bit = '1' then + mb <= "01" & insn(10 downto 6); + else + mb <= "0" & insn(5) & insn(10 downto 6); + end if; + elsif right_shift = '1' then + -- this is basically mb <= sh + (is_32bit? 32: 0); + if is_32bit = '1' then + mb <= sh(5) & not sh(5) & sh(4 downto 0); + else + mb <= sh; + end if; + else + mb <= ('0' & is_32bit & "00000"); + end if; + if clear_right = '1' and is_32bit = '1' then + me <= "01" & insn(5 downto 1); + elsif clear_right = '1' and clear_left = '0' then + me <= "0" & insn(5) & insn(10 downto 6); + else + -- effectively, 63 - sh + me <= sh(6) & not sh(5 downto 0); + end if; + + -- Calculate left and right masks + mr <= right_mask(mb); + ml <= left_mask(me); + + -- Work out output mode + -- 00 for sl[wd] + -- 0w for rlw*, rldic, rldicr, rldimi, where w = 1 iff mb > me + -- 10 for rldicl, sr[wd] + -- 1z for sra[wd][i], z = 1 if rs is negative + if (clear_left = '1' and clear_right = '0') or right_shift = '1' then + output_mode(1) <= '1'; + output_mode(0) <= arith and repl32(63); + else + output_mode(1) <= '0'; + if clear_right = '1' and unsigned(mb(5 downto 0)) > unsigned(me(5 downto 0)) then + output_mode(0) <= '1'; + else + output_mode(0) <= '0'; + end if; + end if; + + -- Generate output from rotated input and masks + case output_mode is + when "00" => + result <= (rot and (mr and ml)) or (ra and not (mr and ml)); + when "01" => + result <= (rot and (mr or ml)) or (ra and not (mr or ml)); + when "10" => + result <= rot and mr; + when others => + result <= rot or not mr; + end case; + + -- Generate carry output for arithmetic shift right of negative value + if output_mode = "11" then + carry_out <= or (rs and not ml); + else + carry_out <= '0'; + end if; + end process; +end behaviour; diff --git a/rotator_tb.vhdl b/rotator_tb.vhdl new file mode 100644 index 0000000..3cb46b0 --- /dev/null +++ b/rotator_tb.vhdl @@ -0,0 +1,269 @@ +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +library work; +use work.common.all; +use work.glibc_random.all; +use work.ppc_fx_insns.all; +use work.insn_helpers.all; + +entity rotator_tb is +end rotator_tb; + +architecture behave of rotator_tb is + constant clk_period: time := 10 ns; + signal ra, rs: std_ulogic_vector(63 downto 0); + signal shift: std_ulogic_vector(6 downto 0) := (others => '0'); + signal insn: std_ulogic_vector(31 downto 0) := (others => '0'); + signal is_32bit, right_shift, arith, clear_left, clear_right: std_ulogic := '0'; + signal result: std_ulogic_vector(63 downto 0); + signal carry_out: std_ulogic; + +begin + rotator_0: entity work.rotator + port map ( + rs => rs, + ra => ra, + shift => shift, + insn => insn, + is_32bit => is_32bit, + right_shift => right_shift, + arith => arith, + clear_left => clear_left, + clear_right => clear_right, + result => result, + carry_out => carry_out + ); + + stim_process: process + variable behave_ra: std_ulogic_vector(63 downto 0); + variable behave_ca_ra: std_ulogic_vector(64 downto 0); + begin + -- rlwinm, rlwnm + report "test rlw[i]nm"; + ra <= (others => '0'); + is_32bit <= '1'; + right_shift <= '0'; + arith <= '0'; + clear_left <= '1'; + clear_right <= '1'; + rlwnm_loop : for i in 0 to 1000 loop + rs <= pseudorand(64); + shift <= pseudorand(7); + insn <= x"00000" & '0' & pseudorand(10) & '0'; + wait for clk_period; + behave_ra := ppc_rlwinm(rs, shift(4 downto 0), insn_mb32(insn), insn_me32(insn)); + assert behave_ra = result + report "bad rlwnm expected " & to_hstring(behave_ra) & " got " & to_hstring(result); + end loop; + + -- rlwimi + report "test rlwimi"; + is_32bit <= '1'; + right_shift <= '0'; + arith <= '0'; + clear_left <= '1'; + clear_right <= '1'; + rlwimi_loop : for i in 0 to 1000 loop + rs <= pseudorand(64); + ra <= pseudorand(64); + shift <= "00" & pseudorand(5); + insn <= x"00000" & '0' & pseudorand(10) & '0'; + wait for clk_period; + behave_ra := ppc_rlwimi(ra, rs, shift(4 downto 0), insn_mb32(insn), insn_me32(insn)); + assert behave_ra = result + report "bad rlwimi expected " & to_hstring(behave_ra) & " got " & to_hstring(result); + end loop; + + -- rldicl, rldcl + report "test rld[i]cl"; + ra <= (others => '0'); + is_32bit <= '0'; + right_shift <= '0'; + arith <= '0'; + clear_left <= '1'; + clear_right <= '0'; + rldicl_loop : for i in 0 to 1000 loop + rs <= pseudorand(64); + shift <= pseudorand(7); + insn <= x"00000" & '0' & pseudorand(10) & '0'; + wait for clk_period; + behave_ra := ppc_rldicl(rs, shift(5 downto 0), insn_mb(insn)); + assert behave_ra = result + report "bad rldicl expected " & to_hstring(behave_ra) & " got " & to_hstring(result); + end loop; + + -- rldicr, rldcr + report "test rld[i]cr"; + ra <= (others => '0'); + is_32bit <= '0'; + right_shift <= '0'; + arith <= '0'; + clear_left <= '0'; + clear_right <= '1'; + rldicr_loop : for i in 0 to 1000 loop + rs <= pseudorand(64); + shift <= pseudorand(7); + insn <= x"00000" & '0' & pseudorand(10) & '0'; + wait for clk_period; + behave_ra := ppc_rldicr(rs, shift(5 downto 0), insn_me(insn)); + --report "rs = " & to_hstring(rs); + --report "ra = " & to_hstring(ra); + --report "shift = " & to_hstring(shift); + --report "insn me = " & to_hstring(insn_me(insn)); + --report "result = " & to_hstring(result); + assert behave_ra = result + report "bad rldicr expected " & to_hstring(behave_ra) & " got " & to_hstring(result); + end loop; + + -- rldic + report "test rldic"; + ra <= (others => '0'); + is_32bit <= '0'; + right_shift <= '0'; + arith <= '0'; + clear_left <= '1'; + clear_right <= '1'; + rldic_loop : for i in 0 to 1000 loop + rs <= pseudorand(64); + shift <= '0' & pseudorand(6); + insn <= x"00000" & '0' & pseudorand(10) & '0'; + wait for clk_period; + behave_ra := ppc_rldic(rs, shift(5 downto 0), insn_mb(insn)); + assert behave_ra = result + report "bad rldic expected " & to_hstring(behave_ra) & " got " & to_hstring(result); + end loop; + + -- rldimi + report "test rldimi"; + is_32bit <= '0'; + right_shift <= '0'; + arith <= '0'; + clear_left <= '1'; + clear_right <= '1'; + rldimi_loop : for i in 0 to 1000 loop + rs <= pseudorand(64); + ra <= pseudorand(64); + shift <= '0' & pseudorand(6); + insn <= x"00000" & '0' & pseudorand(10) & '0'; + wait for clk_period; + behave_ra := ppc_rldimi(ra, rs, shift(5 downto 0), insn_mb(insn)); + assert behave_ra = result + report "bad rldimi expected " & to_hstring(behave_ra) & " got " & to_hstring(result); + end loop; + + -- slw + report "test slw"; + ra <= (others => '0'); + is_32bit <= '1'; + right_shift <= '0'; + arith <= '0'; + clear_left <= '0'; + clear_right <= '0'; + slw_loop : for i in 0 to 1000 loop + rs <= pseudorand(64); + shift <= pseudorand(7); + wait for clk_period; + behave_ra := ppc_slw(rs, std_ulogic_vector(resize(unsigned(shift), 64))); + assert behave_ra = result + report "bad slw expected " & to_hstring(behave_ra) & " got " & to_hstring(result); + end loop; + + -- sld + report "test sld"; + ra <= (others => '0'); + is_32bit <= '0'; + right_shift <= '0'; + arith <= '0'; + clear_left <= '0'; + clear_right <= '0'; + sld_loop : for i in 0 to 1000 loop + rs <= pseudorand(64); + shift <= pseudorand(7); + wait for clk_period; + behave_ra := ppc_sld(rs, std_ulogic_vector(resize(unsigned(shift), 64))); + assert behave_ra = result + report "bad sld expected " & to_hstring(behave_ra) & " got " & to_hstring(result); + end loop; + + -- srw + report "test srw"; + ra <= (others => '0'); + is_32bit <= '1'; + right_shift <= '1'; + arith <= '0'; + clear_left <= '0'; + clear_right <= '0'; + srw_loop : for i in 0 to 1000 loop + rs <= pseudorand(64); + shift <= pseudorand(7); + wait for clk_period; + behave_ra := ppc_srw(rs, std_ulogic_vector(resize(unsigned(shift), 64))); + assert behave_ra = result + report "bad srw expected " & to_hstring(behave_ra) & " got " & to_hstring(result); + end loop; + + -- srd + report "test srd"; + ra <= (others => '0'); + is_32bit <= '0'; + right_shift <= '1'; + arith <= '0'; + clear_left <= '0'; + clear_right <= '0'; + srd_loop : for i in 0 to 1000 loop + rs <= pseudorand(64); + shift <= pseudorand(7); + wait for clk_period; + behave_ra := ppc_srd(rs, std_ulogic_vector(resize(unsigned(shift), 64))); + assert behave_ra = result + report "bad srd expected " & to_hstring(behave_ra) & " got " & to_hstring(result); + end loop; + + -- sraw[i] + report "test sraw[i]"; + ra <= (others => '0'); + is_32bit <= '1'; + right_shift <= '1'; + arith <= '1'; + clear_left <= '0'; + clear_right <= '0'; + sraw_loop : for i in 0 to 1000 loop + rs <= pseudorand(64); + shift <= '0' & pseudorand(6); + wait for clk_period; + behave_ca_ra := ppc_sraw(rs, std_ulogic_vector(resize(unsigned(shift), 64))); + --report "rs = " & to_hstring(rs); + --report "ra = " & to_hstring(ra); + --report "shift = " & to_hstring(shift); + --report "result = " & to_hstring(carry_out & result); + assert behave_ca_ra(63 downto 0) = result and behave_ca_ra(64) = carry_out + report "bad sraw expected " & to_hstring(behave_ca_ra) & " got " & to_hstring(carry_out & result); + end loop; + + -- srad[i] + report "test srad[i]"; + ra <= (others => '0'); + is_32bit <= '0'; + right_shift <= '1'; + arith <= '1'; + clear_left <= '0'; + clear_right <= '0'; + srad_loop : for i in 0 to 1000 loop + rs <= pseudorand(64); + shift <= pseudorand(7); + wait for clk_period; + behave_ca_ra := ppc_srad(rs, std_ulogic_vector(resize(unsigned(shift), 64))); + --report "rs = " & to_hstring(rs); + --report "ra = " & to_hstring(ra); + --report "shift = " & to_hstring(shift); + --report "result = " & to_hstring(carry_out & result); + assert behave_ca_ra(63 downto 0) = result and behave_ca_ra(64) = carry_out + report "bad srad expected " & to_hstring(behave_ca_ra) & " got " & to_hstring(carry_out & result); + end loop; + + assert false report "end of test" severity failure; + wait; + end process; +end behave; -- 2.30.2