From: Paul Mackerras Date: Fri, 13 Dec 2019 04:48:54 +0000 (+1100) Subject: execute: Do comparisons using the main adder X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=d2ca625b3b9c98de607b2a56f8428c70ab343891;p=microwatt.git execute: Do comparisons using the main adder This handles OP_CMP like a subtraction; the main adder computes ~RA + RB + 1, and the condition codes are computed from the results. A direct comparison of the two input operands is used to calculate the EQ bit of the condition result. The LT and GT bits are computed from the MSB of the subtraction result, the carry out from the subtraction, and the MSBs of the operands. For a 32-bit comparison, the 32-bit carry and bit 31 of the result and input operands are used; for a 64-bit comparison, the 64-bit carry and bit 63 of the operands and result are used. It turns out to be more convenient to use the 'signed' field of the decode table to distinguish signed from unsigned comparisons, rather than the insn_type. Therefore this uses OP_CMP for both cmp and cmpl, which also has the benefit of reducing the number of values in insn_type_t. Doing this saves over 200 slice LUTs on the Arty A7-100 and improves timing slightly as well. Signed-off-by: Paul Mackerras --- diff --git a/decode1.vhdl b/decode1.vhdl index 6ac3f01..0e42d1b 100644 --- a/decode1.vhdl +++ b/decode1.vhdl @@ -44,8 +44,8 @@ architecture behaviour of decode1 is 29 => (ALU, OP_AND, NONE, CONST_UI_HI, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', ONE, '0', '0'), -- andis. 18 => (ALU, OP_B, NONE, CONST_LI, NONE, NONE, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0'), -- b 16 => (ALU, OP_BC, SPR, CONST_BD, NONE, SPR , '1', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '1', '0'), -- bc - 11 => (ALU, OP_CMP, RA, CONST_SI, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpi - 10 => (ALU, OP_CMPL, RA, CONST_UI, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpli + 11 => (ALU, OP_CMP, RA, CONST_SI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- cmpi + 10 => (ALU, OP_CMP, RA, CONST_UI, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpli 34 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '0', '0', '0', '0', NONE, '0', '1'), -- lbz 35 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is1B, '0', '0', '1', '0', '0', '0', NONE, '0', '1'), -- lbzu 42 => (LDST, OP_LOAD, RA_OR_ZERO, CONST_SI, NONE, RT, '0', '0', '0', '0', ZERO, '0', is2B, '0', '1', '0', '0', '0', '0', NONE, '0', '1'), -- lha @@ -145,10 +145,10 @@ architecture behaviour of decode1 is 2#0000011100# => (ALU, OP_AND, NONE, RB, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- and 2#0000111100# => (ALU, OP_AND, NONE, RB, RS, RA, '0', '0', '1', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- andc -- 2#0011111100# bperm - 2#0000000000# => (ALU, OP_CMP, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmp + 2#0000000000# => (ALU, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '1', NONE, '0', '0'), -- cmp 2#0111111100# => (ALU, OP_CMPB, NONE, RB, RS, RA, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpb -- 2#0011100000# cmpeqb - 2#0000100000# => (ALU, OP_CMPL, RA, RB, NONE, NONE, '0', '1', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpl + 2#0000100000# => (ALU, OP_CMP, RA, RB, NONE, NONE, '0', '1', '1', '0', ONE, '0', NONE, '0', '0', '0', '0', '0', '0', NONE, '0', '0'), -- cmpl -- 2#0011000000# cmprb 2#0000111010# => (ALU, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '0', '0', RC, '0', '0'), -- cntlzd 2#0000011010# => (ALU, OP_CNTZ, NONE, NONE, RS, RA, '0', '0', '0', '0', ZERO, '0', NONE, '0', '0', '0', '0', '1', '0', RC, '0', '0'), -- cntlzw diff --git a/decode_types.vhdl b/decode_types.vhdl index fdc1e6e..82039bd 100644 --- a/decode_types.vhdl +++ b/decode_types.vhdl @@ -4,7 +4,7 @@ use ieee.std_logic_1164.all; package decode_types is type insn_type_t is (OP_ILLEGAL, OP_NOP, OP_ADD, OP_ADDPCIS, OP_AND, OP_ATTN, OP_B, OP_BC, OP_BCREG, - OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPL, OP_CMPRB, + OP_BPERM, OP_CMP, OP_CMPB, OP_CMPEQB, OP_CMPRB, OP_CNTZ, OP_CRAND, OP_CRANDC, OP_CREQV, OP_CRNAND, OP_CRNOR, OP_CROR, OP_CRORC, OP_CRXOR, OP_DARN, OP_DCBF, OP_DCBST, OP_DCBT, OP_DCBTST, diff --git a/execute1.vhdl b/execute1.vhdl index 1991009..6889a6a 100644 --- a/execute1.vhdl +++ b/execute1.vhdl @@ -193,6 +193,9 @@ begin variable abs1, abs2 : signed(63 downto 0); variable overflow : std_ulogic; variable negative : std_ulogic; + variable zerohi, zerolo : std_ulogic; + variable msb_a, msb_b : std_ulogic; + variable a_lt : std_ulogic; begin result := (others => '0'); result_with_carry := (others => '0'); @@ -348,7 +351,7 @@ begin report "illegal"; when OP_NOP => -- Do nothing - when OP_ADD => + when OP_ADD | OP_CMP => if e_in.invert_a = '0' then a_inv := e_in.read_data1; else @@ -359,15 +362,57 @@ begin result := result_with_carry(63 downto 0); carry_32 := result(32) xor a_inv(32) xor e_in.read_data2(32); carry_64 := result_with_carry(64); - if e_in.output_carry = '1' then - set_carry(v.e, carry_32, carry_64); - end if; - if e_in.oe = '1' then - set_ov(v.e, - calc_ov(a_inv(63), e_in.read_data2(63), carry_64, result_with_carry(63)), - calc_ov(a_inv(31), e_in.read_data2(31), carry_32, result_with_carry(31))); - end if; - result_en := '1'; + if e_in.insn_type = OP_ADD then + if e_in.output_carry = '1' then + set_carry(v.e, carry_32, carry_64); + end if; + if e_in.oe = '1' then + set_ov(v.e, + calc_ov(a_inv(63), e_in.read_data2(63), carry_64, result_with_carry(63)), + calc_ov(a_inv(31), e_in.read_data2(31), carry_32, result_with_carry(31))); + end if; + result_en := '1'; + else + -- CMP and CMPL instructions + -- Note, we have done RB - RA, not RA - RB + bf := insn_bf(e_in.insn); + l := insn_l(e_in.insn); + v.e.write_cr_enable := '1'; + crnum := to_integer(unsigned(bf)); + v.e.write_cr_mask := num_to_fxm(crnum); + zerolo := not (or (e_in.read_data1(31 downto 0) xor e_in.read_data2(31 downto 0))); + zerohi := not (or (e_in.read_data1(63 downto 32) xor e_in.read_data2(63 downto 32))); + if zerolo = '1' and (l = '0' or zerohi = '1') then + -- values are equal + newcrf := "001" & v.e.xerc.so; + else + if l = '1' then + -- 64-bit comparison + msb_a := e_in.read_data1(63); + msb_b := e_in.read_data2(63); + else + -- 32-bit comparison + msb_a := e_in.read_data1(31); + msb_b := e_in.read_data2(31); + end if; + if msb_a /= msb_b then + -- Subtraction might overflow, but + -- comparison is clear from MSB difference. + -- for signed, 0 is greater; for unsigned, 1 is greater + a_lt := msb_a xnor e_in.is_signed; + else + -- Subtraction cannot overflow since MSBs are equal. + -- carry = 1 indicates RA is smaller (signed or unsigned) + a_lt := (not l and carry_32) or (l and carry_64); + end if; + newcrf := a_lt & not a_lt & '0' & v.e.xerc.so; + end if; + for i in 0 to 7 loop + lo := i*4; + hi := lo + 3; + v.e.write_cr_data(hi downto lo) := newcrf; + end loop; + end if; when OP_AND | OP_OR | OP_XOR => result := logical_result; result_en := '1'; @@ -412,28 +457,6 @@ begin when OP_CMPB => result := ppc_cmpb(e_in.read_data3, e_in.read_data2); result_en := '1'; - when OP_CMP => - bf := insn_bf(e_in.insn); - l := insn_l(e_in.insn); - v.e.write_cr_enable := '1'; - crnum := to_integer(unsigned(bf)); - v.e.write_cr_mask := num_to_fxm(crnum); - for i in 0 to 7 loop - lo := i*4; - hi := lo + 3; - v.e.write_cr_data(hi downto lo) := ppc_cmp(l, e_in.read_data1, e_in.read_data2, v.e.xerc.so); - end loop; - when OP_CMPL => - bf := insn_bf(e_in.insn); - l := insn_l(e_in.insn); - v.e.write_cr_enable := '1'; - crnum := to_integer(unsigned(bf)); - v.e.write_cr_mask := num_to_fxm(crnum); - for i in 0 to 7 loop - lo := i*4; - hi := lo + 3; - v.e.write_cr_data(hi downto lo) := ppc_cmpl(l, e_in.read_data1, e_in.read_data2, v.e.xerc.so); - end loop; when OP_CNTZ => result := countzero_result; result_en := '1';