divider: Do absolute-value ops in divider instead of decode
authorPaul Mackerras <paulus@ozlabs.org>
Fri, 27 Sep 2019 22:55:08 +0000 (08:55 +1000)
committerPaul Mackerras <paulus@ozlabs.org>
Fri, 27 Sep 2019 22:55:08 +0000 (08:55 +1000)
This moves the negation of negative operands for signed divide and
modulus operations out of the decode2 stage and into the divider.
If either of the operands for a signed divide or modulus operation
is negative, the divider now takes an extra cycle to negate the
operands that are negative.

The interface to the divider now has an 'is_signed' signal rather
than a 'neg_result' signal, and the dividend and divisor can be
negative, so divider_tb had to be updated for the new interface.

The reason for doing this is that one of the worst timing violations
on the Arty A7-100 at 100MHz involved the carry chain in the adders
that did the negation of the dividend and divisor in the decode stage.
Moving the negations to a separate cycle fixes that and also seems to
reduce the total number of slice LUTs used.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
common.vhdl
decode2.vhdl
divider.vhdl
divider_tb.vhdl

index bd9b7d0d591465c3f6484c7a481d5b9adae95ad8..d7252f6002bccdf96a15c7332a1ffd0abc688b84 100644 (file)
@@ -81,13 +81,13 @@ package common is
                write_reg: std_ulogic_vector(4 downto 0);
                 dividend: std_ulogic_vector(63 downto 0);
                 divisor: std_ulogic_vector(63 downto 0);
-                neg_result: std_ulogic;
+                is_signed: std_ulogic;
                 is_32bit: std_ulogic;
                 is_extended: std_ulogic;
                 is_modulus: std_ulogic;
                 rc: std_ulogic;
         end record;
-        constant Decode2ToDividerInit: Decode2ToDividerType := (valid => '0', neg_result => '0', is_32bit => '0', is_extended => '0', is_modulus => '0', rc => '0', others => (others => '0'));
+        constant Decode2ToDividerInit: Decode2ToDividerType := (valid => '0', is_signed => '0', is_32bit => '0', is_extended => '0', is_modulus => '0', rc => '0', others => (others => '0'));
 
        type Decode2ToRegisterFileType is record
                read1_enable : std_ulogic;
index deb16dd956e913815f23a778da2dc3841b12651b..2ace765a08f72f2d96ca3e940921386ed3e611fc 100644 (file)
@@ -221,9 +221,6 @@ begin
                variable v_int : reg_internal_type;
                variable mul_a : std_ulogic_vector(63 downto 0);
                variable mul_b : std_ulogic_vector(63 downto 0);
-               variable dividend : std_ulogic_vector(63 downto 0);
-               variable divisor  : std_ulogic_vector(63 downto 0);
-                variable absdend  : std_ulogic_vector(31 downto 0);
                variable decoded_reg_a : decode_input_reg_t;
                variable decoded_reg_b : decode_input_reg_t;
                variable decoded_reg_c : decode_input_reg_t;
@@ -308,60 +305,36 @@ begin
                 --       s = 1 for signed, 0 for unsigned (for div*)
                 --       t = 1 for 32-bit, 0 for 64-bit
                 --       r = RC bit (record condition code)
-                -- For signed division/modulus, we take absolute values and
-                -- tell the divider what the sign of the result should be,
-                -- which is the dividend sign for modulus, and the XOR of
-                -- the dividend and divisor signs for division.
-                dividend := decoded_reg_a.data;
-                divisor := decoded_reg_b.data;
                v.d.write_reg := decode_output_reg(d_in.decode.output_reg_a, d_in.insn);
                 v.d.is_modulus := not d_in.insn(8);
+                v.d.is_32bit := not d_in.insn(2);
                 if d_in.insn(8) = '1' then
-                    signed_division := d_in.insn(6);
+                        signed_division := d_in.insn(6);
                 else
-                    signed_division := d_in.insn(10);
+                        signed_division := d_in.insn(10);
                 end if;
+                v.d.is_signed := signed_division;
                 if d_in.insn(2) = '0' then
-                    -- 64-bit forms
-                        v.d.is_32bit := '0';
+                        -- 64-bit forms
                         if d_in.insn(8) = '1' and d_in.insn(7) = '0' then
                                 v.d.is_extended := '1';
                         end if;
-                        if signed_division = '1' and dividend(63) = '1' then
-                                v.d.neg_result := '1';
-                                v.d.dividend := std_ulogic_vector(- signed(dividend));
-                        else
-                                v.d.dividend := dividend;
-                        end if;
-                        if signed_division = '1' and divisor(63) = '1' then
-                                if d_in.insn(8) = '1' then
-                                        v.d.neg_result := not v.d.neg_result;
-                                end if;
-                                v.d.divisor := std_ulogic_vector(- signed(divisor));
-                        else
-                                v.d.divisor := divisor;
-                        end if;
+                        v.d.dividend := decoded_reg_a.data;
+                        v.d.divisor := decoded_reg_b.data;
                 else
                         -- 32-bit forms
-                        v.d.is_32bit := '1';
-                        if signed_division = '1' and dividend(31) = '1' then
-                                v.d.neg_result := '1';
-                                absdend := std_ulogic_vector(- signed(dividend(31 downto 0)));
-                        else
-                                absdend := dividend(31 downto 0);
-                        end if;
                         if d_in.insn(8) = '1' and d_in.insn(7) = '0' then   -- extended forms
-                                v.d.dividend := absdend & x"00000000";
+                                v.d.dividend := decoded_reg_a.data(31 downto 0) & x"00000000";
+                        elsif signed_division = '1' and decoded_reg_a.data(31) = '1' then
+                                -- sign extend to 64 bits
+                                v.d.dividend := x"ffffffff" & decoded_reg_a.data(31 downto 0);
                         else
-                                v.d.dividend := x"00000000" & absdend;
+                                v.d.dividend := x"00000000" & decoded_reg_a.data(31 downto 0);
                         end if;
-                        if signed_division = '1' and divisor(31) = '1' then
-                                if d_in.insn(8) = '1' then
-                                        v.d.neg_result := not v.d.neg_result;
-                                end if;
-                                v.d.divisor := x"00000000" & std_ulogic_vector(- signed(divisor(31 downto 0)));
+                        if signed_division = '1' and decoded_reg_b.data(31) = '1' then
+                                v.d.divisor := x"ffffffff" & decoded_reg_b.data(31 downto 0);
                         else
-                                v.d.divisor := x"00000000" & divisor(31 downto 0);
+                                v.d.divisor := x"00000000" & decoded_reg_b.data(31 downto 0);
                         end if;
                 end if;
                 v.d.rc := decode_rc(d_in.decode.rc, d_in.insn);
index c1335222edb48bf062adb1604e667bd78e64617f..8345623f3d2c418f7295c7a1ebd6c8ed534e0ffb 100644 (file)
@@ -24,10 +24,12 @@ architecture behaviour of divider is
     signal sresult    : std_ulogic_vector(63 downto 0);
     signal qbit       : std_ulogic;
     signal running    : std_ulogic;
+    signal signcheck  : std_ulogic;
     signal count      : unsigned(6 downto 0);
     signal neg_result : std_ulogic;
     signal is_modulus : std_ulogic;
     signal is_32bit   : std_ulogic;
+    signal extended   : std_ulogic;
     signal rc         : std_ulogic;
     signal write_reg  : std_ulogic_vector(4 downto 0);
 
@@ -64,7 +66,7 @@ begin
                 running <= '0';
                 count <= "0000000";
             elsif d_in.valid = '1' then
-                if d_in.is_extended = '1' then
+                if d_in.is_extended = '1' and not (d_in.is_signed = '1' and d_in.dividend(63) = '1') then
                     dend <= d_in.dividend & x"0000000000000000";
                 else
                     dend <= x"0000000000000000" & d_in.dividend;
@@ -72,12 +74,27 @@ begin
                 div <= unsigned(d_in.divisor);
                 quot <= (others => '0');
                 write_reg <= d_in.write_reg;
-                neg_result <= d_in.neg_result;
+                neg_result <= '0';
                 is_modulus <= d_in.is_modulus;
+                extended <= d_in.is_extended;
                 is_32bit <= d_in.is_32bit;
                 rc <= d_in.rc;
                 count <= "0000000";
                 running <= '1';
+                signcheck <= d_in.is_signed and (d_in.dividend(63) or d_in.divisor(63));
+            elsif signcheck = '1' then
+                signcheck <= '0';
+                neg_result <= dend(63) xor (div(63) and not is_modulus);
+                if dend(63) = '1' then
+                    if extended = '1' then
+                        dend <= std_ulogic_vector(- signed(dend(63 downto 0))) & x"0000000000000000";
+                    else
+                        dend <= x"0000000000000000" & std_ulogic_vector(- signed(dend(63 downto 0)));
+                    end if;
+                end if;
+                if div(63) = '1' then
+                    div <= unsigned(- signed(div));
+                end if;
             elsif running = '1' then
                 if count = "0111111" then
                     running <= '0';
index 0fa7f0519db8c3293c2a2c04b36f1edb81bf69fe..3d8fc090ec243cc43698a18e1682c69e6d9a0507 100644 (file)
@@ -44,7 +44,7 @@ begin
         d1.write_reg <= "10001";
         d1.dividend <= x"0000000010001000";
         d1.divisor  <= x"0000000000001111";
-        d1.neg_result <= '0';
+        d1.is_signed <= '0';
         d1.is_32bit <= '0';
         d1.is_extended <= '0';
         d1.is_modulus <= '0';
@@ -55,7 +55,7 @@ begin
 
         d1.valid <= '0';
 
-        for j in 0 to 64 loop
+        for j in 0 to 65 loop
             wait for clk_period;
             if d2.valid = '1' then
                 exit;
@@ -79,7 +79,7 @@ begin
 
         d1.valid <= '0';
 
-        for j in 0 to 64 loop
+        for j in 0 to 65 loop
             wait for clk_period;
             if d2.valid = '1' then
                 exit;
@@ -105,27 +105,15 @@ begin
                     ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
                     rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));
 
-                    if ra(63) = '1' then
-                        d1.dividend <= std_ulogic_vector(- signed(ra));
-                    else
-                        d1.dividend <= ra;
-                    end if;
-                    if rb(63) = '1' then
-                        d1.divisor <= std_ulogic_vector(- signed(rb));
-                    else
-                        d1.divisor <= rb;
-                    end if;
-                    if ra(63) = rb(63) then
-                        d1.neg_result <= '0';
-                    else
-                        d1.neg_result <= '1';
-                    end if;
+                    d1.dividend <= ra;
+                    d1.divisor <= rb;
+                    d1.is_signed <= '1';
                     d1.valid <= '1';
 
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 64 loop
+                    for j in 0 to 65 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -154,13 +142,13 @@ begin
 
                     d1.dividend <= ra;
                     d1.divisor <= rb;
-                    d1.neg_result <= '0';
+                    d1.is_signed <= '0';
                     d1.valid <= '1';
 
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 64 loop
+                    for j in 0 to 65 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -187,28 +175,16 @@ begin
                     ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
                     rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));
 
-                    if ra(63) = '1' then
-                        d1.dividend <= std_ulogic_vector(- signed(ra));
-                    else
-                        d1.dividend <= ra;
-                    end if;
-                    if rb(63) = '1' then
-                        d1.divisor <= std_ulogic_vector(- signed(rb));
-                    else
-                        d1.divisor <= rb;
-                    end if;
-                    if ra(63) = rb(63) then
-                        d1.neg_result <= '0';
-                    else
-                        d1.neg_result <= '1';
-                    end if;
+                    d1.dividend <= ra;
+                    d1.divisor <= rb;
+                    d1.is_signed <= '1';
                     d1.is_extended <= '1';
                     d1.valid <= '1';
 
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 64 loop
+                    for j in 0 to 65 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -216,14 +192,17 @@ begin
                     end loop;
                     assert d2.valid = '1';
 
-                    if unsigned(d1.divisor) > unsigned(d1.dividend) then
+                    if rb /= x"0000000000000000" then
                         d128 := ra & x"0000000000000000";
                         q128 := std_ulogic_vector(signed(d128) / signed(rb));
-                        behave_rt := q128(63 downto 0);
-                        assert to_hstring(behave_rt) = to_hstring(d2.write_reg_data)
-                            report "bad divde expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb);
-                        assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
-                            report "bad CR setting for divde";
+                        if q128(127 downto 63) = x"0000000000000000" & '0' or
+                            q128(127 downto 63) = x"ffffffffffffffff" & '1' then
+                            behave_rt := q128(63 downto 0);
+                            assert to_hstring(behave_rt) = to_hstring(d2.write_reg_data)
+                                report "bad divde expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb);
+                            assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
+                                report "bad CR setting for divde";
+                        end if;
                     end if;
                 end loop;
             end loop;
@@ -239,14 +218,14 @@ begin
 
                     d1.dividend <= ra;
                     d1.divisor <= rb;
-                    d1.neg_result <= '0';
+                    d1.is_signed <= '0';
                     d1.is_extended <= '1';
                     d1.valid <= '1';
 
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 64 loop
+                    for j in 0 to 65 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -254,7 +233,7 @@ begin
                     end loop;
                     assert d2.valid = '1';
 
-                    if unsigned(d1.divisor) > unsigned(d1.dividend) then
+                    if unsigned(rb) > unsigned(ra) then
                         d128 := ra & x"0000000000000000";
                         q128 := std_ulogic_vector(unsigned(d128) / unsigned(rb));
                         behave_rt := q128(63 downto 0);
@@ -275,21 +254,9 @@ begin
                     ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
                     rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));
 
-                    if ra(63) = '1' then
-                        d1.dividend <= std_ulogic_vector(- signed(ra));
-                    else
-                        d1.dividend <= ra;
-                    end if;
-                    if rb(63) = '1' then
-                        d1.divisor <= std_ulogic_vector(- signed(rb));
-                    else
-                        d1.divisor <= rb;
-                    end if;
-                    if ra(63) = rb(63) then
-                        d1.neg_result <= '0';
-                    else
-                        d1.neg_result <= '1';
-                    end if;
+                    d1.dividend <= ra;
+                    d1.divisor <= rb;
+                    d1.is_signed <= '1';
                     d1.is_extended <= '0';
                     d1.is_32bit <= '1';
                     d1.valid <= '1';
@@ -297,7 +264,7 @@ begin
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 64 loop
+                    for j in 0 to 65 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -326,7 +293,7 @@ begin
 
                     d1.dividend <= ra;
                     d1.divisor <= rb;
-                    d1.neg_result <= '0';
+                    d1.is_signed <= '0';
                     d1.is_extended <= '0';
                     d1.is_32bit <= '1';
                     d1.valid <= '1';
@@ -334,7 +301,7 @@ begin
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 64 loop
+                    for j in 0 to 65 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -361,21 +328,9 @@ begin
                     ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 32)) & x"00000000";
                     rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));
 
-                    if ra(63) = '1' then
-                        d1.dividend <= std_ulogic_vector(- signed(ra));
-                    else
-                        d1.dividend <= ra;
-                    end if;
-                    if rb(63) = '1' then
-                        d1.divisor <= std_ulogic_vector(- signed(rb));
-                    else
-                        d1.divisor <= rb;
-                    end if;
-                    if ra(63) = rb(63) then
-                        d1.neg_result <= '0';
-                    else
-                        d1.neg_result <= '1';
-                    end if;
+                    d1.dividend <= ra;
+                    d1.divisor <= rb;
+                    d1.is_signed <= '1';
                     d1.is_extended <= '0';
                     d1.is_32bit <= '1';
                     d1.valid <= '1';
@@ -383,7 +338,7 @@ begin
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 64 loop
+                    for j in 0 to 65 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -391,12 +346,15 @@ begin
                     end loop;
                     assert d2.valid = '1';
 
-                    if unsigned(d1.divisor(31 downto 0)) > unsigned(d1.dividend(63 downto 32)) then
+                    if rb /= x"0000000000000000" then
                         behave_rt := std_ulogic_vector(signed(ra) / signed(rb));
-                        assert behave_rt(31 downto 0) = d2.write_reg_data(31 downto 0)
-                            report "bad divwe expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb);
-                        assert ppc_cmpi('0', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
-                            report "bad CR setting for divwe";
+                        if behave_rt(63 downto 31) = x"00000000" & '0' or
+                            behave_rt(63 downto 31) = x"ffffffff" & '1' then
+                            assert behave_rt(31 downto 0) = d2.write_reg_data(31 downto 0)
+                                report "bad divwe expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb);
+                            assert ppc_cmpi('0', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
+                                report "bad CR setting for divwe";
+                        end if;
                     end if;
                 end loop;
             end loop;
@@ -412,7 +370,7 @@ begin
 
                     d1.dividend <= ra;
                     d1.divisor <= rb;
-                    d1.neg_result <= '0';
+                    d1.is_signed <= '0';
                     d1.is_extended <= '0';
                     d1.is_32bit <= '1';
                     d1.valid <= '1';
@@ -420,7 +378,7 @@ begin
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 64 loop
+                    for j in 0 to 65 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -428,7 +386,7 @@ begin
                     end loop;
                     assert d2.valid = '1';
 
-                    if unsigned(d1.divisor(31 downto 0)) > unsigned(d1.dividend(63 downto 32)) then
+                    if unsigned(rb(31 downto 0)) > unsigned(ra(63 downto 32)) then
                         behave_rt := std_ulogic_vector(unsigned(ra) / unsigned(rb));
                         assert behave_rt(31 downto 0) = d2.write_reg_data(31 downto 0)
                             report "bad divweu expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb);
@@ -447,17 +405,9 @@ begin
                     ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
                     rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));
 
-                    if ra(63) = '1' then
-                        d1.dividend <= std_ulogic_vector(- signed(ra));
-                    else
-                        d1.dividend <= ra;
-                    end if;
-                    if rb(63) = '1' then
-                        d1.divisor <= std_ulogic_vector(- signed(rb));
-                    else
-                        d1.divisor <= rb;
-                    end if;
-                    d1.neg_result <= ra(63);
+                    d1.dividend <= ra;
+                    d1.divisor <= rb;
+                    d1.is_signed <= '1';
                     d1.is_extended <= '0';
                     d1.is_32bit <= '0';
                     d1.is_modulus <= '1';
@@ -466,7 +416,7 @@ begin
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 64 loop
+                    for j in 0 to 65 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -495,7 +445,7 @@ begin
 
                     d1.dividend <= ra;
                     d1.divisor <= rb;
-                    d1.neg_result <= '0';
+                    d1.is_signed <= '0';
                     d1.is_extended <= '0';
                     d1.is_32bit <= '0';
                     d1.is_modulus <= '1';
@@ -504,7 +454,7 @@ begin
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 64 loop
+                    for j in 0 to 65 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -531,17 +481,9 @@ begin
                     ra := std_ulogic_vector(resize(signed(pseudorand(dlength * 8)), 64));
                     rb := std_ulogic_vector(resize(signed(pseudorand(vlength * 8)), 64));
 
-                    if ra(63) = '1' then
-                        d1.dividend <= std_ulogic_vector(- signed(ra));
-                    else
-                        d1.dividend <= ra;
-                    end if;
-                    if rb(63) = '1' then
-                        d1.divisor <= std_ulogic_vector(- signed(rb));
-                    else
-                        d1.divisor <= rb;
-                    end if;
-                    d1.neg_result <= ra(63);
+                    d1.dividend <= ra;
+                    d1.divisor <= rb;
+                    d1.is_signed <= '1';
                     d1.is_extended <= '0';
                     d1.is_32bit <= '1';
                     d1.is_modulus <= '1';
@@ -550,7 +492,7 @@ begin
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 64 loop
+                    for j in 0 to 65 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -579,7 +521,7 @@ begin
 
                     d1.dividend <= ra;
                     d1.divisor <= rb;
-                    d1.neg_result <= '0';
+                    d1.is_signed <= '0';
                     d1.is_extended <= '0';
                     d1.is_32bit <= '1';
                     d1.is_modulus <= '1';
@@ -588,7 +530,7 @@ begin
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 64 loop
+                    for j in 0 to 65 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;