divider: Return 0 for invalid and overflow cases, like P9 does
authorPaul Mackerras <paulus@ozlabs.org>
Fri, 11 Oct 2019 04:16:47 +0000 (15:16 +1100)
committerPaul Mackerras <paulus@ozlabs.org>
Sat, 12 Oct 2019 05:47:33 +0000 (16:47 +1100)
This adds logic to detect the cases where the quotient of the
division overflows the range of the output representation, and
return all zeroes in those cases, which is what POWER9 does.
To do this, we extend the dividend register by 1 bit and we do
an extra step in the division process to get a 2^64 bit of the
quotient, which ends up in the 'overflow' signal.  This catches all
the cases where dividend >= 2^64 * divisor, including the case
where divisor = 0, and the divde/divdeu cases where |RA| >= |RB|.

Then, in the output stage, we also check that the result fits in
the representable range, which depends on whether the division is
a signed division or not, and whether it is a 32-bit or 64-bit
division.  If dividend >= 2^64 or the result doesn't fit in the
representable range, write_data is set to 0 and write_cr_data to
0x20000000 (i.e. cr0.eq = 1).

POWER9 sets the top 32 bits of the result to zero for 32-bit signed
divisions, and sets CR0 when RC=1 according to the 64-bit value
(i.e. CR0.LT is always 0 for 32-bit signed divisions, even if the
32-bit result is negative).  However, modsw with a negative result
sets the top 32 bits to all 1s.  We follow suit.

This updates divider_tb to check the invalid cases as well as the
valid case.

This also fixes a small bug where the reset signal for the divider
was driven from rst when it should have been driven from core_rst.

Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
core.vhdl
divider.vhdl
divider_tb.vhdl

index 43b338df61832551901a59537ef413f4f726ed52..aa5e87a459b3947bfe48aa4383ab4dbdaf8c401f 100644 (file)
--- a/core.vhdl
+++ b/core.vhdl
@@ -241,7 +241,7 @@ begin
     divider_0: entity work.divider
         port map (
             clk => clk,
-            rst => rst,
+            rst => core_rst,
             d_in => decode2_to_divider,
             d_out => divider_to_writeback
             );
index 8345623f3d2c418f7295c7a1ebd6c8ed534e0ffb..a2a35b057a6a6f2e1902cc9b4865cba07f7e9431 100644 (file)
@@ -17,7 +17,7 @@ entity divider is
 end entity divider;
 
 architecture behaviour of divider is
-    signal dend       : std_ulogic_vector(127 downto 0);
+    signal dend       : std_ulogic_vector(128 downto 0);
     signal div        : unsigned(63 downto 0);
     signal quot       : std_ulogic_vector(63 downto 0);
     signal result     : std_ulogic_vector(63 downto 0);
@@ -30,30 +30,11 @@ architecture behaviour of divider is
     signal is_modulus : std_ulogic;
     signal is_32bit   : std_ulogic;
     signal extended   : std_ulogic;
+    signal is_signed  : std_ulogic;
     signal rc         : std_ulogic;
     signal write_reg  : std_ulogic_vector(4 downto 0);
-
-    function compare_zero(value : std_ulogic_vector(63 downto 0); is_32 : std_ulogic)
-        return std_ulogic_vector is
-    begin
-        if is_32 = '1' then
-            if value(31) = '1' then
-                return "1000";
-            elsif unsigned(value(30 downto 0)) > 0 then
-                return "0100";
-            else
-                return "0010";
-            end if;
-        else
-            if value(63) = '1' then
-                return "1000";
-            elsif unsigned(value(62 downto 0)) > 0 then
-                return "0100";
-            else
-                return "0010";
-            end if;
-        end if;
-    end function compare_zero;
+    signal overflow   : std_ulogic;
+    signal did_ovf    : std_ulogic;
 
 begin
     divider_0: process(clk)
@@ -67,9 +48,9 @@ begin
                 count <= "0000000";
             elsif d_in.valid = '1' then
                 if d_in.is_extended = '1' and not (d_in.is_signed = '1' and d_in.dividend(63) = '1') then
-                    dend <= d_in.dividend & x"0000000000000000";
+                    dend <= '0' & d_in.dividend & x"0000000000000000";
                 else
-                    dend <= x"0000000000000000" & d_in.dividend;
+                    dend <= '0' & x"0000000000000000" & d_in.dividend;
                 end if;
                 div <= unsigned(d_in.divisor);
                 quot <= (others => '0');
@@ -78,18 +59,20 @@ begin
                 is_modulus <= d_in.is_modulus;
                 extended <= d_in.is_extended;
                 is_32bit <= d_in.is_32bit;
+                is_signed <= d_in.is_signed;
                 rc <= d_in.rc;
-                count <= "0000000";
+                count <= "1111111";
                 running <= '1';
+                overflow <= '0';
                 signcheck <= d_in.is_signed and (d_in.dividend(63) or d_in.divisor(63));
             elsif signcheck = '1' then
                 signcheck <= '0';
                 neg_result <= dend(63) xor (div(63) and not is_modulus);
                 if dend(63) = '1' then
                     if extended = '1' then
-                        dend <= std_ulogic_vector(- signed(dend(63 downto 0))) & x"0000000000000000";
+                        dend <= '0' & std_ulogic_vector(- signed(dend(63 downto 0))) & x"0000000000000000";
                     else
-                        dend <= x"0000000000000000" & std_ulogic_vector(- signed(dend(63 downto 0)));
+                        dend <= '0' & x"0000000000000000" & std_ulogic_vector(- signed(dend(63 downto 0)));
                     end if;
                 end if;
                 if div(63) = '1' then
@@ -99,18 +82,19 @@ begin
                 if count = "0111111" then
                     running <= '0';
                 end if;
-                if dend(127) = '1' or unsigned(dend(126 downto 63)) >= div then
-                    dend <= std_ulogic_vector(unsigned(dend(126 downto 63)) - div) &
-                            dend(62 downto 0) & '0';
+                overflow <= quot(63);
+                if dend(128) = '1' or unsigned(dend(127 downto 64)) >= div then
+                    dend <= std_ulogic_vector(unsigned(dend(127 downto 64)) - div) &
+                            dend(63 downto 0) & '0';
                     quot <= quot(62 downto 0) & '1';
                     count <= count + 1;
-                elsif dend(127 downto 56) = x"000000000000000000" and count(5 downto 3) /= "111" then
+                elsif dend(128 downto 57) = x"000000000000000000" and count(6 downto 3) /= "0111" then
                     -- consume 8 bits of zeroes in one cycle
-                    dend <= dend(119 downto 0) & x"00";
+                    dend <= dend(120 downto 0) & x"00";
                     quot <= quot(55 downto 0) & x"00";
                     count <= count + 8;
                 else
-                    dend <= dend(126 downto 0) & '0';
+                    dend <= dend(127 downto 0) & '0';
                     quot <= quot(62 downto 0) & '0';
                     count <= count + 1;
                 end if;
@@ -126,7 +110,7 @@ begin
         d_out.write_reg_nr <= write_reg;
 
         if is_modulus = '1' then
-            result <= dend(127 downto 64);
+            result <= dend(128 downto 65);
         else
             result <= quot;
         end if;
@@ -135,15 +119,40 @@ begin
         else
             sresult <= result;
         end if;
-        d_out.write_reg_data <= sresult;
+        did_ovf <= '0';
+        if is_32bit = '0' then
+            did_ovf <= overflow or (is_signed and (sresult(63) xor neg_result));
+        elsif is_signed = '1' then
+            if overflow = '1' or
+                (sresult(63 downto 31) /= x"00000000" & '0' and
+                 sresult(63 downto 31) /= x"ffffffff" & '1') then
+                did_ovf <= '1';
+            end if;
+        else
+            did_ovf <= overflow or (or (sresult(63 downto 32)));
+        end if;
+        if did_ovf = '1' then
+            d_out.write_reg_data <= (others => '0');
+        elsif (is_32bit = '1') and (is_modulus = '0') then
+            -- 32-bit divisions set the top 32 bits of the result to 0
+            d_out.write_reg_data <= x"00000000" & sresult(31 downto 0);
+        else
+            d_out.write_reg_data <= sresult;
+        end if;
 
-        if count(6) = '1' then
+        if count = "1000000" then
             d_out.valid <= '1';
             d_out.write_reg_enable <= '1';
             if rc = '1' then
                 d_out.write_cr_enable <= '1';
                 d_out.write_cr_mask <= num_to_fxm(0);
-                d_out.write_cr_data <= compare_zero(sresult, is_32bit) & x"0000000";
+                if (did_ovf = '1') or (or (sresult) = '0') then
+                    d_out.write_cr_data <= x"20000000";
+                elsif (sresult(63) = '1') and not ((is_32bit = '1') and (is_modulus = '0')) then
+                    d_out.write_cr_data <= x"80000000";
+                else
+                    d_out.write_cr_data <= x"40000000";
+                end if;
             end if;
         end if;
     end process;
index 3d8fc090ec243cc43698a18e1682c69e6d9a0507..fdc8da54c11bf4ff9d64712fbe38d7b7ffe2c429 100644 (file)
@@ -35,6 +35,8 @@ begin
         variable si: std_ulogic_vector(15 downto 0);
         variable d128: std_ulogic_vector(127 downto 0);
         variable q128: std_ulogic_vector(127 downto 0);
+        variable q64: std_ulogic_vector(63 downto 0);
+        variable rem32: std_ulogic_vector(31 downto 0);
     begin
         rst <= '1';
         wait for clk_period;
@@ -55,7 +57,7 @@ begin
 
         d1.valid <= '0';
 
-        for j in 0 to 65 loop
+        for j in 0 to 66 loop
             wait for clk_period;
             if d2.valid = '1' then
                 exit;
@@ -79,7 +81,7 @@ begin
 
         d1.valid <= '0';
 
-        for j in 0 to 65 loop
+        for j in 0 to 66 loop
             wait for clk_period;
             if d2.valid = '1' then
                 exit;
@@ -113,7 +115,7 @@ begin
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 65 loop
+                    for j in 0 to 66 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -121,13 +123,14 @@ begin
                     end loop;
                     assert d2.valid = '1';
 
-                    if rb /= x"0000000000000000" then
+                    behave_rt := (others => '0');
+                    if rb /= x"0000000000000000" and (ra /= x"8000000000000000" or rb /= x"ffffffffffffffff") then
                         behave_rt := ppc_divd(ra, rb);
-                        assert to_hstring(behave_rt) = to_hstring(d2.write_reg_data)
-                            report "bad divd expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data);
-                        assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
-                            report "bad CR setting for divd";
                     end if;
+                    assert to_hstring(behave_rt) = to_hstring(d2.write_reg_data)
+                        report "bad divd expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data);
+                    assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
+                        report "bad CR setting for divd";
                 end loop;
             end loop;
         end loop;
@@ -148,7 +151,7 @@ begin
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 65 loop
+                    for j in 0 to 66 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -156,13 +159,14 @@ begin
                     end loop;
                     assert d2.valid = '1';
 
+                    behave_rt := (others => '0');
                     if rb /= x"0000000000000000" then
                         behave_rt := ppc_divdu(ra, rb);
-                        assert to_hstring(behave_rt) = to_hstring(d2.write_reg_data)
-                            report "bad divdu expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data);
-                        assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
-                            report "bad CR setting for divdu";
                     end if;
+                    assert to_hstring(behave_rt) = to_hstring(d2.write_reg_data)
+                        report "bad divdu expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data);
+                    assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
+                        report "bad CR setting for divdu";
                 end loop;
             end loop;
         end loop;
@@ -184,7 +188,7 @@ begin
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 65 loop
+                    for j in 0 to 66 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -192,18 +196,19 @@ begin
                     end loop;
                     assert d2.valid = '1';
 
+                    behave_rt := (others => '0');
                     if rb /= x"0000000000000000" then
                         d128 := ra & x"0000000000000000";
                         q128 := std_ulogic_vector(signed(d128) / signed(rb));
                         if q128(127 downto 63) = x"0000000000000000" & '0' or
                             q128(127 downto 63) = x"ffffffffffffffff" & '1' then
                             behave_rt := q128(63 downto 0);
-                            assert to_hstring(behave_rt) = to_hstring(d2.write_reg_data)
-                                report "bad divde expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb);
-                            assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
-                                report "bad CR setting for divde";
                         end if;
                     end if;
+                    assert to_hstring(behave_rt) = to_hstring(d2.write_reg_data)
+                        report "bad divde expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb);
+                    assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
+                        report "bad CR setting for divde";
                 end loop;
             end loop;
         end loop;
@@ -225,7 +230,7 @@ begin
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 65 loop
+                    for j in 0 to 66 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -233,15 +238,16 @@ begin
                     end loop;
                     assert d2.valid = '1';
 
+                    behave_rt := (others => '0');
                     if unsigned(rb) > unsigned(ra) then
                         d128 := ra & x"0000000000000000";
                         q128 := std_ulogic_vector(unsigned(d128) / unsigned(rb));
                         behave_rt := q128(63 downto 0);
-                        assert to_hstring(behave_rt) = to_hstring(d2.write_reg_data)
-                            report "bad divdeu expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb);
-                        assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
-                            report "bad CR setting for divdeu";
                     end if;
+                    assert to_hstring(behave_rt) = to_hstring(d2.write_reg_data)
+                        report "bad divdeu expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb);
+                    assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
+                        report "bad CR setting for divdeu";
                 end loop;
             end loop;
         end loop;
@@ -264,7 +270,7 @@ begin
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 65 loop
+                    for j in 0 to 66 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -272,13 +278,14 @@ begin
                     end loop;
                     assert d2.valid = '1';
 
-                    if rb /= x"0000000000000000" then
+                    behave_rt := (others => '0');
+                    if rb /= x"0000000000000000" and (ra /= x"ffffffff80000000" or rb /= x"ffffffffffffffff") then
                         behave_rt := ppc_divw(ra, rb);
-                        assert behave_rt(31 downto 0) = d2.write_reg_data(31 downto 0)
-                            report "bad divw expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data);
-                        assert ppc_cmpi('0', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
-                            report "bad CR setting for divw";
                     end if;
+                    assert behave_rt = d2.write_reg_data
+                        report "bad divw expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data);
+                    assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
+                        report "bad CR setting for divw";
                 end loop;
             end loop;
         end loop;
@@ -301,7 +308,7 @@ begin
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 65 loop
+                    for j in 0 to 66 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -309,13 +316,14 @@ begin
                     end loop;
                     assert d2.valid = '1';
 
+                    behave_rt := (others => '0');
                     if rb /= x"0000000000000000" then
                         behave_rt := ppc_divwu(ra, rb);
-                        assert behave_rt(31 downto 0) = d2.write_reg_data(31 downto 0)
-                            report "bad divwu expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data);
-                        assert ppc_cmpi('0', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
-                            report "bad CR setting for divwu";
                     end if;
+                    assert behave_rt = d2.write_reg_data
+                        report "bad divwu expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data);
+                    assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
+                        report "bad CR setting for divwu";
                 end loop;
             end loop;
         end loop;
@@ -338,7 +346,7 @@ begin
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 65 loop
+                    for j in 0 to 66 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -346,15 +354,17 @@ begin
                     end loop;
                     assert d2.valid = '1';
 
+                    behave_rt := (others => '0');
                     if rb /= x"0000000000000000" then
-                        behave_rt := std_ulogic_vector(signed(ra) / signed(rb));
-                        if behave_rt(63 downto 31) = x"00000000" & '0' or
-                            behave_rt(63 downto 31) = x"ffffffff" & '1' then
-                            assert behave_rt(31 downto 0) = d2.write_reg_data(31 downto 0)
-                                report "bad divwe expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb);
-                            assert ppc_cmpi('0', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
-                                report "bad CR setting for divwe";
+                        q64 := std_ulogic_vector(signed(ra) / signed(rb));
+                        if q64(63 downto 31) = x"00000000" & '0' or
+                            q64(63 downto 31) = x"ffffffff" & '1' then
+                            behave_rt := x"00000000" & q64(31 downto 0);
                         end if;
+                        assert behave_rt = d2.write_reg_data
+                            report "bad divwe expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb);
+                        assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
+                            report "bad CR setting for divwe";
                     end if;
                 end loop;
             end loop;
@@ -378,7 +388,7 @@ begin
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 65 loop
+                    for j in 0 to 66 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -386,13 +396,14 @@ begin
                     end loop;
                     assert d2.valid = '1';
 
+                    behave_rt := (others => '0');
                     if unsigned(rb(31 downto 0)) > unsigned(ra(63 downto 32)) then
                         behave_rt := std_ulogic_vector(unsigned(ra) / unsigned(rb));
-                        assert behave_rt(31 downto 0) = d2.write_reg_data(31 downto 0)
-                            report "bad divweu expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb);
-                        assert ppc_cmpi('0', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
-                            report "bad CR setting for divweu";
                     end if;
+                    assert behave_rt = d2.write_reg_data
+                        report "bad divweu expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data) & " for ra = " & to_hstring(ra) & " rb = " & to_hstring(rb);
+                    assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
+                        report "bad CR setting for divweu";
                 end loop;
             end loop;
         end loop;
@@ -416,7 +427,7 @@ begin
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 65 loop
+                    for j in 0 to 66 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -424,13 +435,14 @@ begin
                     end loop;
                     assert d2.valid = '1';
 
+                    behave_rt := (others => '0');
                     if rb /= x"0000000000000000" then
                         behave_rt := std_ulogic_vector(signed(ra) rem signed(rb));
-                        assert behave_rt = d2.write_reg_data
-                            report "bad modsd expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data);
-                        assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
-                            report "bad CR setting for modsd";
                     end if;
+                    assert behave_rt = d2.write_reg_data
+                        report "bad modsd expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data);
+                    assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
+                        report "bad CR setting for modsd";
                 end loop;
             end loop;
         end loop;
@@ -454,7 +466,7 @@ begin
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 65 loop
+                    for j in 0 to 66 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -462,13 +474,14 @@ begin
                     end loop;
                     assert d2.valid = '1';
 
+                    behave_rt := (others => '0');
                     if rb /= x"0000000000000000" then
                         behave_rt := std_ulogic_vector(unsigned(ra) rem unsigned(rb));
-                        assert behave_rt = d2.write_reg_data
-                            report "bad modud expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data);
-                        assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
-                            report "bad CR setting for modud";
                     end if;
+                    assert behave_rt = d2.write_reg_data
+                        report "bad modud expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data);
+                    assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
+                        report "bad CR setting for modud";
                 end loop;
             end loop;
         end loop;
@@ -492,7 +505,7 @@ begin
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 65 loop
+                    for j in 0 to 66 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -500,13 +513,19 @@ begin
                     end loop;
                     assert d2.valid = '1';
 
+                    behave_rt := (others => '0');
                     if rb /= x"0000000000000000" then
-                        behave_rt := x"00000000" & std_ulogic_vector(signed(ra(31 downto 0)) rem signed(rb(31 downto 0)));
-                        assert behave_rt(31 downto 0) = d2.write_reg_data(31 downto 0)
-                            report "bad modsw expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data);
-                        assert ppc_cmpi('0', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
-                            report "bad CR setting for modsw";
+                        rem32 := std_ulogic_vector(signed(ra(31 downto 0)) rem signed(rb(31 downto 0)));
+                        if rem32(31) = '0' then
+                            behave_rt := x"00000000" & rem32;
+                        else
+                            behave_rt := x"ffffffff" & rem32;
+                        end if;
                     end if;
+                    assert behave_rt = d2.write_reg_data
+                        report "bad modsw expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data);
+                    assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
+                        report "bad CR setting for modsw";
                 end loop;
             end loop;
         end loop;
@@ -530,7 +549,7 @@ begin
                     wait for clk_period;
 
                     d1.valid <= '0';
-                    for j in 0 to 65 loop
+                    for j in 0 to 66 loop
                         wait for clk_period;
                         if d2.valid = '1' then
                             exit;
@@ -538,13 +557,14 @@ begin
                     end loop;
                     assert d2.valid = '1';
 
+                    behave_rt := (others => '0');
                     if rb /= x"0000000000000000" then
                         behave_rt := x"00000000" & std_ulogic_vector(unsigned(ra(31 downto 0)) rem unsigned(rb(31 downto 0)));
-                        assert behave_rt(31 downto 0) = d2.write_reg_data(31 downto 0)
-                            report "bad moduw expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data);
-                        assert ppc_cmpi('0', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
-                            report "bad CR setting for moduw";
                     end if;
+                    assert behave_rt(31 downto 0) = d2.write_reg_data(31 downto 0)
+                        report "bad moduw expected " & to_hstring(behave_rt) & " got " & to_hstring(d2.write_reg_data);
+                    assert ppc_cmpi('1', behave_rt, x"0000") & x"0000000" = d2.write_cr_data
+                        report "bad CR setting for moduw";
                 end loop;
             end loop;
         end loop;