is_32bit = Signal(reset_less=True)
         comb += is_32bit.eq(self.i.ctx.op.is_32bit)
+        sign_bit = Signal(reset_less=True)
+        comb += sign_bit.eq(Mux(is_32bit, self.i.a[31], self.i.a[63]))
 
         add_output = Signal(self.i.a.width + 1, reset_less=True)
         comb += add_output.eq(self.i.a + self.i.b + self.i.carry_in)
                         comb += mask.eq(0)
                     with m.Else():
                         comb += mask.eq(maskgen.o)
-                comb += self.o.o.eq(rotl_out & mask)
+                with m.If(self.i.ctx.op.is_signed):
+                    comb += self.o.o.eq(rotl_out & mask |
+                                        Mux(sign_bit, ~mask, 0))
+                    comb += self.o.carry_out.eq(sign_bit & ((rotl_out & mask) != 0))
+                with m.Else():
+                    comb += self.o.o.eq(rotl_out & mask)
 
         ###### sticky overflow and context, both pass-through #####
 
 
         comb = m.d.comb
 
         o = Signal.like(self.i.o)
-        o2 = Signal.like(self.i.o)
         with m.If(self.i.ctx.op.invert_out):
-            comb += o2.eq(~self.i.o)
+            comb += o.eq(~self.i.o)
         with m.Else():
-            comb += o2.eq(self.i.o)
+            comb += o.eq(self.i.o)
+        
 
-        with m.If(self.i.ctx.op.is_32bit):
-            comb += o.eq(Cat(o2[0:32], Repl(0, 32)))
-        with m.Else():
-            comb += o.eq(o2)
 
         is_zero = Signal(reset_less=True)
         is_positive = Signal(reset_less=True)
 
                 sim = self.run_tst_program(program, initial_regs)
 
     def test_shift(self):
-        insns = ["slw", "sld", "srw", "srd"]
+        insns = ["slw", "sld", "srw", "srd", "sraw", "srad"]
         for i in range(20):
             choice = random.choice(insns)
             lst = [f"{choice} 3, 1, 2"]
             with Program(lst) as program:
                 sim = self.run_tst_program(program, initial_regs)
 
+    def test_shift_arith(self):
+        lst = ["sraw 3, 1, 2"]
+        initial_regs = [0] * 32
+        initial_regs[1] = random.randint(0, (1<<64)-1)
+        initial_regs[2] = random.randint(0, 63)
+        print(initial_regs[1], initial_regs[2])
+        with Program(lst) as program:
+            sim = self.run_tst_program(program, initial_regs)
+
     @unittest.skip("broken")
     def test_ilang(self):
         rec = CompALUOpSubset()