simplify popcount
authorLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Thu, 14 May 2020 18:38:48 +0000 (19:38 +0100)
committerLuke Kenneth Casson Leighton <lkcl@lkcl.net>
Thu, 14 May 2020 18:38:48 +0000 (19:38 +0100)
src/soc/decoder/isa/caller.py
src/soc/logical/main_stage.py

index 2dc6205e004b432eb887b5452ff79bb4f4f28fd7..671360ccfb8b44b08e012b5d6be30e5cefa0a630 100644 (file)
@@ -310,7 +310,7 @@ class ISACaller:
                         self.namespace[name].eq(output)
                 else:
                     regnum = yield getattr(self.decoder, name)
-                    print('writing reg %d' % regnum)
+                    print('writing reg %d %s' % (regnum, str(output)))
                     if output.bits > 64:
                         output = SelectableInt(output.value, 64)
                     self.gpr[regnum] = output
index f671bc729611238600eb77723b5cda1491e4ba78..b4fce292cb6412c161a52749e3f3b9667053c2a8 100644 (file)
@@ -58,44 +58,33 @@ class LogicalMainStage(PipeModBase):
 
             ###### popcount #######
             with m.Case(InternalOp.OP_POPCNT):
-                pc2 = array_of(32, 2)
-                pc4 = array_of(16, 3)
-                pc8 = array_of(8, 4)
-                pc16 = array_of(4, 5)
-                pc32 = array_of(2, 6)
-                popcnt = Signal(64, reset_less=True)
-                for i in range(32):
-                    stt, end = i*2, i*2+1
-                    comb += pc2[i].eq(Cat(a[stt], Const(0, 1)) +
-                                      Cat(a[end], Const(0, 1)))
-                for i in range(16):
-                    stt, end = i*2, i*2+1
-                    comb += pc4[i].eq(Cat(pc2[stt], Const(0, 1)) +
-                                      Cat(pc2[end], Const(0, 1)))
-                for i in range(8):
-                    stt, end = i*2, i*2+1
-                    comb += pc8[i].eq(Cat(pc4[stt], Const(0, 1)) +
-                                      Cat(pc4[end], Const(0, 1)))
-                for i in range(4):
-                    stt, end = i*2, i*2+1
-                    comb += pc16[i].eq(Cat(pc8[stt], Const(0, 1)) +
-                                       Cat(pc8[end], Const(0, 1)))
-                for i in range(2):
-                    stt, end = i*2, i*2+1
-                    comb += pc32[i].eq(Cat(pc16[stt], Const(0, 1)) +
-                                       Cat(pc16[end], Const(0, 1)))
+                # starting from a, perform successive addition-reductions
+                pc = [a]
+                work = [(32, 2), (16, 3), (8, 4), (4, 5), (2, 6), (1, 6)]
+                for l, b in work:
+                    pc.append(array_of(l, b))
+                pc8 = pc[3]     # array of 8 8-bit counts (popcntb)
+                pc32 = pc[5]    # array of 2 32-bit counts (popcntw)
+                popcnt = pc[-1] # array of 1 64-bit count (popcntd)
+                # cascade-tree of adds
+                for idx, (l, b) in enumerate(work):
+                    for i in range(l):
+                        stt, end = i*2, i*2+1
+                        src, dst = pc[idx], pc[idx+1]
+                        comb += dst[i].eq(Cat(src[stt], Const(0, 1)) +
+                                          Cat(src[end], Const(0, 1)))
+                # decode operation length
                 with m.If(self.i.ctx.op.data_len[2:4] == 0b00):
-                    # popcntb
+                    # popcntb - pack 8x 4-bit answers into output
                     for i in range(8):
-                        comb += popcnt[i*8:i*8+4].eq(pc8[i])
+                        comb += o[i*8:i*8+4].eq(pc8[i])
                 with m.Elif(self.i.ctx.op.data_len[3] == 0):
-                    # popcntw
+                    # popcntw - pack 2x 5-bit answers into output
                     for i in range(2):
-                        comb += popcnt[i*32:i*32+5].eq(pc32[i])
+                        comb += o[i*32:i*32+5].eq(pc32[i])
                 with m.Else():
-                    comb += popcnt.eq(Cat(pc32[0], Const(0, 1)) +
-                                      Cat(pc32[1], Const(0, 1)))
-                comb += o.eq(popcnt)
+                    # popcntd - put 1x 6-bit answer into output
+                    comb += o.eq(popcnt[0])
 
             ###### parity #######
             # TODO with m.Case(InternalOp.OP_PRTY):