From d22ac4e7a091e3ad0692473e20e095b1a5ab636a Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Thu, 14 May 2020 19:38:48 +0100 Subject: [PATCH] simplify popcount --- src/soc/decoder/isa/caller.py | 2 +- src/soc/logical/main_stage.py | 55 ++++++++++++++--------------------- 2 files changed, 23 insertions(+), 34 deletions(-) diff --git a/src/soc/decoder/isa/caller.py b/src/soc/decoder/isa/caller.py index 2dc6205e..671360cc 100644 --- a/src/soc/decoder/isa/caller.py +++ b/src/soc/decoder/isa/caller.py @@ -310,7 +310,7 @@ class ISACaller: self.namespace[name].eq(output) else: regnum = yield getattr(self.decoder, name) - print('writing reg %d' % regnum) + print('writing reg %d %s' % (regnum, str(output))) if output.bits > 64: output = SelectableInt(output.value, 64) self.gpr[regnum] = output diff --git a/src/soc/logical/main_stage.py b/src/soc/logical/main_stage.py index f671bc72..b4fce292 100644 --- a/src/soc/logical/main_stage.py +++ b/src/soc/logical/main_stage.py @@ -58,44 +58,33 @@ class LogicalMainStage(PipeModBase): ###### popcount ####### with m.Case(InternalOp.OP_POPCNT): - pc2 = array_of(32, 2) - pc4 = array_of(16, 3) - pc8 = array_of(8, 4) - pc16 = array_of(4, 5) - pc32 = array_of(2, 6) - popcnt = Signal(64, reset_less=True) - for i in range(32): - stt, end = i*2, i*2+1 - comb += pc2[i].eq(Cat(a[stt], Const(0, 1)) + - Cat(a[end], Const(0, 1))) - for i in range(16): - stt, end = i*2, i*2+1 - comb += pc4[i].eq(Cat(pc2[stt], Const(0, 1)) + - Cat(pc2[end], Const(0, 1))) - for i in range(8): - stt, end = i*2, i*2+1 - comb += pc8[i].eq(Cat(pc4[stt], Const(0, 1)) + - Cat(pc4[end], Const(0, 1))) - for i in range(4): - stt, end = i*2, i*2+1 - comb += pc16[i].eq(Cat(pc8[stt], Const(0, 1)) + - Cat(pc8[end], Const(0, 1))) - for i in range(2): - stt, end = i*2, i*2+1 - comb += pc32[i].eq(Cat(pc16[stt], Const(0, 1)) + - Cat(pc16[end], Const(0, 1))) + # starting from a, perform successive addition-reductions + pc = [a] + work = [(32, 2), (16, 3), (8, 4), (4, 5), (2, 6), (1, 6)] + for l, b in work: + pc.append(array_of(l, b)) + pc8 = pc[3] # array of 8 8-bit counts (popcntb) + pc32 = pc[5] # array of 2 32-bit counts (popcntw) + popcnt = pc[-1] # array of 1 64-bit count (popcntd) + # cascade-tree of adds + for idx, (l, b) in enumerate(work): + for i in range(l): + stt, end = i*2, i*2+1 + src, dst = pc[idx], pc[idx+1] + comb += dst[i].eq(Cat(src[stt], Const(0, 1)) + + Cat(src[end], Const(0, 1))) + # decode operation length with m.If(self.i.ctx.op.data_len[2:4] == 0b00): - # popcntb + # popcntb - pack 8x 4-bit answers into output for i in range(8): - comb += popcnt[i*8:i*8+4].eq(pc8[i]) + comb += o[i*8:i*8+4].eq(pc8[i]) with m.Elif(self.i.ctx.op.data_len[3] == 0): - # popcntw + # popcntw - pack 2x 5-bit answers into output for i in range(2): - comb += popcnt[i*32:i*32+5].eq(pc32[i]) + comb += o[i*32:i*32+5].eq(pc32[i]) with m.Else(): - comb += popcnt.eq(Cat(pc32[0], Const(0, 1)) + - Cat(pc32[1], Const(0, 1))) - comb += o.eq(popcnt) + # popcntd - put 1x 6-bit answer into output + comb += o.eq(popcnt[0]) ###### parity ####### # TODO with m.Case(InternalOp.OP_PRTY): -- 2.30.2