from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
from nmutil.pipemodbase import PipeModBase
-from soc.fu.logical.pipe_data import ALUInputData
+from nmutil.clz import CLZ
+from soc.fu.logical.pipe_data import LogicalInputData
from soc.fu.alu.pipe_data import ALUOutputData
from ieee754.part.partsig import PartitionedSignal
from soc.decoder.power_enums import InternalOp
-from soc.countzero.countzero import ZeroCounter
from soc.decoder.power_fields import DecodeFields
from soc.decoder.power_fieldsn import SignalBitRange
def array_of(count, bitwidth):
res = []
for i in range(count):
- res.append(Signal(bitwidth, reset_less=True))
+ res.append(Signal(bitwidth, reset_less=True,
+ name=f"pop_{bitwidth}_{i}"))
return res
self.fields.create_specs()
def ispec(self):
- return ALUInputData(self.pspec)
+ return LogicalInputData(self.pspec)
def ospec(self):
return ALUOutputData(self.pspec) # TODO: ALUIntermediateData
# creating arrays big enough to store the sum, each time
pc = [a]
# QTY32 2-bit (to take 2x 1-bit sums) etc.
- work = [(32, 2), (16, 3), (8, 4), (4, 5), (2, 6), (1, 6)]
+ work = [(32, 2), (16, 3), (8, 4), (4, 5), (2, 6), (1, 7)]
for l, b in work:
pc.append(array_of(l, b))
pc8 = pc[3] # array of 8 8-bit counts (popcntb)
comb += dst[i].eq(Cat(src[stt], Const(0, 1)) +
Cat(src[end], Const(0, 1)))
# decode operation length
- with m.If(op.data_len[2:4] == 0b00):
+ with m.If(op.data_len == 1):
# popcntb - pack 8x 4-bit answers into output
for i in range(8):
- comb += o[i*8:i*8+4].eq(pc8[i])
- with m.Elif(op.data_len[3] == 0):
+ comb += o[i*8:(i+1)*8].eq(pc8[i])
+ with m.Elif(op.data_len == 4):
# popcntw - pack 2x 5-bit answers into output
for i in range(2):
- comb += o[i*32:i*32+5].eq(pc32[i])
+ comb += o[i*32:(i+1)*32].eq(pc32[i])
with m.Else():
# popcntd - put 1x 6-bit answer into output
comb += o.eq(popcnt[0])
###### cntlz #######
with m.Case(InternalOp.OP_CNTZ):
XO = self.fields.FormX.XO[0:-1]
- m.submodules.countz = countz = ZeroCounter()
- comb += countz.rs_i.eq(a)
- comb += countz.is_32bit_i.eq(op.is_32bit)
- comb += countz.count_right_i.eq(XO[-1])
- comb += o.eq(countz.result_o)
+ count_right = Signal(reset_less=True)
+ comb += count_right.eq(XO[-1])
+
+ cntz_i = Signal(64, reset_less=True)
+ a32 = Signal(32, reset_less=True)
+ comb += a32.eq(a[0:32])
+
+ with m.If(op.is_32bit):
+ comb += cntz_i.eq(Mux(count_right, a32[::-1], a32))
+ with m.Else():
+ comb += cntz_i.eq(Mux(count_right, a[::-1], a))
+
+ m.submodules.clz = clz = CLZ(64)
+ comb += clz.sig_in.eq(cntz_i)
+ comb += o.eq(Mux(op.is_32bit, clz.lz-32, clz.lz))
###### bpermd #######
# TODO with m.Case(InternalOp.OP_BPERM): - not in microwatt
###### sticky overflow and context, both pass-through #####
- comb += self.o.so.eq(self.i.so)
+ comb += self.o.xer_so.data.eq(self.i.xer_so)
comb += self.o.ctx.eq(self.i.ctx)
return m