src/soc/fu/logical/main_stage.py

   1 # This stage is intended to do most of the work of executing Logical
   2 # instructions. This is OR, AND, XOR, POPCNT, PRTY, CMPB, BPERMD, CNTLZ
   3 # however input and output stages also perform bit-negation on input(s)
   4 # and output, as well as carry and overflow generation.
   5 # This module however should not gate the carry or overflow, that's up
   6 # to the output stage
   7
   8 from nmigen import (Module, Signal, Cat, Repl, Mux, Const, Array)
   9 from nmutil.pipemodbase import PipeModBase
  10 from nmutil.clz import CLZ
  11 from soc.fu.logical.pipe_data import LogicalInputData
  12 from soc.fu.alu.pipe_data import ALUOutputData
  13 from ieee754.part.partsig import PartitionedSignal
  14 from soc.decoder.power_enums import InternalOp
  15
  16 from soc.decoder.power_fields import DecodeFields
  17 from soc.decoder.power_fieldsn import SignalBitRange
  18
  19
  20 def array_of(count, bitwidth):
  21     res = []
  22     for i in range(count):
  23         res.append(Signal(bitwidth, reset_less=True))
  24     return res
  25
  26
  27 class LogicalMainStage(PipeModBase):
  28     def __init__(self, pspec):
  29         super().__init__(pspec, "main")
  30         self.fields = DecodeFields(SignalBitRange, [self.i.ctx.op.insn])
  31         self.fields.create_specs()
  32
  33     def ispec(self):
  34         return LogicalInputData(self.pspec)
  35
  36     def ospec(self):
  37         return ALUOutputData(self.pspec) # TODO: ALUIntermediateData
  38
  39     def elaborate(self, platform):
  40         m = Module()
  41         comb = m.d.comb
  42         op, a, b, o = self.i.ctx.op, self.i.a, self.i.b, self.o.o
  43
  44         ##########################
  45         # main switch for logic ops AND, OR and XOR, cmpb, parity, and popcount
  46
  47         with m.Switch(op.insn_type):
  48
  49             ###### AND, OR, XOR #######
  50             with m.Case(InternalOp.OP_AND):
  51                 comb += o.eq(a & b)
  52             with m.Case(InternalOp.OP_OR):
  53                 comb += o.eq(a | b)
  54             with m.Case(InternalOp.OP_XOR):
  55                 comb += o.eq(a ^ b)
  56
  57             ###### cmpb #######
  58             with m.Case(InternalOp.OP_CMPB):
  59                 l = []
  60                 for i in range(8):
  61                     slc = slice(i*8, (i+1)*8)
  62                     l.append(Repl(a[slc] == b[slc], 8))
  63                 comb += o.eq(Cat(*l))
  64
  65             ###### popcount #######
  66             with m.Case(InternalOp.OP_POPCNT):
  67                 # starting from a, perform successive addition-reductions
  68                 # creating arrays big enough to store the sum, each time
  69                 pc = [a]
  70                 # QTY32 2-bit (to take 2x 1-bit sums) etc.
  71                 work = [(32, 2), (16, 3), (8, 4), (4, 5), (2, 6), (1, 7)]
  72                 for l, b in work:
  73                     pc.append(array_of(l, b))
  74                 pc8 = pc[3]     # array of 8 8-bit counts (popcntb)
  75                 pc32 = pc[5]    # array of 2 32-bit counts (popcntw)
  76                 popcnt = pc[-1] # array of 1 64-bit count (popcntd)
  77                 # cascade-tree of adds
  78                 for idx, (l, b) in enumerate(work):
  79                     for i in range(l):
  80                         stt, end = i*2, i*2+1
  81                         src, dst = pc[idx], pc[idx+1]
  82                         comb += dst[i].eq(Cat(src[stt], Const(0, 1)) +
  83                                           Cat(src[end], Const(0, 1)))
  84                 # decode operation length
  85                 with m.If(op.data_len[2:4] == 0b00):
  86                     # popcntb - pack 8x 4-bit answers into output
  87                     for i in range(8):
  88                         comb += o[i*8:i*8+4].eq(pc8[i])
  89                 with m.Elif(op.data_len[3] == 0):
  90                     # popcntw - pack 2x 5-bit answers into output
  91                     for i in range(2):
  92                         comb += o[i*32:i*32+5].eq(pc32[i])
  93                 with m.Else():
  94                     # popcntd - put 1x 6-bit answer into output
  95                     comb += o.eq(popcnt[0])
  96
  97             ###### parity #######
  98             with m.Case(InternalOp.OP_PRTY):
  99                 # strange instruction which XORs together the LSBs of each byte
 100                 par0 = Signal(reset_less=True)
 101                 par1 = Signal(reset_less=True)
 102                 comb += par0.eq(Cat(a[0] , a[8] , a[16], a[24]).xor())
 103                 comb += par1.eq(Cat(a[32], a[40], a[48], a[56]).xor())
 104                 with m.If(op.data_len[3] == 1):
 105                     comb += o.eq(par0 ^ par1)
 106                 with m.Else():
 107                     comb += o[0].eq(par0)
 108                     comb += o[32].eq(par1)
 109
 110             ###### cntlz #######
 111             with m.Case(InternalOp.OP_CNTZ):
 112                 XO = self.fields.FormX.XO[0:-1]
 113                 count_right = Signal(reset_less=True)
 114                 comb += count_right.eq(XO[-1])
 115
 116                 cntz_i = Signal(64, reset_less=True)
 117                 a32 = Signal(32, reset_less=True)
 118                 comb += a32.eq(a[0:32])
 119
 120                 with m.If(op.is_32bit):
 121                     comb += cntz_i.eq(Mux(count_right, a32[::-1], a32))
 122                 with m.Else():
 123                     comb += cntz_i.eq(Mux(count_right, a[::-1], a))
 124
 125                 m.submodules.clz = clz = CLZ(64)
 126                 comb += clz.sig_in.eq(cntz_i)
 127                 comb += o.eq(Mux(op.is_32bit, clz.lz-32, clz.lz))
 128
 129             ###### bpermd #######
 130             # TODO with m.Case(InternalOp.OP_BPERM): - not in microwatt
 131
 132         ###### sticky overflow and context, both pass-through #####
 133
 134         comb += self.o.xer_so.data.eq(self.i.so)
 135         comb += self.o.ctx.eq(self.i.ctx)
 136
 137         return m