src/ieee754/part_shift/part_shift_scalar.py

   1 # SPDX-License-Identifier: LGPL-2.1-or-later
   2 # See Notices.txt for copyright information
   3
   4 """
   5 Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
   6
   7 dynamically partitionable shifter. Only the operand to be shifted can
   8 be partitioned, the amount to shift by *must* be a scalar
   9
  10 See:
  11
  12 * http://libre-riscv.org/3d_gpu/architecture/dynamic_simd/shift/
  13 * http://bugs.libre-riscv.org/show_bug.cgi?id=173
  14 """
  15 from nmigen import Signal, Module, Elaboratable, Cat, Mux
  16 from ieee754.part_mul_add.partpoints import PartitionPoints
  17 from ieee754.part_shift.part_shift_dynamic import ShifterMask
  18 import math
  19
  20
  21 class PartitionedScalarShift(Elaboratable):
  22     def __init__(self, width, partition_points):
  23         self.width = width
  24         self.partition_points = PartitionPoints(partition_points)
  25
  26         self.data = Signal(width)
  27         self.shiftbits = math.ceil(math.log2(width))
  28         self.shifter = Signal(self.shiftbits)
  29         self.output = Signal(width)
  30
  31     def elaborate(self, platform):
  32         m = Module()
  33         comb = m.d.comb
  34         width = self.width
  35         pwid = self.partition_points.get_max_partition_count(width)-1
  36         shiftbits = self.shiftbits
  37         shifted = Signal(self.data.width)
  38         gates = self.partition_points.as_sig()
  39         comb += shifted.eq(self.data << self.shifter)
  40
  41         parts = []
  42         outputs = []
  43         shiftparts = []
  44         intervals = []
  45         keys = list(self.partition_points.keys()) + [self.width]
  46         start = 0
  47         for i in range(len(keys)):
  48             end = keys[i]
  49             parts.append(self.data[start:end])
  50             outputs.append(self.output[start:end])
  51             intervals.append((start,end))
  52             start = end  # for next time round loop
  53
  54         min_bits = math.ceil(math.log2(intervals[0][1] - intervals[0][0]))
  55         shifter_masks = []
  56         for i in range(len(intervals)):
  57             max_bits = math.ceil(math.log2(width-intervals[i][0]))
  58             if pwid-i != 0:
  59                 sm = ShifterMask(pwid-i, shiftbits,
  60                                  max_bits, min_bits)
  61                 setattr(m.submodules, "sm%d" % i, sm)
  62                 comb += sm.gates.eq(gates[i:pwid])
  63                 mask = Signal(shiftbits, name="sm_mask%d" % i)
  64                 comb += mask.eq(sm.mask)
  65                 shifter_masks.append(mask)
  66             else: # having a 0 width signal seems to give the proof issues
  67                 shifter_masks.append((1<<min_bits)-1)
  68         print(m.submodules)
  69
  70         for i, interval in enumerate(intervals):
  71             s,e = interval
  72             sp = Signal(width, name="sp%d" % i)
  73             _shifter = Signal(self.shifter.width, name="shifter%d" % i)
  74             comb += _shifter.eq(self.shifter & shifter_masks[i])
  75             comb += sp[s:].eq(self.data[s:e] << _shifter)
  76             shiftparts.append(sp)
  77
  78
  79         for i, interval in enumerate(intervals):
  80             start, end = interval
  81             if i == 0:
  82                 intermed = shiftparts[i]
  83             else:
  84                 intermed = shiftparts[i] | Mux(gates[i-1], 0, prev)
  85             comb += outputs[i].eq(intermed[start:end])
  86             prev = intermed
  87
  88         return m