src/ieee754/part_shift/part_shift_dynamic.py

   1 # SPDX-License-Identifier: LGPL-2.1-or-later
   2 # See Notices.txt for copyright information
   3
   4 """
   5 Copyright (C) 2020 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   6 Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
   7
   8 dynamically partitionable shifter. Unlike part_shift_scalar, both
   9 operands can be partitioned
  10
  11 See:
  12
  13 * http://libre-riscv.org/3d_gpu/architecture/dynamic_simd/shift/
  14 * http://bugs.libre-riscv.org/show_bug.cgi?id=173
  15 """
  16 from nmigen import Signal, Module, Elaboratable, Cat, Mux, C
  17 from ieee754.part_mul_add.partpoints import PartitionPoints
  18 import math
  19
  20
  21 class PartitionedDynamicShift(Elaboratable):
  22     def __init__(self, width, partition_points):
  23         self.width = width
  24         self.partition_points = PartitionPoints(partition_points)
  25
  26         self.a = Signal(width, reset_less=True)
  27         self.b = Signal(width, reset_less=True)
  28         self.output = Signal(width, reset_less=True)
  29
  30     def elaborate(self, platform):
  31         m = Module()
  32         comb = m.d.comb
  33         width = self.width
  34         pwid = self.partition_points.get_max_partition_count(width)-1
  35         gates = Signal(pwid, reset_less=True)
  36         comb += gates.eq(self.partition_points.as_sig())
  37
  38         matrix = []
  39         keys = list(self.partition_points.keys()) + [self.width]
  40         start = 0
  41
  42         # break out both the input and output into partition-stratified blocks
  43         a_intervals = []
  44         b_intervals = []
  45         intervals = []
  46         widths = []
  47         start = 0
  48         for i in range(len(keys)):
  49             end = keys[i]
  50             widths.append(width - start)
  51             a_intervals.append(self.a[start:end])
  52             b_intervals.append(self.b[start:end])
  53             intervals.append([start,end])
  54             start = end
  55
  56         min_bits = math.ceil(math.log2(intervals[0][1] - intervals[0][0]))
  57         max_bits = math.ceil(math.log2(width))
  58
  59         # shifts are normally done as (e.g. for 32 bit) result = a & (b&0b11111)
  60         # truncating the b input.  however here of course the size of the
  61         # partition varies dynamically.
  62         shifter_masks = []
  63         for i in range(len(b_intervals)):
  64             mask = Signal(b_intervals[i].shape(), name="shift_mask%d" % i,
  65                           reset_less=True)
  66             bits = Signal(gates.width-i+1, name="bits%d" % i, reset_less=True)
  67             bl = []
  68             for j in range(i, gates.width):
  69                 if bl:
  70                     bl.append(~gates[j] & bits[j-i-1])
  71                 else:
  72                     bl.append(~gates[j])
  73             comb += bits.eq(Cat(*bl))
  74             comb += mask.eq(Cat((1 << min_bits)-1, bits)
  75                             & ((1 << max_bits)-1))
  76             shifter_masks.append(mask)
  77
  78         print(shifter_masks)
  79
  80         # Instead of generating the matrix described in the wiki, I
  81         # instead calculate the shift amounts for each partition, then
  82         # calculate the partial results of each partition << shift
  83         # amount. On the wiki, the following table is given for output #3:
  84         # p2p1p0 | o3
  85         # 0 0 0  | a0b0[31:24] | a1b0[23:16] | a2b0[15:8] | a3b0[7:0]
  86         # 0 0 1  | a0b0[31:24] | a1b1[23:16] | a2b1[15:8] | a3b1[7:0]
  87         # 0 1 0  | a0b0[31:24] | a1b0[23:16] | a2b2[15:8] | a3b2[7:0]
  88         # 0 1 1  | a0b0[31:24] | a1b1[23:16] | a2b2[15:8] | a3b2[7:0]
  89         # 1 0 0  | a0b0[31:24] | a1b0[23:16] | a2b0[15:8] | a3b3[7:0]
  90         # 1 0 1  | a0b0[31:24] | a1b1[23:16] | a2b1[15:8] | a3b3[7:0]
  91         # 1 1 0  | a0b0[31:24] | a1b0[23:16] | a2b2[15:8] | a3b3[7:0]
  92         # 1 1 1  | a0b0[31:24] | a1b1[23:16] | a2b2[15:8] | a3b3[7:0]
  93
  94         # Each output for o3 is given by a3bx and the partial results
  95         # for o2 (namely, a2bx, a1bx, and a0b0). If I calculate the
  96         # partial results [a0b0, a1bx, a2bx, a3bx], I can use just
  97         # those partial results to calculate a0, a1, a2, and a3
  98         shiftbits = math.ceil(math.log2(width))
  99         element = b_intervals[0] & shifter_masks[0]
 100         partial_results = []
 101         partial_results.append(a_intervals[0] << element)
 102         for i in range(1, len(keys)):
 103             s, e = intervals[i]
 104             masked = Signal(b_intervals[i].shape(), name="masked%d" % i,
 105                           reset_less=True)
 106             comb += masked.eq(b_intervals[i] & shifter_masks[i])
 107             element = Mux(gates[i-1], masked, element)
 108             elmux = Signal(b_intervals[i].shape(), name="elmux%d" % i,
 109                           reset_less=True)
 110             comb += elmux.eq(element)
 111             element = elmux
 112
 113             # This calculates which partition of b to select the
 114             # shifter from. According to the table above, the
 115             # partition to select is given by the highest set bit in
 116             # the partition mask, this calculates that with a mux
 117             # chain
 118
 119             # This computes the partial results table
 120             shifter = Signal(shiftbits, name="shifter%d" % i,
 121                           reset_less=True)
 122             comb += shifter.eq(element)
 123             partial = Signal(width, name="partial%d" % i, reset_less=True)
 124             comb += partial.eq(a_intervals[i] << shifter)
 125
 126             partial_results.append(partial)
 127
 128         out = []
 129
 130         # This calculates the outputs o0-o3 from the partial results
 131         # table above.
 132         s,e = intervals[0]
 133         result = partial_results[0]
 134         out.append(result[s:e])
 135         for i in range(1, len(keys)):
 136             start, end = (intervals[i][0], width)
 137             result = partial_results[i] | \
 138                 Mux(gates[i-1], 0, result[intervals[0][1]:])[:end-start]
 139             print("select: [%d:%d]" % (start, end))
 140             res = Signal(width, name="res%d" % i, reset_less=True)
 141             comb += res.eq(result)
 142             s,e = intervals[0]
 143             out.append(res[s:e])
 144
 145         comb += self.output.eq(Cat(*out))
 146
 147         return m
 148