src/ieee754/part_shift/part_shift_dynamic.py

   1 # SPDX-License-Identifier: LGPL-2.1-or-later
   2 # See Notices.txt for copyright information
   3
   4 """
   5 Copyright (C) 2020 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   6 Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
   7
   8 dynamically partitionable shifter. Unlike part_shift_scalar, both
   9 operands can be partitioned
  10
  11 See:
  12
  13 * http://libre-riscv.org/3d_gpu/architecture/dynamic_simd/shift/
  14 * http://bugs.libre-riscv.org/show_bug.cgi?id=173
  15 """
  16 from nmigen import Signal, Module, Elaboratable, Cat, Mux, C
  17 from ieee754.part_mul_add.partpoints import PartitionPoints
  18 import math
  19
  20
  21 class PartitionedDynamicShift(Elaboratable):
  22     def __init__(self, width, partition_points):
  23         self.width = width
  24         self.partition_points = PartitionPoints(partition_points)
  25
  26         self.a = Signal(width)
  27         self.b = Signal(width)
  28         self.output = Signal(width)
  29
  30     def elaborate(self, platform):
  31         m = Module()
  32         comb = m.d.comb
  33         width = self.width
  34         gates = Signal(self.partition_points.get_max_partition_count(width)-1)
  35         comb += gates.eq(self.partition_points.as_sig())
  36
  37         matrix = []
  38         keys = list(self.partition_points.keys()) + [self.width]
  39         start = 0
  40
  41         # break out both the input and output into partition-stratified blocks
  42         a_intervals = []
  43         b_intervals = []
  44         intervals = []
  45         widths = []
  46         start = 0
  47         for i in range(len(keys)):
  48             end = keys[i]
  49             widths.append(width - start)
  50             a_intervals.append(self.a[start:end])
  51             b_intervals.append(self.b[start:end])
  52             intervals.append([start,end])
  53             start = end
  54
  55         min_bits = math.ceil(math.log2(intervals[0][1] - intervals[0][0]))
  56         max_bits = math.ceil(math.log2(width))
  57
  58         # shifts are normally done as (e.g. for 32 bit) result = a & (b&0b11111)
  59         # truncating the b input.  however here of course the size of the
  60         # partition varies dynamically.
  61         shifter_masks = []
  62         for i in range(len(b_intervals)):
  63             mask = Signal(b_intervals[i].shape(), name="shift_mask%d" % i)
  64             bits = []
  65             for j in range(i, gates.width):
  66                 if bits:
  67                     bits.append(~gates[j] & bits[-1])
  68                 else:
  69                     bits.append(~gates[j])
  70             comb += mask.eq(Cat((1 << min_bits)-1, bits)
  71                             & ((1 << max_bits)-1))
  72             shifter_masks.append(mask)
  73
  74         print(shifter_masks)
  75
  76         # Instead of generating the matrix described in the wiki, I
  77         # instead calculate the shift amounts for each partition, then
  78         # calculate the partial results of each partition << shift
  79         # amount. On the wiki, the following table is given for output #3:
  80         # p2p1p0 | o3
  81         # 0 0 0  | a0b0[31:24] | a1b0[23:16] | a2b0[15:8] | a3b0[7:0]
  82         # 0 0 1  | a0b0[31:24] | a1b1[23:16] | a2b1[15:8] | a3b1[7:0]
  83         # 0 1 0  | a0b0[31:24] | a1b0[23:16] | a2b2[15:8] | a3b2[7:0]
  84         # 0 1 1  | a0b0[31:24] | a1b1[23:16] | a2b2[15:8] | a3b2[7:0]
  85         # 1 0 0  | a0b0[31:24] | a1b0[23:16] | a2b0[15:8] | a3b3[7:0]
  86         # 1 0 1  | a0b0[31:24] | a1b1[23:16] | a2b1[15:8] | a3b3[7:0]
  87         # 1 1 0  | a0b0[31:24] | a1b0[23:16] | a2b2[15:8] | a3b3[7:0]
  88         # 1 1 1  | a0b0[31:24] | a1b1[23:16] | a2b2[15:8] | a3b3[7:0]
  89
  90         # Each output for o3 is given by a3bx and the partial results
  91         # for o2 (namely, a2bx, a1bx, and a0b0). If I calculate the
  92         # partial results [a0b0, a1bx, a2bx, a3bx], I can use just
  93         # those partial results to calculate a0, a1, a2, and a3
  94         shiftbits = math.ceil(math.log2(width))
  95         element = b_intervals[0] & shifter_masks[0]
  96         partial_results = []
  97         partial_results.append(a_intervals[0] << element)
  98         for i in range(1, len(keys)):
  99             s, e = intervals[i]
 100             masked = Signal(b_intervals[i].shape(), name="masked%d" % i)
 101             comb += masked.eq(b_intervals[i] & shifter_masks[i])
 102             element = Mux(gates[i-1], masked, element)
 103
 104             # This calculates which partition of b to select the
 105             # shifter from. According to the table above, the
 106             # partition to select is given by the highest set bit in
 107             # the partition mask, this calculates that with a mux
 108             # chain
 109
 110             # This computes the partial results table
 111             shifter = Signal(shiftbits, name="shifter%d" % i)
 112             comb += shifter.eq(element)
 113             partial = Signal(width, name="partial%d" % i)
 114             comb += partial.eq(a_intervals[i] << shifter)
 115
 116             partial_results.append(partial)
 117
 118         out = []
 119
 120         # This calculates the outputs o0-o3 from the partial results
 121         # table above.
 122         s,e = intervals[0]
 123         result = partial_results[0]
 124         out.append(result[s:e])
 125         for i in range(1, len(keys)):
 126             start, end = (intervals[i][0], width)
 127             result = partial_results[i] | \
 128                 Mux(gates[i-1], 0, result[intervals[0][1]:])[:end-start]
 129             print("select: [%d:%d]" % (start, end))
 130             res = Signal(width, name="res%d" % i)
 131             comb += res.eq(result)
 132             s,e = intervals[0]
 133             out.append(res[s:e])
 134
 135         comb += self.output.eq(Cat(*out))
 136
 137         return m
 138