src/ieee754/part_shift/part_shift_dynamic.py

   1 # SPDX-License-Identifier: LGPL-2.1-or-later
   2 # See Notices.txt for copyright information
   3
   4 """
   5 Copyright (C) 2020 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
   6 Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
   7
   8 dynamically partitionable shifter. Unlike part_shift_scalar, both
   9 operands can be partitioned
  10
  11 See:
  12
  13 * http://libre-riscv.org/3d_gpu/architecture/dynamic_simd/shift/
  14 * http://bugs.libre-riscv.org/show_bug.cgi?id=173
  15 """
  16 from nmigen import Signal, Module, Elaboratable, Cat, Mux, C
  17 from ieee754.part_mul_add.partpoints import PartitionPoints
  18 import math
  19
  20
  21 class PartitionedDynamicShift(Elaboratable):
  22     def __init__(self, width, partition_points):
  23         self.width = width
  24         self.partition_points = PartitionPoints(partition_points)
  25
  26         self.a = Signal(width)
  27         self.b = Signal(width)
  28         self.output = Signal(width)
  29
  30     def elaborate(self, platform):
  31         m = Module()
  32         comb = m.d.comb
  33         width = self.width
  34         gates = Signal(self.partition_points.get_max_partition_count(width)-1)
  35         comb += gates.eq(self.partition_points.as_sig())
  36
  37         matrix = []
  38         keys = list(self.partition_points.keys()) + [self.width]
  39         start = 0
  40
  41
  42         # break out both the input and output into partition-stratified blocks
  43         a_intervals = []
  44         b_intervals = []
  45         out_intervals = []
  46         intervals = []
  47         widths = []
  48         start = 0
  49         for i in range(len(keys)):
  50             end = keys[i]
  51             widths.append(width - start)
  52             a_intervals.append(self.a[start:end])
  53             b_intervals.append(self.b[start:end])
  54             out_intervals.append(self.output[start:end])
  55             intervals.append([start,end])
  56             start = end
  57
  58         # Instead of generating the matrix described in the wiki, I
  59         # instead calculate the shift amounts for each partition, then
  60         # calculate the partial results of each partition << shift
  61         # amount. On the wiki, the following table is given for output #3:
  62         # p2p1p0 | o3
  63         # 0 0 0  | a0b0[31:24] | a1b0[23:16] | a2b0[15:8] | a3b0[7:0]
  64         # 0 0 1  | a0b0[31:24] | a1b1[23:16] | a2b1[15:8] | a3b1[7:0]
  65         # 0 1 0  | a0b0[31:24] | a1b0[23:16] | a2b2[15:8] | a3b2[7:0]
  66         # 0 1 1  | a0b0[31:24] | a1b1[23:16] | a2b2[15:8] | a3b2[7:0]
  67         # 1 0 0  | a0b0[31:24] | a1b0[23:16] | a2b0[15:8] | a3b3[7:0]
  68         # 1 0 1  | a0b0[31:24] | a1b1[23:16] | a2b1[15:8] | a3b3[7:0]
  69         # 1 1 0  | a0b0[31:24] | a1b0[23:16] | a2b2[15:8] | a3b3[7:0]
  70         # 1 1 1  | a0b0[31:24] | a1b1[23:16] | a2b2[15:8] | a3b3[7:0]
  71
  72         # Each output for o3 is given by a3bx and the partial results
  73         # for o2 (namely, a2bx, a1bx, and a0b0). If I calculate the
  74         # partial results [a0b0, a1bx, a2bx, a3bx], I can use just
  75         # those partial results to calculate a0, a1, a2, and a3
  76         partial_results = []
  77         partial_results.append(a_intervals[0] << b_intervals[0])
  78         element = b_intervals[0]
  79         for i in range(1, len(out_intervals)):
  80             s, e = intervals[i]
  81             element = Mux(gates[i-1], b_intervals[i], element)
  82
  83             # This calculates which partition of b to select the
  84             # shifter from. According to the table above, the
  85             # partition to select is given by the highest set bit in
  86             # the partition mask, this calculates that with a mux
  87             # chain
  88
  89
  90             # This computes the partial results table
  91             shifter = Signal(8, name="shifter%d" % i)
  92             comb += shifter.eq(element)
  93             partial = Signal(width, name="partial%d" % i)
  94             comb += partial.eq(a_intervals[i] << shifter)
  95
  96             partial_results.append(partial)
  97
  98         out = []
  99
 100         # This calculates the outputs o0-o3 from the partial results
 101         # table above.
 102         s,e = intervals[0]
 103         result = partial_results[0]
 104         out.append(result[s:e])
 105         for i in range(1, len(out_intervals)):
 106             start, end = (intervals[i][0], width)
 107             result = partial_results[i] | \
 108                 Mux(gates[i-1], 0, result[intervals[0][1]:])[:end-start]
 109             print("select: [%d:%d]" % (start, end))
 110             res = Signal(width, name="res%d" % i)
 111             comb += res.eq(result)
 112             s,e = intervals[0]
 113             out.append(res[s:e])
 114
 115         comb += self.output.eq(Cat(*out))
 116
 117         return m
 118