edcb7162ac68f3014acda5b1bf6dcac7d4b41267
[ieee754fpu.git] / src / ieee754 / part_shift / part_shift_dynamic.py
1 # SPDX-License-Identifier: LGPL-2.1-or-later
2 # See Notices.txt for copyright information
3
4 """
5 Copyright (C) 2020 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
6 Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
7
8 dynamically partitionable shifter. Unlike part_shift_scalar, both
9 operands can be partitioned
10
11 See:
12
13 * http://libre-riscv.org/3d_gpu/architecture/dynamic_simd/shift/
14 * http://bugs.libre-riscv.org/show_bug.cgi?id=173
15 """
16 from nmigen import Signal, Module, Elaboratable, Cat, Mux, C
17 from ieee754.part_mul_add.partpoints import PartitionPoints
18 import math
19
20
21 class PartitionedDynamicShift(Elaboratable):
22 def __init__(self, width, partition_points):
23 self.width = width
24 self.partition_points = PartitionPoints(partition_points)
25
26 self.a = Signal(width)
27 self.b = Signal(width)
28 self.output = Signal(width)
29
30 def elaborate(self, platform):
31 m = Module()
32 comb = m.d.comb
33 width = self.width
34 gates = Signal(self.partition_points.get_max_partition_count(width)-1)
35 comb += gates.eq(self.partition_points.as_sig())
36
37 matrix = []
38 keys = list(self.partition_points.keys()) + [self.width]
39 start = 0
40
41
42 # break out both the input and output into partition-stratified blocks
43 a_intervals = []
44 b_intervals = []
45 out_intervals = []
46 intervals = []
47 widths = []
48 start = 0
49 for i in range(len(keys)):
50 end = keys[i]
51 widths.append(width - start)
52 a_intervals.append(self.a[start:end])
53 b_intervals.append(self.b[start:end])
54 out_intervals.append(self.output[start:end])
55 intervals.append([start,end])
56 start = end
57
58 # Instead of generating the matrix described in the wiki, I
59 # instead calculate the shift amounts for each partition, then
60 # calculate the partial results of each partition << shift
61 # amount. On the wiki, the following table is given for output #3:
62 # p2p1p0 | o3
63 # 0 0 0 | a0b0[31:24] | a1b0[23:16] | a2b0[15:8] | a3b0[7:0]
64 # 0 0 1 | a0b0[31:24] | a1b1[23:16] | a2b1[15:8] | a3b1[7:0]
65 # 0 1 0 | a0b0[31:24] | a1b0[23:16] | a2b2[15:8] | a3b2[7:0]
66 # 0 1 1 | a0b0[31:24] | a1b1[23:16] | a2b2[15:8] | a3b2[7:0]
67 # 1 0 0 | a0b0[31:24] | a1b0[23:16] | a2b0[15:8] | a3b3[7:0]
68 # 1 0 1 | a0b0[31:24] | a1b1[23:16] | a2b1[15:8] | a3b3[7:0]
69 # 1 1 0 | a0b0[31:24] | a1b0[23:16] | a2b2[15:8] | a3b3[7:0]
70 # 1 1 1 | a0b0[31:24] | a1b1[23:16] | a2b2[15:8] | a3b3[7:0]
71
72 # Each output for o3 is given by a3bx and the partial results
73 # for o2 (namely, a2bx, a1bx, and a0b0). If I calculate the
74 # partial results [a0b0, a1bx, a2bx, a3bx], I can use just
75 # those partial results to calculate a0, a1, a2, and a3
76 partial_results = []
77 partial_results.append(a_intervals[0] << b_intervals[0])
78 element = b_intervals[0]
79 for i in range(1, len(out_intervals)):
80 s, e = intervals[i]
81 element = Mux(gates[i-1], b_intervals[i], element)
82
83 # This calculates which partition of b to select the
84 # shifter from. According to the table above, the
85 # partition to select is given by the highest set bit in
86 # the partition mask, this calculates that with a mux
87 # chain
88
89
90 # This computes the partial results table
91 shifter = Signal(8, name="shifter%d" % i)
92 comb += shifter.eq(element)
93 partial = Signal(width, name="partial%d" % i)
94 comb += partial.eq(a_intervals[i] << shifter)
95
96 partial_results.append(partial)
97
98 out = []
99
100 # This calculates the outputs o0-o3 from the partial results
101 # table above.
102 s,e = intervals[0]
103 result = partial_results[0]
104 out.append(result[s:e])
105 for i in range(1, len(out_intervals)):
106 start, end = (intervals[i][0], width)
107 result = partial_results[i] | \
108 Mux(gates[i-1], 0, result[intervals[0][1]:])[:end-start]
109 print("select: [%d:%d]" % (start, end))
110 res = Signal(width, name="res%d" % i)
111 comb += res.eq(result)
112 s,e = intervals[0]
113 out.append(res[s:e])
114
115 comb += self.output.eq(Cat(*out))
116
117 return m
118