all signals must be reset_less
[ieee754fpu.git] / src / ieee754 / part_shift / part_shift_scalar.py
1 # SPDX-License-Identifier: LGPL-2.1-or-later
2 # See Notices.txt for copyright information
3
4 """
5 Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
6
7 dynamically partitionable shifter. Only the operand to be shifted can
8 be partitioned, the amount to shift by *must* be a scalar
9
10 See:
11
12 * http://libre-riscv.org/3d_gpu/architecture/dynamic_simd/shift/
13 * http://bugs.libre-riscv.org/show_bug.cgi?id=173
14 """
15 from nmigen import Signal, Module, Elaboratable, Cat, Mux
16 from ieee754.part_mul_add.partpoints import PartitionPoints
17 from ieee754.part_shift.part_shift_dynamic import ShifterMask
18 import math
19
20
21 class PartitionedScalarShift(Elaboratable):
22 def __init__(self, width, partition_points):
23 self.width = width
24 self.partition_points = PartitionPoints(partition_points)
25
26 self.data = Signal(width, reset_less=True)
27 self.shiftbits = math.ceil(math.log2(width))
28 self.shifter = Signal(self.shiftbits, reset_less=True)
29 self.output = Signal(width, reset_less=True)
30
31 def elaborate(self, platform):
32 m = Module()
33 comb = m.d.comb
34 width = self.width
35 pwid = self.partition_points.get_max_partition_count(width)-1
36 shiftbits = self.shiftbits
37 shifted = Signal(self.data.width, reset_less=True)
38 gates = self.partition_points.as_sig()
39 comb += shifted.eq(self.data << self.shifter)
40
41 parts = []
42 outputs = []
43 shiftparts = []
44 intervals = []
45 keys = list(self.partition_points.keys()) + [self.width]
46 start = 0
47 for i in range(len(keys)):
48 end = keys[i]
49 parts.append(self.data[start:end])
50 outputs.append(self.output[start:end])
51 intervals.append((start,end))
52 start = end # for next time round loop
53
54 min_bits = math.ceil(math.log2(intervals[0][1] - intervals[0][0]))
55 shifter_masks = []
56 for i in range(len(intervals)):
57 max_bits = math.ceil(math.log2(width-intervals[i][0]))
58 sm_mask = Signal(shiftbits, name="sm_mask%d" % i, reset_less=True)
59 if pwid-i != 0:
60 sm = ShifterMask(pwid-i, shiftbits,
61 max_bits, min_bits)
62 comb += sm.gates.eq(gates[i:pwid])
63 comb += sm_mask.eq(sm.mask)
64 setattr(m.submodules, "sm%d" % i, sm)
65 else: # having a 0 width signal seems to give the proof issues
66 # this seems to fix it
67 comb += sm_mask.eq((1<<min_bits)-1)
68 if i != 0:
69 shifter_mask = Signal(shiftbits, name="shifter_mask%d" % i,
70 reset_less=True)
71 comb += shifter_mask.eq(Mux(gates[i-1],
72 sm_mask,
73 shifter_masks[i-1]))
74 shifter_masks.append(shifter_mask)
75 else:
76 shifter_masks.append(sm_mask)
77
78 for i, interval in enumerate(intervals):
79 s,e = interval
80 sp = Signal(width, name="sp%d" % i, reset_less=True)
81 _shifter = Signal(self.shifter.width, name="shifter%d" % i,
82 reset_less=True)
83 comb += _shifter.eq(self.shifter & shifter_masks[i])
84 comb += sp[s:].eq(self.data[s:e] << _shifter)
85 shiftparts.append(sp)
86
87
88 for i, interval in enumerate(intervals):
89 start, end = interval
90 if i == 0:
91 intermed = shiftparts[i]
92 else:
93 intermed = shiftparts[i] | Mux(gates[i-1], 0, prev)
94 comb += outputs[i].eq(intermed[start:end])
95 prev = intermed
96
97 return m