43d16bdb5fb21c352c86a6e3a8970a1c9d1a8e49
[ieee754fpu.git] / src / ieee754 / part_shift / part_shift_scalar.py
1 # SPDX-License-Identifier: LGPL-2.1-or-later
2 # See Notices.txt for copyright information
3
4 """
5 Copyright (C) 2020 Michael Nolan <mtnolan2640@gmail.com>
6
7 dynamically partitionable shifter. Only the operand to be shifted can
8 be partitioned, the amount to shift by *must* be a scalar
9
10 See:
11
12 * http://libre-riscv.org/3d_gpu/architecture/dynamic_simd/shift/
13 * http://bugs.libre-riscv.org/show_bug.cgi?id=173
14 """
15 from nmigen import Signal, Module, Elaboratable, Cat, Mux
16 from ieee754.part_mul_add.partpoints import PartitionPoints
17 from ieee754.part_shift.part_shift_dynamic import ShifterMask
18 import math
19
20
21 class PartitionedScalarShift(Elaboratable):
22 def __init__(self, width, partition_points):
23 self.width = width
24 self.partition_points = PartitionPoints(partition_points)
25
26 self.data = Signal(width)
27 self.shiftbits = math.ceil(math.log2(width))
28 self.shifter = Signal(self.shiftbits)
29 self.output = Signal(width)
30
31 def elaborate(self, platform):
32 m = Module()
33 comb = m.d.comb
34 width = self.width
35 pwid = self.partition_points.get_max_partition_count(width)-1
36 shiftbits = self.shiftbits
37 shifted = Signal(self.data.width)
38 gates = self.partition_points.as_sig()
39 comb += shifted.eq(self.data << self.shifter)
40
41 parts = []
42 outputs = []
43 shiftparts = []
44 intervals = []
45 keys = list(self.partition_points.keys()) + [self.width]
46 start = 0
47 for i in range(len(keys)):
48 end = keys[i]
49 parts.append(self.data[start:end])
50 outputs.append(self.output[start:end])
51 intervals.append((start,end))
52 start = end # for next time round loop
53
54 min_bits = math.ceil(math.log2(intervals[0][1] - intervals[0][0]))
55 shifter_masks = []
56 for i in range(len(intervals)):
57 max_bits = math.ceil(math.log2(width-intervals[i][0]))
58 if pwid-i != 0:
59 sm = ShifterMask(pwid-i, shiftbits,
60 max_bits, min_bits)
61 setattr(m.submodules, "sm%d" % i, sm)
62 comb += sm.gates.eq(gates[i:pwid])
63 mask = Signal(shiftbits, name="sm_mask%d" % i)
64 comb += mask.eq(sm.mask)
65 shifter_masks.append(mask)
66 else: # having a 0 width signal seems to give the proof issues
67 shifter_masks.append((1<<min_bits)-1)
68 print(m.submodules)
69
70 for i, interval in enumerate(intervals):
71 s,e = interval
72 sp = Signal(width, name="sp%d" % i)
73 _shifter = Signal(self.shifter.width, name="shifter%d" % i)
74 comb += _shifter.eq(self.shifter & shifter_masks[i])
75 comb += sp[s:].eq(self.data[s:e] << _shifter)
76 shiftparts.append(sp)
77
78
79 for i, interval in enumerate(intervals):
80 start, end = interval
81 if i == 0:
82 intermed = shiftparts[i]
83 else:
84 intermed = shiftparts[i] | Mux(gates[i-1], 0, prev)
85 comb += outputs[i].eq(intermed[start:end])
86 prev = intermed
87
88 return m