use "Mask" class which is more gate-efficient than (1<<x)-1
[soc.git] / src / soc / fu / shift_rot / rotator.py
1 # Manual translation and adaptation of rotator.vhdl from microwatt into nmigen
2 #
3 from nmigen.compat.sim import run_simulation
4
5 from nmigen import (Elaboratable, Signal, Module, Const, Cat, Repl,
6 unsigned, signed)
7 from soc.fu.shift_rot.rotl import ROTL
8 from nmigen.back.pysim import Settle
9 from nmutil.extend import exts
10 from nmutil.mask import Mask
11
12
13 # note BE bit numbering
14 def right_mask(m, mask_begin):
15 ret = Signal(64, name="right_mask", reset_less=True)
16 with m.If(mask_begin <= 64):
17 m.d.comb += ret.eq((1 << (64-mask_begin)) - 1)
18 with m.Else():
19 m.d.comb += ret.eq(0)
20 return ret
21
22
23 def left_mask(m, mask_end):
24 ret = Signal(64, name="left_mask", reset_less=True)
25 m.d.comb += ret.eq(~((1 << (63-mask_end)) - 1))
26 return ret
27
28
29 class Rotator(Elaboratable):
30 """Rotator: covers multiple POWER9 rotate functions
31
32 supported modes:
33
34 * sl[wd]
35 * rlw*, rldic, rldicr, rldimi
36 * rldicl, sr[wd]
37 * sra[wd][i]
38
39 use as follows:
40
41 * shift = RB[0:7]
42 * arith = 1 when is_signed
43 * right_shift = 1 when insn_type is OP_SHR
44 * clear_left = 1 when insn_type is OP_RLC or OP_RLCL
45 * clear_right = 1 when insn_type is OP_RLC or OP_RLCR
46 """
47
48 def __init__(self):
49 # input
50 self.me = Signal(5, reset_less=True) # ME field
51 self.mb = Signal(5, reset_less=True) # MB field
52 # extra bit of mb in MD-form
53 self.mb_extra = Signal(1, reset_less=True)
54 self.ra = Signal(64, reset_less=True) # RA
55 self.rs = Signal(64, reset_less=True) # RS
56 self.shift = Signal(7, reset_less=True) # RB[0:7]
57 self.is_32bit = Signal(reset_less=True)
58 self.right_shift = Signal(reset_less=True)
59 self.arith = Signal(reset_less=True)
60 self.clear_left = Signal(reset_less=True)
61 self.clear_right = Signal(reset_less=True)
62 self.sign_ext_rs = Signal(reset_less=True)
63 # output
64 self.result_o = Signal(64, reset_less=True)
65 self.carry_out_o = Signal(reset_less=True)
66
67 def elaborate(self, platform):
68 m = Module()
69 comb = m.d.comb
70 ra, rs = self.ra, self.rs
71
72 # temporaries
73 rot_count = Signal(6, reset_less=True)
74 rot = Signal(64, reset_less=True)
75 sh = Signal(7, reset_less=True)
76 mb = Signal(7, reset_less=True)
77 me = Signal(7, reset_less=True)
78 mr = Signal(64, reset_less=True)
79 ml = Signal(64, reset_less=True)
80 output_mode = Signal(2, reset_less=True)
81 hi32 = Signal(32, reset_less=True)
82 repl32 = Signal(64, reset_less=True)
83
84 # First replicate bottom 32 bits to both halves if 32-bit
85 with m.If(self.is_32bit):
86 comb += hi32.eq(rs[0:32])
87 with m.Elif(self.sign_ext_rs):
88 # sign-extend bottom 32 bits
89 comb += hi32.eq(Repl(rs[31], 32))
90 with m.Else():
91 comb += hi32.eq(rs[32:64])
92 comb += repl32.eq(Cat(rs[0:32], hi32))
93
94 shift_signed = Signal(signed(6))
95 comb += shift_signed.eq(self.shift[0:6])
96
97 # Negate shift count for right shifts
98 with m.If(self.right_shift):
99 comb += rot_count.eq(-shift_signed)
100 with m.Else():
101 comb += rot_count.eq(self.shift[0:6])
102
103 # ROTL submodule
104 m.submodules.rotl = rotl = ROTL(64)
105 comb += rotl.a.eq(repl32)
106 comb += rotl.b.eq(rot_count)
107 comb += rot.eq(rotl.o)
108
109 # Trim shift count to 6 bits for 32-bit shifts
110 comb += sh.eq(Cat(self.shift[0:6], self.shift[6] & ~self.is_32bit))
111
112 # XXX errr... we should already have these, in Fields? oh well
113 # Work out mask begin/end indexes (caution, big-endian bit numbering)
114
115 # mask-begin (mb)
116 with m.If(self.clear_left):
117 comb += mb.eq(self.mb)
118 with m.If(self.is_32bit):
119 comb += mb[5:7].eq(Const(0b01, 2))
120 with m.Else():
121 comb += mb[5:7].eq(Cat(self.mb_extra, Const(0b0, 1)))
122 with m.Elif(self.right_shift):
123 # this is basically mb = sh + (is_32bit? 32: 0);
124 comb += mb.eq(sh)
125 with m.If(self.is_32bit):
126 comb += mb[5:7].eq(Cat(~sh[5], sh[5]))
127 with m.Else():
128 comb += mb.eq(Cat(Const(0b0, 5), self.is_32bit, Const(0b0, 1)))
129
130 # mask-end (me)
131 with m.If(self.clear_right & self.is_32bit):
132 # TODO: track down where this is. have to use fields.
133 comb += me.eq(Cat(self.me, Const(0b01, 2)))
134 with m.Elif(self.clear_right & ~self.clear_left):
135 # this is me, have to use fields
136 comb += me.eq(Cat(self.mb, self.mb_extra, Const(0b0, 1)))
137 with m.Else():
138 # effectively, 63 - sh
139 comb += me.eq(Cat(~sh[0:6], sh[6]))
140
141 # Calculate left and right masks
142 m.submodules.right_mask = right_mask = Mask(64)
143 with m.If(mb <= 64):
144 comb += right_mask.shift.eq(64-mb)
145 comb += mr.eq(right_mask.mask)
146 with m.Else():
147 comb += mr.eq(0)
148 #comb += mr.eq(right_mask(m, mb))
149
150 m.submodules.left_mask = left_mask = Mask(64)
151 comb += left_mask.shift.eq(63-me)
152 comb += ml.eq(~left_mask.mask)
153 #comb += ml.eq(left_mask(m, me))
154
155
156 # Work out output mode
157 # 00 for sl[wd]
158 # 0w for rlw*, rldic, rldicr, rldimi, where w = 1 iff mb > me
159 # 10 for rldicl, sr[wd]
160 # 1z for sra[wd][i], z = 1 if rs is negative
161 with m.If((self.clear_left & ~self.clear_right) | self.right_shift):
162 comb += output_mode.eq(Cat(self.arith & repl32[63], Const(1, 1)))
163 with m.Else():
164 mbgt = self.clear_right & (mb[0:6] > me[0:6])
165 comb += output_mode.eq(Cat(mbgt, Const(0, 1)))
166
167 # Generate output from rotated input and masks
168 with m.Switch(output_mode):
169 with m.Case(0b00):
170 comb += self.result_o.eq((rot & (mr & ml)) | (ra & ~(mr & ml)))
171 with m.Case(0b01):
172 comb += self.result_o.eq((rot & (mr | ml)) | (ra & ~(mr | ml)))
173 with m.Case(0b10):
174 comb += self.result_o.eq(rot & mr)
175 with m.Case(0b11):
176 comb += self.result_o.eq(rot | ~mr)
177 # Generate carry output for arithmetic shift right of -ve value
178 comb += self.carry_out_o.eq((rs & ~ml).bool())
179
180 return m
181
182
183 if __name__ == '__main__':
184
185 m = Module()
186 comb = m.d.comb
187 mr = Signal(64)
188 mb = Signal(6)
189 comb += mr.eq(left_mask(m, mb))
190
191 def loop():
192 for i in range(64):
193 yield mb.eq(63-i)
194 yield Settle()
195 res = yield mr
196 print(i, hex(res))
197
198 run_simulation(m, [loop()],
199 vcd_name="test_mask.vcd")