add inner and outer yield version of DCT inner and out butterfly
[openpower-isa.git] / src / openpower / decoder / isa / remap_dct_yield.py
1 # DCT "REMAP" scheduler
2 #
3 # Modifications made to create an in-place iterative DCT:
4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
5 #
6 # SPDX: LGPLv3+
7 #
8 # Original fastdctlee.py by Nayuki:
9 # Copyright (c) 2020 Project Nayuki. (MIT License)
10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
11
12 import math
13
14 # bits of the integer 'val'.
15 def reverse_bits(val, width):
16 result = 0
17 for _ in range(width):
18 result = (result << 1) | (val & 1)
19 val >>= 1
20 return result
21
22
23 # iterative version of [recursively-applied] half-rev.
24 # relies on the list lengths being power-of-two and the fact
25 # that bit-inversion of a list of binary numbers is the same
26 # as reversing the order of the list
27 # this version is dead easy to implement in hardware.
28 # a big surprise is that the half-reversal can be done with
29 # such a simple XOR. the inverse operation is slightly trickier
30 def halfrev2(vec, pre_rev=True):
31 res = []
32 for i in range(len(vec)):
33 if pre_rev:
34 res.append(i ^ (i>>1))
35 else:
36 ri = i
37 bl = i.bit_length()
38 for ji in range(1, bl):
39 ri ^= (i >> ji)
40 res.append(vec[ri])
41 return res
42
43
44 # python "yield" can be iterated. use this to make it clear how
45 # the indices are generated by using natural-looking nested loops
46 def iterate_dct_butterfly_indices(SVSHAPE):
47 # get indices to iterate over, in the required order
48 n = SVSHAPE.lims[0]
49 # createing lists of indices to iterate over in each dimension
50 # has to be done dynamically, because it depends on the size
51 # first, the size-based loop (which can be done statically)
52 x_r = []
53 size = 2
54 while size <= n:
55 x_r.append(size)
56 size *= 2
57 # invert order if requested
58 if SVSHAPE.invxyz[0]:
59 x_r.reverse()
60
61 if len(x_r) == 0:
62 return
63
64 # reference (read/write) the in-place data in *reverse-bit-order*
65 ri = list(range(n))
66 if SVSHAPE.mode == 0b01:
67 levels = n.bit_length() - 1
68 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
69
70 # reference list for not needing to do data-swaps, just swap what
71 # *indices* are referenced (two levels of indirection at the moment)
72 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
73 ji = list(range(n))
74 inplace_mode = SVSHAPE.mode == 0b01 and SVSHAPE.skip not in [0b10, 0b11]
75 if inplace_mode:
76 ji = halfrev2(ji, True)
77
78 # start an infinite (wrapping) loop
79 skip = 0
80 while True:
81 for size in x_r: # loop over 3rd order dimension (size)
82 x_end = size == x_r[-1]
83 # y_r schedule depends on size
84 halfsize = size // 2
85 y_r = []
86 for i in range(0, n, size):
87 y_r.append(i)
88 # invert if requested
89 if SVSHAPE.invxyz[1]: y_r.reverse()
90 for i in y_r: # loop over 2nd order dimension
91 y_end = i == y_r[-1]
92 k_r = []
93 j_r = []
94 k = 0
95 for j in range(i, i+halfsize):
96 k_r.append(k)
97 j_r.append(j)
98 # invert if requested
99 if SVSHAPE.invxyz[2]: k_r.reverse()
100 if SVSHAPE.invxyz[2]: j_r.reverse()
101 hz2 = halfsize // 2 # zero stops reversing 1-item lists
102 # if you *really* want to do the in-place swapping manually,
103 # this allows you to do it. good luck...
104 if SVSHAPE.mode == 0b01 and not inplace_mode:
105 jr = j_r[:hz2]
106 for j in j_r: # loop over 1st order dimension
107 z_end = j == j_r[-1]
108 # now depending on MODE return the index
109 if SVSHAPE.skip in [0b00, 0b10]:
110 result = ri[ji[j]] # lower half
111 elif SVSHAPE.skip in [0b01, 0b11]:
112 result = ri[ji[size-j-1]] # upper half, reverse order
113 loopends = (z_end |
114 ((y_end and z_end)<<1) |
115 ((y_end and x_end and z_end)<<2))
116
117 yield result + SVSHAPE.offset, loopends
118
119 # now in-place swap
120 if SVSHAPE.mode == 0b01 and inplace_mode:
121 for ci, jl in enumerate(j_r[:hz2]):
122 jh = size-jl-1 # upper half, reverse order
123 jlh = jl+halfsize
124 tmp1, tmp2 = ji[jlh], ji[jh]
125 ji[jlh], ji[jh] = tmp2, tmp1
126
127
128 def pprint_schedule(schedule, n):
129 size = 2
130 idx = 0
131 while size <= n:
132 halfsize = size // 2
133 tablestep = n // size
134 print ("size %d halfsize %d tablestep %d" % \
135 (size, halfsize, tablestep))
136 for i in range(0, n, size):
137 prefix = "i %d\t" % i
138 for j in range(i, i + halfsize):
139 (jl, je), (jh, he) = schedule[idx]
140 print (" %-3d\t%s j=%-2d jh=%-2d "
141 "j[jl=%-2d] j[jh=%-2d]" % \
142 (idx, prefix, j, j+halfsize,
143 jl, jh,
144 ),
145 "end", bin(je)[2:], bin(je)[2:])
146 idx += 1
147 size *= 2
148
149
150 def demo():
151 # set the dimension sizes here
152 xdim = 8
153 ydim = 0 # not needed
154 zdim = 0 # again, not needed
155
156 # set total. err don't know how to calculate how many there are...
157 # do it manually for now
158 VL = 0
159 size = 2
160 n = xdim
161 while size <= n:
162 halfsize = size // 2
163 tablestep = n // size
164 for i in range(0, n, size):
165 for j in range(i, i + halfsize):
166 VL += 1
167 size *= 2
168
169 ################
170 # INNER butterfly
171 ################
172
173 # set up an SVSHAPE
174 class SVSHAPE:
175 pass
176 # j schedule
177 SVSHAPE0 = SVSHAPE()
178 SVSHAPE0.lims = [xdim, ydim, zdim]
179 SVSHAPE0.order = [0,1,2] # experiment with different permutations, here
180 SVSHAPE0.mode = 0b01
181 SVSHAPE0.skip = 0b00
182 SVSHAPE0.offset = 0 # experiment with different offset, here
183 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
184 # j+halfstep schedule
185 SVSHAPE1 = SVSHAPE()
186 SVSHAPE1.lims = [xdim, ydim, zdim]
187 SVSHAPE1.order = [0,1,2] # experiment with different permutations, here
188 SVSHAPE1.mode = 0b01
189 SVSHAPE1.skip = 0b01
190 SVSHAPE1.offset = 0 # experiment with different offset, here
191 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
192
193 # enumerate over the iterator function, getting new indices
194 schedule = []
195 i0 = iterate_dct_butterfly_indices(SVSHAPE0)
196 i1 = iterate_dct_butterfly_indices(SVSHAPE1)
197 for idx, (jl, jh) in enumerate(zip(i0, i1)):
198 if idx >= VL:
199 break
200 schedule.append((jl, jh))
201
202 # ok now pretty-print the results, with some debug output
203 print ("inner butterfly")
204 pprint_schedule(schedule, n)
205 print ("")
206
207 ################
208 # outer butterfly
209 ################
210
211 # j schedule
212 SVSHAPE0 = SVSHAPE()
213 SVSHAPE0.lims = [xdim, ydim, zdim]
214 SVSHAPE0.order = [0,1,2] # experiment with different permutations, here
215 SVSHAPE0.mode = 0b10
216 SVSHAPE0.skip = 0b00
217 SVSHAPE0.offset = 0 # experiment with different offset, here
218 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
219 # j+halfstep schedule
220 SVSHAPE1 = SVSHAPE()
221 SVSHAPE1.lims = [xdim, ydim, zdim]
222 SVSHAPE1.order = [0,1,2] # experiment with different permutations, here
223 SVSHAPE1.mode = 0b10
224 SVSHAPE1.skip = 0b01
225 SVSHAPE1.offset = 0 # experiment with different offset, here
226 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
227
228 # enumerate over the iterator function, getting new indices
229 schedule = []
230 i0 = iterate_dct_butterfly_indices(SVSHAPE0)
231 i1 = iterate_dct_butterfly_indices(SVSHAPE1)
232 for idx, (jl, jh) in enumerate(zip(i0, i1)):
233 if idx >= VL:
234 break
235 schedule.append((jl, jh))
236
237 # ok now pretty-print the results, with some debug output
238 print ("outer butterfly")
239 pprint_schedule(schedule, n)
240
241 # run the demo
242 if __name__ == '__main__':
243 demo()