add REMAP DCT yield schedule function, TODO
[openpower-isa.git] / src / openpower / decoder / isa / remap_dct_yield.py
1 # DCT "REMAP" scheduler
2 #
3 # Modifications made to create an in-place iterative DCT:
4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
5 #
6 # SPDX: LGPLv3+
7 #
8 # Original fastdctlee.py by Nayuki:
9 # Copyright (c) 2020 Project Nayuki. (MIT License)
10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
11
12 import math
13
14 # bits of the integer 'val'.
15 def reverse_bits(val, width):
16 result = 0
17 for _ in range(width):
18 result = (result << 1) | (val & 1)
19 val >>= 1
20 return result
21
22
23 # iterative version of [recursively-applied] half-rev.
24 # relies on the list lengths being power-of-two and the fact
25 # that bit-inversion of a list of binary numbers is the same
26 # as reversing the order of the list
27 # this version is dead easy to implement in hardware.
28 # a big surprise is that the half-reversal can be done with
29 # such a simple XOR. the inverse operation is slightly trickier
30 def halfrev2(vec, pre_rev=True):
31 res = []
32 for i in range(len(vec)):
33 if pre_rev:
34 res.append(i ^ (i>>1))
35 else:
36 ri = i
37 bl = i.bit_length()
38 for ji in range(1, bl):
39 if (1<<ji) & i:
40 ri ^= ((1<<ji)-1)
41 res.append(vec[ri])
42 return res
43
44
45 # python "yield" can be iterated. use this to make it clear how
46 # the indices are generated by using natural-looking nested loops
47 def iterate_dct_inner_butterfly_indices(SVSHAPE):
48 # get indices to iterate over, in the required order
49 n = SVSHAPE.lims[0]
50 # createing lists of indices to iterate over in each dimension
51 # has to be done dynamically, because it depends on the size
52 # first, the size-based loop (which can be done statically)
53 x_r = []
54 size = 2
55 while size <= n:
56 x_r.append(size)
57 size *= 2
58 # invert order if requested
59 if SVSHAPE.invxyz[0]: x_r.reverse()
60
61 if len(x_r) == 0:
62 return
63
64 # reference (read/write) the in-place data in *reverse-bit-order*
65 ri = list(range(n))
66 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
67
68 # reference list for not needing to do data-swaps, just swap what
69 # *indices* are referenced (two levels of indirection at the moment)
70 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
71 ji = list(range(n))
72 inplace_mode = SVSHAPE.skip not in [0b10, 0b11]
73 if inplace_mode:
74 ji = halfrev2(ji, True)
75
76 # start an infinite (wrapping) loop
77 skip = 0
78 while True:
79 for size in x_r: # loop over 3rd order dimension (size)
80 x_end = size == x_r[-1]
81 # y_r schedule depends on size
82 halfsize = size // 2
83 y_r = []
84 for i in range(0, n, size):
85 y_r.append(i)
86 # invert if requested
87 if SVSHAPE.invxyz[1]: y_r.reverse()
88 for i in y_r: # loop over 2nd order dimension
89 y_end = i == y_r[-1]
90 k_r = []
91 j_r = []
92 k = 0
93 for j in range(i, i+halfsize):
94 k_r.append(k)
95 j_r.append(j)
96 # invert if requested
97 if SVSHAPE.invxyz[2]: k_r.reverse()
98 if SVSHAPE.invxyz[2]: j_r.reverse()
99 hz2 = halfsize // 2 # zero stops reversing 1-item lists
100 # if you *really* want to do the in-place swapping manually,
101 # this allows you to do it. good luck...
102 if not inplace_mode:
103 jr = j_r[:hz2]
104 for j in j_r: # loop over 1st order dimension
105 z_end = j == j_r[-1]
106 # now depending on MODE return the index
107 if SVSHAPE.skip in [0b00, 0b10]:
108 result = ri[ji[j]] # lower half
109 elif SVSHAPE.skip in [0b01, 0b11]:
110 result = ri[ji[size-j-1]] # upper half, reverse order
111 loopends = (z_end |
112 ((y_end and z_end)<<1) |
113 ((y_end and x_end and z_end)<<2))
114
115 yield result + SVSHAPE.offset, loopends
116
117 # now in-place swap
118 if inplace_mode:
119 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
120 jlh = jl+halfsize
121 tmp1, tmp2 = ji[jlh], ji[jh]
122 ji[jlh], ji[jh] = tmp2, tmp1
123
124
125 # totally cool *in-place* DCT algorithm
126 def transform2(vec):
127
128 # Initialization
129 n = len(vec)
130 levels = n.bit_length() - 1
131
132 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
133 # TODO: merge these two
134 vec = halfrev2(vec, False)
135 vec = [vec[ri[i]] for i in range(n)]
136
137 # start the inner butterfly
138 size = n
139 while size >= 2:
140 halfsize = size // 2
141 tablestep = n // size
142 ir = list(range(0, n, size))
143 for i in ir:
144 # two lists of half-range indices, e.g. j 0123, jr 7654
145 j = list(range(i, i + halfsize))
146 jr = list(range(i+halfsize, i + size))
147 jr.reverse()
148 for ci, (jl, jh) in enumerate(zip(j, jr)):
149 vec[ri[ji[jl]]] = t1 + t2
150 vec[ri[ji[jh]]] = (t1 - t2) * (1/coeff)
151 hz2 = halfsize // 2 # can be zero which stops reversing 1-item lists
152 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
153 jlh = jl+halfsize
154 tmp1, tmp2 = ji[jlh], ji[jh]
155 ji[jlh], ji[jh] = tmp2, tmp1
156 size //= 2
157
158 def dct_outer_butterfly(SVSHAPE):
159 n = len(vec)
160 size = n // 2
161 while size >= 2:
162 halfsize = size // 2
163 ir = list(range(0, halfsize))
164 print ("itersum", halfsize, size, ir)
165 for i in ir:
166 jr = list(range(i+halfsize, i+n-halfsize, size))
167 print ("itersum jr", i+halfsize, i+size, jr)
168 for jh in jr:
169 vec[jh] += vec[jh+size]
170 print (" itersum", size, i, jh, jh+size)
171 size //= 2
172
173 return vec
174