1 # DCT "REMAP" scheduler
3 # Modifications made to create an in-place iterative DCT:
4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
8 # Original fastdctlee.py by Nayuki:
9 # Copyright (c) 2020 Project Nayuki. (MIT License)
10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
14 # bits of the integer 'val'.
15 def reverse_bits(val
, width
):
17 for _
in range(width
):
18 result
= (result
<< 1) |
(val
& 1)
23 # iterative version of [recursively-applied] half-rev.
24 # relies on the list lengths being power-of-two and the fact
25 # that bit-inversion of a list of binary numbers is the same
26 # as reversing the order of the list
27 # this version is dead easy to implement in hardware.
28 # a big surprise is that the half-reversal can be done with
29 # such a simple XOR. the inverse operation is slightly trickier
30 def halfrev2(vec
, pre_rev
=True):
32 for i
in range(len(vec
)):
34 res
.append(i ^
(i
>>1))
38 for ji
in range(1, bl
):
45 # python "yield" can be iterated. use this to make it clear how
46 # the indices are generated by using natural-looking nested loops
47 def iterate_dct_inner_butterfly_indices(SVSHAPE
):
48 # get indices to iterate over, in the required order
50 # createing lists of indices to iterate over in each dimension
51 # has to be done dynamically, because it depends on the size
52 # first, the size-based loop (which can be done statically)
58 # invert order if requested
59 if SVSHAPE
.invxyz
[0]: x_r
.reverse()
64 # reference (read/write) the in-place data in *reverse-bit-order*
66 ri
= [ri
[reverse_bits(i
, levels
)] for i
in range(n
)]
68 # reference list for not needing to do data-swaps, just swap what
69 # *indices* are referenced (two levels of indirection at the moment)
70 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
72 inplace_mode
= SVSHAPE
.skip
not in [0b10, 0b11]
74 ji
= halfrev2(ji
, True)
76 # start an infinite (wrapping) loop
79 for size
in x_r
: # loop over 3rd order dimension (size)
80 x_end
= size
== x_r
[-1]
81 # y_r schedule depends on size
84 for i
in range(0, n
, size
):
87 if SVSHAPE
.invxyz
[1]: y_r
.reverse()
88 for i
in y_r
: # loop over 2nd order dimension
93 for j
in range(i
, i
+halfsize
):
97 if SVSHAPE
.invxyz
[2]: k_r
.reverse()
98 if SVSHAPE
.invxyz
[2]: j_r
.reverse()
99 hz2
= halfsize
// 2 # zero stops reversing 1-item lists
100 # if you *really* want to do the in-place swapping manually,
101 # this allows you to do it. good luck...
104 for j
in j_r
: # loop over 1st order dimension
106 # now depending on MODE return the index
107 if SVSHAPE
.skip
in [0b00, 0b10]:
108 result
= ri
[ji
[j
]] # lower half
109 elif SVSHAPE
.skip
in [0b01, 0b11]:
110 result
= ri
[ji
[size
-j
-1]] # upper half, reverse order
112 ((y_end
and z_end
)<<1) |
113 ((y_end
and x_end
and z_end
)<<2))
115 yield result
+ SVSHAPE
.offset
, loopends
119 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
121 tmp1
, tmp2
= ji
[jlh
], ji
[jh
]
122 ji
[jlh
], ji
[jh
] = tmp2
, tmp1
125 # totally cool *in-place* DCT algorithm
130 levels
= n
.bit_length() - 1
132 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
133 # TODO: merge these two
134 vec
= halfrev2(vec
, False)
135 vec
= [vec
[ri
[i
]] for i
in range(n
)]
137 # start the inner butterfly
141 tablestep
= n
// size
142 ir
= list(range(0, n
, size
))
144 # two lists of half-range indices, e.g. j 0123, jr 7654
145 j
= list(range(i
, i
+ halfsize
))
146 jr
= list(range(i
+halfsize
, i
+ size
))
148 for ci
, (jl
, jh
) in enumerate(zip(j
, jr
)):
149 vec
[ri
[ji
[jl
]]] = t1
+ t2
150 vec
[ri
[ji
[jh
]]] = (t1
- t2
) * (1/coeff
)
151 hz2
= halfsize
// 2 # can be zero which stops reversing 1-item lists
152 for ci
, (jl
, jh
) in enumerate(zip(j
[:hz2
], jr
[:hz2
])):
154 tmp1
, tmp2
= ji
[jlh
], ji
[jh
]
155 ji
[jlh
], ji
[jh
] = tmp2
, tmp1
158 def dct_outer_butterfly(SVSHAPE
):
163 ir
= list(range(0, halfsize
))
164 print ("itersum", halfsize
, size
, ir
)
166 jr
= list(range(i
+halfsize
, i
+n
-halfsize
, size
))
167 print ("itersum jr", i
+halfsize
, i
+size
, jr
)
169 vec
[jh
] += vec
[jh
+size
]
170 print (" itersum", size
, i
, jh
, jh
+size
)