## REMAP FFT pseudocode
+The FFT REMAP is RADIX2 only.
+
```
# a "yield" version of the REMAP algorithm, for FFT Tukey-Cooley schedules
# original code for the FFT Tukey-Cooley schedule:
demo()
```
+## DCT REMAP pseudocode
+
+The DCT REMAP is RADIX2 only. Convolutions may be applied as usual
+to create non-RADIX2 DCT. Combined with appropriate Twin-butterfly
+instructions the algorithm below, written in python3, becomes part
+of an in-place in-registers Vectorised DCT.
+
+```
+# DCT "REMAP" scheduler to create an in-place iterative DCT.
+#
+# Original fastdctlee.py by Nayuki:
+# Copyright (c) 2020 Project Nayuki. (MIT License)
+# https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
+
+from copy import deepcopy
+import math
+
+# bits of the integer 'val'.
+def reverse_bits(val, width):
+ result = 0
+ for _ in range(width):
+ result = (result << 1) | (val & 1)
+ val >>= 1
+ return result
+
+
+# iterative version of [recursively-applied] half-rev.
+# turns out this is Gray-Encoding.
+def halfrev2(vec, pre_rev=True):
+ res = []
+ for i in range(len(vec)):
+ if pre_rev:
+ res.append(vec[i ^ (i>>1)])
+ else:
+ ri = i
+ bl = i.bit_length()
+ for ji in range(1, bl):
+ ri ^= (i >> ji)
+ res.append(vec[ri])
+ return res
+
+
+def iterate_dct_inner_halfswap_loadstore(SVSHAPE):
+ # get indices to iterate over, in the required order
+ n = SVSHAPE.lims[0]
+ mode = SVSHAPE.lims[1]
+ stride = SVSHAPE.lims[2]
+
+ # reference list for not needing to do data-swaps, just swap what
+ # *indices* are referenced (two levels of indirection at the moment)
+ # pre-reverse the data-swap list so that it *ends up* in the order 0123..
+ ji = list(range(n))
+
+ levels = n.bit_length() - 1
+ ri = [reverse_bits(i, levels) for i in range(n)]
+
+ if SVSHAPE.mode == 0b01: # FFT, bitrev only
+ ji = [ji[ri[i]] for i in range(n)]
+ elif SVSHAPE.submode2 == 0b001:
+ ji = [ji[ri[i]] for i in range(n)]
+ ji = halfrev2(ji, True)
+ else:
+ ji = halfrev2(ji, False)
+ ji = [ji[ri[i]] for i in range(n)]
+
+ # invert order if requested
+ if SVSHAPE.invxyz[0]:
+ ji.reverse()
+
+ for i, jl in enumerate(ji):
+ y_end = jl == ji[-1]
+ yield jl * stride, (0b111 if y_end else 0b000)
+
+def iterate_dct_inner_costable_indices(SVSHAPE):
+ # get indices to iterate over, in the required order
+ n = SVSHAPE.lims[0]
+ mode = SVSHAPE.lims[1]
+ stride = SVSHAPE.lims[2]
+ # creating lists of indices to iterate over in each dimension
+ # has to be done dynamically, because it depends on the size
+ # first, the size-based loop (which can be done statically)
+ x_r = []
+ size = 2
+ while size <= n:
+ x_r.append(size)
+ size *= 2
+ # invert order if requested
+ if SVSHAPE.invxyz[0]:
+ x_r.reverse()
+
+ if len(x_r) == 0:
+ return
+
+ # start an infinite (wrapping) loop
+ skip = 0
+ z_end = 1 # doesn't exist in this, only 2 loops
+ k = 0
+ while True:
+ for size in x_r: # loop over 3rd order dimension (size)
+ x_end = size == x_r[-1]
+ # y_r schedule depends on size
+ halfsize = size // 2
+ y_r = []
+ for i in range(0, n, size):
+ y_r.append(i)
+ # invert if requested
+ if SVSHAPE.invxyz[1]: y_r.reverse()
+ # two lists of half-range indices, e.g. j 0123, jr 7654
+ j = list(range(0, halfsize))
+ # invert if requested
+ if SVSHAPE.invxyz[2]: j_r.reverse()
+ # loop over 1st order dimension
+ for ci, jl in enumerate(j):
+ y_end = jl == j[-1]
+ # now depending on MODE return the index. inner butterfly
+ if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
+ result = k # offset into COS table
+ elif SVSHAPE.skip == 0b10: #
+ result = ci # coefficient helper
+ elif SVSHAPE.skip == 0b11: #
+ result = size # coefficient helper
+ loopends = (z_end |
+ ((y_end and z_end)<<1) |
+ ((y_end and x_end and z_end)<<2))
+
+ yield (result * stride) + SVSHAPE.offset, loopends
+ k += 1
+
+def iterate_dct_inner_butterfly_indices(SVSHAPE):
+ # get indices to iterate over, in the required order
+ n = SVSHAPE.lims[0]
+ mode = SVSHAPE.lims[1]
+ stride = SVSHAPE.lims[2]
+ # creating lists of indices to iterate over in each dimension
+ # has to be done dynamically, because it depends on the size
+ # first, the size-based loop (which can be done statically)
+ x_r = []
+ size = 2
+ while size <= n:
+ x_r.append(size)
+ size *= 2
+ # invert order if requested
+ if SVSHAPE.invxyz[0]:
+ x_r.reverse()
+
+ if len(x_r) == 0:
+ return
+
+ # reference (read/write) the in-place data in *reverse-bit-order*
+ ri = list(range(n))
+ if SVSHAPE.submode2 == 0b01:
+ levels = n.bit_length() - 1
+ ri = [ri[reverse_bits(i, levels)] for i in range(n)]
+
+ # reference list for not needing to do data-swaps, just swap what
+ # *indices* are referenced (two levels of indirection at the moment)
+ # pre-reverse the data-swap list so that it *ends up* in the order 0123..
+ ji = list(range(n))
+ inplace_mode = True
+ if inplace_mode and SVSHAPE.submode2 == 0b01:
+ ji = halfrev2(ji, True)
+ if inplace_mode and SVSHAPE.submode2 == 0b11:
+ ji = halfrev2(ji, False)
+
+ # start an infinite (wrapping) loop
+ while True:
+ k = 0
+ k_start = 0
+ for size in x_r: # loop over 3rd order dimension (size)
+ x_end = size == x_r[-1]
+ # y_r schedule depends on size
+ halfsize = size // 2
+ y_r = []
+ for i in range(0, n, size):
+ y_r.append(i)
+ # invert if requested
+ if SVSHAPE.invxyz[1]: y_r.reverse()
+ for i in y_r: # loop over 2nd order dimension
+ y_end = i == y_r[-1]
+ # two lists of half-range indices, e.g. j 0123, jr 7654
+ j = list(range(i, i + halfsize))
+ jr = list(range(i+halfsize, i + size))
+ jr.reverse()
+ # invert if requested
+ if SVSHAPE.invxyz[2]:
+ j.reverse()
+ jr.reverse()
+ hz2 = halfsize // 2 # zero stops reversing 1-item lists
+ # loop over 1st order dimension
+ k = k_start
+ for ci, (jl, jh) in enumerate(zip(j, jr)):
+ z_end = jl == j[-1]
+ # now depending on MODE return the index. inner butterfly
+ if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
+ if SVSHAPE.submode2 == 0b11: # iDCT
+ result = ji[ri[jl]] # lower half
+ else:
+ result = ri[ji[jl]] # lower half
+ elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
+ if SVSHAPE.submode2 == 0b11: # iDCT
+ result = ji[ri[jl+halfsize]] # upper half
+ else:
+ result = ri[ji[jh]] # upper half
+ elif mode == 4:
+ # COS table pre-generated mode
+ if SVSHAPE.skip == 0b10: #
+ result = k # cos table offset
+ else: # mode 2
+ # COS table generated on-demand ("Vertical-First") mode
+ if SVSHAPE.skip == 0b10: #
+ result = ci # coefficient helper
+ elif SVSHAPE.skip == 0b11: #
+ result = size # coefficient helper
+ loopends = (z_end |
+ ((y_end and z_end)<<1) |
+ ((y_end and x_end and z_end)<<2))
+
+ yield (result * stride) + SVSHAPE.offset, loopends
+ k += 1
+
+ # now in-place swap
+ if inplace_mode:
+ for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
+ jlh = jl+halfsize
+ tmp1, tmp2 = ji[jlh], ji[jh]
+ ji[jlh], ji[jh] = tmp2, tmp1
+
+ # new k_start point for cos tables( runs inside x_r loop NOT i loop)
+ k_start += halfsize
+
+
+# python "yield" can be iterated. use this to make it clear how
+# the indices are generated by using natural-looking nested loops
+def iterate_dct_outer_butterfly_indices(SVSHAPE):
+ # get indices to iterate over, in the required order
+ n = SVSHAPE.lims[0]
+ mode = SVSHAPE.lims[1]
+ stride = SVSHAPE.lims[2]
+ # creating lists of indices to iterate over in each dimension
+ # has to be done dynamically, because it depends on the size
+ # first, the size-based loop (which can be done statically)
+ x_r = []
+ size = n // 2
+ while size >= 2:
+ x_r.append(size)
+ size //= 2
+ # invert order if requested
+ if SVSHAPE.invxyz[0]:
+ x_r.reverse()
+
+ if len(x_r) == 0:
+ return
+
+ # I-DCT, reference (read/write) the in-place data in *reverse-bit-order*
+ ri = list(range(n))
+ if SVSHAPE.submode2 in [0b11, 0b01]:
+ levels = n.bit_length() - 1
+ ri = [ri[reverse_bits(i, levels)] for i in range(n)]
+
+ # reference list for not needing to do data-swaps, just swap what
+ # *indices* are referenced (two levels of indirection at the moment)
+ # pre-reverse the data-swap list so that it *ends up* in the order 0123..
+ ji = list(range(n))
+ inplace_mode = False # need the space... SVSHAPE.skip in [0b10, 0b11]
+ if SVSHAPE.submode2 == 0b11:
+ ji = halfrev2(ji, False)
+
+ # start an infinite (wrapping) loop
+ while True:
+ k = 0
+ k_start = 0
+ for size in x_r: # loop over 3rd order dimension (size)
+ halfsize = size//2
+ x_end = size == x_r[-1]
+ y_r = list(range(0, halfsize))
+ # invert if requested
+ if SVSHAPE.invxyz[1]: y_r.reverse()
+ for i in y_r: # loop over 2nd order dimension
+ y_end = i == y_r[-1]
+ # one list to create iterative-sum schedule
+ jr = list(range(i+halfsize, i+n-halfsize, size))
+ # invert if requested
+ if SVSHAPE.invxyz[2]: jr.reverse()
+ hz2 = halfsize // 2 # zero stops reversing 1-item lists
+ k = k_start
+ for ci, jh in enumerate(jr): # loop over 1st order dimension
+ z_end = jh == jr[-1]
+ if mode == 4:
+ # COS table pre-generated mode
+ if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
+ if SVSHAPE.submode2 == 0b11: # iDCT
+ result = ji[ri[jh]] # upper half
+ else:
+ result = ri[ji[jh]] # lower half
+ elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
+ if SVSHAPE.submode2 == 0b11: # iDCT
+ result = ji[ri[jh+size]] # upper half
+ else:
+ result = ri[ji[jh+size]] # upper half
+ elif SVSHAPE.skip == 0b10: #
+ result = k # cos table offset
+ else:
+ # COS table generated on-demand ("Vertical-First") mode
+ if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
+ if SVSHAPE.submode2 == 0b11: # iDCT
+ result = ji[ri[jh]] # lower half
+ else:
+ result = ri[ji[jh]] # lower half
+ elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
+ if SVSHAPE.submode2 == 0b11: # iDCT
+ result = ji[ri[jh+size]] # upper half
+ else:
+ result = ri[ji[jh+size]] # upper half
+ elif SVSHAPE.skip == 0b10: #
+ result = ci # coefficient helper
+ elif SVSHAPE.skip == 0b11: #
+ result = size # coefficient helper
+ loopends = (z_end |
+ ((y_end and z_end)<<1) |
+ ((y_end and x_end and z_end)<<2))
+
+ yield (result * stride) + SVSHAPE.offset, loopends
+ k += 1
+
+ # new k_start point for cos tables( runs inside x_r loop NOT i loop)
+ k_start += halfsize
+
+```
[[!tag opf_rfc]]