add LD-half-swap for i-DCT which does not work. redesign needed
[openpower-isa.git] / src / openpower / decoder / isa / remap_dct_yield.py
1 # DCT "REMAP" scheduler
2 #
3 # Modifications made to create an in-place iterative DCT:
4 # Copyright (c) 2021 Luke Kenneth Casson Leighton <lkcl@lkcl.net>
5 #
6 # SPDX: LGPLv3+
7 #
8 # Original fastdctlee.py by Nayuki:
9 # Copyright (c) 2020 Project Nayuki. (MIT License)
10 # https://www.nayuki.io/page/fast-discrete-cosine-transform-algorithms
11
12 from copy import deepcopy
13 import math
14
15 # bits of the integer 'val'.
16 def reverse_bits(val, width):
17 result = 0
18 for _ in range(width):
19 result = (result << 1) | (val & 1)
20 val >>= 1
21 return result
22
23
24 # iterative version of [recursively-applied] half-rev.
25 # relies on the list lengths being power-of-two and the fact
26 # that bit-inversion of a list of binary numbers is the same
27 # as reversing the order of the list
28 # this version is dead easy to implement in hardware.
29 # a big surprise is that the half-reversal can be done with
30 # such a simple XOR. the inverse operation is slightly trickier
31 def halfrev2(vec, pre_rev=True):
32 res = []
33 for i in range(len(vec)):
34 if pre_rev:
35 res.append(vec[i ^ (i>>1)])
36 else:
37 ri = i
38 bl = i.bit_length()
39 for ji in range(1, bl):
40 ri ^= (i >> ji)
41 res.append(vec[ri])
42 return res
43
44
45 # python "yield" can be iterated. use this to make it clear how
46 # the indices are generated by using natural-looking nested loops
47 def iterate_dct_inner_halfswap_loadstore(SVSHAPE):
48 # get indices to iterate over, in the required order
49 n = SVSHAPE.lims[0]
50 mode = SVSHAPE.lims[1]
51 print ("inner halfswap loadstore", n, mode, SVSHAPE.skip,
52 "submode", SVSHAPE.submode2)
53
54 # reference list for not needing to do data-swaps, just swap what
55 # *indices* are referenced (two levels of indirection at the moment)
56 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
57 ji = list(range(n))
58
59 levels = n.bit_length() - 1
60 if SVSHAPE.submode2 == 0b001:
61 ji = halfrev2(ji, True)
62 else:
63 ji = halfrev2(ji, False)
64
65 if False: # swap: TODO, add extra bit-reverse mode
66 ri = [reverse_bits(i, levels) for i in range(n)]
67 ji = [ji[ri[i]] for i in range(n)]
68
69 # invert order if requested
70 if SVSHAPE.invxyz[0]:
71 ji.reverse()
72
73 for i, jl in enumerate(ji):
74 y_end = jl == ji[-1]
75 yield jl, (0b111 if y_end else 0b000)
76
77
78 # python "yield" can be iterated. use this to make it clear how
79 # the indices are generated by using natural-looking nested loops
80 def iterate_dct_inner_costable_indices(SVSHAPE):
81 # get indices to iterate over, in the required order
82 n = SVSHAPE.lims[0]
83 mode = SVSHAPE.lims[1]
84 print ("inner costable", mode)
85 # creating lists of indices to iterate over in each dimension
86 # has to be done dynamically, because it depends on the size
87 # first, the size-based loop (which can be done statically)
88 x_r = []
89 size = 2
90 while size <= n:
91 x_r.append(size)
92 size *= 2
93 # invert order if requested
94 if SVSHAPE.invxyz[0]:
95 x_r.reverse()
96
97 if len(x_r) == 0:
98 return
99
100 #print ("ri", ri)
101 #print ("ji", ji)
102
103 # start an infinite (wrapping) loop
104 skip = 0
105 z_end = 1 # doesn't exist in this, only 2 loops
106 k = 0
107 while True:
108 for size in x_r: # loop over 3rd order dimension (size)
109 x_end = size == x_r[-1]
110 # y_r schedule depends on size
111 halfsize = size // 2
112 y_r = []
113 for i in range(0, n, size):
114 y_r.append(i)
115 # invert if requested
116 if SVSHAPE.invxyz[1]: y_r.reverse()
117 # two lists of half-range indices, e.g. j 0123, jr 7654
118 j = list(range(0, halfsize))
119 # invert if requested
120 if SVSHAPE.invxyz[2]: j_r.reverse()
121 #print ("xform jr", jr)
122 # loop over 1st order dimension
123 for ci, jl in enumerate(j):
124 y_end = jl == j[-1]
125 # now depending on MODE return the index. inner butterfly
126 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
127 result = k # offset into COS table
128 elif SVSHAPE.skip == 0b10: #
129 result = ci # coefficient helper
130 elif SVSHAPE.skip == 0b11: #
131 result = size # coefficient helper
132 loopends = (z_end |
133 ((y_end and z_end)<<1) |
134 ((y_end and x_end and z_end)<<2))
135
136 yield result + SVSHAPE.offset, loopends
137 k += 1
138
139 # python "yield" can be iterated. use this to make it clear how
140 # the indices are generated by using natural-looking nested loops
141 def iterate_dct_inner_butterfly_indices(SVSHAPE):
142 # get indices to iterate over, in the required order
143 n = SVSHAPE.lims[0]
144 mode = SVSHAPE.lims[1]
145 print ("inner butterfly", mode, SVSHAPE.skip, "submode", SVSHAPE.submode2)
146 # creating lists of indices to iterate over in each dimension
147 # has to be done dynamically, because it depends on the size
148 # first, the size-based loop (which can be done statically)
149 x_r = []
150 size = 2
151 while size <= n:
152 x_r.append(size)
153 size *= 2
154 # invert order if requested
155 if SVSHAPE.invxyz[0]:
156 x_r.reverse()
157
158 if len(x_r) == 0:
159 return
160
161 # reference (read/write) the in-place data in *reverse-bit-order*
162 ri = list(range(n))
163 if SVSHAPE.submode2 == 0b01:
164 levels = n.bit_length() - 1
165 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
166
167 # reference list for not needing to do data-swaps, just swap what
168 # *indices* are referenced (two levels of indirection at the moment)
169 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
170 ji = list(range(n))
171 inplace_mode = True
172 if inplace_mode and SVSHAPE.submode2 == 0b01:
173 #print ("inplace mode")
174 ji = halfrev2(ji, True)
175 if inplace_mode and SVSHAPE.submode2 == 0b11:
176 ji = halfrev2(ji, False)
177
178 print ("ri", ri)
179 print ("ji", ji)
180
181 # start an infinite (wrapping) loop
182 while True:
183 k = 0
184 k_start = 0
185 for size in x_r: # loop over 3rd order dimension (size)
186 x_end = size == x_r[-1]
187 # y_r schedule depends on size
188 halfsize = size // 2
189 y_r = []
190 for i in range(0, n, size):
191 y_r.append(i)
192 # invert if requested
193 if SVSHAPE.invxyz[1]: y_r.reverse()
194 for i in y_r: # loop over 2nd order dimension
195 y_end = i == y_r[-1]
196 # two lists of half-range indices, e.g. j 0123, jr 7654
197 j = list(range(i, i + halfsize))
198 jr = list(range(i+halfsize, i + size))
199 jr.reverse()
200 # invert if requested
201 if SVSHAPE.invxyz[2]:
202 j.reverse()
203 jr.reverse()
204 hz2 = halfsize // 2 # zero stops reversing 1-item lists
205 #print ("xform jr", jr)
206 # loop over 1st order dimension
207 k = k_start
208 for ci, (jl, jh) in enumerate(zip(j, jr)):
209 z_end = jl == j[-1]
210 # now depending on MODE return the index. inner butterfly
211 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
212 if SVSHAPE.submode2 == 0b11: # iDCT
213 result = ji[ri[jl]] # lower half
214 else:
215 result = ri[ji[jl]] # lower half
216 elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
217 if SVSHAPE.submode2 == 0b11: # iDCT
218 result = ji[ri[jl+halfsize]] # upper half
219 else:
220 result = ri[ji[jh]] # upper half
221 elif mode == 4:
222 # COS table pre-generated mode
223 if SVSHAPE.skip == 0b10: #
224 result = k # cos table offset
225 else: # mode 2
226 # COS table generated on-demand ("Vertical-First") mode
227 if SVSHAPE.skip == 0b10: #
228 result = ci # coefficient helper
229 elif SVSHAPE.skip == 0b11: #
230 result = size # coefficient helper
231 loopends = (z_end |
232 ((y_end and z_end)<<1) |
233 ((y_end and x_end and z_end)<<2))
234
235 yield result + SVSHAPE.offset, loopends
236 k += 1
237
238 # now in-place swap
239 if inplace_mode:
240 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
241 jlh = jl+halfsize
242 print ("inplace swap", jh, jlh)
243 tmp1, tmp2 = ji[jlh], ji[jh]
244 ji[jlh], ji[jh] = tmp2, tmp1
245
246 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
247 k_start += halfsize
248
249
250 # python "yield" can be iterated. use this to make it clear how
251 # the indices are generated by using natural-looking nested loops
252 def iterate_dct_outer_butterfly_indices(SVSHAPE):
253 # get indices to iterate over, in the required order
254 n = SVSHAPE.lims[0]
255 mode = SVSHAPE.lims[1]
256 # creating lists of indices to iterate over in each dimension
257 # has to be done dynamically, because it depends on the size
258 # first, the size-based loop (which can be done statically)
259 x_r = []
260 size = n // 2
261 while size >= 2:
262 x_r.append(size)
263 size //= 2
264 # invert order if requested
265 if SVSHAPE.invxyz[0]:
266 x_r.reverse()
267
268 if len(x_r) == 0:
269 return
270
271 print ("outer butterfly", mode, SVSHAPE.skip, "submode", SVSHAPE.submode2)
272
273 # I-DCT, reference (read/write) the in-place data in *reverse-bit-order*
274 ri = list(range(n))
275 if SVSHAPE.submode2 in [0b11, 0b01]:
276 levels = n.bit_length() - 1
277 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
278
279 # reference list for not needing to do data-swaps, just swap what
280 # *indices* are referenced (two levels of indirection at the moment)
281 # pre-reverse the data-swap list so that it *ends up* in the order 0123..
282 ji = list(range(n))
283 inplace_mode = False # need the space... SVSHAPE.skip in [0b10, 0b11]
284 if SVSHAPE.submode2 == 0b11:
285 ji = halfrev2(ji, False)
286
287 print ("ri", ri)
288 print ("ji", ji)
289
290 # start an infinite (wrapping) loop
291 while True:
292 k = 0
293 k_start = 0
294 for size in x_r: # loop over 3rd order dimension (size)
295 halfsize = size//2
296 x_end = size == x_r[-1]
297 y_r = list(range(0, halfsize))
298 print ("itersum", halfsize, size, y_r, "invert", SVSHAPE.invxyz[1])
299 # invert if requested
300 if SVSHAPE.invxyz[1]: y_r.reverse()
301 for i in y_r: # loop over 2nd order dimension
302 y_end = i == y_r[-1]
303 # one list to create iterative-sum schedule
304 jr = list(range(i+halfsize, i+n-halfsize, size))
305 # invert if requested
306 if SVSHAPE.invxyz[2]: jr.reverse()
307 print ("itersum jr", i+halfsize, i+size, jr,
308 "invert", SVSHAPE.invxyz[2])
309 hz2 = halfsize // 2 # zero stops reversing 1-item lists
310 k = k_start
311 for ci, jh in enumerate(jr): # loop over 1st order dimension
312 z_end = jh == jr[-1]
313 #print (" itersum", size, i, jh, jh+size)
314 if mode == 4:
315 # COS table pre-generated mode
316 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
317 if SVSHAPE.submode2 == 0b11: # iDCT
318 result = ji[ri[jh]] # upper half
319 else:
320 result = ri[ji[jh]] # lower half
321 elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
322 if SVSHAPE.submode2 == 0b11: # iDCT
323 result = ji[ri[jh+size]] # upper half
324 else:
325 result = ri[ji[jh+size]] # upper half
326 elif SVSHAPE.skip == 0b10: #
327 result = k # cos table offset
328 else:
329 # COS table generated on-demand ("Vertical-First") mode
330 if SVSHAPE.skip == 0b00: # in [0b00, 0b10]:
331 if SVSHAPE.submode2 == 0b11: # iDCT
332 result = ji[ri[jh]] # lower half
333 else:
334 result = ri[ji[jh]] # lower half
335 elif SVSHAPE.skip == 0b01: # in [0b01, 0b11]:
336 if SVSHAPE.submode2 == 0b11: # iDCT
337 result = ji[ri[jh+size]] # upper half
338 else:
339 result = ri[ji[jh+size]] # upper half
340 elif SVSHAPE.skip == 0b10: #
341 result = ci # coefficient helper
342 elif SVSHAPE.skip == 0b11: #
343 result = size # coefficient helper
344 loopends = (z_end |
345 ((y_end and z_end)<<1) |
346 ((y_end and x_end and z_end)<<2))
347
348 yield result + SVSHAPE.offset, loopends
349 k += 1
350
351 # now in-place swap (disabled)
352 if False and SVSHAPE.submode2 == 0b11:
353 j = list(range(i, i + halfsize))
354 jr = list(range(i+halfsize, i + size))
355 jr.reverse()
356 for ci, (jl, jh) in enumerate(zip(j[:hz2], jr[:hz2])):
357 jlh = jl+halfsize
358 tmp1, tmp2 = ji[jlh], ji[jh]
359 print ("inplace swap", jh, jlh, "actual", tmp1, tmp2)
360 ji[jlh], ji[jh] = tmp2, tmp1
361
362 # new k_start point for cos tables( runs inside x_r loop NOT i loop)
363 k_start += halfsize
364
365
366 def pprint_schedule(schedule, n):
367 size = 2
368 idx = 0
369 while size <= n:
370 halfsize = size // 2
371 tablestep = n // size
372 print ("size %d halfsize %d tablestep %d" % \
373 (size, halfsize, tablestep))
374 for i in range(0, n, size):
375 prefix = "i %d\t" % i
376 for j in range(i, i + halfsize):
377 (jl, je), (jh, he) = schedule[idx]
378 print (" %-3d\t%s j=%-2d jh=%-2d "
379 "j[jl=%-2d] j[jh=%-2d]" % \
380 (idx, prefix, j, j+halfsize,
381 jl, jh,
382 ),
383 "end", bin(je)[2:], bin(je)[2:])
384 idx += 1
385 size *= 2
386
387 def pprint_schedule_outer(schedule, n):
388 size = 2
389 idx = 0
390 while size <= n//2:
391 halfsize = size // 2
392 tablestep = n // size
393 print ("size %d halfsize %d tablestep %d" % \
394 (size, halfsize, tablestep))
395 y_r = list(range(0, halfsize))
396 for i in y_r:
397 prefix = "i %d\t" % i
398 jr = list(range(i+halfsize, i+n-halfsize, size))
399 for j in jr:
400 (jl, je), (jh, he) = schedule[idx]
401 print (" %-3d\t%s j=%-2d jh=%-2d "
402 "j[jl=%-2d] j[jh=%-2d]" % \
403 (idx, prefix, j, j+halfsize,
404 jl, jh,
405 ),
406 "end", bin(je)[2:], bin(je)[2:])
407 idx += 1
408 size *= 2
409
410
411 # totally cool *in-place* inverse DCT algorithm using yield REMAPs
412 def inverse_transform2(vec):
413
414 vec = deepcopy(vec)
415
416 # Initialization
417 n = len(vec)
418 print ()
419 print ("inverse_transform2", n)
420 levels = n.bit_length() - 1
421
422 # set up dims
423 xdim = n
424
425 # divide element 0 by 2
426 vec[0] /= 2.0
427
428 # reference (read/write) the in-place data in *reverse-bit-order*
429 ri = list(range(n))
430 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
431
432 # pretend we LDed data in half-swapped *and* bit-reversed order as well
433 # TODO: merge these two
434 vec = [vec[ri[i]] for i in range(n)]
435 vec = halfrev2(vec, True)
436
437 print("inverse_transform2 post-alter", vec)
438
439 # create a cos table: not strictly necessary but here for illustrative
440 # purposes, to demonstrate the point that it really *is* iterative.
441 # this table could be cached and used multiple times rather than
442 # computed every time.
443 ctable = []
444 size = 2
445 while size <= n:
446 halfsize = size // 2
447 for ci in range(halfsize):
448 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
449 ctable.append(coeff)
450 print ("coeff", size, "ci", ci, "k", len(ctable)-1,
451 "i/n", (ci+0.5)/size, coeff)
452 size *= 2
453
454 # set up an SVSHAPE
455 class SVSHAPE:
456 pass
457
458 # XXX TODO
459 if False:
460 # ci schedule
461 SVSHAPE0 = SVSHAPE()
462 SVSHAPE0.lims = [xdim, 4, 0]
463 SVSHAPE0.mode = 0b01
464 SVSHAPE0.submode2 = 0b01
465 SVSHAPE0.skip = 0b10
466 SVSHAPE0.offset = 0 # experiment with different offset, here
467 SVSHAPE0.invxyz = [1,0,1] # inversion if desired
468 # size schedule
469 SVSHAPE1 = SVSHAPE()
470 SVSHAPE1.lims = [xdim, 4, 0]
471 SVSHAPE1.mode = 0b01
472 SVSHAPE1.submode2 = 0b01
473 SVSHAPE1.skip = 0b11
474 SVSHAPE1.offset = 0 # experiment with different offset, here
475 SVSHAPE1.invxyz = [1,0,1] # inversion if desired
476 # k schedule
477 SVSHAPE2 = SVSHAPE()
478 SVSHAPE2.lims = [xdim, 4, 0]
479 SVSHAPE2.mode = 0b01
480 SVSHAPE2.submode2 = 0b01
481 SVSHAPE2.skip = 0b00
482 SVSHAPE2.offset = 0 # experiment with different offset, here
483 SVSHAPE2.invxyz = [1,0,1] # inversion if desired
484
485 # enumerate over the iterator function, getting new indices
486 i0 = iterate_dct_inner_costable_indices(SVSHAPE0)
487 i1 = iterate_dct_inner_costable_indices(SVSHAPE1)
488 i2 = iterate_dct_inner_costable_indices(SVSHAPE2)
489 for ((ci, cie), (size, sze), (k, ke)) in \
490 zip(i0, i1, i2):
491 print ("xform2 cos", ci, size, k)
492 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
493 assert coeff == ctable[k]
494 print ("coeff", size, "ci", ci, "k", k,
495 "i/n", (ci+0.5)/size, coeff,
496 "end", bin(cie), bin(sze), bin(ke))
497 if cie == 0b111: # all loops end
498 break
499
500 # now things are in the right order for the outer butterfly.
501
502 # j schedule
503 SVSHAPE0 = SVSHAPE()
504 SVSHAPE0.lims = [xdim, 0b0000010, 0]
505 SVSHAPE0.submode2 = 0b11
506 SVSHAPE0.mode = 0b11
507 SVSHAPE0.skip = 0b00
508 SVSHAPE0.offset = 0 # experiment with different offset, here
509 SVSHAPE0.invxyz = [1,0,1] # inversion if desired
510 # j+halfstep schedule
511 SVSHAPE1 = SVSHAPE()
512 SVSHAPE1.lims = [xdim, 0b0000010, 0]
513 SVSHAPE1.mode = 0b11
514 SVSHAPE1.submode2 = 0b11
515 SVSHAPE1.skip = 0b01
516 SVSHAPE1.offset = 0 # experiment with different offset, here
517 SVSHAPE1.invxyz = [1,0,1] # inversion if desired
518
519 # enumerate over the iterator function, getting new indices
520 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
521 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
522 for k, ((jl, jle), (jh, jhe)) in enumerate(zip(i0, i1)):
523 print ("itersum jr", jl, jh,
524 "end", bin(jle), bin(jhe),
525 vec[jl], vec[jh], vec[jh]+vec[jl])
526 vec[jh] += vec[jl]
527 size //= 2
528 if jle == 0b111: # all loops end
529 break
530
531 print("transform2 pre-itersum", vec)
532
533 ################
534 # INNER butterfly
535 ################
536
537 # j schedule
538 SVSHAPE0 = SVSHAPE()
539 SVSHAPE0.lims = [xdim, 0b000001, 0]
540 SVSHAPE0.mode = 0b11
541 SVSHAPE0.submode2 = 0b11
542 SVSHAPE0.skip = 0b00
543 SVSHAPE0.offset = 0 # experiment with different offset, here
544 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
545 # j+halfstep schedule
546 SVSHAPE1 = SVSHAPE()
547 SVSHAPE1.lims = [xdim, 0b000001, 0]
548 SVSHAPE1.mode = 0b11
549 SVSHAPE1.submode2 = 0b11
550 SVSHAPE1.skip = 0b01
551 SVSHAPE1.offset = 0 # experiment with different offset, here
552 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
553 # ci schedule
554 SVSHAPE2 = SVSHAPE()
555 SVSHAPE2.lims = [xdim, 0b000001, 0]
556 SVSHAPE2.mode = 0b11
557 SVSHAPE2.submode2 = 0b11
558 SVSHAPE2.skip = 0b10
559 SVSHAPE2.offset = 0 # experiment with different offset, here
560 SVSHAPE2.invxyz = [0,0,0] # inversion if desired
561 # size schedule
562 SVSHAPE3 = SVSHAPE()
563 SVSHAPE3.lims = [xdim, 0b000001, 0]
564 SVSHAPE3.mode = 0b11
565 SVSHAPE3.submode2 = 0b11
566 SVSHAPE3.skip = 0b11
567 SVSHAPE3.offset = 0 # experiment with different offset, here
568 SVSHAPE3.invxyz = [0,0,0] # inversion if desired
569
570 # enumerate over the iterator function, getting new indices
571 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
572 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
573 i2 = iterate_dct_inner_butterfly_indices(SVSHAPE2)
574 i3 = iterate_dct_inner_butterfly_indices(SVSHAPE3)
575 for k, ((jl, jle), (jh, jhe), (ci, cie), (size, sze)) in \
576 enumerate(zip(i0, i1, i2, i3)):
577 t1, t2 = vec[jl], vec[jh]
578 print ("xform2", jl, jh, ci, size)
579 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
580 #assert coeff == ctable[k]
581 vec[jl] = t1 + t2/coeff
582 vec[jh] = t1 - t2/coeff
583 print ("coeff", size, "ci", ci,
584 "jl", jl, "jh", jh,
585 "i/n", (ci+0.5)/size, coeff, "t1/2", t1, t2,
586 "+/-", vec[jl], vec[jh],
587 "end", bin(jle), bin(jhe))
588 if jle == 0b111: # all loops end
589 break
590
591 print("inverse_transform2 result", vec)
592
593 return vec
594
595
596 # totally cool *in-place* DCT algorithm using yield REMAPs
597 def transform2(vec):
598
599 # Initialization
600 n = len(vec)
601 print ()
602 print ("transform2", n)
603 levels = n.bit_length() - 1
604
605 # set up dims
606 xdim = n
607
608 # reference (read/write) the in-place data in *reverse-bit-order*
609 ri = list(range(n))
610 ri = [ri[reverse_bits(i, levels)] for i in range(n)]
611
612 # and pretend we LDed data in half-swapped *and* bit-reversed order as well
613 # TODO: merge these two
614 vec = halfrev2(vec, False)
615 vec = [vec[ri[i]] for i in range(n)]
616
617 # create a cos table: not strictly necessary but here for illustrative
618 # purposes, to demonstrate the point that it really *is* iterative.
619 # this table could be cached and used multiple times rather than
620 # computed every time.
621 ctable = []
622 size = n
623 while size >= 2:
624 halfsize = size // 2
625 for ci in range(halfsize):
626 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
627 ctable.append(coeff)
628 print ("coeff", size, "ci", ci, "k", len(ctable)-1,
629 "i/n", (ci+0.5)/size, coeff)
630 size //= 2
631
632 # set up an SVSHAPE
633 class SVSHAPE:
634 pass
635 # ci schedule
636 SVSHAPE0 = SVSHAPE()
637 SVSHAPE0.lims = [xdim, 4, 0]
638 SVSHAPE0.mode = 0b01
639 SVSHAPE0.submode2 = 0b01
640 SVSHAPE0.skip = 0b10
641 SVSHAPE0.offset = 0 # experiment with different offset, here
642 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
643 # size schedule
644 SVSHAPE1 = SVSHAPE()
645 SVSHAPE1.lims = [xdim, 4, 0]
646 SVSHAPE1.mode = 0b01
647 SVSHAPE1.submode2 = 0b01
648 SVSHAPE1.skip = 0b11
649 SVSHAPE1.offset = 0 # experiment with different offset, here
650 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
651 # k schedule
652 SVSHAPE2 = SVSHAPE()
653 SVSHAPE2.lims = [xdim, 4, 0]
654 SVSHAPE2.mode = 0b01
655 SVSHAPE2.submode2 = 0b01
656 SVSHAPE2.skip = 0b00
657 SVSHAPE2.offset = 0 # experiment with different offset, here
658 SVSHAPE2.invxyz = [1,0,0] # inversion if desired
659
660 # enumerate over the iterator function, getting new indices
661 i0 = iterate_dct_inner_costable_indices(SVSHAPE0)
662 i1 = iterate_dct_inner_costable_indices(SVSHAPE1)
663 i2 = iterate_dct_inner_costable_indices(SVSHAPE2)
664 for ((ci, cie), (size, sze), (k, ke)) in \
665 zip(i0, i1, i2):
666 print ("xform2 cos", ci, size, k)
667 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
668 assert coeff == ctable[k]
669 print ("coeff", size, "ci", ci, "k", k,
670 "i/n", (ci+0.5)/size, coeff,
671 "end", bin(cie), bin(sze), bin(ke))
672 if cie == 0b111: # all loops end
673 break
674
675 ################
676 # INNER butterfly
677 ################
678
679 # j schedule
680 SVSHAPE0 = SVSHAPE()
681 SVSHAPE0.lims = [xdim, 0b000001, 0]
682 SVSHAPE0.mode = 0b01
683 SVSHAPE0.submode2 = 0b01
684 SVSHAPE0.skip = 0b00
685 SVSHAPE0.offset = 0 # experiment with different offset, here
686 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
687 # j+halfstep schedule
688 SVSHAPE1 = SVSHAPE()
689 SVSHAPE1.lims = [xdim, 0b000001, 0]
690 SVSHAPE1.mode = 0b01
691 SVSHAPE1.submode2 = 0b01
692 SVSHAPE1.skip = 0b01
693 SVSHAPE1.offset = 0 # experiment with different offset, here
694 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
695 # ci schedule
696 SVSHAPE2 = SVSHAPE()
697 SVSHAPE2.lims = [xdim, 0b000001, 0]
698 SVSHAPE2.mode = 0b01
699 SVSHAPE2.submode2 = 0b01
700 SVSHAPE2.skip = 0b10
701 SVSHAPE2.offset = 0 # experiment with different offset, here
702 SVSHAPE2.invxyz = [1,0,0] # inversion if desired
703 # size schedule
704 SVSHAPE3 = SVSHAPE()
705 SVSHAPE3.lims = [xdim, 0b000001, 0]
706 SVSHAPE3.mode = 0b01
707 SVSHAPE3.submode2 = 0b01
708 SVSHAPE3.skip = 0b11
709 SVSHAPE3.offset = 0 # experiment with different offset, here
710 SVSHAPE3.invxyz = [1,0,0] # inversion if desired
711
712 # enumerate over the iterator function, getting new indices
713 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
714 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
715 i2 = iterate_dct_inner_butterfly_indices(SVSHAPE2)
716 i3 = iterate_dct_inner_butterfly_indices(SVSHAPE3)
717 for k, ((jl, jle), (jh, jhe), (ci, cie), (size, sze)) in \
718 enumerate(zip(i0, i1, i2, i3)):
719 t1, t2 = vec[jl], vec[jh]
720 print ("xform2", jl, jh, ci, size)
721 coeff = (math.cos((ci + 0.5) * math.pi / size) * 2.0)
722 #assert coeff == ctable[k]
723 vec[jl] = t1 + t2
724 vec[jh] = (t1 - t2) * (1/coeff)
725 print ("coeff", size, "ci", ci,
726 "jl", jl, "jh", jh,
727 "i/n", (ci+0.5)/size, coeff, vec[jl],
728 vec[jh],
729 "end", bin(jle), bin(jhe))
730 if jle == 0b111: # all loops end
731 break
732
733 print("transform2 pre-itersum", vec)
734
735 # now things are in the right order for the outer butterfly.
736
737 # j schedule
738 SVSHAPE0 = SVSHAPE()
739 SVSHAPE0.lims = [xdim, 0b0000010, 0]
740 SVSHAPE0.submode2 = 0b100
741 SVSHAPE0.mode = 0b01
742 SVSHAPE0.skip = 0b00
743 SVSHAPE0.offset = 0 # experiment with different offset, here
744 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
745 # j+halfstep schedule
746 SVSHAPE1 = SVSHAPE()
747 SVSHAPE1.lims = [xdim, 0b0000010, 0]
748 SVSHAPE1.mode = 0b01
749 SVSHAPE1.submode2 = 0b100
750 SVSHAPE1.skip = 0b01
751 SVSHAPE1.offset = 0 # experiment with different offset, here
752 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
753
754 # enumerate over the iterator function, getting new indices
755 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
756 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
757 for k, ((jl, jle), (jh, jhe)) in enumerate(zip(i0, i1)):
758 print ("itersum jr", jl, jh,
759 "end", bin(jle), bin(jhe))
760 vec[jl] += vec[jh]
761 size //= 2
762 if jle == 0b111: # all loops end
763 break
764
765 print("transform2 result", vec)
766
767 return vec
768
769
770 def demo_idct():
771 # set the dimension sizes here
772 n = 8
773 xdim = n
774 ydim = 0 # not needed
775 zdim = 0 # again, not needed
776
777 # set up an SVSHAPE
778 class SVSHAPE:
779 pass
780
781 ################
782 # outer butterfly
783 ################
784
785 # j schedule
786 SVSHAPE0 = SVSHAPE()
787 SVSHAPE0.lims = [xdim, 0b0000010, 0]
788 SVSHAPE0.submode2 = 0b11
789 SVSHAPE0.mode = 0b11
790 SVSHAPE0.skip = 0b00
791 SVSHAPE0.offset = 0 # experiment with different offset, here
792 SVSHAPE0.invxyz = [1,0,1] # inversion if desired
793 # j+halfstep schedule
794 SVSHAPE1 = SVSHAPE()
795 SVSHAPE1.lims = [xdim, 0b0000010, 0]
796 SVSHAPE1.mode = 0b11
797 SVSHAPE1.submode2 = 0b11
798 SVSHAPE1.skip = 0b01
799 SVSHAPE1.offset = 0 # experiment with different offset, here
800 SVSHAPE1.invxyz = [1,0,1] # inversion if desired
801
802 # enumerate over the iterator function, getting new indices
803 schedule = []
804 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
805 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
806 for idx, (jl, jh) in enumerate(zip(i0, i1)):
807 schedule.append((jl, jh))
808 if jl[1] == 0b111: # end
809 break
810
811 # ok now pretty-print the results, with some debug output
812 print ("outer i-dct butterfly")
813 pprint_schedule_outer(schedule, n)
814
815 ################
816 # INNER butterfly
817 ################
818
819 # j schedule
820 SVSHAPE0 = SVSHAPE()
821 SVSHAPE0.lims = [xdim, 0b000001, 0]
822 SVSHAPE0.mode = 0b11
823 SVSHAPE0.submode2 = 0b11
824 SVSHAPE0.skip = 0b00
825 SVSHAPE0.offset = 0 # experiment with different offset, here
826 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
827 # j+halfstep schedule
828 SVSHAPE1 = SVSHAPE()
829 SVSHAPE1.lims = [xdim, 0b000001, 0]
830 SVSHAPE1.mode = 0b11
831 SVSHAPE1.submode2 = 0b11
832 SVSHAPE1.skip = 0b01
833 SVSHAPE1.offset = 0 # experiment with different offset, here
834 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
835
836 # enumerate over the iterator function, getting new indices
837 schedule = []
838 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
839 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
840 for idx, (jl, jh) in enumerate(zip(i0, i1)):
841 schedule.append((jl, jh))
842 if jl[1] == 0b111: # end
843 break
844
845 # ok now pretty-print the results, with some debug output
846 print ("inner butterfly")
847 pprint_schedule(schedule, n)
848 print ("")
849
850 return
851
852 # for DCT half-swap LDs
853 # j schedule
854 SVSHAPE0 = SVSHAPE()
855 SVSHAPE0.lims = [xdim, 0b000101, zdim]
856 SVSHAPE0.mode = 0b01
857 SVSHAPE0.submode2 = 0b01
858 SVSHAPE0.skip = 0
859 SVSHAPE0.offset = 0 # experiment with different offset, here
860 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
861
862 # expected results
863 levels = n.bit_length() - 1
864 avi = list(range(n))
865 ri = [reverse_bits(i, levels) for i in range(n)]
866 av = halfrev2(avi, False)
867 av = [av[ri[i]] for i in range(n)]
868
869 i0 = iterate_dct_inner_halfswap_loadstore(SVSHAPE0)
870 for idx, (jl) in enumerate(i0):
871 print ("inverse half-swap ld", idx, jl, av[idx])
872 if jl[1] == 0b111: # end
873 break
874
875
876 def demo_dct():
877 # set the dimension sizes here
878 n = 8
879 xdim = n
880 ydim = 0 # not needed
881 zdim = 0 # again, not needed
882
883
884 ################
885 # INNER butterfly
886 ################
887
888 # set up an SVSHAPE
889 class SVSHAPE:
890 pass
891 # j schedule
892 SVSHAPE0 = SVSHAPE()
893 SVSHAPE0.lims = [xdim, 0b000001, zdim]
894 SVSHAPE0.submode2 = 0b010
895 SVSHAPE0.mode = 0b01
896 SVSHAPE0.skip = 0b00
897 SVSHAPE0.offset = 0 # experiment with different offset, here
898 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
899 # j+halfstep schedule
900 SVSHAPE1 = SVSHAPE()
901 SVSHAPE1.lims = [xdim, 0b000001, zdim]
902 SVSHAPE1.submode2 = 0b010
903 SVSHAPE1.mode = 0b01
904 SVSHAPE1.skip = 0b01
905 SVSHAPE1.offset = 0 # experiment with different offset, here
906 SVSHAPE1.invxyz = [0,0,0] # inversion if desired
907
908 # enumerate over the iterator function, getting new indices
909 schedule = []
910 i0 = iterate_dct_inner_butterfly_indices(SVSHAPE0)
911 i1 = iterate_dct_inner_butterfly_indices(SVSHAPE1)
912 for idx, (jl, jh) in enumerate(zip(i0, i1)):
913 schedule.append((jl, jh))
914 if jl[1] == 0b111: # end
915 break
916
917 # ok now pretty-print the results, with some debug output
918 print ("inner butterfly")
919 pprint_schedule(schedule, n)
920 print ("")
921
922 ################
923 # outer butterfly
924 ################
925
926 # j schedule
927 SVSHAPE0 = SVSHAPE()
928 SVSHAPE0.lims = [xdim, 0b000010, zdim]
929 SVSHAPE0.mode = 0b01
930 SVSHAPE0.submode2 = 0b100
931 SVSHAPE0.skip = 0b10
932 SVSHAPE0.offset = 0 # experiment with different offset, here
933 SVSHAPE0.invxyz = [1,0,0] # inversion if desired
934 # j+halfstep schedule
935 SVSHAPE1 = SVSHAPE()
936 SVSHAPE1.lims = [xdim, 0b000010, zdim]
937 SVSHAPE1.mode = 0b01
938 SVSHAPE1.submode2 = 0b100
939 SVSHAPE1.skip = 0b11
940 SVSHAPE1.offset = 0 # experiment with different offset, here
941 SVSHAPE1.invxyz = [1,0,0] # inversion if desired
942
943 # enumerate over the iterator function, getting new indices
944 schedule = []
945 i0 = iterate_dct_outer_butterfly_indices(SVSHAPE0)
946 i1 = iterate_dct_outer_butterfly_indices(SVSHAPE1)
947 for idx, (jl, jh) in enumerate(zip(i0, i1)):
948 schedule.append((jl, jh))
949 if jl[1] == 0b111: # end
950 break
951
952 # ok now pretty-print the results, with some debug output
953 print ("outer butterfly")
954 pprint_schedule_outer(schedule, n)
955
956 # for DCT half-swap LDs
957 # j schedule
958 SVSHAPE0 = SVSHAPE()
959 SVSHAPE0.lims = [xdim, 0b000101, zdim]
960 SVSHAPE0.mode = 0b01
961 SVSHAPE0.submode2 = 0
962 SVSHAPE0.skip = 0
963 SVSHAPE0.offset = 0 # experiment with different offset, here
964 SVSHAPE0.invxyz = [0,0,0] # inversion if desired
965
966 # expected results
967 levels = n.bit_length() - 1
968 avi = list(range(n))
969 ri = [reverse_bits(i, levels) for i in range(n)]
970 av = halfrev2(avi, False)
971 av = [av[ri[i]] for i in range(n)]
972
973
974 i0 = iterate_dct_inner_halfswap_loadstore(SVSHAPE0)
975 for idx, (jl) in enumerate(i0):
976 print ("inverse half-swap ld", idx, jl, av[idx])
977 if jl[1] == 0b111: # end
978 break
979
980
981 # run the demo
982 if __name__ == '__main__':
983 demo_dct()
984 demo_idct()