From 40ba5d571fab9d4157d46aa5cb9a4742103962da Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Fri, 2 Jul 2021 15:41:12 +0100 Subject: [PATCH] add generator-variant to fft-test.py to demo FFT REMAP scheduling --- openpower/sv/remap_fft_yield.py | 3 ++ openpower/sv/remapfft.py | 87 +++++++++++++++++++++++++++------ 2 files changed, 76 insertions(+), 14 deletions(-) diff --git a/openpower/sv/remap_fft_yield.py b/openpower/sv/remap_fft_yield.py index e43272be5..0106c8f76 100644 --- a/openpower/sv/remap_fft_yield.py +++ b/openpower/sv/remap_fft_yield.py @@ -36,6 +36,9 @@ def iterate_indices(SVSHAPE): # invert order if requested if SVSHAPE.invxyz[SVSHAPE.order[0]]: x_r.reverse() + if len(x_r) == 0: + return + # start an infinite (wrapping) loop skip = 0 while True: diff --git a/openpower/sv/remapfft.py b/openpower/sv/remapfft.py index 03b3651d5..458c839c2 100644 --- a/openpower/sv/remapfft.py +++ b/openpower/sv/remapfft.py @@ -21,6 +21,8 @@ # import cmath, math, random +from copy import deepcopy +from remap_fft_yield import iterate_indices # @@ -66,19 +68,72 @@ def transform_radix2(vec, inverse, generators_mode): # Copy with bit-reversed permutation vec = [vec[reverse_bits(i, levels)] for i in range(n)] + # # Radix-2 decimation-in-time FFT - size = 2 - while size <= n: - halfsize = size // 2 - tablestep = n // size - for i in range(0, n, size): - k = 0 - for j in range(i, i + halfsize): - temp = vec[j + halfsize] * exptable[k] - vec[j + halfsize] = vec[j] - temp - vec[j] += temp - k += tablestep - size *= 2 + # + if generators_mode: + # loop using SVP64 REMAP "generators" + # set the dimension sizes here + + # set total. err don't know how to calculate how many there are... + # do it manually for now + VL = 0 + size = 2 + while size <= n: + halfsize = size // 2 + tablestep = n // size + for i in range(0, n, size): + for j in range(i, i + halfsize): + VL += 1 + size *= 2 + + # set up an SVSHAPE + class SVSHAPE: + pass + # j schedule + SVSHAPE0 = SVSHAPE() + SVSHAPE0.lims = [n, 0, 0] + SVSHAPE0.order = [0,1,2] + SVSHAPE0.mode = 0b00 + SVSHAPE0.offset = 0 + SVSHAPE0.invxyz = [0,0,0] # inversion if desired + # j+halfstep schedule + SVSHAPE1 = deepcopy(SVSHAPE0) + SVSHAPE1.mode = 0b01 + # k schedule + SVSHAPE2 = deepcopy(SVSHAPE0) + SVSHAPE2.mode = 0b10 + + # enumerate over the iterator function, getting 3 *different* indices + for idx, (jl, jh, k) in enumerate(zip(iterate_indices(SVSHAPE0), + iterate_indices(SVSHAPE1), + iterate_indices(SVSHAPE2))): + if idx >= VL: + break + # exact same actual computation, just embedded in a single + # for-loop but using triple generators to create the schedule + temp1 = vec[jh] * exptable[k] + temp2 = vec[jl] + vec[jh] = temp2 - temp1 + vec[jl] = temp2 + temp1 + else: + # loop using standard python nested for-loops + size = 2 + while size <= n: + halfsize = size // 2 + tablestep = n // size + for i in range(0, n, size): + k = 0 + for j in range(i, i + halfsize): + # exact same actual computation, just embedded in + # triple-nested for-loops + jl, jh = j, j+halfsize + temp1 = vec[jh] * exptable[k] + temp2 = vec[jl] + vec[jh] = temp2 - temp1 + vec[jl] = temp2 + temp1 + k += tablestep + size *= 2 return vec @@ -126,13 +181,17 @@ def main(): def _test_fft(size): input = _random_vector(size) expect = _naive_dft(input, False) - actual = transform(input, False) + actual = transform(input, False, False) + actual_generated = transform(input, False, True) + assert actual == actual_generated # check generator-version is identical + + err_gen = _log10_rms_err(actual, actual_generated) # superfluous but hey err = _log10_rms_err(expect, actual) actual = [(x / size) for x in expect] actual = transform(actual, True) err = max(_log10_rms_err(input, actual), err) - print(f"fftsize={size:4d} logerr={err:5.1f}") + print(f"fftsize={size:4d} logerr={err:5.1f} generr={err_gen:5.1f}") def _test_convolution(size): -- 2.30.2