From d02ab4c817a9fadb28b64c4c9c89ed67da093afa Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Wed, 21 Sep 2022 15:36:29 +0100 Subject: [PATCH] add stride-multiplier for 2D DCT/FFT "in-place" offsets https://bugs.libre-soc.org/show_bug.cgi?id=930 --- src/openpower/decoder/isa/remap_dct_yield.py | 19 ++++++++++++------- src/openpower/decoder/isa/remap_fft_yield.py | 5 +++-- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/openpower/decoder/isa/remap_dct_yield.py b/src/openpower/decoder/isa/remap_dct_yield.py index c2758444..e0e899e0 100644 --- a/src/openpower/decoder/isa/remap_dct_yield.py +++ b/src/openpower/decoder/isa/remap_dct_yield.py @@ -48,8 +48,9 @@ def iterate_dct_inner_halfswap_loadstore(SVSHAPE): # get indices to iterate over, in the required order n = SVSHAPE.lims[0] mode = SVSHAPE.lims[1] + stride = SVSHAPE.lims[2] + 1 print ("inner halfswap loadstore", n, mode, SVSHAPE.skip, - "submode", SVSHAPE.submode2) + "submode", SVSHAPE.submode2, "stride", stride) # reference list for not needing to do data-swaps, just swap what # *indices* are referenced (two levels of indirection at the moment) @@ -74,7 +75,7 @@ def iterate_dct_inner_halfswap_loadstore(SVSHAPE): for i, jl in enumerate(ji): y_end = jl == ji[-1] - yield jl, (0b111 if y_end else 0b000) + yield jl * stride, (0b111 if y_end else 0b000) # python "yield" can be iterated. use this to make it clear how @@ -83,7 +84,8 @@ def iterate_dct_inner_costable_indices(SVSHAPE): # get indices to iterate over, in the required order n = SVSHAPE.lims[0] mode = SVSHAPE.lims[1] - print ("inner costable", mode) + stride = SVSHAPE.lims[2] + 1 + print ("inner costable", mode, "stride", stride) # creating lists of indices to iterate over in each dimension # has to be done dynamically, because it depends on the size # first, the size-based loop (which can be done statically) @@ -135,7 +137,7 @@ def iterate_dct_inner_costable_indices(SVSHAPE): ((y_end and z_end)<<1) | ((y_end and x_end and z_end)<<2)) - yield result + SVSHAPE.offset, loopends + yield (result * stride) + SVSHAPE.offset, loopends k += 1 # python "yield" can be iterated. use this to make it clear how @@ -144,7 +146,9 @@ def iterate_dct_inner_butterfly_indices(SVSHAPE): # get indices to iterate over, in the required order n = SVSHAPE.lims[0] mode = SVSHAPE.lims[1] - print ("inner butterfly", mode, SVSHAPE.skip, "submode", SVSHAPE.submode2) + stride = SVSHAPE.lims[2] + 1 + print ("inner butterfly", mode, SVSHAPE.skip, + "submode", SVSHAPE.submode2, "stride", stride) # creating lists of indices to iterate over in each dimension # has to be done dynamically, because it depends on the size # first, the size-based loop (which can be done statically) @@ -234,7 +238,7 @@ def iterate_dct_inner_butterfly_indices(SVSHAPE): ((y_end and z_end)<<1) | ((y_end and x_end and z_end)<<2)) - yield result + SVSHAPE.offset, loopends + yield (result * stride) + SVSHAPE.offset, loopends k += 1 # now in-place swap @@ -255,6 +259,7 @@ def iterate_dct_outer_butterfly_indices(SVSHAPE): # get indices to iterate over, in the required order n = SVSHAPE.lims[0] mode = SVSHAPE.lims[1] + stride = SVSHAPE.lims[2] + 1 # creating lists of indices to iterate over in each dimension # has to be done dynamically, because it depends on the size # first, the size-based loop (which can be done statically) @@ -347,7 +352,7 @@ def iterate_dct_outer_butterfly_indices(SVSHAPE): ((y_end and z_end)<<1) | ((y_end and x_end and z_end)<<2)) - yield result + SVSHAPE.offset, loopends + yield (result * stride) + SVSHAPE.offset, loopends k += 1 # now in-place swap (disabled) diff --git a/src/openpower/decoder/isa/remap_fft_yield.py b/src/openpower/decoder/isa/remap_fft_yield.py index 422c2187..76349ce2 100644 --- a/src/openpower/decoder/isa/remap_fft_yield.py +++ b/src/openpower/decoder/isa/remap_fft_yield.py @@ -25,7 +25,8 @@ def iterate_butterfly_indices(SVSHAPE): # get indices to iterate over, in the required order n = SVSHAPE.lims[0] - # createing lists of indices to iterate over in each dimension + stride = SVSHAPE.lims[2] + 1 # stride-multiplier on reg access + # creating lists of indices to iterate over in each dimension # has to be done dynamically, because it depends on the size # first, the size-based loop (which can be done statically) x_r = [] @@ -78,7 +79,7 @@ def iterate_butterfly_indices(SVSHAPE): ((y_end and z_end)<<1) | ((y_end and x_end and z_end)<<2)) - yield result + SVSHAPE.offset, loopends + yield (result * stride) + SVSHAPE.offset, loopends def demo(): # set the dimension sizes here -- 2.30.2