src/openpower/decoder/isa/test_caller_svp64_fft.py

   1 from nmigen import Module, Signal
   2 from nmigen.back.pysim import Simulator, Delay, Settle
   3 from nmutil.formaltest import FHDLTestCase
   4 import unittest
   5 from openpower.decoder.power_decoder import (create_pdecode)
   6 from openpower.simulator.program import Program
   7 from openpower.decoder.isa.caller import SVP64State
   8 from openpower.decoder.selectable_int import SelectableInt
   9 from openpower.decoder.isa.test_caller import run_tst
  10 from openpower.sv.trans.svp64 import SVP64Asm
  11 from copy import deepcopy
  12 from openpower.decoder.helpers import fp64toselectable
  13 from openpower.decoder.isafunctions.double2single import DOUBLE2SINGLE
  14
  15
  16 def transform_radix2(vec, exptable):
  17     """
  18     # FFT and convolution test (Python), based on Project Nayuki
  19     #
  20     # Copyright (c) 2020 Project Nayuki. (MIT License)
  21     # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
  22
  23     """
  24     # bits of the integer 'val'.
  25     def reverse_bits(val, width):
  26         result = 0
  27         for _ in range(width):
  28             result = (result << 1) | (val & 1)
  29             val >>= 1
  30         return result
  31
  32     # Initialization
  33     n = len(vec)
  34     levels = n.bit_length() - 1
  35
  36     # Copy with bit-reversed permutation
  37     #vec = [vec[reverse_bits(i, levels)] for i in range(n)]
  38
  39     size = 2
  40     while size <= n:
  41         halfsize = size // 2
  42         tablestep = n // size
  43         for i in range(0, n, size):
  44             k = 0
  45             for j in range(i, i + halfsize):
  46                 # exact same actual computation, just embedded in
  47                 # triple-nested for-loops
  48                 jl, jh = j, j+halfsize
  49                 vjh = vec[jh]
  50                 temp1 = vec[jh] * exptable[k]
  51                 temp2 = vec[jl]
  52                 vec[jh] = temp2 - temp1
  53                 vec[jl] = temp2 + temp1
  54                 print ("xform jl jh k", jl, jh, k,
  55                        "vj vjh ek", temp2, vjh, exptable[k],
  56                        "t1, t2", temp1, temp2,
  57                        "v[jh] v[jl]", vec[jh], vec[jl])
  58                 k += tablestep
  59         size *= 2
  60
  61     return vec
  62
  63
  64 class FFTTestCase(FHDLTestCase):
  65
  66     def _check_regs(self, sim, expected):
  67         for i in range(32):
  68             self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
  69
  70     def test_sv_remap_fpmadds_fft(self):
  71         """>>> lst = ["svremap 8, 1, 1, 1",
  72                       "sv.ffmadds 2.v, 2.v, 2.v, 10.v"
  73                      ]
  74             runs a full in-place O(N log2 N) butterfly schedule for
  75             Discrete Fourier Transform.
  76
  77             this is the twin "butterfly" mul-add-sub from Cooley-Tukey
  78             https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
  79
  80             there is the *option* to target a different location (non-in-place)
  81             just in case.
  82
  83             SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
  84             (3 inputs, 2 outputs)
  85         """
  86         lst = SVP64Asm( ["svremap 8, 1, 1, 1",
  87                         "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
  88                         ])
  89         lst = list(lst)
  90
  91         # array and coefficients to test
  92         av = [7.0, -9.8, 3.0, -32.3,
  93               -2.0, 5.0, -9.8, 31.3] # array 0..7
  94         coe = [-0.25, 0.5, 3.1, 6.2] # coefficients
  95
  96         # store in regfile
  97         fprs = [0] * 32
  98         for i, c in enumerate(coe):
  99             fprs[i+8] = fp64toselectable(c)
 100         for i, a in enumerate(av):
 101             fprs[i+0] = fp64toselectable(a)
 102
 103         # set total. err don't know how to calculate how many there are...
 104         # do it manually for now
 105         VL = 0
 106         size = 2
 107         n = len(av)
 108         while size <= n:
 109             halfsize = size // 2
 110             tablestep = n // size
 111             for i in range(0, n, size):
 112                 for j in range(i, i + halfsize):
 113                     VL += 1
 114             size *= 2
 115
 116         # SVSTATE (calculated VL)
 117         svstate = SVP64State()
 118         svstate.vl[0:7] = VL # VL
 119         svstate.maxvl[0:7] = VL # MAXVL
 120         print ("SVSTATE", bin(svstate.spr.asint()))
 121
 122         with Program(lst, bigendian=False) as program:
 123             sim = self.run_tst_program(program, svstate=svstate,
 124                                        initial_fprs=fprs)
 125             print ("spr svshape0", sim.spr['SVSHAPE0'])
 126             print ("    xdimsz", sim.spr['SVSHAPE0'].xdimsz)
 127             print ("    ydimsz", sim.spr['SVSHAPE0'].ydimsz)
 128             print ("    zdimsz", sim.spr['SVSHAPE0'].zdimsz)
 129             print ("spr svshape1", sim.spr['SVSHAPE1'])
 130             print ("spr svshape2", sim.spr['SVSHAPE2'])
 131             print ("spr svshape3", sim.spr['SVSHAPE3'])
 132
 133             # work out the results with the twin mul/add-sub
 134             res = transform_radix2(av, coe)
 135
 136             for i, expected in enumerate(res):
 137                 print ("i", i, float(sim.fpr(i)), "expected", expected)
 138             for i, expected in enumerate(res):
 139                 # convert to Power single
 140                 expected = DOUBLE2SINGLE(fp64toselectable(expected))
 141                 expected = float(expected)
 142                 actual = float(sim.fpr(i))
 143                 # approximate error calculation, good enough test
 144                 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
 145                 # and the rounding is different
 146                 err = abs(actual - expected) / expected
 147                 self.assertTrue(err < 1e-7)
 148
 149     def test_sv_remap_fpmadds_fft_svstep(self):
 150         """>>> lst = SVP64Asm( ["setvl 0, 0, 11, 1, 1, 1",
 151                             "svremap 8, 1, 1, 1",
 152                             "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
 153                             "setvl. 0, 0, 0, 1, 0, 0",
 154                             "bc 4, 2, -16"
 155                             ])
 156             runs a full in-place O(N log2 N) butterfly schedule for
 157             Discrete Fourier Transform.  this version however uses
 158             SVP64 "Vertical-First" Mode and so needs an explicit
 159             branch, testing CR0.
 160
 161             SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
 162             (3 inputs, 2 outputs)
 163         """
 164         lst = SVP64Asm( ["setvl 0, 0, 11, 1, 1, 1",
 165                         "svremap 8, 1, 1, 1",
 166                         "sv.ffmadds 0.v, 0.v, 0.v, 8.v",
 167                         "setvl. 0, 0, 0, 1, 0, 0",
 168                         "bc 4, 2, -16"
 169                         ])
 170         lst = list(lst)
 171
 172         # array and coefficients to test
 173         av = [7.0, -9.8, 3.0, -32.3,
 174               -2.0, 5.0, -9.8, 31.3] # array 0..7
 175         coe = [-0.25, 0.5, 3.1, 6.2] # coefficients
 176
 177         # store in regfile
 178         fprs = [0] * 32
 179         for i, c in enumerate(coe):
 180             fprs[i+8] = fp64toselectable(c)
 181         for i, a in enumerate(av):
 182             fprs[i+0] = fp64toselectable(a)
 183
 184         # set total. err don't know how to calculate how many there are...
 185         # do it manually for now
 186         VL = 0
 187         size = 2
 188         n = len(av)
 189         while size <= n:
 190             halfsize = size // 2
 191             tablestep = n // size
 192             for i in range(0, n, size):
 193                 for j in range(i, i + halfsize):
 194                     VL += 1
 195             size *= 2
 196
 197         # SVSTATE (calculated VL)
 198         svstate = SVP64State()
 199         svstate.vl[0:7] = VL # VL
 200         svstate.maxvl[0:7] = VL # MAXVL
 201         print ("SVSTATE", bin(svstate.spr.asint()))
 202
 203         with Program(lst, bigendian=False) as program:
 204             sim = self.run_tst_program(program, svstate=svstate,
 205                                        initial_fprs=fprs)
 206             print ("spr svshape0", sim.spr['SVSHAPE0'])
 207             print ("    xdimsz", sim.spr['SVSHAPE0'].xdimsz)
 208             print ("    ydimsz", sim.spr['SVSHAPE0'].ydimsz)
 209             print ("    zdimsz", sim.spr['SVSHAPE0'].zdimsz)
 210             print ("spr svshape1", sim.spr['SVSHAPE1'])
 211             print ("spr svshape2", sim.spr['SVSHAPE2'])
 212             print ("spr svshape3", sim.spr['SVSHAPE3'])
 213
 214             # work out the results with the twin mul/add-sub
 215             res = transform_radix2(av, coe)
 216
 217             for i, expected in enumerate(res):
 218                 print ("i", i, float(sim.fpr(i)), "expected", expected)
 219             for i, expected in enumerate(res):
 220                 # convert to Power single
 221                 expected = DOUBLE2SINGLE(fp64toselectable(expected))
 222                 expected = float(expected)
 223                 actual = float(sim.fpr(i))
 224                 # approximate error calculation, good enough test
 225                 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
 226                 # and the rounding is different
 227                 err = abs(actual - expected) / expected
 228                 self.assertTrue(err < 1e-7)
 229
 230     def test_sv_fpmadds_fft(self):
 231         """>>> lst = ["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
 232                         ]
 233             four in-place vector mul-adds, four in-place vector mul-subs
 234
 235             this is the twin "butterfly" mul-add-sub from Cooley-Tukey
 236             https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
 237
 238             there is the *option* to target a different location (non-in-place)
 239             just in case.
 240
 241             SVP64 "FFT" mode will *automatically* offset FRB and an implicit
 242             FRS to perform the two multiplies.  one add, one subtract.
 243
 244             sv.ffmadds FRT, FRA, FRC, FRB  actually does:
 245                 fmadds  FRT   , FRA, FRC, FRA
 246                 fnmsubs FRT+vl, FRA, FRC, FRB+vl
 247         """
 248         lst = SVP64Asm(["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
 249                         ])
 250         lst = list(lst)
 251
 252         fprs = [0] * 32
 253         av = [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
 254         bv = [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
 255         coe = [-1.0, 4.0, 3.1, 6.2]  # coefficients
 256         res = []
 257         # work out the results with the twin mul/add-sub
 258         for i, (a, b, c) in enumerate(zip(av, bv, coe)):
 259             fprs[i+2] = fp64toselectable(a)
 260             fprs[i+6] = fp64toselectable(b)
 261             fprs[i+10] = fp64toselectable(c)
 262             mul = a * c
 263             t = b + mul
 264             u = b - mul
 265             t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
 266             u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
 267             res.append((t, u))
 268             print ("FFT", i, "in", a, b, "coeff", c, "mul", mul, "res", t, u)
 269
 270         # SVSTATE (in this case, VL=2)
 271         svstate = SVP64State()
 272         svstate.vl[0:7] = 4 # VL
 273         svstate.maxvl[0:7] = 4 # MAXVL
 274         print ("SVSTATE", bin(svstate.spr.asint()))
 275
 276         with Program(lst, bigendian=False) as program:
 277             sim = self.run_tst_program(program, svstate=svstate,
 278                                        initial_fprs=fprs)
 279             # confirm that the results are as expected
 280             for i, (t, u) in enumerate(res):
 281                 self.assertEqual(sim.fpr(i+2), t)
 282                 self.assertEqual(sim.fpr(i+6), u)
 283
 284     def run_tst_program(self, prog, initial_regs=None,
 285                               svstate=None,
 286                               initial_mem=None,
 287                               initial_fprs=None):
 288         if initial_regs is None:
 289             initial_regs = [0] * 32
 290         simulator = run_tst(prog, initial_regs, mem=initial_mem,
 291                                                 initial_fprs=initial_fprs,
 292                                                 svstate=svstate)
 293
 294         print ("GPRs")
 295         simulator.gpr.dump()
 296         print ("FPRs")
 297         simulator.fpr.dump()
 298
 299         return simulator
 300
 301
 302 if __name__ == "__main__":
 303     unittest.main()