src/openpower/decoder/isa/test_caller_svp64_fft.py

   1 from nmigen import Module, Signal
   2 from nmigen.back.pysim import Simulator, Delay, Settle
   3 from nmutil.formaltest import FHDLTestCase
   4 import unittest
   5 from openpower.decoder.isa.caller import ISACaller
   6 from openpower.decoder.power_decoder import (create_pdecode)
   7 from openpower.decoder.power_decoder2 import (PowerDecode2)
   8 from openpower.simulator.program import Program
   9 from openpower.decoder.isa.caller import ISACaller, SVP64State
  10 from openpower.decoder.selectable_int import SelectableInt
  11 from openpower.decoder.orderedset import OrderedSet
  12 from openpower.decoder.isa.all import ISA
  13 from openpower.decoder.isa.test_caller import Register, run_tst
  14 from openpower.sv.trans.svp64 import SVP64Asm
  15 from openpower.consts import SVP64CROffs
  16 from copy import deepcopy
  17 from openpower.decoder.helpers import fp64toselectable
  18 from openpower.decoder.isafunctions.double2single import DOUBLE2SINGLE
  19
  20
  21 def transform_radix2(vec, exptable):
  22     """
  23     # FFT and convolution test (Python), based on Project Nayuki
  24     #
  25     # Copyright (c) 2020 Project Nayuki. (MIT License)
  26     # https://www.nayuki.io/page/free-small-fft-in-multiple-languages
  27
  28     """
  29     # bits of the integer 'val'.
  30     def reverse_bits(val, width):
  31         result = 0
  32         for _ in range(width):
  33             result = (result << 1) | (val & 1)
  34             val >>= 1
  35         return result
  36
  37     # Initialization
  38     n = len(vec)
  39     levels = n.bit_length() - 1
  40
  41     # Copy with bit-reversed permutation
  42     #vec = [vec[reverse_bits(i, levels)] for i in range(n)]
  43
  44     size = 2
  45     while size <= n:
  46         halfsize = size // 2
  47         tablestep = n // size
  48         for i in range(0, n, size):
  49             k = 0
  50             for j in range(i, i + halfsize):
  51                 # exact same actual computation, just embedded in
  52                 # triple-nested for-loops
  53                 jl, jh = j, j+halfsize
  54                 vjh = vec[jh]
  55                 temp1 = vec[jh] * exptable[k]
  56                 temp2 = vec[jl]
  57                 vec[jh] = temp2 - temp1
  58                 vec[jl] = temp2 + temp1
  59                 print ("xform jl jh k", jl, jh, k,
  60                        "vj vjh ek", temp2, vjh, exptable[k],
  61                        "t1, t2", temp1, temp2,
  62                        "v[jh] v[jl]", vec[jh], vec[jl])
  63                 k += tablestep
  64         size *= 2
  65
  66     return vec
  67
  68
  69 class DecoderTestCase(FHDLTestCase):
  70
  71     def _check_regs(self, sim, expected):
  72         for i in range(32):
  73             self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
  74
  75     def test_sv_remap_fpmadds_fft(self):
  76         """>>> lst = ["svremap 8, 1, 1, 1",
  77                       "sv.ffmadds 2.v, 2.v, 2.v, 10.v"
  78                      ]
  79             runs a full in-place O(N log2 N) butterfly schedule for
  80             Discrete Fourier Transform.
  81
  82             this is the twin "butterfly" mul-add-sub from Cooley-Tukey
  83             https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
  84
  85             there is the *option* to target a different location (non-in-place)
  86             just in case.
  87
  88             SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
  89             (3 inputs, 2 outputs)
  90         """
  91         lst = SVP64Asm( ["svremap 8, 1, 1, 1",
  92                         "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
  93                         ])
  94         lst = list(lst)
  95
  96         # array and coefficients to test
  97         av = [7.0, -9.8, 3.0, -32.3,
  98               -2.0, 5.0, -9.8, 31.3] # array 0..7
  99         coe = [-0.25, 0.5, 3.1, 6.2] # coefficients
 100
 101         # store in regfile
 102         fprs = [0] * 32
 103         for i, c in enumerate(coe):
 104             fprs[i+8] = fp64toselectable(c)
 105         for i, a in enumerate(av):
 106             fprs[i+0] = fp64toselectable(a)
 107
 108         # set total. err don't know how to calculate how many there are...
 109         # do it manually for now
 110         VL = 0
 111         size = 2
 112         n = len(av)
 113         while size <= n:
 114             halfsize = size // 2
 115             tablestep = n // size
 116             for i in range(0, n, size):
 117                 for j in range(i, i + halfsize):
 118                     VL += 1
 119             size *= 2
 120
 121         # SVSTATE (calculated VL)
 122         svstate = SVP64State()
 123         svstate.vl[0:7] = VL # VL
 124         svstate.maxvl[0:7] = VL # MAXVL
 125         print ("SVSTATE", bin(svstate.spr.asint()))
 126
 127         with Program(lst, bigendian=False) as program:
 128             sim = self.run_tst_program(program, svstate=svstate,
 129                                        initial_fprs=fprs)
 130             print ("spr svshape0", sim.spr['SVSHAPE0'])
 131             print ("    xdimsz", sim.spr['SVSHAPE0'].xdimsz)
 132             print ("    ydimsz", sim.spr['SVSHAPE0'].ydimsz)
 133             print ("    zdimsz", sim.spr['SVSHAPE0'].zdimsz)
 134             print ("spr svshape1", sim.spr['SVSHAPE1'])
 135             print ("spr svshape2", sim.spr['SVSHAPE2'])
 136             print ("spr svshape3", sim.spr['SVSHAPE3'])
 137
 138             # work out the results with the twin mul/add-sub
 139             res = transform_radix2(av, coe)
 140
 141             for i, expected in enumerate(res):
 142                 print ("i", i, float(sim.fpr(i)), "expected", expected)
 143             for i, expected in enumerate(res):
 144                 # convert to Power single
 145                 expected = DOUBLE2SINGLE(fp64toselectable(expected))
 146                 expected = float(expected)
 147                 actual = float(sim.fpr(i))
 148                 # approximate error calculation, good enough test
 149                 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
 150                 # and the rounding is different
 151                 err = abs(actual - expected) / expected
 152                 self.assertTrue(err < 1e-7)
 153
 154
 155     def test_sv_fpmadds_fft(self):
 156         """>>> lst = ["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
 157                         ]
 158             four in-place vector mul-adds, four in-place vector mul-subs
 159
 160             this is the twin "butterfly" mul-add-sub from Cooley-Tukey
 161             https://en.wikipedia.org/wiki/Cooley%E2%80%93Tukey_FFT_algorithm#Data_reordering,_bit_reversal,_and_in-place_algorithms
 162
 163             there is the *option* to target a different location (non-in-place)
 164             just in case.
 165
 166             SVP64 "FFT" mode will *automatically* offset FRB and an implicit
 167             FRS to perform the two multiplies.  one add, one subtract.
 168
 169             sv.ffmadds FRT, FRA, FRC, FRB  actually does:
 170                 fmadds  FRT   , FRA, FRC, FRA
 171                 fnmsubs FRT+vl, FRA, FRC, FRB+vl
 172         """
 173         lst = SVP64Asm(["sv.ffmadds 2.v, 2.v, 2.v, 10.v"
 174                         ])
 175         lst = list(lst)
 176
 177         fprs = [0] * 32
 178         av = [7.0, -9.8, 2.0, -32.3] # first half of array 0..3
 179         bv = [-2.0, 2.0, -9.8, 32.3] # second half of array 4..7
 180         coe = [-1.0, 4.0, 3.1, 6.2]  # coefficients
 181         res = []
 182         # work out the results with the twin mul/add-sub
 183         for i, (a, b, c) in enumerate(zip(av, bv, coe)):
 184             fprs[i+2] = fp64toselectable(a)
 185             fprs[i+6] = fp64toselectable(b)
 186             fprs[i+10] = fp64toselectable(c)
 187             mul = a * c
 188             t = b + mul
 189             u = b - mul
 190             t = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
 191             u = DOUBLE2SINGLE(fp64toselectable(u)) # from double
 192             res.append((t, u))
 193             print ("FFT", i, "in", a, b, "coeff", c, "mul", mul, "res", t, u)
 194
 195         # SVSTATE (in this case, VL=2)
 196         svstate = SVP64State()
 197         svstate.vl[0:7] = 4 # VL
 198         svstate.maxvl[0:7] = 4 # MAXVL
 199         print ("SVSTATE", bin(svstate.spr.asint()))
 200
 201         with Program(lst, bigendian=False) as program:
 202             sim = self.run_tst_program(program, svstate=svstate,
 203                                        initial_fprs=fprs)
 204             # confirm that the results are as expected
 205             for i, (t, u) in enumerate(res):
 206                 self.assertEqual(sim.fpr(i+2), t)
 207                 self.assertEqual(sim.fpr(i+6), u)
 208
 209     def run_tst_program(self, prog, initial_regs=None,
 210                               svstate=None,
 211                               initial_mem=None,
 212                               initial_fprs=None):
 213         if initial_regs is None:
 214             initial_regs = [0] * 32
 215         simulator = run_tst(prog, initial_regs, mem=initial_mem,
 216                                                 initial_fprs=initial_fprs,
 217                                                 svstate=svstate)
 218
 219         print ("GPRs")
 220         simulator.gpr.dump()
 221         print ("FPRs")
 222         simulator.fpr.dump()
 223
 224         return simulator
 225
 226
 227 if __name__ == "__main__":
 228     unittest.main()