src/openpower/decoder/isa/test_caller_svp64_dct.py

   1 from nmigen import Module, Signal
   2 from nmigen.back.pysim import Simulator, Delay, Settle
   3 from nmutil.formaltest import FHDLTestCase
   4 import unittest
   5 from openpower.decoder.power_decoder import (create_pdecode)
   6 from openpower.simulator.program import Program
   7 from openpower.decoder.isa.caller import SVP64State
   8 from openpower.decoder.selectable_int import SelectableInt
   9 from openpower.decoder.isa.test_caller import run_tst
  10 from openpower.sv.trans.svp64 import SVP64Asm
  11 from copy import deepcopy
  12 from openpower.decoder.helpers import fp64toselectable, SINGLE
  13 from openpower.decoder.isafunctions.double2single import DOUBLE2SINGLE
  14
  15
  16 class DCTTestCase(FHDLTestCase):
  17
  18     def _check_regs(self, sim, expected):
  19         for i in range(32):
  20             self.assertEqual(sim.gpr(i), SelectableInt(expected[i], 64))
  21
  22     def test_sv_ffadds_dct(self):
  23         """>>> lst = ["sv.fdmadds 0.v, 0.v, 0.v, 8.v"
  24                         ]
  25             four in-place vector adds, four in-place vector mul-subs
  26
  27             SVP64 "DCT" mode will *automatically* offset FRB and an implicit
  28             FRS to perform the two multiplies.  one add, one subtract.
  29
  30             sv.fdadds FRT, FRA, FRC, FRB  actually does:
  31                 fadds FRT   , FRB, FRA
  32                 fsubs FRT+vl, FRA, FRB+vl
  33         """
  34         lst = SVP64Asm(["sv.fdmadds 0.v, 0.v, 0.v, 8.v"
  35                         ])
  36         lst = list(lst)
  37
  38         # cheat here with these values, they're selected so that
  39         # rounding errors do not occur. sigh.
  40         fprs = [0] * 32
  41         av = [7.0, -0.8, 2.0, -2.3] # first half of array 0..3
  42         bv = [-2.0, 2.0, -0.8, 1.4] # second half of array 4..7
  43         cv = [-1.0, 0.5, 2.5, -0.25]  # coefficients
  44         res = []
  45         # work out the results with the twin add-sub
  46         for i, (a, b, c) in enumerate(zip(av, bv, cv)):
  47             fprs[i+0] = fp64toselectable(a)
  48             fprs[i+4] = fp64toselectable(b)
  49             fprs[i+8] = fp64toselectable(c)
  50             # this isn't quite a perfect replication of the
  51             # FP32 mul-add-sub.  better really to use FPMUL32, FPADD32
  52             # and FPSUB32 directly to be honest.
  53             t = b + a
  54             diff = (b - a)
  55             diff = DOUBLE2SINGLE(fp64toselectable(diff)) # FP32 round
  56             diff = float(diff)
  57             u = diff * c
  58             tc = DOUBLE2SINGLE(fp64toselectable(t)) # convert to Power single
  59             uc = DOUBLE2SINGLE(fp64toselectable(u)) # from double
  60             res.append((tc, uc))
  61             print ("DCT", i, "in", a, b, "c", c, "res", t, u)
  62
  63         # SVSTATE (in this case, VL=2)
  64         svstate = SVP64State()
  65         svstate.vl = 4 # VL
  66         svstate.maxvl = 4 # MAXVL
  67         print ("SVSTATE", bin(svstate.asint()))
  68
  69         with Program(lst, bigendian=False) as program:
  70             sim = self.run_tst_program(program, svstate=svstate,
  71                                        initial_fprs=fprs)
  72             # confirm that the results are as expected
  73             for i, (t, u) in enumerate(res):
  74                 a = float(sim.fpr(i+0))
  75                 b = float(sim.fpr(i+4))
  76                 t = float(t)
  77                 u = float(u)
  78                 print ("DCT", i, "in", a, b, "res", t, u)
  79             for i, (t, u) in enumerate(res):
  80                 self.assertEqual(sim.fpr(i+0), t)
  81                 self.assertEqual(sim.fpr(i+4), u)
  82
  83     def tst_sv_remap_fpmadds_dct(self):
  84         """>>> lst = ["svshape 4, 1, 1, 2, 0",
  85                      "svremap 31, 1, 0, 2, 0, 1, 0",
  86                         "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
  87                      ]
  88             runs a full in-place O(N log2 N) butterfly schedule for
  89             DCT
  90
  91             SVP64 "REMAP" in Butterfly Mode is applied to a twin +/- FMAC
  92             (3 inputs, 2 outputs)
  93         """
  94         lst = SVP64Asm( ["svshape 4, 1, 1, 2, 0",
  95                          "svremap 31, 1, 0, 2, 0, 1, 0",
  96                         "sv.ffmadds 0.v, 0.v, 0.v, 8.v"
  97                         ])
  98         lst = list(lst)
  99
 100         # array and coefficients to test
 101         av = [7.0, -9.8, 3.0, -32.3]
 102         coe = [-0.25, 0.5, 3.1, 6.2, 0.1, -0.2] # 6 coefficients
 103
 104         # store in regfile
 105         fprs = [0] * 32
 106         for i, c in enumerate(coe):
 107             fprs[i+8] = fp64toselectable(c)
 108         for i, a in enumerate(av):
 109             fprs[i+0] = fp64toselectable(a)
 110
 111         with Program(lst, bigendian=False) as program:
 112             sim = self.run_tst_program(program, initial_fprs=fprs)
 113             print ("spr svshape0", sim.spr['SVSHAPE0'])
 114             print ("    xdimsz", sim.spr['SVSHAPE0'].xdimsz)
 115             print ("    ydimsz", sim.spr['SVSHAPE0'].ydimsz)
 116             print ("    zdimsz", sim.spr['SVSHAPE0'].zdimsz)
 117             print ("spr svshape1", sim.spr['SVSHAPE1'])
 118             print ("spr svshape2", sim.spr['SVSHAPE2'])
 119             print ("spr svshape3", sim.spr['SVSHAPE3'])
 120
 121             return
 122
 123             # work out the results with the twin mul/add-sub
 124             res = transform_radix2(av, coe)
 125
 126             for i, expected in enumerate(res):
 127                 print ("i", i, float(sim.fpr(i)), "expected", expected)
 128             for i, expected in enumerate(res):
 129                 # convert to Power single
 130                 expected = DOUBLE2SINGLE(fp64toselectable(expected))
 131                 expected = float(expected)
 132                 actual = float(sim.fpr(i))
 133                 # approximate error calculation, good enough test
 134                 # reason: we are comparing FMAC against FMUL-plus-FADD-or-FSUB
 135                 # and the rounding is different
 136                 err = abs(actual - expected) / expected
 137                 self.assertTrue(err < 1e-7)
 138
 139     def run_tst_program(self, prog, initial_regs=None,
 140                               svstate=None,
 141                               initial_mem=None,
 142                               initial_fprs=None):
 143         if initial_regs is None:
 144             initial_regs = [0] * 32
 145         simulator = run_tst(prog, initial_regs, mem=initial_mem,
 146                                                 initial_fprs=initial_fprs,
 147                                                 svstate=svstate)
 148
 149         print ("GPRs")
 150         simulator.gpr.dump()
 151         print ("FPRs")
 152         simulator.fpr.dump()
 153
 154         return simulator
 155
 156
 157 if __name__ == "__main__":
 158     unittest.main()