From: Luke Kenneth Casson Leighton Date: Wed, 14 Jul 2021 20:37:38 +0000 (+0100) Subject: use fmadds and fmsubs in complex fft example X-Git-Tag: xlen-bcd~287 X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e7d26cf2218686c9f54ecc971827cca1d9ca73b8;p=openpower-isa.git use fmadds and fmsubs in complex fft example reduces inner loop instruction count by 2 --- diff --git a/src/openpower/decoder/isa/test_caller_svp64_fft.py b/src/openpower/decoder/isa/test_caller_svp64_fft.py index 0c5d23a1..5f92e07c 100644 --- a/src/openpower/decoder/isa/test_caller_svp64_fft.py +++ b/src/openpower/decoder/isa/test_caller_svp64_fft.py @@ -518,14 +518,14 @@ class FFTTestCase(FHDLTestCase): "svremap 5, 1, 0, 2, 0, 0", "sv.fmuls 24, 0.v, 16.v", # mul1_r = r*cos_r "svremap 5, 1, 0, 2, 0, 0", - "sv.fmuls 25, 8.v, 20.v", # mul2_r = i*sin_i - "fadds 24, 24, 25", # tpre = mul1_r + mul2_r + "sv.fmadds 24, 8.v, 20.v, 24", # mul2_r = i*sin_i + # tpre = mul1_r + mul2_r # tpim "svremap 5, 1, 0, 2, 0, 0", "sv.fmuls 26, 0.v, 20.v", # mul1_i = r*sin_i "svremap 5, 1, 0, 2, 0, 0", - "sv.fmuls 27, 8.v, 16.v", # mul2_i = i*cos_r - "fsubs 26, 27, 26", # tpim = mul2_i - mul1_i + "sv.fmsubs 26, 8.v, 16.v, 26", # mul2_i = i*cos_r + # tpim = mul2_i - mul1_i # vec_r jh/jl "svremap 26, 0, 0, 0, 0, 1", "sv.ffadds 0.v, 24, 0.v", # vh/vl +/- tpre @@ -535,7 +535,7 @@ class FFTTestCase(FHDLTestCase): # svstep loop "setvl. 0, 0, 1, 1, 0, 0", - "bc 4, 2, -84" + "bc 4, 2, -76" ]) lst = list(lst)