(no commit message)

author lkcl <lkcl@web>

Sun, 17 Apr 2022 21:58:34 +0000 (22:58 +0100)

committer IkiWiki <ikiwiki.info>

Sun, 17 Apr 2022 21:58:34 +0000 (22:58 +0100)
author lkcl <lkcl@web>
Sun, 17 Apr 2022 21:58:34 +0000 (22:58 +0100)
committer IkiWiki <ikiwiki.info>
Sun, 17 Apr 2022 21:58:34 +0000 (22:58 +0100)
diff --git a/openpower/sv/bitmanip/appendix.mdwn b/openpower/sv/bitmanip/appendix.mdwn

index 359e94f1f41a3a7def77194c94ebee16b079d050..4b8e20f72658be18a7b6e8ffd743ba7cdd7f8640 100644 (file)
--- a/openpower/sv/bitmanip/appendix.mdwn
+++ b/openpower/sv/bitmanip/appendix.mdwn
@@ -126,6 +126,14 @@ Transformation of 4-in, 2-out into a pair of operations:
  
  <img src="/openpower/sv/weirdmuladd.jpg" width=800 />
  
+A trick used in the DCT and FFT twin-butterfly instructions,
+originally borrowed from `lq` and LD/ST-with-update, is to
+have a second hidden (implicit) destination register, RS.
+RS is calculated as RT+VL, where all scalar operations
+assume VL=1.  With `sv.msubx` *creating* a pair of Vector
+results, `sv.weirdaddx` correspondingly has to pick the
+pair up in order to carry on the algorithm.
+
  **msubx RT, RA, RB, RC** (RS=RT+VL for SVP64, RS=RT+1 for scalar)
  
      prod[0:127] = (RA) * (RB)
@@ -146,12 +154,16 @@ These two combine as, simply:
  
      # RS=RT+VL, assume VL=8, therefore RS starts at r8.v
      # q       : r16
-    # dividend: r24.v
-    # divisor : r32.v
+    # dividend: r20.v
+    # divisor : r28.v
      # carry   : r40
-    li r40, 0
-    sv.msubx r0.v, r16, r24.v, r32.v
-    sv.weirdaddx r0.v, r40, r8.v
+    li r17, 0
+    sv.msubx r0.v, r16, r20.v, r28.v
+    sv.weirdaddx r0.v, r17, r8.v
+
+As a result, a big-integer subtract and multiply may be carried out
+in only 3 instructions, one of which is setting a scalar integer to
+zero.
  
  ## EXT004 Opcode map
author	lkcl <lkcl@web>
	Sun, 17 Apr 2022 21:58:34 +0000 (22:58 +0100)
committer	IkiWiki <ikiwiki.info>
	Sun, 17 Apr 2022 21:58:34 +0000 (22:58 +0100)