From 9a5546ea59d4fc6610f93482dc58501dd69eccac Mon Sep 17 00:00:00 2001
From: Konstantinos Margaritis <konstantinos.margaritis@vectorcamp.gr>
Date: Sun, 7 May 2023 09:55:15 +0000
Subject: [PATCH] Add description for maddrs 2-coeff butterfly instruction

---
 openpower/sv/twin_butterfly.mdwn | 63 +++++++++++++++++++++++++++++++-
 1 file changed, 61 insertions(+), 2 deletions(-)

diff --git a/openpower/sv/twin_butterfly.mdwn b/openpower/sv/twin_butterfly.mdwn
index a2a7b8d7f..f531615a0 100644
--- a/openpower/sv/twin_butterfly.mdwn
+++ b/openpower/sv/twin_butterfly.mdwn
@@ -128,8 +128,6 @@ Pseudo-code:
         RS <- (res2 | smask2)
 ```
 
-Note that if Rc=1 an Illegal Instruction is raised.  Rc=1 is `RESERVED`
-
 Similar to `RTp`, this instruction produces an implicit result, `RS`,
 which under Scalar circumstances is defined as `RT+1`.  For SVP64 if
 `RT` is a Vector, `RS` begins immediately after the Vector `RT` where
@@ -141,6 +139,67 @@ Special Registers Altered:
     None
 ```
 
+# [DRAFT] Integer Butterfly Multiply Add/Sub and Accumulate FFT/DCT
+
+A-Form
+
+* maddrs  RT,RA,SH,RB
+
+Pseudo-code:
+
+    n <- SH
+    prod <- MULS(RB, RA)
+    prod_lo <- prod[XLEN:(XLEN*2)-1]
+    if n = 0 then
+        RT <- (RT) + prod_lo
+        RS <- (RS) - prod_lo
+    else
+        res1 <- (RT) + prod_lo
+        res2 <- (RS) - prod_lo
+        round <- [0]*XLEN
+        round[XLEN -n] <- 1
+        res1 <- res1 + round
+        res2 <- res2 + round
+        signbit1 <- res1[0]
+        signbit2 <- res2[0]
+        m <- MASK(n, (XLEN-1))
+        res1 <- ROTL64(res1, XLEN-n) & m
+        res2 <- ROTL64(res2, XLEN-n) & m
+        smask1 <- ([signbit1]*XLEN) & Â¬m
+        smask2 <- ([signbit2]*XLEN) & Â¬m
+        RT <- (res1 | smask1)
+        RS <- (res2 | smask2)
+
+Special Registers Altered:
+
+    None
+
+Similar to `RTp`, this instruction produces an implicit result, `RS`,
+which under Scalar circumstances is defined as `RT+1`.  For SVP64 if
+`RT` is a Vector, `RS` begins immediately after the Vector `RT` where
+the length of `RT` is set by `SVSTATE.MAXVL` (Max Vector Length).
+
+This instruction is supposed to be used in complement to the maddsubrs
+to produce the double-coefficient butterfly instruction. In order for that
+to work, instead of passing c2 as coefficient, we have to pass c2-c1 instead.
+
+In essence, we are calculating the quantity `a * c1 +/- b * c1` first, with
+`maddsubrs` *without* shifting (so `SH=0`) and then we add/sub `b * (c2-c1)`
+from the previous `RT`/`RS`, and *then* do the shifting.
+
+In the following example, assume `a` in `R1`, `b` in `R10`, `c1` in `R11` and `c2 - c1` in `R12`.
+The first instruction will put `a * c1 + b * c1` in `R1` (`RT`), `a * c1 - b * c1` in `RS`
+(here, `RS = RT +1`, so `R2`).
+Then, `maddrs` will add `b * (c2 - c1)` to `R1` (`RT`), and subtract it from `R2` (`RS`), and then
+round shift right both quantities 14 bits:
+
+```
+    maddsubrs 1,10,0,11
+    maddrs 1,10,14,12
+```
+
+In scalar code, that would take ~16 instructions for both operations.
+
 -------
 
 \newpage{}
-- 
2.30.2