From 9a5546ea59d4fc6610f93482dc58501dd69eccac Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Sun, 7 May 2023 09:55:15 +0000 Subject: [PATCH] Add description for maddrs 2-coeff butterfly instruction --- openpower/sv/twin_butterfly.mdwn | 63 +++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 2 deletions(-) diff --git a/openpower/sv/twin_butterfly.mdwn b/openpower/sv/twin_butterfly.mdwn index a2a7b8d7f..f531615a0 100644 --- a/openpower/sv/twin_butterfly.mdwn +++ b/openpower/sv/twin_butterfly.mdwn @@ -128,8 +128,6 @@ Pseudo-code: RS <- (res2 | smask2) ``` -Note that if Rc=1 an Illegal Instruction is raised. Rc=1 is `RESERVED` - Similar to `RTp`, this instruction produces an implicit result, `RS`, which under Scalar circumstances is defined as `RT+1`. For SVP64 if `RT` is a Vector, `RS` begins immediately after the Vector `RT` where @@ -141,6 +139,67 @@ Special Registers Altered: None ``` +# [DRAFT] Integer Butterfly Multiply Add/Sub and Accumulate FFT/DCT + +A-Form + +* maddrs RT,RA,SH,RB + +Pseudo-code: + + n <- SH + prod <- MULS(RB, RA) + prod_lo <- prod[XLEN:(XLEN*2)-1] + if n = 0 then + RT <- (RT) + prod_lo + RS <- (RS) - prod_lo + else + res1 <- (RT) + prod_lo + res2 <- (RS) - prod_lo + round <- [0]*XLEN + round[XLEN -n] <- 1 + res1 <- res1 + round + res2 <- res2 + round + signbit1 <- res1[0] + signbit2 <- res2[0] + m <- MASK(n, (XLEN-1)) + res1 <- ROTL64(res1, XLEN-n) & m + res2 <- ROTL64(res2, XLEN-n) & m + smask1 <- ([signbit1]*XLEN) & ¬m + smask2 <- ([signbit2]*XLEN) & ¬m + RT <- (res1 | smask1) + RS <- (res2 | smask2) + +Special Registers Altered: + + None + +Similar to `RTp`, this instruction produces an implicit result, `RS`, +which under Scalar circumstances is defined as `RT+1`. For SVP64 if +`RT` is a Vector, `RS` begins immediately after the Vector `RT` where +the length of `RT` is set by `SVSTATE.MAXVL` (Max Vector Length). + +This instruction is supposed to be used in complement to the maddsubrs +to produce the double-coefficient butterfly instruction. In order for that +to work, instead of passing c2 as coefficient, we have to pass c2-c1 instead. + +In essence, we are calculating the quantity `a * c1 +/- b * c1` first, with +`maddsubrs` *without* shifting (so `SH=0`) and then we add/sub `b * (c2-c1)` +from the previous `RT`/`RS`, and *then* do the shifting. + +In the following example, assume `a` in `R1`, `b` in `R10`, `c1` in `R11` and `c2 - c1` in `R12`. +The first instruction will put `a * c1 + b * c1` in `R1` (`RT`), `a * c1 - b * c1` in `RS` +(here, `RS = RT +1`, so `R2`). +Then, `maddrs` will add `b * (c2 - c1)` to `R1` (`RT`), and subtract it from `R2` (`RS`), and then +round shift right both quantities 14 bits: + +``` + maddsubrs 1,10,0,11 + maddrs 1,10,14,12 +``` + +In scalar code, that would take ~16 instructions for both operations. + ------- \newpage{} -- 2.30.2