From 33b2813ff64417b513e3ac569f42ad72574eef9f Mon Sep 17 00:00:00 2001 From: Konstantinos Margaritis Date: Tue, 25 Jul 2023 08:54:27 +0000 Subject: [PATCH] fix docs for updated maddsubrs/maddrs/msubrs --- openpower/sv/twin_butterfly.mdwn | 87 +++++++++++++++++--------------- 1 file changed, 47 insertions(+), 40 deletions(-) diff --git a/openpower/sv/twin_butterfly.mdwn b/openpower/sv/twin_butterfly.mdwn index faee1c2e9..516f601f3 100644 --- a/openpower/sv/twin_butterfly.mdwn +++ b/openpower/sv/twin_butterfly.mdwn @@ -97,35 +97,30 @@ A-Form | PO | RT | RA | RB | SH | XO |Rc | ``` -* maddsubrs RT,RA,SH,RB +* maddsubrs RT,RA,RB,SH Pseudo-code: ``` n <- SH - sum <- (RT) + (RA) - diff <- (RT) - (RA) + sum <- (RT[0] || RT) + (RA[0] || RA) + diff <- (RT[0] || RT) - (RA[0] || RA) prod1 <- MULS(RB, sum) prod2 <- MULS(RB, diff) if n = 0 then - prod1_lo <- prod1[XLEN:(XLEN*2) - 1] - prod2_lo <- prod2[XLEN:(XLEN*2) - 1] + prod1_lo <- prod1[XLEN+1:(XLEN*2)] + prod2_lo <- prod2[XLEN+1:(XLEN*2)] RT <- prod1_lo RS <- prod2_lo else - round <- [0]*(XLEN*2) - round[XLEN*2 - n] <- 1 + round <- [0]*(XLEN*2 + 1) + round[XLEN*2 - n + 1] <- 1 prod1 <- prod1 + round prod2 <- prod2 + round - m <- MASK(XLEN - n - 2, XLEN - 1) - res1 <- prod1[XLEN - n:XLEN*2 - n - 1] - res2 <- prod2[XLEN - n:XLEN*2 - n - 1] - signbit1 <- prod1[0] - signbit2 <- prod2[0] - smask1 <- ([signbit1]*XLEN) & ¬m - smask2 <- ([signbit2]*XLEN) & ¬m - RT <- (res1 | smask1) - RS <- (res2 | smask2) + res1 <- prod1[XLEN - n + 1:XLEN*2 - n] + res2 <- prod2[XLEN - n + 1:XLEN*2 - n] + RT <- res1 + RS <- res2 ``` Similar to `RTp`, this instruction produces an implicit result, `RS`, @@ -139,11 +134,11 @@ Special Registers Altered: None ``` -# [DRAFT] Integer Butterfly Multiply Add/Sub and Accumulate FFT/DCT +# [DRAFT] Integer Butterfly Multiply Add and Round Shift FFT/DCT A-Form -* maddrs RT,RA,SH,RB +* maddrs RT,RA,RB,SH Pseudo-code: @@ -153,51 +148,63 @@ Pseudo-code: if n = 0 then prod_lo <- prod[XLEN:(XLEN*2) - 1] RT <- (RT) + prod_lo - RS <- (RS) - prod_lo else - res1[0:XLEN*2-1] <- (EXTSXL((RT)[0], 1) || (RT)) + prod - res2[0:XLEN*2-1] <- (EXTSXL((RS)[0], 1) || (RS)) - prod + res[0:XLEN*2-1] <- (EXTSXL((RT)[0], 1) || (RT)) + prod round <- [0]*XLEN*2 round[XLEN*2 - n] <- 1 - res1 <- res1 + round - res2 <- res2 + round - signbit1 <- res1[0] - signbit2 <- res2[0] - m <- MASK(XLEN -n - 2, XLEN - 1) - res1 <- res1[XLEN - n:XLEN*2 - n -1] - res2 <- res2[XLEN - n:XLEN*2 - n -1] - smask1 <- ([signbit1]*XLEN) & ¬m - smask2 <- ([signbit2]*XLEN) & ¬m - RT <- (res1 | smask1) - RS <- (res2 | smask2) + res <- res + round + RT <- res[XLEN - n:XLEN*2 - n -1] +``` + +Special Registers Altered: + + None + +# [DRAFT] Integer Butterfly Multiply Sub and Round Shift FFT/DCT + +A-Form + +* msubrs RT,RA,RB,SH + +Pseudo-code: + +``` + n <- SH + prod <- MULS(RB, RA) + if n = 0 then + prod_lo <- prod[XLEN:(XLEN*2) - 1] + RT <- (RT) - prod_lo + else + res[0:XLEN*2-1] <- (EXTSXL((RT)[0], 1) || (RT)) - prod + round <- [0]*XLEN*2 + round[XLEN*2 - n] <- 1 + res <- res + round + RT <- res[XLEN - n:XLEN*2 - n -1] ``` Special Registers Altered: None -Similar to `RTp`, this instruction produces an implicit result, `RS`, -which under Scalar circumstances is defined as `RT+1`. For SVP64 if -`RT` is a Vector, `RS` begins immediately after the Vector `RT` where -the length of `RT` is set by `SVSTATE.MAXVL` (Max Vector Length). -This instruction is supposed to be used in complement to the maddsubrs +This pair of instructions is supposed to be used in complement to the maddsubrs to produce the double-coefficient butterfly instruction. In order for that to work, instead of passing c2 as coefficient, we have to pass c2-c1 instead. In essence, we are calculating the quantity `a * c1 +/- b * c1` first, with `maddsubrs` *without* shifting (so `SH=0`) and then we add/sub `b * (c2-c1)` -from the previous `RT`/`RS`, and *then* do the shifting. +from the previous `RT`, and *then* do the shifting. In the following example, assume `a` in `R1`, `b` in `R10`, `c1` in `R11` and `c2 - c1` in `R12`. The first instruction will put `a * c1 + b * c1` in `R1` (`RT`), `a * c1 - b * c1` in `RS` (here, `RS = RT +1`, so `R2`). -Then, `maddrs` will add `b * (c2 - c1)` to `R1` (`RT`), and subtract it from `R2` (`RS`), and then +Then, `maddrs` will add `b * (c2 - c1)` to `R1` (`RT`), and `msubrs` will subtract it from `R2` (`RS`), and then round shift right both quantities 14 bits: ``` maddsubrs 1,10,0,11 - maddrs 1,10,14,12 + maddrs 1,10,12,14 + msubrs 2,10,12,14 ``` In scalar code, that would take ~16 instructions for both operations. -- 2.30.2