From fb879247e5d61ef19800995e03f834b76f4bb44e Mon Sep 17 00:00:00 2001 From: lkcl Date: Sat, 4 Jun 2022 17:26:33 +0100 Subject: [PATCH] --- openpower/sv/remap.mdwn | 84 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/openpower/sv/remap.mdwn b/openpower/sv/remap.mdwn index 6b6a02179..07d6c57ae 100644 --- a/openpower/sv/remap.mdwn +++ b/openpower/sv/remap.mdwn @@ -218,10 +218,92 @@ Fields: * **SVxd** - SV REMAP "xdim" * **SVyd** - SV REMAP "ydim" -* **SVMM** - SV REMAP Mode (0b00000 for Matrix, 0b00001 for FFT) +* **SVRM** - SV REMAP Mode (0b00000 for Matrix, 0b00001 for FFT) * **vf** - sets "Vertical-First" mode * **XO** - standard 5-bit XO field +``` +# set schedule up for multiply +if (SVRM = 0b0000) then + # VL in Matrix Multiply is xd*yd*zd + n <- (0b00 || SVxd) * (0b00 || SVyd) * (0b00 || SVzd) + vlen[0:6] <- n[14:20] + # set up template in SVSHAPE0, then copy to 1-3 + SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim + SVSHAPE0[6:11] <- (0b0 || SVyd) # ydim + SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim + SVSHAPE0[28:29] <- 0b11 # skip z + # copy + SVSHAPE1[0:31] <- SVSHAPE0[0:31] + SVSHAPE2[0:31] <- SVSHAPE0[0:31] + SVSHAPE3[0:31] <- SVSHAPE0[0:31] + # set up FRA + SVSHAPE1[18:20] <- 0b001 # permute x,z,y + SVSHAPE1[28:29] <- 0b01 # skip z + # FRC + SVSHAPE2[18:20] <- 0b001 # permute x,z,y + SVSHAPE2[28:29] <- 0b11 # skip y +# set schedule up for FFT butterfly +if (SVRM = 0b0001) then + # calculate O(N log2 N) + n <- [0] * 3 + do while n < 5 + if SVxd[4-n] = 0 then + leave + n <- n + 1 + n <- ((0b0 || SVxd) + 1) * n + vlen[0:6] <- n[1:7] + # set up template in SVSHAPE0, then copy to 1-3 + # for FRA and FRT + SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim + SVSHAPE0[30:31] <- 0b01 # Butterfly mode + # copy + SVSHAPE1[0:31] <- SVSHAPE0[0:31] + SVSHAPE2[0:31] <- SVSHAPE0[0:31] + # set up FRB and FRS + SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule + # FRC (coefficients) + SVSHAPE2[28:29] <- 0b10 # k schedule +# set schedule up for (i)DCT Inner butterfly +if ((SVRM = 0b0010) | (SVRM = 0b0100) | + (SVRM = 0b1010) | (SVRM = 0b1100)) then + # calculate O(N log2 N) + n <- [0] * 3 + do while n < 5 + if SVxd[4-n] = 0 then + leave + n <- n + 1 + n <- ((0b0 || SVxd) + 1) * n + vlen[0:6] <- n[1:7] + # set up template in SVSHAPE0, then copy to 1-3 + # set up FRB and FRS + SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim + if (SVRM = 0b1010) | (SVRM = 0b1100) then + SVSHAPE0[30:31] <- 0b11 # iDCT mode + SVSHAPE0[18:20] <- 0b011 # iDCT Inner Butterfly sub-mode + else + SVSHAPE0[30:31] <- 0b01 # DCT mode + SVSHAPE0[18:20] <- 0b001 # DCT Inner Butterfly sub-mode + SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop + if (SVRM = 0b1100) | (SVRM = 0b0100) then + SVSHAPE0[6:11] <- 0b000011 # (i)DCT Inner Butterfly mode 4 + else + SVSHAPE0[6:11] <- 0b000001 # (i)DCT Inner Butterfly mode 2 +# set schedule up for iDCT / DCT inverse of half-swapped ordering +if (SVRM = 0b0110) | (SVRM = 0b1110) | (SVRM = 0b1111) then + vlen[0:6] <- (0b00 || SVxd) + 0b0000001 +``` + + | 0b0010 | DCT Inner butterfly, pre-calculated coefficients | + | 0b0110 | iDCT Inner butterfly, pre-calculated coefficients | + | 0b0100 | DCT Inner butterfly, on-the-fly (Vertical-First Mode) | + | 0b1100 | iDCT Inner butterfly, on-the-fly (Vertical-First Mode) | + | 0b0011 | DCT Outer butterfly | + | 0b1011 | (i)DCT Outer butterfly | + | 0b0101 | iDCT COS table generation | + | 0b1101 | DCT COS table generation | + + # 4x4 Matrix to vec4 Multiply Example The following settings will allow a 4x4 matrix (starting at f8), expressed -- 2.30.2