<!-- This defines Draft SVP64 instructions to augment PowerISA Version 3.0 -->
<!-- These are not described in book 1 -->
+# svstep
+
+SVL-Form
+
+* svstep RT,SVi,vf
+* svstep. RT,SVi,vf
+
+Pseudo-code:
+
+ step <- SVSTATE_NEXT(SVi, vf)
+ RT <- [0]*57 || step
+
+Special Registers Altered:
+
+ CR0 (if Rc=1)
+
# setvl
SVL-Form
-* setvl RT, RA, SVi, vf, vs, ms
-* setvl. RT, RA, SVi, vf, vs, ms
+* setvl RT,RA,SVi,vf,vs,ms
+* setvl. RT,RA,SVi,vf,vs,ms
Pseudo-code:
if (vf & (¬vs) & ¬(ms)) = 1 then
- step <- SVSTATE_NEXT(SVi)
+ step <- SVSTATE_NEXT(SVi, 0b0)
if _RT != 0b00000 then
GPR(_RT) <- [0]*57 || step
else
SVSTATE[7:13] <- VL
if _RT != 0b00000 then
GPR(_RT) <- [0]*57 || VL
+ # set requested Vertical-First mode, clear persist
SVSTATE[63] <- vf
+ SVSTATE[62] <- 0b0
Special Registers Altered:
SVRM-Form
-* svremap SVme, mi0, mi1, mi2, mo0, mo1, pst
+* svremap SVme,mi0,mi1,mi2,mo0,mo1,pst
Pseudo-code:
SVM-Form
-* svshape SVxd, SVyd, SVzd, SVRM, vf
+* svshape SVxd,SVyd,SVzd,SVRM,vf
Pseudo-code:
# for convenience, VL to be calculated and stored in SVSTATE
vlen <- [0] * 7
+ itercount[0:6] <- [0] * 7
SVSTATE[0:31] <- [0] * 32
# only overwrite REMAP if "persistence" is zero
if (SVSTATE[62] = 0b0) then
SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule
# FRC (coefficients)
SVSHAPE2[28:29] <- 0b10 # k schedule
- # set schedule up for DCT Inner butterfly
- if (SVRM = 0b0010) then
+ # set schedule up for (i)DCT Inner butterfly
+ # SVRM Mode 2 (Mode 6 for iDCT) is for pre-calculated coefficients,
+ # SVRM Mode 4 (Mode 12 for iDCT) is for on-the-fly (Vertical-First Mode)
+ if ((SVRM = 0b0010) | (SVRM = 0b0100) |
+ (SVRM = 0b1010) | (SVRM = 0b1100)) then
# calculate O(N log2 N)
n <- [0] * 3
do while n < 5
# set up template in SVSHAPE0, then copy to 1-3
# set up FRB and FRS
SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
- SVSHAPE0[30:31] <- 0b01 # Butterfly mode
- SVSHAPE0[18:20] <- 0b001 # DCT Inner Butterfly sub-mode
- SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop
+ if (SVRM = 0b1010) | (SVRM = 0b1100) then
+ SVSHAPE0[30:31] <- 0b11 # iDCT mode
+ SVSHAPE0[18:20] <- 0b011 # iDCT Inner Butterfly sub-mode
+ else
+ SVSHAPE0[30:31] <- 0b01 # DCT mode
+ SVSHAPE0[18:20] <- 0b001 # DCT Inner Butterfly sub-mode
+ SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop
+ if (SVRM = 0b1100) | (SVRM = 0b0100) then
+ SVSHAPE0[6:11] <- 0b000011 # (i)DCT Inner Butterfly mode 4
+ else
+ SVSHAPE0[6:11] <- 0b000001 # (i)DCT Inner Butterfly mode 2
# copy
SVSHAPE1[0:31] <- SVSHAPE0[0:31]
+ SVSHAPE2[0:31] <- SVSHAPE0[0:31]
+ if (SVRM != 0b0100) & (SVRM != 0b1100) then
+ SVSHAPE3[0:31] <- SVSHAPE0[0:31]
# for FRA and FRT
SVSHAPE0[28:29] <- 0b01 # j+halfstep schedule
- # set schedule up for DCT Outer butterfly
- if (SVRM = 0b0011) then
+ # for cos coefficient
+ SVSHAPE2[28:29] <- 0b10 # ci (k for mode 4) schedule
+ if (SVRM != 0b0100) & (SVRM != 0b1100) then
+ SVSHAPE3[28:29] <- 0b11 # size schedule
+ # set schedule up for (i)DCT Outer butterfly
+ if (SVRM = 0b0011) | (SVRM = 0b1011) then
# calculate O(N log2 N) number of outer butterfly overlapping adds
vlen[0:6] <- [0] * 7
n <- 0b000
# set up template in SVSHAPE0, then copy to 1-3
# set up FRB and FRS
SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
- SVSHAPE0[30:31] <- 0b01 # Butterfly mode
- SVSHAPE0[18:20] <- 0b100 # DCT Outer Butterfly sub-mode
+ if (SVRM = 0b1011) then
+ SVSHAPE0[30:31] <- 0b11 # iDCT mode
+ SVSHAPE0[18:20] <- 0b011 # iDCT Outer Butterfly sub-mode
+ SVSHAPE0[21:23] <- 0b101 # "inverse" on outer and inner loop
+ else
+ SVSHAPE0[30:31] <- 0b01 # DCT mode
+ SVSHAPE0[18:20] <- 0b100 # DCT Outer Butterfly sub-mode
+ SVSHAPE0[6:11] <- 0b000010 # DCT Butterfly mode
# copy
SVSHAPE1[0:31] <- SVSHAPE0[0:31]
SVSHAPE2[0:31] <- SVSHAPE0[0:31]
- SVSHAPE3[0:31] <- SVSHAPE0[0:31]
# for FRA and FRT
SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule
+ # set schedule up for DCT COS table generation
+ if (SVRM = 0b0101) | (SVRM = 0b1101) then
+ # calculate O(N log2 N)
+ vlen[0:6] <- [0] * 7
+ itercount[0:6] <- (0b00 || SVxd) + 0b0000001
+ itercount[0:6] <- (0b0 || itercount[0:5])
+ n <- [0] * 3
+ do while n < 5
+ if SVxd[4-n] = 0 then
+ leave
+ n <- n + 1
+ vlen[0:6] <- vlen + itercount
+ itercount[0:6] <- (0b0 || itercount[0:5])
+ # set up template in SVSHAPE0, then copy to 1-3
+ # set up FRB and FRS
+ SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
+ SVSHAPE0[30:31] <- 0b01 # DCT/FFT mode
+ SVSHAPE0[6:11] <- 0b000100 # DCT Inner Butterfly COS-gen mode
+ if (SVRM = 0b0101) then
+ SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop for DCT
+ # copy
+ SVSHAPE1[0:31] <- SVSHAPE0[0:31]
+ SVSHAPE2[0:31] <- SVSHAPE0[0:31]
# for cos coefficient
- SVSHAPE2[28:29] <- 0b10 # ci schedule
- SVSHAPE3[28:29] <- 0b11 # size schedule
+ SVSHAPE1[28:29] <- 0b10 # ci schedule
+ SVSHAPE2[28:29] <- 0b11 # size schedule
+ # set schedule up for iDCT / DCT inverse of half-swapped ordering
+ if (SVRM = 0b0110) | (SVRM = 0b1110) | (SVRM = 0b1111) then
+ vlen[0:6] <- (0b00 || SVxd) + 0b0000001
+ # set up template in SVSHAPE0
+ SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
+ if (SVRM = 0b1110) then
+ SVSHAPE0[18:20] <- 0b001 # DCT opposite half-swap
+ if (SVRM = 0b1111) then
+ SVSHAPE0[30:31] <- 0b01 # FFT mode
+ else
+ SVSHAPE0[30:31] <- 0b11 # DCT mode
+ SVSHAPE0[6:11] <- 0b000101 # DCT "half-swap" mode
# set VL, MVL and Vertical-First
SVSTATE[0:6] <- vlen
SVSTATE[7:13] <- vlen