-<!-- This defines instructions described in PowerISA Version 3.0 B Book 1 -->
-
+<!-- This defines Draft SVP64 instructions to augment PowerISA Version 3.0 -->
<!-- These are not described in book 1 -->
+# svstep
+
+SVL-Form
+
+* svstep RT,SVi,vf
+* svstep. RT,SVi,vf
+
+Pseudo-code:
+
+ step <- SVSTATE_NEXT(SVi, vf)
+ RT <- [0]*57 || step
+
+Special Registers Altered:
+
+ CR0 (if Rc=1)
+
# setvl
SVL-Form
-* setvl RT, RA, SVi, vs, ms
-* setvl. RT, RA, SVi, vs, ms
+* setvl RT,RA,SVi,vf,vs,ms
+* setvl. RT,RA,SVi,vf,vs,ms
Pseudo-code:
- VLimm <- SVi + 1
- if vs = 1 then
- if _RA != 0 then
- VL <- (RA|0)[57:63]
- else
- VL <- VLimm[1:7]
- else
- VL <- SVSTATE[7:13]
- if ms = 1 then
- MVL <- VLimm[1:7]
+ if (vf & (¬vs) & ¬(ms)) = 1 then
+ step <- SVSTATE_NEXT(SVi, 0b0)
+ if _RT != 0b00000 then
+ GPR(_RT) <- [0]*57 || step
else
- MVL <- SVSTATE[0:6]
- if VL > MVL then
- VL = MVL
- SVSTATE[0:6] <- MVL
- SVSTATE[7:13] <- VL
- RT <- [0]*57 || VL
+ VLimm <- SVi + 1
+ if vs = 1 then
+ if _RA != 0 then
+ VL <- (RA|0)[57:63]
+ else
+ VL <- VLimm[0:6]
+ else
+ VL <- SVSTATE[7:13]
+ if ms = 1 then
+ MVL <- VLimm[0:6]
+ else
+ MVL <- SVSTATE[0:6]
+ if VL > MVL then
+ VL = MVL
+ SVSTATE[0:6] <- MVL
+ SVSTATE[7:13] <- VL
+ if _RT != 0b00000 then
+ GPR(_RT) <- [0]*57 || VL
+ # set requested Vertical-First mode, clear persist
+ SVSTATE[63] <- vf
+ SVSTATE[62] <- 0b0
Special Registers Altered:
# svremap
+SVRM-Form
+
+* svremap SVme,mi0,mi1,mi2,mo0,mo1,pst
+
+Pseudo-code:
+
+ # registers RA RB RC RT EA/FRS SVSHAPE0-3 indices
+ SVSTATE[32:33] <- mi0
+ SVSTATE[34:35] <- mi1
+ SVSTATE[36:37] <- mi2
+ SVSTATE[38:39] <- mo0
+ SVSTATE[40:41] <- mo1
+ # enable bit for RA RB RC RT EA/FRS
+ SVSTATE[42:46] <- SVme
+ # persistence bit (applies to more than one instruction)
+ SVSTATE[62] <- pst
+
+Special Registers Altered:
+
+ None
+
+# svshape
+
SVM-Form
-* svremap SVxd, SVyd, SVzd, SVRM
+* svshape SVxd,SVyd,SVzd,SVRM,vf
Pseudo-code:
+ # for convenience, VL to be calculated and stored in SVSTATE
+ vlen <- [0] * 7
+ itercount[0:6] <- [0] * 7
+ SVSTATE[0:31] <- [0] * 32
+ # only overwrite REMAP if "persistence" is zero
+ if (SVSTATE[62] = 0b0) then
+ SVSTATE[32:33] <- 0b00
+ SVSTATE[34:35] <- 0b00
+ SVSTATE[36:37] <- 0b00
+ SVSTATE[38:39] <- 0b00
+ SVSTATE[40:41] <- 0b00
+ SVSTATE[42:46] <- 0b00000
+ SVSTATE[62] <- 0b0
+ SVSTATE[63] <- 0b0
# clear out all SVSHAPEs
SVSHAPE0[0:31] <- [0] * 32
SVSHAPE1[0:31] <- [0] * 32
SVSHAPE2[0:31] <- [0] * 32
SVSHAPE3[0:31] <- [0] * 32
# set schedule up for multiply
- if (SVRM = 0b00000) then
+ if (SVRM = 0b0000) then
+ # VL in Matrix Multiply is xd*yd*zd
+ n <- (0b00 || SVxd) * (0b00 || SVyd) * (0b00 || SVzd)
+ vlen[0:6] <- n[14:20]
# set up template in SVSHAPE0, then copy to 1-3
SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
SVSHAPE0[6:11] <- (0b0 || SVyd) # ydim
# FRC
SVSHAPE2[18:20] <- 0b001 # permute x,z,y
SVSHAPE2[28:29] <- 0b11 # skip y
- # set schedule up for butterfly
- if (SVRM = 0b00001) then
+ # set schedule up for FFT butterfly
+ if (SVRM = 0b0001) then
+ # calculate O(N log2 N)
+ n <- [0] * 3
+ do while n < 5
+ if SVxd[4-n] = 0 then
+ leave
+ n <- n + 1
+ n <- ((0b0 || SVxd) + 1) * n
+ vlen[0:6] <- n[1:7]
# set up template in SVSHAPE0, then copy to 1-3
# for FRA and FRT
SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule
# FRC (coefficients)
SVSHAPE2[28:29] <- 0b10 # k schedule
+ # set schedule up for DCT Inner butterfly
+ # SVRM Mode 2 is for pre-calculated coefficients,
+ # SVRM Mode 4 is for on-the-fly (Vertical-First Mode)
+ if (SVRM = 0b0010) | (SVRM = 0b0100) then
+ # calculate O(N log2 N)
+ n <- [0] * 3
+ do while n < 5
+ if SVxd[4-n] = 0 then
+ leave
+ n <- n + 1
+ n <- ((0b0 || SVxd) + 1) * n
+ vlen[0:6] <- n[1:7]
+ # set up template in SVSHAPE0, then copy to 1-3
+ # set up FRB and FRS
+ SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
+ SVSHAPE0[30:31] <- 0b01 # DCT/FFT mode
+ #if (SVRM = 0b0100) then
+ SVSHAPE0[6:11] <- 0b000001 # DCT Inner Butterfly mode
+ SVSHAPE0[18:20] <- 0b001 # DCT Inner Butterfly sub-mode
+ SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop
+ # copy
+ SVSHAPE1[0:31] <- SVSHAPE0[0:31]
+ SVSHAPE2[0:31] <- SVSHAPE0[0:31]
+ SVSHAPE3[0:31] <- SVSHAPE0[0:31]
+ # for FRA and FRT
+ SVSHAPE0[28:29] <- 0b01 # j+halfstep schedule
+ # for cos coefficient
+ SVSHAPE2[28:29] <- 0b10 # ci schedule
+ SVSHAPE3[28:29] <- 0b11 # size schedule
+ # set schedule up for DCT Outer butterfly
+ if (SVRM = 0b0011) then
+ # calculate O(N log2 N) number of outer butterfly overlapping adds
+ vlen[0:6] <- [0] * 7
+ n <- 0b000
+ size <- 0b0000001
+ itercount[0:6] <- (0b00 || SVxd) + 0b0000001
+ itercount[0:6] <- (0b0 || itercount[0:5])
+ do while n < 5
+ if SVxd[4-n] = 0 then
+ leave
+ n <- n + 1
+ count <- (itercount - 0b0000001) * size
+ vlen[0:6] <- vlen + count[7:13]
+ size[0:6] <- (size[1:6] || 0b0)
+ itercount[0:6] <- (0b0 || itercount[0:5])
+ # set up template in SVSHAPE0, then copy to 1-3
+ # set up FRB and FRS
+ SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
+ SVSHAPE0[30:31] <- 0b01 # DCT/FFT mode
+ SVSHAPE0[6:11] <- 0b000010 # DCT Butterfly mode
+ SVSHAPE0[18:20] <- 0b100 # DCT Outer Butterfly sub-mode
+ # copy
+ SVSHAPE1[0:31] <- SVSHAPE0[0:31]
+ SVSHAPE2[0:31] <- SVSHAPE0[0:31]
+ # for FRA and FRT
+ SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule
+ # set schedule up for DCT COS table generation
+ if (SVRM = 0b0101) then
+ # calculate O(N log2 N)
+ vlen[0:6] <- [0] * 7
+ itercount[0:6] <- (0b00 || SVxd) + 0b0000001
+ itercount[0:6] <- (0b0 || itercount[0:5])
+ n <- [0] * 3
+ do while n < 5
+ if SVxd[4-n] = 0 then
+ leave
+ n <- n + 1
+ vlen[0:6] <- vlen + itercount
+ itercount[0:6] <- (0b0 || itercount[0:5])
+ # set up template in SVSHAPE0, then copy to 1-3
+ # set up FRB and FRS
+ SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
+ SVSHAPE0[30:31] <- 0b01 # DCT/FFT mode
+ SVSHAPE0[6:11] <- 0b000100 # DCT Inner Butterfly COS-gen mode
+ SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop
+ # copy
+ SVSHAPE1[0:31] <- SVSHAPE0[0:31]
+ SVSHAPE2[0:31] <- SVSHAPE0[0:31]
+ # for cos coefficient
+ SVSHAPE1[28:29] <- 0b10 # ci schedule
+ SVSHAPE2[28:29] <- 0b11 # size schedule
+ # set VL, MVL and Vertical-First
+ SVSTATE[0:6] <- vlen
+ SVSTATE[7:13] <- vlen
+ SVSTATE[63] <- vf
Special Registers Altered: