SVL-Form
-* svstep RT,SVi,vf
-* svstep. RT,SVi,vf
+* svstep RT,SVi,vf (Rc=0)
+* svstep. RT,SVi,vf (Rc=1)
Pseudo-code:
- step <- SVSTATE_NEXT(SVi, vf)
- RT <- [0]*57 || step
+ if SVi[3:4] = 0b11 then
+ # store pack and unpack in SVSTATE
+ SVSTATE[53] <- SVi[5]
+ SVSTATE[54] <- SVi[6]
+ RT <- [0]*62 || SVSTATE[53:54]
+ else
+ step <- SVSTATE_NEXT(SVi, vf)
+ RT <- [0]*57 || step
Special Registers Altered:
SVL-Form
-* setvl RT,RA,SVi,vf,vs,ms
-* setvl. RT,RA,SVi,vf,vs,ms
+* setvl RT,RA,SVi,vf,vs,ms (Rc=0)
+* setvl. RT,RA,SVi,vf,vs,ms (Rc=1)
Pseudo-code:
overflow <- 0b0
- if (vf & (¬vs) & ¬(ms)) = 1 then
- step <- SVSTATE_NEXT(SVi, 0b0)
- if _RT != 0 then
- GPR(_RT) <- [0]*57 || step
- else
- VLimm <- SVi + 1
- # set or get MVL
- if ms = 1 then MVL <- VLimm[0:6]
- else MVL <- SVSTATE[0:6]
- # set or get VL
- if vs = 0 then VL <- SVSTATE[7:13]
- else if _RA != 0 then
- if (RA) >u 0b1111111 then
- VL <- 0b1111111
- overflow <- 0b1
- else VL <- (RA)[57:63]
- else if _RA != 0 then VL <- (RA)[57:63]
- else if _RT = 0 then VL <- VLimm[0:6]
- else if CTR >u 0b1111111 then
+ VLimm <- SVi + 1
+ # set or get MVL
+ if ms = 1 then MVL <- VLimm[0:6]
+ else MVL <- SVSTATE[0:6]
+ # set or get VL
+ if vs = 0 then VL <- SVSTATE[7:13]
+ else if _RA != 0 then
+ if (RA) >u 0b1111111 then
VL <- 0b1111111
overflow <- 0b1
- else VL <- CTR[57:63]
- # limit VL to within MVL
- if VL >u MVL then
- overflow <- 0b1
- VL <- MVL
- SVSTATE[0:6] <- MVL
- SVSTATE[7:13] <- VL
- if _RT != 0 then
- GPR(_RT) <- [0]*57 || VL
- if ((¬vs) & ¬(ms)) = 0 then
- # set requested Vertical-First mode, clear persist
- SVSTATE[63] <- vf
- SVSTATE[62] <- 0b0
+ else VL <- (RA)[57:63]
+ else if _RT = 0 then VL <- VLimm[0:6]
+ else if CTR >u 0b1111111 then
+ VL <- 0b1111111
+ overflow <- 0b1
+ else VL <- CTR[57:63]
+ # limit VL to within MVL
+ if VL >u MVL then
+ overflow <- 0b1
+ VL <- MVL
+ SVSTATE[0:6] <- MVL
+ SVSTATE[7:13] <- VL
+ if _RT != 0 then
+ GPR(_RT) <- [0]*57 || VL
+ # MAXVL is a static "state-reset".
+ if ms = 1 then
+ SVSTATE[63] <- vf # set Vertical-First mode
+ SVSTATE[62] <- 0b0 # clear persist bit
Special Registers Altered:
# for convenience, VL to be calculated and stored in SVSTATE
vlen <- [0] * 7
+ mscale[0:5] <- 0b000001 # for scaling MAXVL
itercount[0:6] <- [0] * 7
SVSTATE[0:31] <- [0] * 32
# only overwrite REMAP if "persistence" is zero
# set schedule up for multiply
if (SVrm = 0b0000) then
# VL in Matrix Multiply is xd*yd*zd
- n <- (0b00 || SVxd) * (0b00 || SVyd) * (0b00 || SVzd)
+ xd <- (0b00 || SVxd) + 1
+ yd <- (0b00 || SVyd) + 1
+ zd <- (0b00 || SVzd) + 1
+ n <- xd * yd * zd
vlen[0:6] <- n[14:20]
# set up template in SVSHAPE0, then copy to 1-3
SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
# set up template in SVSHAPE0, then copy to 1-3
# for FRA and FRT
SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
+ SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim - "striding" (2D FFT)
+ mscale <- (0b0 || SVzd) + 1
SVSHAPE0[30:31] <- 0b01 # Butterfly mode
# copy
SVSHAPE1[0:31] <- SVSHAPE0[0:31]
# FRC (coefficients)
SVSHAPE2[28:29] <- 0b10 # k schedule
# set schedule up for (i)DCT Inner butterfly
- # SVrm Mode 2 (Mode 6 for iDCT) is for pre-calculated coefficients,
# SVrm Mode 4 (Mode 12 for iDCT) is for on-the-fly (Vertical-First Mode)
- if ((SVrm = 0b0010) | (SVrm = 0b0100) |
- (SVrm = 0b1010) | (SVrm = 0b1100)) then
+ if ((SVrm = 0b0100) |
+ (SVrm = 0b1100)) then
# calculate O(N log2 N)
n <- [0] * 3
do while n < 5
# set up template in SVSHAPE0, then copy to 1-3
# set up FRB and FRS
SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
- if (SVrm = 0b1010) | (SVrm = 0b1100) then
+ SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim - "striding" (2D DCT)
+ mscale <- (0b0 || SVzd) + 1
+ if (SVrm = 0b1100) then
SVSHAPE0[30:31] <- 0b11 # iDCT mode
SVSHAPE0[18:20] <- 0b011 # iDCT Inner Butterfly sub-mode
else
SVSHAPE0[30:31] <- 0b01 # DCT mode
SVSHAPE0[18:20] <- 0b001 # DCT Inner Butterfly sub-mode
SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop
- if (SVrm = 0b1100) | (SVrm = 0b0100) then
- SVSHAPE0[6:11] <- 0b000011 # (i)DCT Inner Butterfly mode 4
- else
- SVSHAPE0[6:11] <- 0b000001 # (i)DCT Inner Butterfly mode 2
+ SVSHAPE0[6:11] <- 0b000011 # (i)DCT Inner Butterfly mode 4
# copy
SVSHAPE1[0:31] <- SVSHAPE0[0:31]
SVSHAPE2[0:31] <- SVSHAPE0[0:31]
SVSHAPE0[28:29] <- 0b01 # j+halfstep schedule
# for cos coefficient
SVSHAPE2[28:29] <- 0b10 # ci (k for mode 4) schedule
+ SVSHAPE2[12:17] <- 0b000000 # reset costable "striding" to 1
if (SVrm != 0b0100) & (SVrm != 0b1100) then
SVSHAPE3[28:29] <- 0b11 # size schedule
# set schedule up for (i)DCT Outer butterfly
# set up template in SVSHAPE0, then copy to 1-3
# set up FRB and FRS
SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
+ SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim - "striding" (2D DCT)
+ mscale <- (0b0 || SVzd) + 1
if (SVrm = 0b1011) then
SVSHAPE0[30:31] <- 0b11 # iDCT mode
SVSHAPE0[18:20] <- 0b011 # iDCT Outer Butterfly sub-mode
SVSHAPE0[18:20] <- 0b100 # DCT Outer Butterfly sub-mode
SVSHAPE0[6:11] <- 0b000010 # DCT Butterfly mode
# copy
- SVSHAPE1[0:31] <- SVSHAPE0[0:31]
- SVSHAPE2[0:31] <- SVSHAPE0[0:31]
+ SVSHAPE1[0:31] <- SVSHAPE0[0:31] # j+halfstep schedule
+ SVSHAPE2[0:31] <- SVSHAPE0[0:31] # costable coefficients
# for FRA and FRT
SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule
+ # reset costable "striding" to 1
+ SVSHAPE2[12:17] <- 0b000000
# set schedule up for DCT COS table generation
if (SVrm = 0b0101) | (SVrm = 0b1101) then
# calculate O(N log2 N)
# set up template in SVSHAPE0, then copy to 1-3
# set up FRB and FRS
SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
+ SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim - "striding" (2D DCT)
+ mscale <- (0b0 || SVzd) + 1
SVSHAPE0[30:31] <- 0b01 # DCT/FFT mode
SVSHAPE0[6:11] <- 0b000100 # DCT Inner Butterfly COS-gen mode
if (SVrm = 0b0101) then
vlen[0:6] <- (0b00 || SVxd) + 0b0000001
# set up template in SVSHAPE0
SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
+ SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim - "striding" (2D DCT)
+ mscale <- (0b0 || SVzd) + 1
if (SVrm = 0b1110) then
SVSHAPE0[18:20] <- 0b001 # DCT opposite half-swap
if (SVrm = 0b1111) then
else
SVSHAPE0[30:31] <- 0b11 # DCT mode
SVSHAPE0[6:11] <- 0b000101 # DCT "half-swap" mode
- # set schedule up for parallel reduction
+ # set schedule up for parallel reduction or prefix-sum
if (SVrm = 0b0111) then
+ # is scan/prefix-sum
+ is_scan <- SVyd = 2
# calculate the total number of operations (brute-force)
vlen[0:6] <- [0] * 7
itercount[0:6] <- (0b00 || SVxd) + 0b0000001
- step[0:6] <- 0b0000001
- i[0:6] <- 0b0000000
- do while step <u itercount
- newstep <- step[1:6] || 0b0
- j[0:6] <- 0b0000000
- do while (j+step <u itercount)
- j <- j + newstep
- i <- i + 1
- step <- newstep
- # VL in Parallel-Reduce is the number of operations
- vlen[0:6] <- i
+ if is_scan then
+ # prefix sum algorithm with operations replaced with
+ # incrementing vlen
+ dist <- 1
+ vlen[0:6] <- 0
+ do while dist <u itercount
+ start <- dist * 2 - 1
+ step <- dist * 2
+ i <- start
+ do while i <u itercount
+ vlen[0:6] <- vlen[0:6] + 1
+ i <- i + step
+ dist <- dist * 2
+ dist <- dist / 2
+ do while dist != 0
+ i <- dist * 3 - 1
+ do while i <u itercount
+ vlen[0:6] <- vlen[0:6] + 1
+ i <- i + dist * 2
+ dist <- dist / 2
+ else
+ step <- 0b0000001
+ i <- 0b0000000
+ do while step <u itercount
+ newstep <- step[1:6] || 0b0
+ j[0:6] <- 0b0000000
+ do while (j+step <u itercount)
+ j <- j + newstep
+ i <- i + 1
+ step <- newstep
+ # VL in Parallel-Reduce is the number of operations
+ vlen[0:6] <- i
# set up template in SVSHAPE0, then copy to 1. only 2 needed
SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
- SVSHAPE0[30:31] <- 0b10 # parallel reduce submode
+ SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim - "striding" (2D DCT)
+ mscale <- (0b0 || SVzd) + 1
+ SVSHAPE0[30:31] <- 0b10 # parallel reduce/prefix submode
# copy
SVSHAPE1[0:31] <- SVSHAPE0[0:31]
- # set up right operand (left operand 28:29 is zero)
- SVSHAPE1[28:29] <- 0b01 # right operand
+ # set up submodes: parallel or prefix
+ SVSHAPE0[28:29] <- 0b00 # left operand
+ SVSHAPE1[28:29] <- 0b01 # right operand
+ if is_scan then
+ SVSHAPE0[28:29] <- 0b10 # left operand
+ SVSHAPE1[28:29] <- 0b11 # right operand
# set VL, MVL and Vertical-First
- SVSTATE[0:6] <- vlen
- SVSTATE[7:13] <- vlen
+ m[0:12] <- vlen * mscale
+ maxvl[0:6] <- m[6:12]
+ SVSTATE[0:6] <- maxvl # MAVXL
+ SVSTATE[7:13] <- vlen # VL
SVSTATE[63] <- vf
Special Registers Altered:
shape[0:5] <- (0b0 || SVd) # ydim
shape[12:17] <- (0b0 || SVG) # SVGPR
shape[28:29] <- ew # element-width override
- if sk = 1 then shape[28:29] <- 0b01 # skip 1st dimension
- else shape[28:29] <- 0b00 # no skipping
+ shape[21] <- sk # skip 1st dimension
# select the mode for updating SVSHAPEs
SVSTATE[62] <- mm # set or clear persistence
if mm = 0 then