# svstep SVL-Form * svstep RT,SVi,vf * svstep. RT,SVi,vf Pseudo-code: step <- SVSTATE_NEXT(SVi, vf) RT <- [0]*57 || step Special Registers Altered: CR0 (if Rc=1) # setvl SVL-Form * setvl RT,RA,SVi,vf,vs,ms * setvl. RT,RA,SVi,vf,vs,ms Pseudo-code: if (vf & (¬vs) & ¬(ms)) = 1 then step <- SVSTATE_NEXT(SVi, 0b0) if _RT != 0b00000 then GPR(_RT) <- [0]*57 || step else VLimm <- SVi + 1 if vs = 1 then if _RA != 0 then VL <- (RA|0)[57:63] else VL <- VLimm[0:6] else VL <- SVSTATE[7:13] if ms = 1 then MVL <- VLimm[0:6] else MVL <- SVSTATE[0:6] if VL > MVL then VL = MVL SVSTATE[0:6] <- MVL SVSTATE[7:13] <- VL if _RT != 0b00000 then GPR(_RT) <- [0]*57 || VL # set requested Vertical-First mode, clear persist SVSTATE[63] <- vf SVSTATE[62] <- 0b0 Special Registers Altered: CR0 (if Rc=1) # svremap SVRM-Form * svremap SVme,mi0,mi1,mi2,mo0,mo1,pst Pseudo-code: # registers RA RB RC RT EA/FRS SVSHAPE0-3 indices SVSTATE[32:33] <- mi0 SVSTATE[34:35] <- mi1 SVSTATE[36:37] <- mi2 SVSTATE[38:39] <- mo0 SVSTATE[40:41] <- mo1 # enable bit for RA RB RC RT EA/FRS SVSTATE[42:46] <- SVme # persistence bit (applies to more than one instruction) SVSTATE[62] <- pst Special Registers Altered: None # svshape SVM-Form * svshape SVxd,SVyd,SVzd,SVRM,vf Pseudo-code: # for convenience, VL to be calculated and stored in SVSTATE vlen <- [0] * 7 itercount[0:6] <- [0] * 7 SVSTATE[0:31] <- [0] * 32 # only overwrite REMAP if "persistence" is zero if (SVSTATE[62] = 0b0) then SVSTATE[32:33] <- 0b00 SVSTATE[34:35] <- 0b00 SVSTATE[36:37] <- 0b00 SVSTATE[38:39] <- 0b00 SVSTATE[40:41] <- 0b00 SVSTATE[42:46] <- 0b00000 SVSTATE[62] <- 0b0 SVSTATE[63] <- 0b0 # clear out all SVSHAPEs SVSHAPE0[0:31] <- [0] * 32 SVSHAPE1[0:31] <- [0] * 32 SVSHAPE2[0:31] <- [0] * 32 SVSHAPE3[0:31] <- [0] * 32 # set schedule up for multiply if (SVRM = 0b0000) then # VL in Matrix Multiply is xd*yd*zd n <- (0b00 || SVxd) * (0b00 || SVyd) * (0b00 || SVzd) vlen[0:6] <- n[14:20] # set up template in SVSHAPE0, then copy to 1-3 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim SVSHAPE0[6:11] <- (0b0 || SVyd) # ydim SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim SVSHAPE0[28:29] <- 0b11 # skip z # copy SVSHAPE1[0:31] <- SVSHAPE0[0:31] SVSHAPE2[0:31] <- SVSHAPE0[0:31] SVSHAPE3[0:31] <- SVSHAPE0[0:31] # set up FRA SVSHAPE1[18:20] <- 0b001 # permute x,z,y SVSHAPE1[28:29] <- 0b01 # skip z # FRC SVSHAPE2[18:20] <- 0b001 # permute x,z,y SVSHAPE2[28:29] <- 0b11 # skip y # set schedule up for FFT butterfly if (SVRM = 0b0001) then # calculate O(N log2 N) n <- [0] * 3 do while n < 5 if SVxd[4-n] = 0 then leave n <- n + 1 n <- ((0b0 || SVxd) + 1) * n vlen[0:6] <- n[1:7] # set up template in SVSHAPE0, then copy to 1-3 # for FRA and FRT SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim SVSHAPE0[30:31] <- 0b01 # Butterfly mode # copy SVSHAPE1[0:31] <- SVSHAPE0[0:31] SVSHAPE2[0:31] <- SVSHAPE0[0:31] # set up FRB and FRS SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule # FRC (coefficients) SVSHAPE2[28:29] <- 0b10 # k schedule # set schedule up for (i)DCT Inner butterfly # SVRM Mode 2 (Mode 6 for iDCT) is for pre-calculated coefficients, # SVRM Mode 4 (Mode 12 for iDCT) is for on-the-fly (Vertical-First Mode) if ((SVRM = 0b0010) | (SVRM = 0b0100) | (SVRM = 0b1010) | (SVRM = 0b1100)) then # calculate O(N log2 N) n <- [0] * 3 do while n < 5 if SVxd[4-n] = 0 then leave n <- n + 1 n <- ((0b0 || SVxd) + 1) * n vlen[0:6] <- n[1:7] # set up template in SVSHAPE0, then copy to 1-3 # set up FRB and FRS SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim if (SVRM = 0b1010) | (SVRM = 0b1100) then SVSHAPE0[30:31] <- 0b11 # iDCT mode else SVSHAPE0[30:31] <- 0b01 # DCT mode if (SVRM = 0b1100) | (SVRM = 0b0100) then SVSHAPE0[6:11] <- 0b000011 # (i)DCT Inner Butterfly mode 4 else SVSHAPE0[6:11] <- 0b000001 # (i)DCT Inner Butterfly mode 2 SVSHAPE0[18:20] <- 0b001 # DCT Inner Butterfly sub-mode SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop # copy SVSHAPE1[0:31] <- SVSHAPE0[0:31] SVSHAPE2[0:31] <- SVSHAPE0[0:31] if (SVRM != 0b0100) then SVSHAPE3[0:31] <- SVSHAPE0[0:31] # for FRA and FRT SVSHAPE0[28:29] <- 0b01 # j+halfstep schedule # for cos coefficient SVSHAPE2[28:29] <- 0b10 # ci (k for mode 4) schedule if (SVRM != 0b0100) then SVSHAPE3[28:29] <- 0b11 # size schedule # set schedule up for (i)DCT Outer butterfly if (SVRM = 0b0011) | (SVRM = 0b1011) then # calculate O(N log2 N) number of outer butterfly overlapping adds vlen[0:6] <- [0] * 7 n <- 0b000 size <- 0b0000001 itercount[0:6] <- (0b00 || SVxd) + 0b0000001 itercount[0:6] <- (0b0 || itercount[0:5]) do while n < 5 if SVxd[4-n] = 0 then leave n <- n + 1 count <- (itercount - 0b0000001) * size vlen[0:6] <- vlen + count[7:13] size[0:6] <- (size[1:6] || 0b0) itercount[0:6] <- (0b0 || itercount[0:5]) # set up template in SVSHAPE0, then copy to 1-3 # set up FRB and FRS SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim if (SVRM = 0b1011) then SVSHAPE0[30:31] <- 0b11 # iDCT mode SVSHAPE0[18:20] <- 0b011 # iDCT Outer Butterfly sub-mode SVSHAPE0[21:23] <- 0b101 # "inverse" on outer and inner loop else SVSHAPE0[30:31] <- 0b01 # DCT mode SVSHAPE0[18:20] <- 0b100 # DCT Outer Butterfly sub-mode SVSHAPE0[6:11] <- 0b000010 # DCT Butterfly mode # copy SVSHAPE1[0:31] <- SVSHAPE0[0:31] SVSHAPE2[0:31] <- SVSHAPE0[0:31] # for FRA and FRT SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule # set schedule up for DCT COS table generation if (SVRM = 0b0101) then # calculate O(N log2 N) vlen[0:6] <- [0] * 7 itercount[0:6] <- (0b00 || SVxd) + 0b0000001 itercount[0:6] <- (0b0 || itercount[0:5]) n <- [0] * 3 do while n < 5 if SVxd[4-n] = 0 then leave n <- n + 1 vlen[0:6] <- vlen + itercount itercount[0:6] <- (0b0 || itercount[0:5]) # set up template in SVSHAPE0, then copy to 1-3 # set up FRB and FRS SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim SVSHAPE0[30:31] <- 0b01 # DCT/FFT mode SVSHAPE0[6:11] <- 0b000100 # DCT Inner Butterfly COS-gen mode SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop # copy SVSHAPE1[0:31] <- SVSHAPE0[0:31] SVSHAPE2[0:31] <- SVSHAPE0[0:31] # for cos coefficient SVSHAPE1[28:29] <- 0b10 # ci schedule SVSHAPE2[28:29] <- 0b11 # size schedule # set schedule up for DCT inverse of half-swapped ordering if (SVRM = 0b0110) then vlen[0:6] <- (0b00 || SVxd) + 0b0000001 # set up template in SVSHAPE0 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim SVSHAPE0[30:31] <- 0b01 # DCT/FFT mode SVSHAPE0[6:11] <- 0b000101 # DCT "half-swap" mode # set VL, MVL and Vertical-First SVSTATE[0:6] <- vlen SVSTATE[7:13] <- vlen SVSTATE[63] <- vf Special Registers Altered: None