openpower/isa/simplev.mdwn

   1 <!-- This defines Draft SVP64 instructions to augment PowerISA Version 3.0 -->
   2 <!-- These are not described in book 1 -->
   3
   4 # svstep
   5
   6 SVL-Form
   7
   8 * svstep RT,SVi,vf (Rc=0)
   9 * svstep. RT,SVi,vf (Rc=1)
  10
  11 Pseudo-code:
  12
  13     if SVi[3:4] = 0b11 then
  14         # store subvl, pack and unpack in SVSTATE
  15         SVSTATE[53] <- SVi[5]
  16         SVSTATE[54] <- SVi[6]
  17         RT <- [0]*62 || SVSTATE[53:54]
  18     else
  19         step <- SVSTATE_NEXT(SVi, vf)
  20         RT <- [0]*57 || step
  21
  22 Special Registers Altered:
  23
  24     CR0                     (if Rc=1)
  25
  26 # setvl
  27
  28 SVL-Form
  29
  30 * setvl RT,RA,SVi,vf,vs,ms (Rc=0)
  31 * setvl. RT,RA,SVi,vf,vs,ms (Rc=1)
  32
  33 Pseudo-code:
  34
  35     overflow <- 0b0
  36     VLimm <- SVi + 1
  37     # set or get MVL
  38     if ms = 1 then MVL <- VLimm[0:6]
  39     else           MVL <- SVSTATE[0:6]
  40     # set or get VL
  41     if vs = 0                then VL <- SVSTATE[7:13]
  42     else if _RA != 0         then
  43         if (RA) >u 0b1111111 then
  44             VL <- 0b1111111
  45             overflow <- 0b1
  46         else                      VL <- (RA)[57:63]
  47     else if _RT = 0          then VL <- VLimm[0:6]
  48     else if CTR >u 0b1111111 then
  49         VL <- 0b1111111
  50         overflow <- 0b1
  51     else                          VL <- CTR[57:63]
  52     # limit VL to within MVL
  53     if VL >u MVL then
  54         overflow <- 0b1
  55         VL <- MVL
  56     SVSTATE[0:6] <- MVL
  57     SVSTATE[7:13] <- VL
  58     if _RT != 0 then
  59        GPR(_RT) <- [0]*57 || VL
  60     if ((¬vs) & ¬(ms)) = 0 then
  61         # set requested Vertical-First mode, clear persist
  62         SVSTATE[63] <- vf
  63         SVSTATE[62] <- 0b0
  64
  65 Special Registers Altered:
  66
  67     CR0                     (if Rc=1)
  68
  69 # svremap
  70
  71 SVRM-Form
  72
  73 * svremap SVme,mi0,mi1,mi2,mo0,mo1,pst
  74
  75 Pseudo-code:
  76
  77     # registers RA RB RC RT EA/FRS SVSHAPE0-3 indices
  78     SVSTATE[32:33] <- mi0
  79     SVSTATE[34:35] <- mi1
  80     SVSTATE[36:37] <- mi2
  81     SVSTATE[38:39] <- mo0
  82     SVSTATE[40:41] <- mo1
  83     # enable bit for RA RB RC RT EA/FRS
  84     SVSTATE[42:46] <- SVme
  85     # persistence bit (applies to more than one instruction)
  86     SVSTATE[62] <- pst
  87
  88 Special Registers Altered:
  89
  90     None
  91
  92 # svshape
  93
  94 SVM-Form
  95
  96 * svshape SVxd,SVyd,SVzd,SVrm,vf
  97
  98 Pseudo-code:
  99
 100     # for convenience, VL to be calculated and stored in SVSTATE
 101     vlen <- [0] * 7
 102     mscale[0:5] <- 0b000001 # for scaling MAXVL
 103     itercount[0:6] <- [0] * 7
 104     SVSTATE[0:31] <- [0] * 32
 105     # only overwrite REMAP if "persistence" is zero
 106     if (SVSTATE[62] = 0b0) then
 107         SVSTATE[32:33] <- 0b00
 108         SVSTATE[34:35] <- 0b00
 109         SVSTATE[36:37] <- 0b00
 110         SVSTATE[38:39] <- 0b00
 111         SVSTATE[40:41] <- 0b00
 112         SVSTATE[42:46] <- 0b00000
 113         SVSTATE[62] <- 0b0
 114         SVSTATE[63] <- 0b0
 115     # clear out all SVSHAPEs
 116     SVSHAPE0[0:31] <- [0] * 32
 117     SVSHAPE1[0:31] <- [0] * 32
 118     SVSHAPE2[0:31] <- [0] * 32
 119     SVSHAPE3[0:31] <- [0] * 32
 120     # set schedule up for multiply
 121     if (SVrm = 0b0000) then
 122         # VL in Matrix Multiply is xd*yd*zd
 123         xd <- (0b00 || SVxd) + 1
 124         yd <- (0b00 || SVyd) + 1
 125         zd <- (0b00 || SVzd) + 1
 126         n <- xd * yd * zd
 127         vlen[0:6] <- n[14:20]
 128         # set up template in SVSHAPE0, then copy to 1-3
 129         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
 130         SVSHAPE0[6:11] <- (0b0 || SVyd)   # ydim
 131         SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim
 132         SVSHAPE0[28:29] <- 0b11           # skip z
 133         # copy
 134         SVSHAPE1[0:31] <- SVSHAPE0[0:31]
 135         SVSHAPE2[0:31] <- SVSHAPE0[0:31]
 136         SVSHAPE3[0:31] <- SVSHAPE0[0:31]
 137         # set up FRA
 138         SVSHAPE1[18:20] <- 0b001          # permute x,z,y
 139         SVSHAPE1[28:29] <- 0b01           # skip z
 140         # FRC
 141         SVSHAPE2[18:20] <- 0b001          # permute x,z,y
 142         SVSHAPE2[28:29] <- 0b11           # skip y
 143     # set schedule up for FFT butterfly
 144     if (SVrm = 0b0001) then
 145         # calculate O(N log2 N)
 146         n <- [0] * 3
 147         do while n < 5
 148            if SVxd[4-n] = 0 then
 149                leave
 150            n <- n + 1
 151         n <- ((0b0 || SVxd) + 1) * n
 152         vlen[0:6] <- n[1:7]
 153         # set up template in SVSHAPE0, then copy to 1-3
 154         # for FRA and FRT
 155         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
 156         SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D FFT)
 157         mscale <- (0b0 || SVzd) + 1
 158         SVSHAPE0[30:31] <- 0b01          # Butterfly mode
 159         # copy
 160         SVSHAPE1[0:31] <- SVSHAPE0[0:31]
 161         SVSHAPE2[0:31] <- SVSHAPE0[0:31]
 162         # set up FRB and FRS
 163         SVSHAPE1[28:29] <- 0b01           # j+halfstep schedule
 164         # FRC (coefficients)
 165         SVSHAPE2[28:29] <- 0b10           # k schedule
 166     # set schedule up for (i)DCT Inner butterfly
 167     # SVrm Mode 2 (Mode 6 for iDCT) is for pre-calculated coefficients,
 168     # SVrm Mode 4 (Mode 12 for iDCT) is for on-the-fly (Vertical-First Mode)
 169     if ((SVrm = 0b0010) | (SVrm = 0b0100) |
 170         (SVrm = 0b1010) | (SVrm = 0b1100)) then
 171         # calculate O(N log2 N)
 172         n <- [0] * 3
 173         do while n < 5
 174            if SVxd[4-n] = 0 then
 175                leave
 176            n <- n + 1
 177         n <- ((0b0 || SVxd) + 1) * n
 178         vlen[0:6] <- n[1:7]
 179         # set up template in SVSHAPE0, then copy to 1-3
 180         # set up FRB and FRS
 181         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
 182         SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D DCT)
 183         mscale <- (0b0 || SVzd) + 1
 184         if (SVrm = 0b1010) | (SVrm = 0b1100) then
 185             SVSHAPE0[30:31] <- 0b11          # iDCT mode
 186             SVSHAPE0[18:20] <- 0b011         # iDCT Inner Butterfly sub-mode
 187         else
 188             SVSHAPE0[30:31] <- 0b01          # DCT mode
 189             SVSHAPE0[18:20] <- 0b001         # DCT Inner Butterfly sub-mode
 190             SVSHAPE0[21:23] <- 0b001         # "inverse" on outer loop
 191         if (SVrm = 0b1100) | (SVrm = 0b0100) then
 192             SVSHAPE0[6:11] <- 0b000011       # (i)DCT Inner Butterfly mode 4
 193         else
 194             SVSHAPE0[6:11] <- 0b000001       # (i)DCT Inner Butterfly mode 2
 195         # copy
 196         SVSHAPE1[0:31] <- SVSHAPE0[0:31]
 197         SVSHAPE2[0:31] <- SVSHAPE0[0:31]
 198         if (SVrm != 0b0100) & (SVrm != 0b1100) then
 199             SVSHAPE3[0:31] <- SVSHAPE0[0:31]
 200         # for FRA and FRT
 201         SVSHAPE0[28:29] <- 0b01           # j+halfstep schedule
 202         # for cos coefficient
 203         SVSHAPE2[28:29] <- 0b10           # ci (k for mode 4) schedule
 204         SVSHAPE2[12:17] <- 0b000000       # reset costable "striding" to 1
 205         if (SVrm != 0b0100) & (SVrm != 0b1100) then
 206             SVSHAPE3[28:29] <- 0b11           # size schedule
 207     # set schedule up for (i)DCT Outer butterfly
 208     if (SVrm = 0b0011) | (SVrm = 0b1011) then
 209         # calculate O(N log2 N) number of outer butterfly overlapping adds
 210         vlen[0:6] <- [0] * 7
 211         n <- 0b000
 212         size <- 0b0000001
 213         itercount[0:6] <- (0b00 || SVxd) + 0b0000001
 214         itercount[0:6] <- (0b0 || itercount[0:5])
 215         do while n < 5
 216            if SVxd[4-n] = 0 then
 217                leave
 218            n <- n + 1
 219            count <- (itercount - 0b0000001) * size
 220            vlen[0:6] <- vlen + count[7:13]
 221            size[0:6] <- (size[1:6] || 0b0)
 222            itercount[0:6] <- (0b0 || itercount[0:5])
 223         # set up template in SVSHAPE0, then copy to 1-3
 224         # set up FRB and FRS
 225         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
 226         SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D DCT)
 227         mscale <- (0b0 || SVzd) + 1
 228         if (SVrm = 0b1011) then
 229             SVSHAPE0[30:31] <- 0b11      # iDCT mode
 230             SVSHAPE0[18:20] <- 0b011     # iDCT Outer Butterfly sub-mode
 231             SVSHAPE0[21:23] <- 0b101     # "inverse" on outer and inner loop
 232         else
 233             SVSHAPE0[30:31] <- 0b01      # DCT mode
 234             SVSHAPE0[18:20] <- 0b100     # DCT Outer Butterfly sub-mode
 235         SVSHAPE0[6:11] <- 0b000010       # DCT Butterfly mode
 236         # copy
 237         SVSHAPE1[0:31] <- SVSHAPE0[0:31] # j+halfstep schedule
 238         SVSHAPE2[0:31] <- SVSHAPE0[0:31] # costable coefficients
 239         # for FRA and FRT
 240         SVSHAPE1[28:29] <- 0b01           # j+halfstep schedule
 241         # reset costable "striding" to 1
 242         SVSHAPE2[12:17] <- 0b000000
 243     # set schedule up for DCT COS table generation
 244     if (SVrm = 0b0101) | (SVrm = 0b1101) then
 245         # calculate O(N log2 N)
 246         vlen[0:6] <- [0] * 7
 247         itercount[0:6] <- (0b00 || SVxd) + 0b0000001
 248         itercount[0:6] <- (0b0 || itercount[0:5])
 249         n <- [0] * 3
 250         do while n < 5
 251            if SVxd[4-n] = 0 then
 252                leave
 253            n <- n + 1
 254            vlen[0:6] <- vlen + itercount
 255            itercount[0:6] <- (0b0 || itercount[0:5])
 256         # set up template in SVSHAPE0, then copy to 1-3
 257         # set up FRB and FRS
 258         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
 259         SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D DCT)
 260         mscale <- (0b0 || SVzd) + 1
 261         SVSHAPE0[30:31] <- 0b01          # DCT/FFT mode
 262         SVSHAPE0[6:11] <- 0b000100       # DCT Inner Butterfly COS-gen mode
 263         if (SVrm = 0b0101) then
 264             SVSHAPE0[21:23] <- 0b001     # "inverse" on outer loop for DCT
 265         # copy
 266         SVSHAPE1[0:31] <- SVSHAPE0[0:31]
 267         SVSHAPE2[0:31] <- SVSHAPE0[0:31]
 268         # for cos coefficient
 269         SVSHAPE1[28:29] <- 0b10           # ci schedule
 270         SVSHAPE2[28:29] <- 0b11           # size schedule
 271     # set schedule up for iDCT / DCT inverse of half-swapped ordering
 272     if (SVrm = 0b0110) | (SVrm = 0b1110) | (SVrm = 0b1111) then
 273         vlen[0:6] <- (0b00 || SVxd) + 0b0000001
 274         # set up template in SVSHAPE0
 275         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
 276         SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D DCT)
 277         mscale <- (0b0 || SVzd) + 1
 278         if (SVrm = 0b1110) then
 279             SVSHAPE0[18:20] <- 0b001     # DCT opposite half-swap
 280         if (SVrm = 0b1111) then
 281             SVSHAPE0[30:31] <- 0b01          # FFT mode
 282         else
 283             SVSHAPE0[30:31] <- 0b11          # DCT mode
 284         SVSHAPE0[6:11] <- 0b000101       # DCT "half-swap" mode
 285     # set schedule up for parallel reduction
 286     if (SVrm = 0b0111) then
 287         # calculate the total number of operations (brute-force)
 288         vlen[0:6] <- [0] * 7
 289         itercount[0:6] <- (0b00 || SVxd) + 0b0000001
 290         step[0:6] <- 0b0000001
 291         i[0:6] <- 0b0000000
 292         do while step <u itercount
 293             newstep <- step[1:6] || 0b0
 294             j[0:6] <- 0b0000000
 295             do while (j+step <u itercount)
 296                 j <- j + newstep
 297                 i <- i + 1
 298             step <- newstep
 299         # VL in Parallel-Reduce is the number of operations
 300         vlen[0:6] <- i
 301         # set up template in SVSHAPE0, then copy to 1. only 2 needed
 302         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
 303         SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D DCT)
 304         mscale <- (0b0 || SVzd) + 1
 305         SVSHAPE0[30:31] <- 0b10          # parallel reduce submode
 306         # copy
 307         SVSHAPE1[0:31] <- SVSHAPE0[0:31]
 308         # set up right operand (left operand 28:29 is zero)
 309         SVSHAPE1[28:29] <- 0b01           # right operand
 310     # set VL, MVL and Vertical-First
 311     m[0:12] <- vlen * mscale
 312     maxvl[0:6] <- m[6:12]
 313     SVSTATE[0:6] <- maxvl  # MAVXL
 314     SVSTATE[7:13] <- vlen  # VL
 315     SVSTATE[63] <- vf
 316
 317 Special Registers Altered:
 318
 319     None
 320
 321 # svindex
 322
 323 SVI-Form
 324
 325 * svindex SVG,rmm,SVd,ew,SVyx,mm,sk
 326
 327 Pseudo-code:
 328
 329     # based on nearest MAXVL compute other dimension
 330     MVL <- SVSTATE[0:6]
 331     d <- [0] * 6
 332     dim <- SVd+1
 333     do while d*dim <u ([0]*4 || MVL)
 334        d <- d + 1
 335     # set up template, then copy once location identified
 336     shape <- [0]*32
 337     shape[30:31] <- 0b00            # mode
 338     if SVyx = 0 then
 339         shape[18:20] <- 0b110       # indexed xd/yd
 340         shape[0:5] <- (0b0 || SVd)  # xdim
 341         if sk = 0 then shape[6:11] <- 0 # ydim
 342         else           shape[6:11] <- 0b111111 # ydim max
 343     else
 344         shape[18:20] <- 0b111       # indexed yd/xd
 345         if sk = 1 then shape[6:11] <- 0 # ydim
 346         else           shape[6:11] <- d-1 # ydim max
 347         shape[0:5] <- (0b0 || SVd) # ydim
 348     shape[12:17] <- (0b0 || SVG)        # SVGPR
 349     shape[28:29] <- ew                  # element-width override
 350     shape[21] <- sk                     # skip 1st dimension
 351     # select the mode for updating SVSHAPEs
 352     SVSTATE[62] <- mm # set or clear persistence
 353     if mm = 0 then
 354         # clear out all SVSHAPEs first
 355         SVSHAPE0[0:31] <- [0] * 32
 356         SVSHAPE1[0:31] <- [0] * 32
 357         SVSHAPE2[0:31] <- [0] * 32
 358         SVSHAPE3[0:31] <- [0] * 32
 359         SVSTATE[32:41] <- [0] * 10 # clear REMAP.mi/o
 360         SVSTATE[42:46] <- rmm # rmm exactly REMAP.SVme
 361         idx <- 0
 362         for bit = 0 to 4
 363             if rmm[4-bit] then
 364                 # activate requested shape
 365                 if idx = 0 then SVSHAPE0 <- shape
 366                 if idx = 1 then SVSHAPE1 <- shape
 367                 if idx = 2 then SVSHAPE2 <- shape
 368                 if idx = 3 then SVSHAPE3 <- shape
 369                 SVSTATE[bit*2+32:bit*2+33] <- idx
 370                 # increment shape index, modulo 4
 371                 if idx = 3 then idx <- 0
 372                 else            idx <- idx + 1
 373     else
 374         # refined SVSHAPE/REMAP update mode
 375         bit <- rmm[0:2]
 376         idx <- rmm[3:4]
 377         if idx = 0 then SVSHAPE0 <- shape
 378         if idx = 1 then SVSHAPE1 <- shape
 379         if idx = 2 then SVSHAPE2 <- shape
 380         if idx = 3 then SVSHAPE3 <- shape
 381         SVSTATE[bit*2+32:bit*2+33] <- idx
 382         SVSTATE[46-bit] <- 1
 383
 384 Special Registers Altered:
 385
 386     None
 387
 388 # svshape2
 389
 390 SVM2-Form
 391
 392 * svshape2 SVo,SVyx,rmm,SVd,sk,mm
 393
 394 Pseudo-code:
 395
 396     # based on nearest MAXVL compute other dimension
 397     MVL <- SVSTATE[0:6]
 398     d <- [0] * 6
 399     dim <- SVd+1
 400     do while d*dim <u ([0]*4 || MVL)
 401        d <- d + 1
 402     # set up template, then copy once location identified
 403     shape <- [0]*32
 404     shape[30:31] <- 0b00            # mode
 405     shape[0:5] <- (0b0 || SVd)      # x/ydim
 406     if SVyx = 0 then
 407         shape[18:20] <- 0b000       # ordering xd/yd(/zd)
 408         if sk = 0 then shape[6:11] <- 0 # ydim
 409         else           shape[6:11] <- 0b111111 # ydim max
 410     else
 411         shape[18:20] <- 0b010       # ordering yd/xd(/zd)
 412         if sk = 1 then shape[6:11] <- 0 # ydim
 413         else           shape[6:11] <- d-1 # ydim max
 414     # offset (the prime purpose of this instruction)
 415     shape[24:27] <- SVo         # offset
 416     if sk = 1 then shape[28:29] <- 0b01 # skip 1st dimension
 417     else           shape[28:29] <- 0b00 # no skipping
 418     # select the mode for updating SVSHAPEs
 419     SVSTATE[62] <- mm # set or clear persistence
 420     if mm = 0 then
 421         # clear out all SVSHAPEs first
 422         SVSHAPE0[0:31] <- [0] * 32
 423         SVSHAPE1[0:31] <- [0] * 32
 424         SVSHAPE2[0:31] <- [0] * 32
 425         SVSHAPE3[0:31] <- [0] * 32
 426         SVSTATE[32:41] <- [0] * 10 # clear REMAP.mi/o
 427         SVSTATE[42:46] <- rmm # rmm exactly REMAP.SVme
 428         idx <- 0
 429         for bit = 0 to 4
 430             if rmm[4-bit] then
 431                 # activate requested shape
 432                 if idx = 0 then SVSHAPE0 <- shape
 433                 if idx = 1 then SVSHAPE1 <- shape
 434                 if idx = 2 then SVSHAPE2 <- shape
 435                 if idx = 3 then SVSHAPE3 <- shape
 436                 SVSTATE[bit*2+32:bit*2+33] <- idx
 437                 # increment shape index, modulo 4
 438                 if idx = 3 then idx <- 0
 439                 else            idx <- idx + 1
 440     else
 441         # refined SVSHAPE/REMAP update mode
 442         bit <- rmm[0:2]
 443         idx <- rmm[3:4]
 444         if idx = 0 then SVSHAPE0 <- shape
 445         if idx = 1 then SVSHAPE1 <- shape
 446         if idx = 2 then SVSHAPE2 <- shape
 447         if idx = 3 then SVSHAPE3 <- shape
 448         SVSTATE[bit*2+32:bit*2+33] <- idx
 449         SVSTATE[46-bit] <- 1
 450
 451 Special Registers Altered:
 452
 453     None
 454