openpower/isa/simplev.mdwn

   1 <!-- This defines Draft SVP64 instructions to augment PowerISA Version 3.0 -->
   2 <!-- These are not described in book 1 -->
   3
   4 # svstep
   5
   6 SVL-Form
   7
   8 * svstep RT,SVi,vf (Rc=0)
   9 * svstep. RT,SVi,vf (Rc=1)
  10
  11 Pseudo-code:
  12
  13     if SVi[3:4] = 0b11 then
  14         # store subvl, pack and unpack in SVSTATE
  15         SVSTATE[53] <- SVi[5]
  16         SVSTATE[54] <- SVi[6]
  17         RT <- [0]*62 || SVSTATE[53:54]
  18     else
  19         step <- SVSTATE_NEXT(SVi, vf)
  20         RT <- [0]*57 || step
  21
  22 Special Registers Altered:
  23
  24     CR0                     (if Rc=1)
  25
  26 # setvl
  27
  28 SVL-Form
  29
  30 * setvl RT,RA,SVi,vf,vs,ms (Rc=0)
  31 * setvl. RT,RA,SVi,vf,vs,ms (Rc=1)
  32
  33 Pseudo-code:
  34
  35     overflow <- 0b0
  36     if (vf & (¬vs) & ¬(ms)) = 1 then
  37         step <- SVSTATE_NEXT(SVi, 0b0)
  38         if _RT != 0 then
  39            GPR(_RT) <- [0]*57 || step
  40     else
  41         VLimm <- SVi + 1
  42         # set or get MVL
  43         if ms = 1 then MVL <- VLimm[0:6]
  44         else           MVL <- SVSTATE[0:6]
  45         # set or get VL
  46         if vs = 0                then VL <- SVSTATE[7:13]
  47         else if _RA != 0         then
  48             if (RA) >u 0b1111111 then
  49                 VL <- 0b1111111
  50                 overflow <- 0b1
  51             else                      VL <- (RA)[57:63]
  52         else if _RA != 0         then VL <- (RA)[57:63]
  53         else if _RT = 0          then VL <- VLimm[0:6]
  54         else if CTR >u 0b1111111 then
  55             VL <- 0b1111111
  56             overflow <- 0b1
  57         else                          VL <- CTR[57:63]
  58         # limit VL to within MVL
  59         if VL >u MVL then
  60             overflow <- 0b1
  61             VL <- MVL
  62         SVSTATE[0:6] <- MVL
  63         SVSTATE[7:13] <- VL
  64         if _RT != 0 then
  65            GPR(_RT) <- [0]*57 || VL
  66         if ((¬vs) & ¬(ms)) = 0 then
  67             # set requested Vertical-First mode, clear persist
  68             SVSTATE[63] <- vf
  69             SVSTATE[62] <- 0b0
  70
  71 Special Registers Altered:
  72
  73     CR0                     (if Rc=1)
  74
  75 # svremap
  76
  77 SVRM-Form
  78
  79 * svremap SVme,mi0,mi1,mi2,mo0,mo1,pst
  80
  81 Pseudo-code:
  82
  83     # registers RA RB RC RT EA/FRS SVSHAPE0-3 indices
  84     SVSTATE[32:33] <- mi0
  85     SVSTATE[34:35] <- mi1
  86     SVSTATE[36:37] <- mi2
  87     SVSTATE[38:39] <- mo0
  88     SVSTATE[40:41] <- mo1
  89     # enable bit for RA RB RC RT EA/FRS
  90     SVSTATE[42:46] <- SVme
  91     # persistence bit (applies to more than one instruction)
  92     SVSTATE[62] <- pst
  93
  94 Special Registers Altered:
  95
  96     None
  97
  98 # svshape
  99
 100 SVM-Form
 101
 102 * svshape SVxd,SVyd,SVzd,SVrm,vf
 103
 104 Pseudo-code:
 105
 106     # for convenience, VL to be calculated and stored in SVSTATE
 107     vlen <- [0] * 7
 108     mscale[0:5] <- 0b000001 # for scaling MAXVL
 109     itercount[0:6] <- [0] * 7
 110     SVSTATE[0:31] <- [0] * 32
 111     # only overwrite REMAP if "persistence" is zero
 112     if (SVSTATE[62] = 0b0) then
 113         SVSTATE[32:33] <- 0b00
 114         SVSTATE[34:35] <- 0b00
 115         SVSTATE[36:37] <- 0b00
 116         SVSTATE[38:39] <- 0b00
 117         SVSTATE[40:41] <- 0b00
 118         SVSTATE[42:46] <- 0b00000
 119         SVSTATE[62] <- 0b0
 120         SVSTATE[63] <- 0b0
 121     # clear out all SVSHAPEs
 122     SVSHAPE0[0:31] <- [0] * 32
 123     SVSHAPE1[0:31] <- [0] * 32
 124     SVSHAPE2[0:31] <- [0] * 32
 125     SVSHAPE3[0:31] <- [0] * 32
 126     # set schedule up for multiply
 127     if (SVrm = 0b0000) then
 128         # VL in Matrix Multiply is xd*yd*zd
 129         n <- (0b00 || SVxd) * (0b00 || SVyd) * (0b00 || SVzd)
 130         vlen[0:6] <- n[14:20]
 131         # set up template in SVSHAPE0, then copy to 1-3
 132         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
 133         SVSHAPE0[6:11] <- (0b0 || SVyd)   # ydim
 134         SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim
 135         SVSHAPE0[28:29] <- 0b11           # skip z
 136         # copy
 137         SVSHAPE1[0:31] <- SVSHAPE0[0:31]
 138         SVSHAPE2[0:31] <- SVSHAPE0[0:31]
 139         SVSHAPE3[0:31] <- SVSHAPE0[0:31]
 140         # set up FRA
 141         SVSHAPE1[18:20] <- 0b001          # permute x,z,y
 142         SVSHAPE1[28:29] <- 0b01           # skip z
 143         # FRC
 144         SVSHAPE2[18:20] <- 0b001          # permute x,z,y
 145         SVSHAPE2[28:29] <- 0b11           # skip y
 146     # set schedule up for FFT butterfly
 147     if (SVrm = 0b0001) then
 148         # calculate O(N log2 N)
 149         n <- [0] * 3
 150         do while n < 5
 151            if SVxd[4-n] = 0 then
 152                leave
 153            n <- n + 1
 154         n <- ((0b0 || SVxd) + 1) * n
 155         vlen[0:6] <- n[1:7]
 156         # set up template in SVSHAPE0, then copy to 1-3
 157         # for FRA and FRT
 158         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
 159         SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D FFT)
 160         mscale <- (0b0 || SVzd) + 1
 161         SVSHAPE0[30:31] <- 0b01          # Butterfly mode
 162         # copy
 163         SVSHAPE1[0:31] <- SVSHAPE0[0:31]
 164         SVSHAPE2[0:31] <- SVSHAPE0[0:31]
 165         # set up FRB and FRS
 166         SVSHAPE1[28:29] <- 0b01           # j+halfstep schedule
 167         # FRC (coefficients)
 168         SVSHAPE2[28:29] <- 0b10           # k schedule
 169     # set schedule up for (i)DCT Inner butterfly
 170     # SVrm Mode 2 (Mode 6 for iDCT) is for pre-calculated coefficients,
 171     # SVrm Mode 4 (Mode 12 for iDCT) is for on-the-fly (Vertical-First Mode)
 172     if ((SVrm = 0b0010) | (SVrm = 0b0100) |
 173         (SVrm = 0b1010) | (SVrm = 0b1100)) then
 174         # calculate O(N log2 N)
 175         n <- [0] * 3
 176         do while n < 5
 177            if SVxd[4-n] = 0 then
 178                leave
 179            n <- n + 1
 180         n <- ((0b0 || SVxd) + 1) * n
 181         vlen[0:6] <- n[1:7]
 182         # set up template in SVSHAPE0, then copy to 1-3
 183         # set up FRB and FRS
 184         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
 185         SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D DCT)
 186         mscale <- (0b0 || SVzd) + 1
 187         if (SVrm = 0b1010) | (SVrm = 0b1100) then
 188             SVSHAPE0[30:31] <- 0b11          # iDCT mode
 189             SVSHAPE0[18:20] <- 0b011         # iDCT Inner Butterfly sub-mode
 190         else
 191             SVSHAPE0[30:31] <- 0b01          # DCT mode
 192             SVSHAPE0[18:20] <- 0b001         # DCT Inner Butterfly sub-mode
 193             SVSHAPE0[21:23] <- 0b001         # "inverse" on outer loop
 194         if (SVrm = 0b1100) | (SVrm = 0b0100) then
 195             SVSHAPE0[6:11] <- 0b000011       # (i)DCT Inner Butterfly mode 4
 196         else
 197             SVSHAPE0[6:11] <- 0b000001       # (i)DCT Inner Butterfly mode 2
 198         # copy
 199         SVSHAPE1[0:31] <- SVSHAPE0[0:31]
 200         SVSHAPE2[0:31] <- SVSHAPE0[0:31]
 201         if (SVrm != 0b0100) & (SVrm != 0b1100) then
 202             SVSHAPE3[0:31] <- SVSHAPE0[0:31]
 203         # for FRA and FRT
 204         SVSHAPE0[28:29] <- 0b01           # j+halfstep schedule
 205         # for cos coefficient
 206         SVSHAPE2[28:29] <- 0b10           # ci (k for mode 4) schedule
 207         SVSHAPE2[12:17] <- 0b000000       # reset costable "striding" to 1
 208         if (SVrm != 0b0100) & (SVrm != 0b1100) then
 209             SVSHAPE3[28:29] <- 0b11           # size schedule
 210     # set schedule up for (i)DCT Outer butterfly
 211     if (SVrm = 0b0011) | (SVrm = 0b1011) then
 212         # calculate O(N log2 N) number of outer butterfly overlapping adds
 213         vlen[0:6] <- [0] * 7
 214         n <- 0b000
 215         size <- 0b0000001
 216         itercount[0:6] <- (0b00 || SVxd) + 0b0000001
 217         itercount[0:6] <- (0b0 || itercount[0:5])
 218         do while n < 5
 219            if SVxd[4-n] = 0 then
 220                leave
 221            n <- n + 1
 222            count <- (itercount - 0b0000001) * size
 223            vlen[0:6] <- vlen + count[7:13]
 224            size[0:6] <- (size[1:6] || 0b0)
 225            itercount[0:6] <- (0b0 || itercount[0:5])
 226         # set up template in SVSHAPE0, then copy to 1-3
 227         # set up FRB and FRS
 228         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
 229         SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D DCT)
 230         mscale <- (0b0 || SVzd) + 1
 231         if (SVrm = 0b1011) then
 232             SVSHAPE0[30:31] <- 0b11      # iDCT mode
 233             SVSHAPE0[18:20] <- 0b011     # iDCT Outer Butterfly sub-mode
 234             SVSHAPE0[21:23] <- 0b101     # "inverse" on outer and inner loop
 235         else
 236             SVSHAPE0[30:31] <- 0b01      # DCT mode
 237             SVSHAPE0[18:20] <- 0b100     # DCT Outer Butterfly sub-mode
 238         SVSHAPE0[6:11] <- 0b000010       # DCT Butterfly mode
 239         # copy
 240         SVSHAPE1[0:31] <- SVSHAPE0[0:31] # j+halfstep schedule
 241         SVSHAPE2[0:31] <- SVSHAPE0[0:31] # costable coefficients
 242         # for FRA and FRT
 243         SVSHAPE1[28:29] <- 0b01           # j+halfstep schedule
 244         # reset costable "striding" to 1
 245         SVSHAPE2[12:17] <- 0b000000
 246     # set schedule up for DCT COS table generation
 247     if (SVrm = 0b0101) | (SVrm = 0b1101) then
 248         # calculate O(N log2 N)
 249         vlen[0:6] <- [0] * 7
 250         itercount[0:6] <- (0b00 || SVxd) + 0b0000001
 251         itercount[0:6] <- (0b0 || itercount[0:5])
 252         n <- [0] * 3
 253         do while n < 5
 254            if SVxd[4-n] = 0 then
 255                leave
 256            n <- n + 1
 257            vlen[0:6] <- vlen + itercount
 258            itercount[0:6] <- (0b0 || itercount[0:5])
 259         # set up template in SVSHAPE0, then copy to 1-3
 260         # set up FRB and FRS
 261         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
 262         SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D DCT)
 263         mscale <- (0b0 || SVzd) + 1
 264         SVSHAPE0[30:31] <- 0b01          # DCT/FFT mode
 265         SVSHAPE0[6:11] <- 0b000100       # DCT Inner Butterfly COS-gen mode
 266         if (SVrm = 0b0101) then
 267             SVSHAPE0[21:23] <- 0b001     # "inverse" on outer loop for DCT
 268         # copy
 269         SVSHAPE1[0:31] <- SVSHAPE0[0:31]
 270         SVSHAPE2[0:31] <- SVSHAPE0[0:31]
 271         # for cos coefficient
 272         SVSHAPE1[28:29] <- 0b10           # ci schedule
 273         SVSHAPE2[28:29] <- 0b11           # size schedule
 274     # set schedule up for iDCT / DCT inverse of half-swapped ordering
 275     if (SVrm = 0b0110) | (SVrm = 0b1110) | (SVrm = 0b1111) then
 276         vlen[0:6] <- (0b00 || SVxd) + 0b0000001
 277         # set up template in SVSHAPE0
 278         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
 279         SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D DCT)
 280         mscale <- (0b0 || SVzd) + 1
 281         if (SVrm = 0b1110) then
 282             SVSHAPE0[18:20] <- 0b001     # DCT opposite half-swap
 283         if (SVrm = 0b1111) then
 284             SVSHAPE0[30:31] <- 0b01          # FFT mode
 285         else
 286             SVSHAPE0[30:31] <- 0b11          # DCT mode
 287         SVSHAPE0[6:11] <- 0b000101       # DCT "half-swap" mode
 288     # set schedule up for parallel reduction
 289     if (SVrm = 0b0111) then
 290         # calculate the total number of operations (brute-force)
 291         vlen[0:6] <- [0] * 7
 292         itercount[0:6] <- (0b00 || SVxd) + 0b0000001
 293         step[0:6] <- 0b0000001
 294         i[0:6] <- 0b0000000
 295         do while step <u itercount
 296             newstep <- step[1:6] || 0b0
 297             j[0:6] <- 0b0000000
 298             do while (j+step <u itercount)
 299                 j <- j + newstep
 300                 i <- i + 1
 301             step <- newstep
 302         # VL in Parallel-Reduce is the number of operations
 303         vlen[0:6] <- i
 304         # set up template in SVSHAPE0, then copy to 1. only 2 needed
 305         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
 306         SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D DCT)
 307         mscale <- (0b0 || SVzd) + 1
 308         SVSHAPE0[30:31] <- 0b10          # parallel reduce submode
 309         # copy
 310         SVSHAPE1[0:31] <- SVSHAPE0[0:31]
 311         # set up right operand (left operand 28:29 is zero)
 312         SVSHAPE1[28:29] <- 0b01           # right operand
 313     # set VL, MVL and Vertical-First
 314     m[0:12] <- vlen * mscale
 315     maxvl[0:6] <- m[6:12]
 316     SVSTATE[0:6] <- maxvl  # MAVXL
 317     SVSTATE[7:13] <- vlen  # VL
 318     SVSTATE[63] <- vf
 319
 320 Special Registers Altered:
 321
 322     None
 323
 324 # svindex
 325
 326 SVI-Form
 327
 328 * svindex SVG,rmm,SVd,ew,SVyx,mm,sk
 329
 330 Pseudo-code:
 331
 332     # based on nearest MAXVL compute other dimension
 333     MVL <- SVSTATE[0:6]
 334     d <- [0] * 6
 335     dim <- SVd+1
 336     do while d*dim <u ([0]*4 || MVL)
 337        d <- d + 1
 338     # set up template, then copy once location identified
 339     shape <- [0]*32
 340     shape[30:31] <- 0b00            # mode
 341     if SVyx = 0 then
 342         shape[18:20] <- 0b110       # indexed xd/yd
 343         shape[0:5] <- (0b0 || SVd)  # xdim
 344         if sk = 0 then shape[6:11] <- 0 # ydim
 345         else           shape[6:11] <- 0b111111 # ydim max
 346     else
 347         shape[18:20] <- 0b111       # indexed yd/xd
 348         if sk = 1 then shape[6:11] <- 0 # ydim
 349         else           shape[6:11] <- d-1 # ydim max
 350         shape[0:5] <- (0b0 || SVd) # ydim
 351     shape[12:17] <- (0b0 || SVG)        # SVGPR
 352     shape[28:29] <- ew                  # element-width override
 353     if sk = 1 then shape[28:29] <- 0b01 # skip 1st dimension
 354     else           shape[28:29] <- 0b00 # no skipping
 355     # select the mode for updating SVSHAPEs
 356     SVSTATE[62] <- mm # set or clear persistence
 357     if mm = 0 then
 358         # clear out all SVSHAPEs first
 359         SVSHAPE0[0:31] <- [0] * 32
 360         SVSHAPE1[0:31] <- [0] * 32
 361         SVSHAPE2[0:31] <- [0] * 32
 362         SVSHAPE3[0:31] <- [0] * 32
 363         SVSTATE[32:41] <- [0] * 10 # clear REMAP.mi/o
 364         SVSTATE[42:46] <- rmm # rmm exactly REMAP.SVme
 365         idx <- 0
 366         for bit = 0 to 4
 367             if rmm[4-bit] then
 368                 # activate requested shape
 369                 if idx = 0 then SVSHAPE0 <- shape
 370                 if idx = 1 then SVSHAPE1 <- shape
 371                 if idx = 2 then SVSHAPE2 <- shape
 372                 if idx = 3 then SVSHAPE3 <- shape
 373                 SVSTATE[bit*2+32:bit*2+33] <- idx
 374                 # increment shape index, modulo 4
 375                 if idx = 3 then idx <- 0
 376                 else            idx <- idx + 1
 377     else
 378         # refined SVSHAPE/REMAP update mode
 379         bit <- rmm[0:2]
 380         idx <- rmm[3:4]
 381         if idx = 0 then SVSHAPE0 <- shape
 382         if idx = 1 then SVSHAPE1 <- shape
 383         if idx = 2 then SVSHAPE2 <- shape
 384         if idx = 3 then SVSHAPE3 <- shape
 385         SVSTATE[bit*2+32:bit*2+33] <- idx
 386         SVSTATE[46-bit] <- 1
 387
 388 Special Registers Altered:
 389
 390     None
 391
 392 # svshape2
 393
 394 SVM2-Form
 395
 396 * svshape2 SVo,SVyx,rmm,SVd,sk,mm
 397
 398 Pseudo-code:
 399
 400     # based on nearest MAXVL compute other dimension
 401     MVL <- SVSTATE[0:6]
 402     d <- [0] * 6
 403     dim <- SVd+1
 404     do while d*dim <u ([0]*4 || MVL)
 405        d <- d + 1
 406     # set up template, then copy once location identified
 407     shape <- [0]*32
 408     shape[30:31] <- 0b00            # mode
 409     shape[0:5] <- (0b0 || SVd)      # x/ydim
 410     if SVyx = 0 then
 411         shape[18:20] <- 0b000       # ordering xd/yd(/zd)
 412         if sk = 0 then shape[6:11] <- 0 # ydim
 413         else           shape[6:11] <- 0b111111 # ydim max
 414     else
 415         shape[18:20] <- 0b010       # ordering yd/xd(/zd)
 416         if sk = 1 then shape[6:11] <- 0 # ydim
 417         else           shape[6:11] <- d-1 # ydim max
 418     # offset (the prime purpose of this instruction)
 419     shape[24:27] <- SVo         # offset
 420     if sk = 1 then shape[28:29] <- 0b01 # skip 1st dimension
 421     else           shape[28:29] <- 0b00 # no skipping
 422     # select the mode for updating SVSHAPEs
 423     SVSTATE[62] <- mm # set or clear persistence
 424     if mm = 0 then
 425         # clear out all SVSHAPEs first
 426         SVSHAPE0[0:31] <- [0] * 32
 427         SVSHAPE1[0:31] <- [0] * 32
 428         SVSHAPE2[0:31] <- [0] * 32
 429         SVSHAPE3[0:31] <- [0] * 32
 430         SVSTATE[32:41] <- [0] * 10 # clear REMAP.mi/o
 431         SVSTATE[42:46] <- rmm # rmm exactly REMAP.SVme
 432         idx <- 0
 433         for bit = 0 to 4
 434             if rmm[4-bit] then
 435                 # activate requested shape
 436                 if idx = 0 then SVSHAPE0 <- shape
 437                 if idx = 1 then SVSHAPE1 <- shape
 438                 if idx = 2 then SVSHAPE2 <- shape
 439                 if idx = 3 then SVSHAPE3 <- shape
 440                 SVSTATE[bit*2+32:bit*2+33] <- idx
 441                 # increment shape index, modulo 4
 442                 if idx = 3 then idx <- 0
 443                 else            idx <- idx + 1
 444     else
 445         # refined SVSHAPE/REMAP update mode
 446         bit <- rmm[0:2]
 447         idx <- rmm[3:4]
 448         if idx = 0 then SVSHAPE0 <- shape
 449         if idx = 1 then SVSHAPE1 <- shape
 450         if idx = 2 then SVSHAPE2 <- shape
 451         if idx = 3 then SVSHAPE3 <- shape
 452         SVSTATE[bit*2+32:bit*2+33] <- idx
 453         SVSTATE[46-bit] <- 1
 454
 455 Special Registers Altered:
 456
 457     None
 458