openpower/isa/simplev.mdwn

   1 <!-- This defines Draft SVP64 instructions to augment PowerISA Version 3.0 -->
   2 <!-- These are not described in book 1 -->
   3
   4 # svstep
   5
   6 SVL-Form
   7
   8 * svstep RT,SVi,vf (Rc=0)
   9 * svstep. RT,SVi,vf (Rc=1)
  10
  11 Pseudo-code:
  12
  13     if SVi[3:4] = 0b11 then
  14         # store subvl, pack and unpack in SVSTATE
  15         SVSTATE[53] <- SVi[5]
  16         SVSTATE[54] <- SVi[6]
  17         RT <- [0]*62 || SVSTATE[53:54]
  18     else
  19         step <- SVSTATE_NEXT(SVi, vf)
  20         RT <- [0]*57 || step
  21
  22 Special Registers Altered:
  23
  24     CR0                     (if Rc=1)
  25
  26 # setvl
  27
  28 SVL-Form
  29
  30 * setvl RT,RA,SVi,vf,vs,ms (Rc=0)
  31 * setvl. RT,RA,SVi,vf,vs,ms (Rc=1)
  32
  33 Pseudo-code:
  34
  35     overflow <- 0b0
  36     if (vf & (¬vs) & ¬(ms)) = 1 then
  37         step <- SVSTATE_NEXT(SVi, 0b0)
  38         if _RT != 0 then
  39            GPR(_RT) <- [0]*57 || step
  40     else
  41         VLimm <- SVi + 1
  42         # set or get MVL
  43         if ms = 1 then MVL <- VLimm[0:6]
  44         else           MVL <- SVSTATE[0:6]
  45         # set or get VL
  46         if vs = 0                then VL <- SVSTATE[7:13]
  47         else if _RA != 0         then
  48             if (RA) >u 0b1111111 then
  49                 VL <- 0b1111111
  50                 overflow <- 0b1
  51             else                      VL <- (RA)[57:63]
  52         else if _RA != 0         then VL <- (RA)[57:63]
  53         else if _RT = 0          then VL <- VLimm[0:6]
  54         else if CTR >u 0b1111111 then
  55             VL <- 0b1111111
  56             overflow <- 0b1
  57         else                          VL <- CTR[57:63]
  58         # limit VL to within MVL
  59         if VL >u MVL then
  60             overflow <- 0b1
  61             VL <- MVL
  62         SVSTATE[0:6] <- MVL
  63         SVSTATE[7:13] <- VL
  64         if _RT != 0 then
  65            GPR(_RT) <- [0]*57 || VL
  66         if ((¬vs) & ¬(ms)) = 0 then
  67             # set requested Vertical-First mode, clear persist
  68             SVSTATE[63] <- vf
  69             SVSTATE[62] <- 0b0
  70
  71 Special Registers Altered:
  72
  73     CR0                     (if Rc=1)
  74
  75 # svremap
  76
  77 SVRM-Form
  78
  79 * svremap SVme,mi0,mi1,mi2,mo0,mo1,pst
  80
  81 Pseudo-code:
  82
  83     # registers RA RB RC RT EA/FRS SVSHAPE0-3 indices
  84     SVSTATE[32:33] <- mi0
  85     SVSTATE[34:35] <- mi1
  86     SVSTATE[36:37] <- mi2
  87     SVSTATE[38:39] <- mo0
  88     SVSTATE[40:41] <- mo1
  89     # enable bit for RA RB RC RT EA/FRS
  90     SVSTATE[42:46] <- SVme
  91     # persistence bit (applies to more than one instruction)
  92     SVSTATE[62] <- pst
  93
  94 Special Registers Altered:
  95
  96     None
  97
  98 # svshape
  99
 100 SVM-Form
 101
 102 * svshape SVxd,SVyd,SVzd,SVrm,vf
 103
 104 Pseudo-code:
 105
 106     # for convenience, VL to be calculated and stored in SVSTATE
 107     vlen <- [0] * 7
 108     mscale[0:5] <- 0b000001 # for scaling MAXVL
 109     itercount[0:6] <- [0] * 7
 110     SVSTATE[0:31] <- [0] * 32
 111     # only overwrite REMAP if "persistence" is zero
 112     if (SVSTATE[62] = 0b0) then
 113         SVSTATE[32:33] <- 0b00
 114         SVSTATE[34:35] <- 0b00
 115         SVSTATE[36:37] <- 0b00
 116         SVSTATE[38:39] <- 0b00
 117         SVSTATE[40:41] <- 0b00
 118         SVSTATE[42:46] <- 0b00000
 119         SVSTATE[62] <- 0b0
 120         SVSTATE[63] <- 0b0
 121     # clear out all SVSHAPEs
 122     SVSHAPE0[0:31] <- [0] * 32
 123     SVSHAPE1[0:31] <- [0] * 32
 124     SVSHAPE2[0:31] <- [0] * 32
 125     SVSHAPE3[0:31] <- [0] * 32
 126     # set schedule up for multiply
 127     if (SVrm = 0b0000) then
 128         # VL in Matrix Multiply is xd*yd*zd
 129         xd <- (0b00 || SVxd) + 1
 130         yd <- (0b00 || SVyd) + 1
 131         zd <- (0b00 || SVzd) + 1
 132         n <- xd * yd * zd
 133         vlen[0:6] <- n[14:20]
 134         # set up template in SVSHAPE0, then copy to 1-3
 135         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
 136         SVSHAPE0[6:11] <- (0b0 || SVyd)   # ydim
 137         SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim
 138         SVSHAPE0[28:29] <- 0b11           # skip z
 139         # copy
 140         SVSHAPE1[0:31] <- SVSHAPE0[0:31]
 141         SVSHAPE2[0:31] <- SVSHAPE0[0:31]
 142         SVSHAPE3[0:31] <- SVSHAPE0[0:31]
 143         # set up FRA
 144         SVSHAPE1[18:20] <- 0b001          # permute x,z,y
 145         SVSHAPE1[28:29] <- 0b01           # skip z
 146         # FRC
 147         SVSHAPE2[18:20] <- 0b001          # permute x,z,y
 148         SVSHAPE2[28:29] <- 0b11           # skip y
 149     # set schedule up for FFT butterfly
 150     if (SVrm = 0b0001) then
 151         # calculate O(N log2 N)
 152         n <- [0] * 3
 153         do while n < 5
 154            if SVxd[4-n] = 0 then
 155                leave
 156            n <- n + 1
 157         n <- ((0b0 || SVxd) + 1) * n
 158         vlen[0:6] <- n[1:7]
 159         # set up template in SVSHAPE0, then copy to 1-3
 160         # for FRA and FRT
 161         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
 162         SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D FFT)
 163         mscale <- (0b0 || SVzd) + 1
 164         SVSHAPE0[30:31] <- 0b01          # Butterfly mode
 165         # copy
 166         SVSHAPE1[0:31] <- SVSHAPE0[0:31]
 167         SVSHAPE2[0:31] <- SVSHAPE0[0:31]
 168         # set up FRB and FRS
 169         SVSHAPE1[28:29] <- 0b01           # j+halfstep schedule
 170         # FRC (coefficients)
 171         SVSHAPE2[28:29] <- 0b10           # k schedule
 172     # set schedule up for (i)DCT Inner butterfly
 173     # SVrm Mode 2 (Mode 6 for iDCT) is for pre-calculated coefficients,
 174     # SVrm Mode 4 (Mode 12 for iDCT) is for on-the-fly (Vertical-First Mode)
 175     if ((SVrm = 0b0010) | (SVrm = 0b0100) |
 176         (SVrm = 0b1010) | (SVrm = 0b1100)) then
 177         # calculate O(N log2 N)
 178         n <- [0] * 3
 179         do while n < 5
 180            if SVxd[4-n] = 0 then
 181                leave
 182            n <- n + 1
 183         n <- ((0b0 || SVxd) + 1) * n
 184         vlen[0:6] <- n[1:7]
 185         # set up template in SVSHAPE0, then copy to 1-3
 186         # set up FRB and FRS
 187         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
 188         SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D DCT)
 189         mscale <- (0b0 || SVzd) + 1
 190         if (SVrm = 0b1010) | (SVrm = 0b1100) then
 191             SVSHAPE0[30:31] <- 0b11          # iDCT mode
 192             SVSHAPE0[18:20] <- 0b011         # iDCT Inner Butterfly sub-mode
 193         else
 194             SVSHAPE0[30:31] <- 0b01          # DCT mode
 195             SVSHAPE0[18:20] <- 0b001         # DCT Inner Butterfly sub-mode
 196             SVSHAPE0[21:23] <- 0b001         # "inverse" on outer loop
 197         if (SVrm = 0b1100) | (SVrm = 0b0100) then
 198             SVSHAPE0[6:11] <- 0b000011       # (i)DCT Inner Butterfly mode 4
 199         else
 200             SVSHAPE0[6:11] <- 0b000001       # (i)DCT Inner Butterfly mode 2
 201         # copy
 202         SVSHAPE1[0:31] <- SVSHAPE0[0:31]
 203         SVSHAPE2[0:31] <- SVSHAPE0[0:31]
 204         if (SVrm != 0b0100) & (SVrm != 0b1100) then
 205             SVSHAPE3[0:31] <- SVSHAPE0[0:31]
 206         # for FRA and FRT
 207         SVSHAPE0[28:29] <- 0b01           # j+halfstep schedule
 208         # for cos coefficient
 209         SVSHAPE2[28:29] <- 0b10           # ci (k for mode 4) schedule
 210         SVSHAPE2[12:17] <- 0b000000       # reset costable "striding" to 1
 211         if (SVrm != 0b0100) & (SVrm != 0b1100) then
 212             SVSHAPE3[28:29] <- 0b11           # size schedule
 213     # set schedule up for (i)DCT Outer butterfly
 214     if (SVrm = 0b0011) | (SVrm = 0b1011) then
 215         # calculate O(N log2 N) number of outer butterfly overlapping adds
 216         vlen[0:6] <- [0] * 7
 217         n <- 0b000
 218         size <- 0b0000001
 219         itercount[0:6] <- (0b00 || SVxd) + 0b0000001
 220         itercount[0:6] <- (0b0 || itercount[0:5])
 221         do while n < 5
 222            if SVxd[4-n] = 0 then
 223                leave
 224            n <- n + 1
 225            count <- (itercount - 0b0000001) * size
 226            vlen[0:6] <- vlen + count[7:13]
 227            size[0:6] <- (size[1:6] || 0b0)
 228            itercount[0:6] <- (0b0 || itercount[0:5])
 229         # set up template in SVSHAPE0, then copy to 1-3
 230         # set up FRB and FRS
 231         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
 232         SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D DCT)
 233         mscale <- (0b0 || SVzd) + 1
 234         if (SVrm = 0b1011) then
 235             SVSHAPE0[30:31] <- 0b11      # iDCT mode
 236             SVSHAPE0[18:20] <- 0b011     # iDCT Outer Butterfly sub-mode
 237             SVSHAPE0[21:23] <- 0b101     # "inverse" on outer and inner loop
 238         else
 239             SVSHAPE0[30:31] <- 0b01      # DCT mode
 240             SVSHAPE0[18:20] <- 0b100     # DCT Outer Butterfly sub-mode
 241         SVSHAPE0[6:11] <- 0b000010       # DCT Butterfly mode
 242         # copy
 243         SVSHAPE1[0:31] <- SVSHAPE0[0:31] # j+halfstep schedule
 244         SVSHAPE2[0:31] <- SVSHAPE0[0:31] # costable coefficients
 245         # for FRA and FRT
 246         SVSHAPE1[28:29] <- 0b01           # j+halfstep schedule
 247         # reset costable "striding" to 1
 248         SVSHAPE2[12:17] <- 0b000000
 249     # set schedule up for DCT COS table generation
 250     if (SVrm = 0b0101) | (SVrm = 0b1101) then
 251         # calculate O(N log2 N)
 252         vlen[0:6] <- [0] * 7
 253         itercount[0:6] <- (0b00 || SVxd) + 0b0000001
 254         itercount[0:6] <- (0b0 || itercount[0:5])
 255         n <- [0] * 3
 256         do while n < 5
 257            if SVxd[4-n] = 0 then
 258                leave
 259            n <- n + 1
 260            vlen[0:6] <- vlen + itercount
 261            itercount[0:6] <- (0b0 || itercount[0:5])
 262         # set up template in SVSHAPE0, then copy to 1-3
 263         # set up FRB and FRS
 264         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
 265         SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D DCT)
 266         mscale <- (0b0 || SVzd) + 1
 267         SVSHAPE0[30:31] <- 0b01          # DCT/FFT mode
 268         SVSHAPE0[6:11] <- 0b000100       # DCT Inner Butterfly COS-gen mode
 269         if (SVrm = 0b0101) then
 270             SVSHAPE0[21:23] <- 0b001     # "inverse" on outer loop for DCT
 271         # copy
 272         SVSHAPE1[0:31] <- SVSHAPE0[0:31]
 273         SVSHAPE2[0:31] <- SVSHAPE0[0:31]
 274         # for cos coefficient
 275         SVSHAPE1[28:29] <- 0b10           # ci schedule
 276         SVSHAPE2[28:29] <- 0b11           # size schedule
 277     # set schedule up for iDCT / DCT inverse of half-swapped ordering
 278     if (SVrm = 0b0110) | (SVrm = 0b1110) | (SVrm = 0b1111) then
 279         vlen[0:6] <- (0b00 || SVxd) + 0b0000001
 280         # set up template in SVSHAPE0
 281         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
 282         SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D DCT)
 283         mscale <- (0b0 || SVzd) + 1
 284         if (SVrm = 0b1110) then
 285             SVSHAPE0[18:20] <- 0b001     # DCT opposite half-swap
 286         if (SVrm = 0b1111) then
 287             SVSHAPE0[30:31] <- 0b01          # FFT mode
 288         else
 289             SVSHAPE0[30:31] <- 0b11          # DCT mode
 290         SVSHAPE0[6:11] <- 0b000101       # DCT "half-swap" mode
 291     # set schedule up for parallel reduction
 292     if (SVrm = 0b0111) then
 293         # calculate the total number of operations (brute-force)
 294         vlen[0:6] <- [0] * 7
 295         itercount[0:6] <- (0b00 || SVxd) + 0b0000001
 296         step[0:6] <- 0b0000001
 297         i[0:6] <- 0b0000000
 298         do while step <u itercount
 299             newstep <- step[1:6] || 0b0
 300             j[0:6] <- 0b0000000
 301             do while (j+step <u itercount)
 302                 j <- j + newstep
 303                 i <- i + 1
 304             step <- newstep
 305         # VL in Parallel-Reduce is the number of operations
 306         vlen[0:6] <- i
 307         # set up template in SVSHAPE0, then copy to 1. only 2 needed
 308         SVSHAPE0[0:5] <- (0b0 || SVxd)   # xdim
 309         SVSHAPE0[12:17] <- (0b0 || SVzd)   # zdim - "striding" (2D DCT)
 310         mscale <- (0b0 || SVzd) + 1
 311         SVSHAPE0[30:31] <- 0b10          # parallel reduce submode
 312         # copy
 313         SVSHAPE1[0:31] <- SVSHAPE0[0:31]
 314         # set up right operand (left operand 28:29 is zero)
 315         SVSHAPE1[28:29] <- 0b01           # right operand
 316     # set VL, MVL and Vertical-First
 317     m[0:12] <- vlen * mscale
 318     maxvl[0:6] <- m[6:12]
 319     SVSTATE[0:6] <- maxvl  # MAVXL
 320     SVSTATE[7:13] <- vlen  # VL
 321     SVSTATE[63] <- vf
 322
 323 Special Registers Altered:
 324
 325     None
 326
 327 # svindex
 328
 329 SVI-Form
 330
 331 * svindex SVG,rmm,SVd,ew,SVyx,mm,sk
 332
 333 Pseudo-code:
 334
 335     # based on nearest MAXVL compute other dimension
 336     MVL <- SVSTATE[0:6]
 337     d <- [0] * 6
 338     dim <- SVd+1
 339     do while d*dim <u ([0]*4 || MVL)
 340        d <- d + 1
 341     # set up template, then copy once location identified
 342     shape <- [0]*32
 343     shape[30:31] <- 0b00            # mode
 344     if SVyx = 0 then
 345         shape[18:20] <- 0b110       # indexed xd/yd
 346         shape[0:5] <- (0b0 || SVd)  # xdim
 347         if sk = 0 then shape[6:11] <- 0 # ydim
 348         else           shape[6:11] <- 0b111111 # ydim max
 349     else
 350         shape[18:20] <- 0b111       # indexed yd/xd
 351         if sk = 1 then shape[6:11] <- 0 # ydim
 352         else           shape[6:11] <- d-1 # ydim max
 353         shape[0:5] <- (0b0 || SVd) # ydim
 354     shape[12:17] <- (0b0 || SVG)        # SVGPR
 355     shape[28:29] <- ew                  # element-width override
 356     if sk = 1 then shape[28:29] <- 0b01 # skip 1st dimension
 357     else           shape[28:29] <- 0b00 # no skipping
 358     # select the mode for updating SVSHAPEs
 359     SVSTATE[62] <- mm # set or clear persistence
 360     if mm = 0 then
 361         # clear out all SVSHAPEs first
 362         SVSHAPE0[0:31] <- [0] * 32
 363         SVSHAPE1[0:31] <- [0] * 32
 364         SVSHAPE2[0:31] <- [0] * 32
 365         SVSHAPE3[0:31] <- [0] * 32
 366         SVSTATE[32:41] <- [0] * 10 # clear REMAP.mi/o
 367         SVSTATE[42:46] <- rmm # rmm exactly REMAP.SVme
 368         idx <- 0
 369         for bit = 0 to 4
 370             if rmm[4-bit] then
 371                 # activate requested shape
 372                 if idx = 0 then SVSHAPE0 <- shape
 373                 if idx = 1 then SVSHAPE1 <- shape
 374                 if idx = 2 then SVSHAPE2 <- shape
 375                 if idx = 3 then SVSHAPE3 <- shape
 376                 SVSTATE[bit*2+32:bit*2+33] <- idx
 377                 # increment shape index, modulo 4
 378                 if idx = 3 then idx <- 0
 379                 else            idx <- idx + 1
 380     else
 381         # refined SVSHAPE/REMAP update mode
 382         bit <- rmm[0:2]
 383         idx <- rmm[3:4]
 384         if idx = 0 then SVSHAPE0 <- shape
 385         if idx = 1 then SVSHAPE1 <- shape
 386         if idx = 2 then SVSHAPE2 <- shape
 387         if idx = 3 then SVSHAPE3 <- shape
 388         SVSTATE[bit*2+32:bit*2+33] <- idx
 389         SVSTATE[46-bit] <- 1
 390
 391 Special Registers Altered:
 392
 393     None
 394
 395 # svshape2
 396
 397 SVM2-Form
 398
 399 * svshape2 SVo,SVyx,rmm,SVd,sk,mm
 400
 401 Pseudo-code:
 402
 403     # based on nearest MAXVL compute other dimension
 404     MVL <- SVSTATE[0:6]
 405     d <- [0] * 6
 406     dim <- SVd+1
 407     do while d*dim <u ([0]*4 || MVL)
 408        d <- d + 1
 409     # set up template, then copy once location identified
 410     shape <- [0]*32
 411     shape[30:31] <- 0b00            # mode
 412     shape[0:5] <- (0b0 || SVd)      # x/ydim
 413     if SVyx = 0 then
 414         shape[18:20] <- 0b000       # ordering xd/yd(/zd)
 415         if sk = 0 then shape[6:11] <- 0 # ydim
 416         else           shape[6:11] <- 0b111111 # ydim max
 417     else
 418         shape[18:20] <- 0b010       # ordering yd/xd(/zd)
 419         if sk = 1 then shape[6:11] <- 0 # ydim
 420         else           shape[6:11] <- d-1 # ydim max
 421     # offset (the prime purpose of this instruction)
 422     shape[24:27] <- SVo         # offset
 423     if sk = 1 then shape[28:29] <- 0b01 # skip 1st dimension
 424     else           shape[28:29] <- 0b00 # no skipping
 425     # select the mode for updating SVSHAPEs
 426     SVSTATE[62] <- mm # set or clear persistence
 427     if mm = 0 then
 428         # clear out all SVSHAPEs first
 429         SVSHAPE0[0:31] <- [0] * 32
 430         SVSHAPE1[0:31] <- [0] * 32
 431         SVSHAPE2[0:31] <- [0] * 32
 432         SVSHAPE3[0:31] <- [0] * 32
 433         SVSTATE[32:41] <- [0] * 10 # clear REMAP.mi/o
 434         SVSTATE[42:46] <- rmm # rmm exactly REMAP.SVme
 435         idx <- 0
 436         for bit = 0 to 4
 437             if rmm[4-bit] then
 438                 # activate requested shape
 439                 if idx = 0 then SVSHAPE0 <- shape
 440                 if idx = 1 then SVSHAPE1 <- shape
 441                 if idx = 2 then SVSHAPE2 <- shape
 442                 if idx = 3 then SVSHAPE3 <- shape
 443                 SVSTATE[bit*2+32:bit*2+33] <- idx
 444                 # increment shape index, modulo 4
 445                 if idx = 3 then idx <- 0
 446                 else            idx <- idx + 1
 447     else
 448         # refined SVSHAPE/REMAP update mode
 449         bit <- rmm[0:2]
 450         idx <- rmm[3:4]
 451         if idx = 0 then SVSHAPE0 <- shape
 452         if idx = 1 then SVSHAPE1 <- shape
 453         if idx = 2 then SVSHAPE2 <- shape
 454         if idx = 3 then SVSHAPE3 <- shape
 455         SVSTATE[bit*2+32:bit*2+33] <- idx
 456         SVSTATE[46-bit] <- 1
 457
 458 Special Registers Altered:
 459
 460     None
 461