1 <!-- This defines Draft SVP64 instructions to augment PowerISA Version 3.0 -->
2 <!-- These are not described in book 1 -->
8 * svstep RT,SVi,vf (Rc=0)
9 * svstep. RT,SVi,vf (Rc=1)
13 if SVi[3:4] = 0b11 then
14 # store pack and unpack in SVSTATE
17 RT <- [0]*62 || SVSTATE[53:54]
19 step <- SVSTATE_NEXT(SVi, vf)
22 Special Registers Altered:
30 * setvl RT,RA,SVi,vf,vs,ms (Rc=0)
31 * setvl. RT,RA,SVi,vf,vs,ms (Rc=1)
38 if ms = 1 then MVL <- VLimm[0:6]
39 else MVL <- SVSTATE[0:6]
41 if vs = 0 then VL <- SVSTATE[7:13]
43 if (RA) >u 0b1111111 then
46 else VL <- (RA)[57:63]
47 else if _RT = 0 then VL <- VLimm[0:6]
48 else if CTR >u 0b1111111 then
52 # limit VL to within MVL
59 GPR(_RT) <- [0]*57 || VL
60 # MAXVL is a static "state-reset".
62 SVSTATE[63] <- vf # set Vertical-First mode
63 SVSTATE[62] <- 0b0 # clear persist bit
65 Special Registers Altered:
73 * svremap SVme,mi0,mi1,mi2,mo0,mo1,pst
77 # registers RA RB RC RT EA/FRS SVSHAPE0-3 indices
83 # enable bit for RA RB RC RT EA/FRS
84 SVSTATE[42:46] <- SVme
85 # persistence bit (applies to more than one instruction)
88 Special Registers Altered:
96 * svshape SVxd,SVyd,SVzd,SVrm,vf
100 # for convenience, VL to be calculated and stored in SVSTATE
102 mscale[0:5] <- 0b000001 # for scaling MAXVL
103 itercount[0:6] <- [0] * 7
104 SVSTATE[0:31] <- [0] * 32
105 # only overwrite REMAP if "persistence" is zero
106 if (SVSTATE[62] = 0b0) then
107 SVSTATE[32:33] <- 0b00
108 SVSTATE[34:35] <- 0b00
109 SVSTATE[36:37] <- 0b00
110 SVSTATE[38:39] <- 0b00
111 SVSTATE[40:41] <- 0b00
112 SVSTATE[42:46] <- 0b00000
115 # clear out all SVSHAPEs
116 SVSHAPE0[0:31] <- [0] * 32
117 SVSHAPE1[0:31] <- [0] * 32
118 SVSHAPE2[0:31] <- [0] * 32
119 SVSHAPE3[0:31] <- [0] * 32
120 # set schedule up for multiply
121 if (SVrm = 0b0000) then
122 # VL in Matrix Multiply is xd*yd*zd
123 xd <- (0b00 || SVxd) + 1
124 yd <- (0b00 || SVyd) + 1
125 zd <- (0b00 || SVzd) + 1
127 vlen[0:6] <- n[14:20]
128 # set up template in SVSHAPE0, then copy to 1-3
129 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
130 SVSHAPE0[6:11] <- (0b0 || SVyd) # ydim
131 SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim
132 SVSHAPE0[28:29] <- 0b11 # skip z
134 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
135 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
136 SVSHAPE3[0:31] <- SVSHAPE0[0:31]
138 SVSHAPE1[18:20] <- 0b001 # permute x,z,y
139 SVSHAPE1[28:29] <- 0b01 # skip z
141 SVSHAPE2[18:20] <- 0b001 # permute x,z,y
142 SVSHAPE2[28:29] <- 0b11 # skip y
143 # set schedule up for FFT butterfly
144 if (SVrm = 0b0001) then
145 # calculate O(N log2 N)
148 if SVxd[4-n] = 0 then
151 n <- ((0b0 || SVxd) + 1) * n
153 # set up template in SVSHAPE0, then copy to 1-3
155 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
156 SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim - "striding" (2D FFT)
157 mscale <- (0b0 || SVzd) + 1
158 SVSHAPE0[30:31] <- 0b01 # Butterfly mode
160 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
161 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
163 SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule
165 SVSHAPE2[28:29] <- 0b10 # k schedule
166 # set schedule up for (i)DCT Inner butterfly
167 # SVrm Mode 4 (Mode 12 for iDCT) is for on-the-fly (Vertical-First Mode)
168 if ((SVrm = 0b0100) |
169 (SVrm = 0b1100)) then
170 # calculate O(N log2 N)
173 if SVxd[4-n] = 0 then
176 n <- ((0b0 || SVxd) + 1) * n
178 # set up template in SVSHAPE0, then copy to 1-3
180 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
181 SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim - "striding" (2D DCT)
182 mscale <- (0b0 || SVzd) + 1
183 if (SVrm = 0b1100) then
184 SVSHAPE0[30:31] <- 0b11 # iDCT mode
185 SVSHAPE0[18:20] <- 0b011 # iDCT Inner Butterfly sub-mode
187 SVSHAPE0[30:31] <- 0b01 # DCT mode
188 SVSHAPE0[18:20] <- 0b001 # DCT Inner Butterfly sub-mode
189 SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop
190 SVSHAPE0[6:11] <- 0b000011 # (i)DCT Inner Butterfly mode 4
192 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
193 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
194 if (SVrm != 0b0100) & (SVrm != 0b1100) then
195 SVSHAPE3[0:31] <- SVSHAPE0[0:31]
197 SVSHAPE0[28:29] <- 0b01 # j+halfstep schedule
198 # for cos coefficient
199 SVSHAPE2[28:29] <- 0b10 # ci (k for mode 4) schedule
200 SVSHAPE2[12:17] <- 0b000000 # reset costable "striding" to 1
201 if (SVrm != 0b0100) & (SVrm != 0b1100) then
202 SVSHAPE3[28:29] <- 0b11 # size schedule
203 # set schedule up for (i)DCT Outer butterfly
204 if (SVrm = 0b0011) | (SVrm = 0b1011) then
205 # calculate O(N log2 N) number of outer butterfly overlapping adds
209 itercount[0:6] <- (0b00 || SVxd) + 0b0000001
210 itercount[0:6] <- (0b0 || itercount[0:5])
212 if SVxd[4-n] = 0 then
215 count <- (itercount - 0b0000001) * size
216 vlen[0:6] <- vlen + count[7:13]
217 size[0:6] <- (size[1:6] || 0b0)
218 itercount[0:6] <- (0b0 || itercount[0:5])
219 # set up template in SVSHAPE0, then copy to 1-3
221 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
222 SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim - "striding" (2D DCT)
223 mscale <- (0b0 || SVzd) + 1
224 if (SVrm = 0b1011) then
225 SVSHAPE0[30:31] <- 0b11 # iDCT mode
226 SVSHAPE0[18:20] <- 0b011 # iDCT Outer Butterfly sub-mode
227 SVSHAPE0[21:23] <- 0b101 # "inverse" on outer and inner loop
229 SVSHAPE0[30:31] <- 0b01 # DCT mode
230 SVSHAPE0[18:20] <- 0b100 # DCT Outer Butterfly sub-mode
231 SVSHAPE0[6:11] <- 0b000010 # DCT Butterfly mode
233 SVSHAPE1[0:31] <- SVSHAPE0[0:31] # j+halfstep schedule
234 SVSHAPE2[0:31] <- SVSHAPE0[0:31] # costable coefficients
236 SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule
237 # reset costable "striding" to 1
238 SVSHAPE2[12:17] <- 0b000000
239 # set schedule up for DCT COS table generation
240 if (SVrm = 0b0101) | (SVrm = 0b1101) then
241 # calculate O(N log2 N)
243 itercount[0:6] <- (0b00 || SVxd) + 0b0000001
244 itercount[0:6] <- (0b0 || itercount[0:5])
247 if SVxd[4-n] = 0 then
250 vlen[0:6] <- vlen + itercount
251 itercount[0:6] <- (0b0 || itercount[0:5])
252 # set up template in SVSHAPE0, then copy to 1-3
254 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
255 SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim - "striding" (2D DCT)
256 mscale <- (0b0 || SVzd) + 1
257 SVSHAPE0[30:31] <- 0b01 # DCT/FFT mode
258 SVSHAPE0[6:11] <- 0b000100 # DCT Inner Butterfly COS-gen mode
259 if (SVrm = 0b0101) then
260 SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop for DCT
262 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
263 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
264 # for cos coefficient
265 SVSHAPE1[28:29] <- 0b10 # ci schedule
266 SVSHAPE2[28:29] <- 0b11 # size schedule
267 # set schedule up for iDCT / DCT inverse of half-swapped ordering
268 if (SVrm = 0b0110) | (SVrm = 0b1110) | (SVrm = 0b1111) then
269 vlen[0:6] <- (0b00 || SVxd) + 0b0000001
270 # set up template in SVSHAPE0
271 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
272 SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim - "striding" (2D DCT)
273 mscale <- (0b0 || SVzd) + 1
274 if (SVrm = 0b1110) then
275 SVSHAPE0[18:20] <- 0b001 # DCT opposite half-swap
276 if (SVrm = 0b1111) then
277 SVSHAPE0[30:31] <- 0b01 # FFT mode
279 SVSHAPE0[30:31] <- 0b11 # DCT mode
280 SVSHAPE0[6:11] <- 0b000101 # DCT "half-swap" mode
281 # set schedule up for parallel reduction or prefix-sum
282 if (SVrm = 0b0111) then
285 # calculate the total number of operations (brute-force)
287 itercount[0:6] <- (0b00 || SVxd) + 0b0000001
289 # prefix sum algorithm with operations replaced with
293 do while dist <u itercount
294 start <- dist * 2 - 1
297 do while i <u itercount
298 vlen[0:6] <- vlen[0:6] + 1
304 do while i <u itercount
305 vlen[0:6] <- vlen[0:6] + 1
311 do while step <u itercount
312 newstep <- step[1:6] || 0b0
314 do while (j+step <u itercount)
318 # VL in Parallel-Reduce is the number of operations
320 # set up template in SVSHAPE0, then copy to 1. only 2 needed
321 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
322 SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim - "striding" (2D DCT)
323 mscale <- (0b0 || SVzd) + 1
324 SVSHAPE0[30:31] <- 0b10 # parallel reduce/prefix submode
326 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
327 # set up submodes: parallel or prefix
328 SVSHAPE0[28:29] <- 0b00 # left operand
329 SVSHAPE1[28:29] <- 0b01 # right operand
331 SVSHAPE0[28:29] <- 0b10 # left operand
332 SVSHAPE1[28:29] <- 0b11 # right operand
333 # set VL, MVL and Vertical-First
334 m[0:12] <- vlen * mscale
335 maxvl[0:6] <- m[6:12]
336 SVSTATE[0:6] <- maxvl # MAVXL
337 SVSTATE[7:13] <- vlen # VL
340 Special Registers Altered:
348 * svindex SVG,rmm,SVd,ew,SVyx,mm,sk
352 # based on nearest MAXVL compute other dimension
356 do while d*dim <u ([0]*4 || MVL)
358 # set up template, then copy once location identified
360 shape[30:31] <- 0b00 # mode
362 shape[18:20] <- 0b110 # indexed xd/yd
363 shape[0:5] <- (0b0 || SVd) # xdim
364 if sk = 0 then shape[6:11] <- 0 # ydim
365 else shape[6:11] <- 0b111111 # ydim max
367 shape[18:20] <- 0b111 # indexed yd/xd
368 if sk = 1 then shape[6:11] <- 0 # ydim
369 else shape[6:11] <- d-1 # ydim max
370 shape[0:5] <- (0b0 || SVd) # ydim
371 shape[12:17] <- (0b0 || SVG) # SVGPR
372 shape[28:29] <- ew # element-width override
373 shape[21] <- sk # skip 1st dimension
374 # select the mode for updating SVSHAPEs
375 SVSTATE[62] <- mm # set or clear persistence
377 # clear out all SVSHAPEs first
378 SVSHAPE0[0:31] <- [0] * 32
379 SVSHAPE1[0:31] <- [0] * 32
380 SVSHAPE2[0:31] <- [0] * 32
381 SVSHAPE3[0:31] <- [0] * 32
382 SVSTATE[32:41] <- [0] * 10 # clear REMAP.mi/o
383 SVSTATE[42:46] <- rmm # rmm exactly REMAP.SVme
387 # activate requested shape
388 if idx = 0 then SVSHAPE0 <- shape
389 if idx = 1 then SVSHAPE1 <- shape
390 if idx = 2 then SVSHAPE2 <- shape
391 if idx = 3 then SVSHAPE3 <- shape
392 SVSTATE[bit*2+32:bit*2+33] <- idx
393 # increment shape index, modulo 4
394 if idx = 3 then idx <- 0
397 # refined SVSHAPE/REMAP update mode
400 if idx = 0 then SVSHAPE0 <- shape
401 if idx = 1 then SVSHAPE1 <- shape
402 if idx = 2 then SVSHAPE2 <- shape
403 if idx = 3 then SVSHAPE3 <- shape
404 SVSTATE[bit*2+32:bit*2+33] <- idx
407 Special Registers Altered:
415 * svshape2 SVo,SVyx,rmm,SVd,sk,mm
419 # based on nearest MAXVL compute other dimension
423 do while d*dim <u ([0]*4 || MVL)
425 # set up template, then copy once location identified
427 shape[30:31] <- 0b00 # mode
428 shape[0:5] <- (0b0 || SVd) # x/ydim
430 shape[18:20] <- 0b000 # ordering xd/yd(/zd)
431 if sk = 0 then shape[6:11] <- 0 # ydim
432 else shape[6:11] <- 0b111111 # ydim max
434 shape[18:20] <- 0b010 # ordering yd/xd(/zd)
435 if sk = 1 then shape[6:11] <- 0 # ydim
436 else shape[6:11] <- d-1 # ydim max
437 # offset (the prime purpose of this instruction)
438 shape[24:27] <- SVo # offset
439 if sk = 1 then shape[28:29] <- 0b01 # skip 1st dimension
440 else shape[28:29] <- 0b00 # no skipping
441 # select the mode for updating SVSHAPEs
442 SVSTATE[62] <- mm # set or clear persistence
444 # clear out all SVSHAPEs first
445 SVSHAPE0[0:31] <- [0] * 32
446 SVSHAPE1[0:31] <- [0] * 32
447 SVSHAPE2[0:31] <- [0] * 32
448 SVSHAPE3[0:31] <- [0] * 32
449 SVSTATE[32:41] <- [0] * 10 # clear REMAP.mi/o
450 SVSTATE[42:46] <- rmm # rmm exactly REMAP.SVme
454 # activate requested shape
455 if idx = 0 then SVSHAPE0 <- shape
456 if idx = 1 then SVSHAPE1 <- shape
457 if idx = 2 then SVSHAPE2 <- shape
458 if idx = 3 then SVSHAPE3 <- shape
459 SVSTATE[bit*2+32:bit*2+33] <- idx
460 # increment shape index, modulo 4
461 if idx = 3 then idx <- 0
464 # refined SVSHAPE/REMAP update mode
467 if idx = 0 then SVSHAPE0 <- shape
468 if idx = 1 then SVSHAPE1 <- shape
469 if idx = 2 then SVSHAPE2 <- shape
470 if idx = 3 then SVSHAPE3 <- shape
471 SVSTATE[bit*2+32:bit*2+33] <- idx
474 Special Registers Altered: