1 <!-- This defines Draft SVP64 instructions to augment PowerISA Version 3.0 -->
2 <!-- These are not described in book 1 -->
8 * svstep RT,SVi,vf (Rc=0)
9 * svstep. RT,SVi,vf (Rc=1)
13 if SVi[3:4] = 0b11 then
14 # store subvl, pack and unpack in SVSTATE
17 RT <- [0]*62 || SVSTATE[53:54]
19 step <- SVSTATE_NEXT(SVi, vf)
22 Special Registers Altered:
30 * setvl RT,RA,SVi,vf,vs,ms (Rc=0)
31 * setvl. RT,RA,SVi,vf,vs,ms (Rc=1)
36 if (vf & (¬vs) & ¬(ms)) = 1 then
37 step <- SVSTATE_NEXT(SVi, 0b0)
39 GPR(_RT) <- [0]*57 || step
43 if ms = 1 then MVL <- VLimm[0:6]
44 else MVL <- SVSTATE[0:6]
46 if vs = 0 then VL <- SVSTATE[7:13]
48 if (RA) >u 0b1111111 then
51 else VL <- (RA)[57:63]
52 else if _RA != 0 then VL <- (RA)[57:63]
53 else if _RT = 0 then VL <- VLimm[0:6]
54 else if CTR >u 0b1111111 then
58 # limit VL to within MVL
65 GPR(_RT) <- [0]*57 || VL
66 if ((¬vs) & ¬(ms)) = 0 then
67 # set requested Vertical-First mode, clear persist
71 Special Registers Altered:
79 * svremap SVme,mi0,mi1,mi2,mo0,mo1,pst
83 # registers RA RB RC RT EA/FRS SVSHAPE0-3 indices
89 # enable bit for RA RB RC RT EA/FRS
90 SVSTATE[42:46] <- SVme
91 # persistence bit (applies to more than one instruction)
94 Special Registers Altered:
102 * svshape SVxd,SVyd,SVzd,SVrm,vf
106 # for convenience, VL to be calculated and stored in SVSTATE
108 mscale[0:6] <- 0b0000001 # for scaling MAXVL
109 itercount[0:6] <- [0] * 7
110 SVSTATE[0:31] <- [0] * 32
111 # only overwrite REMAP if "persistence" is zero
112 if (SVSTATE[62] = 0b0) then
113 SVSTATE[32:33] <- 0b00
114 SVSTATE[34:35] <- 0b00
115 SVSTATE[36:37] <- 0b00
116 SVSTATE[38:39] <- 0b00
117 SVSTATE[40:41] <- 0b00
118 SVSTATE[42:46] <- 0b00000
121 # clear out all SVSHAPEs
122 SVSHAPE0[0:31] <- [0] * 32
123 SVSHAPE1[0:31] <- [0] * 32
124 SVSHAPE2[0:31] <- [0] * 32
125 SVSHAPE3[0:31] <- [0] * 32
126 # set schedule up for multiply
127 if (SVrm = 0b0000) then
128 # VL in Matrix Multiply is xd*yd*zd
129 n <- (0b00 || SVxd) * (0b00 || SVyd) * (0b00 || SVzd)
130 vlen[0:6] <- n[14:20]
131 # set up template in SVSHAPE0, then copy to 1-3
132 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
133 SVSHAPE0[6:11] <- (0b0 || SVyd) # ydim
134 SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim
135 SVSHAPE0[28:29] <- 0b11 # skip z
137 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
138 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
139 SVSHAPE3[0:31] <- SVSHAPE0[0:31]
141 SVSHAPE1[18:20] <- 0b001 # permute x,z,y
142 SVSHAPE1[28:29] <- 0b01 # skip z
144 SVSHAPE2[18:20] <- 0b001 # permute x,z,y
145 SVSHAPE2[28:29] <- 0b11 # skip y
146 # set schedule up for FFT butterfly
147 if (SVrm = 0b0001) then
148 # calculate O(N log2 N)
151 if SVxd[4-n] = 0 then
154 n <- ((0b0 || SVxd) + 1) * n
156 # set up template in SVSHAPE0, then copy to 1-3
158 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
159 SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim - "striding" (2D FFT)
160 mscale <- (0b0 || SVzd) + 1
161 SVSHAPE0[30:31] <- 0b01 # Butterfly mode
163 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
164 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
166 SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule
168 SVSHAPE2[28:29] <- 0b10 # k schedule
169 # set schedule up for (i)DCT Inner butterfly
170 # SVrm Mode 2 (Mode 6 for iDCT) is for pre-calculated coefficients,
171 # SVrm Mode 4 (Mode 12 for iDCT) is for on-the-fly (Vertical-First Mode)
172 if ((SVrm = 0b0010) | (SVrm = 0b0100) |
173 (SVrm = 0b1010) | (SVrm = 0b1100)) then
174 # calculate O(N log2 N)
177 if SVxd[4-n] = 0 then
180 n <- ((0b0 || SVxd) + 1) * n
182 # set up template in SVSHAPE0, then copy to 1-3
184 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
185 SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim - "striding" (2D DCT)
186 mscale <- (0b0 || SVzd) + 1
187 if (SVrm = 0b1010) | (SVrm = 0b1100) then
188 SVSHAPE0[30:31] <- 0b11 # iDCT mode
189 SVSHAPE0[18:20] <- 0b011 # iDCT Inner Butterfly sub-mode
191 SVSHAPE0[30:31] <- 0b01 # DCT mode
192 SVSHAPE0[18:20] <- 0b001 # DCT Inner Butterfly sub-mode
193 SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop
194 if (SVrm = 0b1100) | (SVrm = 0b0100) then
195 SVSHAPE0[6:11] <- 0b000011 # (i)DCT Inner Butterfly mode 4
197 SVSHAPE0[6:11] <- 0b000001 # (i)DCT Inner Butterfly mode 2
199 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
200 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
201 if (SVrm != 0b0100) & (SVrm != 0b1100) then
202 SVSHAPE3[0:31] <- SVSHAPE0[0:31]
204 SVSHAPE0[28:29] <- 0b01 # j+halfstep schedule
205 # for cos coefficient
206 SVSHAPE2[28:29] <- 0b10 # ci (k for mode 4) schedule
207 SVSHAPE2[12:17] <- 0b000000 # reset costable "striding" to 1
208 if (SVrm != 0b0100) & (SVrm != 0b1100) then
209 SVSHAPE3[28:29] <- 0b11 # size schedule
210 # set schedule up for (i)DCT Outer butterfly
211 if (SVrm = 0b0011) | (SVrm = 0b1011) then
212 # calculate O(N log2 N) number of outer butterfly overlapping adds
216 itercount[0:6] <- (0b00 || SVxd) + 0b0000001
217 itercount[0:6] <- (0b0 || itercount[0:5])
219 if SVxd[4-n] = 0 then
222 count <- (itercount - 0b0000001) * size
223 vlen[0:6] <- vlen + count[7:13]
224 size[0:6] <- (size[1:6] || 0b0)
225 itercount[0:6] <- (0b0 || itercount[0:5])
226 # set up template in SVSHAPE0, then copy to 1-3
228 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
229 SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim - "striding" (2D DCT)
230 mscale <- (0b0 || SVzd) + 1
231 if (SVrm = 0b1011) then
232 SVSHAPE0[30:31] <- 0b11 # iDCT mode
233 SVSHAPE0[18:20] <- 0b011 # iDCT Outer Butterfly sub-mode
234 SVSHAPE0[21:23] <- 0b101 # "inverse" on outer and inner loop
236 SVSHAPE0[30:31] <- 0b01 # DCT mode
237 SVSHAPE0[18:20] <- 0b100 # DCT Outer Butterfly sub-mode
238 SVSHAPE0[6:11] <- 0b000010 # DCT Butterfly mode
240 SVSHAPE1[0:31] <- SVSHAPE0[0:31] # j+halfstep schedule
241 SVSHAPE2[0:31] <- SVSHAPE0[0:31] # costable coefficients
243 SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule
244 # reset costable "striding" to 1
245 SVSHAPE2[12:17] <- 0b000000
246 # set schedule up for DCT COS table generation
247 if (SVrm = 0b0101) | (SVrm = 0b1101) then
248 # calculate O(N log2 N)
250 itercount[0:6] <- (0b00 || SVxd) + 0b0000001
251 itercount[0:6] <- (0b0 || itercount[0:5])
254 if SVxd[4-n] = 0 then
257 vlen[0:6] <- vlen + itercount
258 itercount[0:6] <- (0b0 || itercount[0:5])
259 # set up template in SVSHAPE0, then copy to 1-3
261 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
262 SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim - "striding" (2D DCT)
263 mscale <- (0b0 || SVzd) + 1
264 SVSHAPE0[30:31] <- 0b01 # DCT/FFT mode
265 SVSHAPE0[6:11] <- 0b000100 # DCT Inner Butterfly COS-gen mode
266 if (SVrm = 0b0101) then
267 SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop for DCT
269 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
270 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
271 # for cos coefficient
272 SVSHAPE1[28:29] <- 0b10 # ci schedule
273 SVSHAPE2[28:29] <- 0b11 # size schedule
274 # set schedule up for iDCT / DCT inverse of half-swapped ordering
275 if (SVrm = 0b0110) | (SVrm = 0b1110) | (SVrm = 0b1111) then
276 vlen[0:6] <- (0b00 || SVxd) + 0b0000001
277 # set up template in SVSHAPE0
278 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
279 SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim - "striding" (2D DCT)
280 mscale <- (0b0 || SVzd) + 1
281 if (SVrm = 0b1110) then
282 SVSHAPE0[18:20] <- 0b001 # DCT opposite half-swap
283 if (SVrm = 0b1111) then
284 SVSHAPE0[30:31] <- 0b01 # FFT mode
286 SVSHAPE0[30:31] <- 0b11 # DCT mode
287 SVSHAPE0[6:11] <- 0b000101 # DCT "half-swap" mode
288 # set schedule up for parallel reduction
289 if (SVrm = 0b0111) then
290 # calculate the total number of operations (brute-force)
292 itercount[0:6] <- (0b00 || SVxd) + 0b0000001
293 step[0:6] <- 0b0000001
295 do while step <u itercount
296 newstep <- step[1:6] || 0b0
298 do while (j+step <u itercount)
302 # VL in Parallel-Reduce is the number of operations
304 # set up template in SVSHAPE0, then copy to 1. only 2 needed
305 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
306 SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim - "striding" (2D DCT)
307 mscale <- (0b0 || SVzd) + 1
308 SVSHAPE0[30:31] <- 0b10 # parallel reduce submode
310 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
311 # set up right operand (left operand 28:29 is zero)
312 SVSHAPE1[28:29] <- 0b01 # right operand
313 # set VL, MVL and Vertical-First
314 m[0:12] <- vlen * mscale
315 maxvl[0:6] <- m[6:12]
316 SVSTATE[0:6] <- maxvl # MAVXL
317 SVSTATE[7:13] <- vlen # VL
320 Special Registers Altered:
328 * svindex SVG,rmm,SVd,ew,SVyx,mm,sk
332 # based on nearest MAXVL compute other dimension
336 do while d*dim <u ([0]*4 || MVL)
338 # set up template, then copy once location identified
340 shape[30:31] <- 0b00 # mode
342 shape[18:20] <- 0b110 # indexed xd/yd
343 shape[0:5] <- (0b0 || SVd) # xdim
344 if sk = 0 then shape[6:11] <- 0 # ydim
345 else shape[6:11] <- 0b111111 # ydim max
347 shape[18:20] <- 0b111 # indexed yd/xd
348 if sk = 1 then shape[6:11] <- 0 # ydim
349 else shape[6:11] <- d-1 # ydim max
350 shape[0:5] <- (0b0 || SVd) # ydim
351 shape[12:17] <- (0b0 || SVG) # SVGPR
352 shape[28:29] <- ew # element-width override
353 if sk = 1 then shape[28:29] <- 0b01 # skip 1st dimension
354 else shape[28:29] <- 0b00 # no skipping
355 # select the mode for updating SVSHAPEs
356 SVSTATE[62] <- mm # set or clear persistence
358 # clear out all SVSHAPEs first
359 SVSHAPE0[0:31] <- [0] * 32
360 SVSHAPE1[0:31] <- [0] * 32
361 SVSHAPE2[0:31] <- [0] * 32
362 SVSHAPE3[0:31] <- [0] * 32
363 SVSTATE[32:41] <- [0] * 10 # clear REMAP.mi/o
364 SVSTATE[42:46] <- rmm # rmm exactly REMAP.SVme
368 # activate requested shape
369 if idx = 0 then SVSHAPE0 <- shape
370 if idx = 1 then SVSHAPE1 <- shape
371 if idx = 2 then SVSHAPE2 <- shape
372 if idx = 3 then SVSHAPE3 <- shape
373 SVSTATE[bit*2+32:bit*2+33] <- idx
374 # increment shape index, modulo 4
375 if idx = 3 then idx <- 0
378 # refined SVSHAPE/REMAP update mode
381 if idx = 0 then SVSHAPE0 <- shape
382 if idx = 1 then SVSHAPE1 <- shape
383 if idx = 2 then SVSHAPE2 <- shape
384 if idx = 3 then SVSHAPE3 <- shape
385 SVSTATE[bit*2+32:bit*2+33] <- idx
388 Special Registers Altered:
396 * svshape2 SVo,SVyx,rmm,SVd,sk,mm
400 # based on nearest MAXVL compute other dimension
404 do while d*dim <u ([0]*4 || MVL)
406 # set up template, then copy once location identified
408 shape[30:31] <- 0b00 # mode
409 shape[0:5] <- (0b0 || SVd) # x/ydim
411 shape[18:20] <- 0b000 # ordering xd/yd(/zd)
412 if sk = 0 then shape[6:11] <- 0 # ydim
413 else shape[6:11] <- 0b111111 # ydim max
415 shape[18:20] <- 0b010 # ordering yd/xd(/zd)
416 if sk = 1 then shape[6:11] <- 0 # ydim
417 else shape[6:11] <- d-1 # ydim max
418 # offset (the prime purpose of this instruction)
419 shape[24:27] <- SVo # offset
420 if sk = 1 then shape[28:29] <- 0b01 # skip 1st dimension
421 else shape[28:29] <- 0b00 # no skipping
422 # select the mode for updating SVSHAPEs
423 SVSTATE[62] <- mm # set or clear persistence
425 # clear out all SVSHAPEs first
426 SVSHAPE0[0:31] <- [0] * 32
427 SVSHAPE1[0:31] <- [0] * 32
428 SVSHAPE2[0:31] <- [0] * 32
429 SVSHAPE3[0:31] <- [0] * 32
430 SVSTATE[32:41] <- [0] * 10 # clear REMAP.mi/o
431 SVSTATE[42:46] <- rmm # rmm exactly REMAP.SVme
435 # activate requested shape
436 if idx = 0 then SVSHAPE0 <- shape
437 if idx = 1 then SVSHAPE1 <- shape
438 if idx = 2 then SVSHAPE2 <- shape
439 if idx = 3 then SVSHAPE3 <- shape
440 SVSTATE[bit*2+32:bit*2+33] <- idx
441 # increment shape index, modulo 4
442 if idx = 3 then idx <- 0
445 # refined SVSHAPE/REMAP update mode
448 if idx = 0 then SVSHAPE0 <- shape
449 if idx = 1 then SVSHAPE1 <- shape
450 if idx = 2 then SVSHAPE2 <- shape
451 if idx = 3 then SVSHAPE3 <- shape
452 SVSTATE[bit*2+32:bit*2+33] <- idx
455 Special Registers Altered: