use widths.get(dw/sw) and test empty/non-empty after.
[openpower-isa.git] / openpower / isa / simplev.mdwn
1 <!-- This defines Draft SVP64 instructions to augment PowerISA Version 3.0 -->
2 <!-- These are not described in book 1 -->
3
4 # svstep
5
6 SVL-Form
7
8 * svstep RT,SVi,vf (Rc=0)
9 * svstep. RT,SVi,vf (Rc=1)
10
11 Pseudo-code:
12
13 if SVi[3:4] = 0b11 then
14 # store subvl, pack and unpack in SVSTATE
15 SVSTATE[53] <- SVi[5]
16 SVSTATE[54] <- SVi[6]
17 RT <- [0]*62 || SVSTATE[53:54]
18 else
19 step <- SVSTATE_NEXT(SVi, vf)
20 RT <- [0]*57 || step
21
22 Special Registers Altered:
23
24 CR0 (if Rc=1)
25
26 # setvl
27
28 SVL-Form
29
30 * setvl RT,RA,SVi,vf,vs,ms (Rc=0)
31 * setvl. RT,RA,SVi,vf,vs,ms (Rc=1)
32
33 Pseudo-code:
34
35 overflow <- 0b0
36 if (vf & (¬vs) & ¬(ms)) = 1 then
37 step <- SVSTATE_NEXT(SVi, 0b0)
38 if _RT != 0 then
39 GPR(_RT) <- [0]*57 || step
40 else
41 VLimm <- SVi + 1
42 # set or get MVL
43 if ms = 1 then MVL <- VLimm[0:6]
44 else MVL <- SVSTATE[0:6]
45 # set or get VL
46 if vs = 0 then VL <- SVSTATE[7:13]
47 else if _RA != 0 then
48 if (RA) >u 0b1111111 then
49 VL <- 0b1111111
50 overflow <- 0b1
51 else VL <- (RA)[57:63]
52 else if _RA != 0 then VL <- (RA)[57:63]
53 else if _RT = 0 then VL <- VLimm[0:6]
54 else if CTR >u 0b1111111 then
55 VL <- 0b1111111
56 overflow <- 0b1
57 else VL <- CTR[57:63]
58 # limit VL to within MVL
59 if VL >u MVL then
60 overflow <- 0b1
61 VL <- MVL
62 SVSTATE[0:6] <- MVL
63 SVSTATE[7:13] <- VL
64 if _RT != 0 then
65 GPR(_RT) <- [0]*57 || VL
66 if ((¬vs) & ¬(ms)) = 0 then
67 # set requested Vertical-First mode, clear persist
68 SVSTATE[63] <- vf
69 SVSTATE[62] <- 0b0
70
71 Special Registers Altered:
72
73 CR0 (if Rc=1)
74
75 # svremap
76
77 SVRM-Form
78
79 * svremap SVme,mi0,mi1,mi2,mo0,mo1,pst
80
81 Pseudo-code:
82
83 # registers RA RB RC RT EA/FRS SVSHAPE0-3 indices
84 SVSTATE[32:33] <- mi0
85 SVSTATE[34:35] <- mi1
86 SVSTATE[36:37] <- mi2
87 SVSTATE[38:39] <- mo0
88 SVSTATE[40:41] <- mo1
89 # enable bit for RA RB RC RT EA/FRS
90 SVSTATE[42:46] <- SVme
91 # persistence bit (applies to more than one instruction)
92 SVSTATE[62] <- pst
93
94 Special Registers Altered:
95
96 None
97
98 # svshape
99
100 SVM-Form
101
102 * svshape SVxd,SVyd,SVzd,SVrm,vf
103
104 Pseudo-code:
105
106 # for convenience, VL to be calculated and stored in SVSTATE
107 vlen <- [0] * 7
108 itercount[0:6] <- [0] * 7
109 SVSTATE[0:31] <- [0] * 32
110 # only overwrite REMAP if "persistence" is zero
111 if (SVSTATE[62] = 0b0) then
112 SVSTATE[32:33] <- 0b00
113 SVSTATE[34:35] <- 0b00
114 SVSTATE[36:37] <- 0b00
115 SVSTATE[38:39] <- 0b00
116 SVSTATE[40:41] <- 0b00
117 SVSTATE[42:46] <- 0b00000
118 SVSTATE[62] <- 0b0
119 SVSTATE[63] <- 0b0
120 # clear out all SVSHAPEs
121 SVSHAPE0[0:31] <- [0] * 32
122 SVSHAPE1[0:31] <- [0] * 32
123 SVSHAPE2[0:31] <- [0] * 32
124 SVSHAPE3[0:31] <- [0] * 32
125 # set schedule up for multiply
126 if (SVrm = 0b0000) then
127 # VL in Matrix Multiply is xd*yd*zd
128 n <- (0b00 || SVxd) * (0b00 || SVyd) * (0b00 || SVzd)
129 vlen[0:6] <- n[14:20]
130 # set up template in SVSHAPE0, then copy to 1-3
131 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
132 SVSHAPE0[6:11] <- (0b0 || SVyd) # ydim
133 SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim
134 SVSHAPE0[28:29] <- 0b11 # skip z
135 # copy
136 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
137 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
138 SVSHAPE3[0:31] <- SVSHAPE0[0:31]
139 # set up FRA
140 SVSHAPE1[18:20] <- 0b001 # permute x,z,y
141 SVSHAPE1[28:29] <- 0b01 # skip z
142 # FRC
143 SVSHAPE2[18:20] <- 0b001 # permute x,z,y
144 SVSHAPE2[28:29] <- 0b11 # skip y
145 # set schedule up for FFT butterfly
146 if (SVrm = 0b0001) then
147 # calculate O(N log2 N)
148 n <- [0] * 3
149 do while n < 5
150 if SVxd[4-n] = 0 then
151 leave
152 n <- n + 1
153 n <- ((0b0 || SVxd) + 1) * n
154 vlen[0:6] <- n[1:7]
155 # set up template in SVSHAPE0, then copy to 1-3
156 # for FRA and FRT
157 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
158 SVSHAPE0[30:31] <- 0b01 # Butterfly mode
159 # copy
160 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
161 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
162 # set up FRB and FRS
163 SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule
164 # FRC (coefficients)
165 SVSHAPE2[28:29] <- 0b10 # k schedule
166 # set schedule up for (i)DCT Inner butterfly
167 # SVrm Mode 2 (Mode 6 for iDCT) is for pre-calculated coefficients,
168 # SVrm Mode 4 (Mode 12 for iDCT) is for on-the-fly (Vertical-First Mode)
169 if ((SVrm = 0b0010) | (SVrm = 0b0100) |
170 (SVrm = 0b1010) | (SVrm = 0b1100)) then
171 # calculate O(N log2 N)
172 n <- [0] * 3
173 do while n < 5
174 if SVxd[4-n] = 0 then
175 leave
176 n <- n + 1
177 n <- ((0b0 || SVxd) + 1) * n
178 vlen[0:6] <- n[1:7]
179 # set up template in SVSHAPE0, then copy to 1-3
180 # set up FRB and FRS
181 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
182 if (SVrm = 0b1010) | (SVrm = 0b1100) then
183 SVSHAPE0[30:31] <- 0b11 # iDCT mode
184 SVSHAPE0[18:20] <- 0b011 # iDCT Inner Butterfly sub-mode
185 else
186 SVSHAPE0[30:31] <- 0b01 # DCT mode
187 SVSHAPE0[18:20] <- 0b001 # DCT Inner Butterfly sub-mode
188 SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop
189 if (SVrm = 0b1100) | (SVrm = 0b0100) then
190 SVSHAPE0[6:11] <- 0b000011 # (i)DCT Inner Butterfly mode 4
191 else
192 SVSHAPE0[6:11] <- 0b000001 # (i)DCT Inner Butterfly mode 2
193 # copy
194 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
195 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
196 if (SVrm != 0b0100) & (SVrm != 0b1100) then
197 SVSHAPE3[0:31] <- SVSHAPE0[0:31]
198 # for FRA and FRT
199 SVSHAPE0[28:29] <- 0b01 # j+halfstep schedule
200 # for cos coefficient
201 SVSHAPE2[28:29] <- 0b10 # ci (k for mode 4) schedule
202 if (SVrm != 0b0100) & (SVrm != 0b1100) then
203 SVSHAPE3[28:29] <- 0b11 # size schedule
204 # set schedule up for (i)DCT Outer butterfly
205 if (SVrm = 0b0011) | (SVrm = 0b1011) then
206 # calculate O(N log2 N) number of outer butterfly overlapping adds
207 vlen[0:6] <- [0] * 7
208 n <- 0b000
209 size <- 0b0000001
210 itercount[0:6] <- (0b00 || SVxd) + 0b0000001
211 itercount[0:6] <- (0b0 || itercount[0:5])
212 do while n < 5
213 if SVxd[4-n] = 0 then
214 leave
215 n <- n + 1
216 count <- (itercount - 0b0000001) * size
217 vlen[0:6] <- vlen + count[7:13]
218 size[0:6] <- (size[1:6] || 0b0)
219 itercount[0:6] <- (0b0 || itercount[0:5])
220 # set up template in SVSHAPE0, then copy to 1-3
221 # set up FRB and FRS
222 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
223 if (SVrm = 0b1011) then
224 SVSHAPE0[30:31] <- 0b11 # iDCT mode
225 SVSHAPE0[18:20] <- 0b011 # iDCT Outer Butterfly sub-mode
226 SVSHAPE0[21:23] <- 0b101 # "inverse" on outer and inner loop
227 else
228 SVSHAPE0[30:31] <- 0b01 # DCT mode
229 SVSHAPE0[18:20] <- 0b100 # DCT Outer Butterfly sub-mode
230 SVSHAPE0[6:11] <- 0b000010 # DCT Butterfly mode
231 # copy
232 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
233 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
234 # for FRA and FRT
235 SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule
236 # set schedule up for DCT COS table generation
237 if (SVrm = 0b0101) | (SVrm = 0b1101) then
238 # calculate O(N log2 N)
239 vlen[0:6] <- [0] * 7
240 itercount[0:6] <- (0b00 || SVxd) + 0b0000001
241 itercount[0:6] <- (0b0 || itercount[0:5])
242 n <- [0] * 3
243 do while n < 5
244 if SVxd[4-n] = 0 then
245 leave
246 n <- n + 1
247 vlen[0:6] <- vlen + itercount
248 itercount[0:6] <- (0b0 || itercount[0:5])
249 # set up template in SVSHAPE0, then copy to 1-3
250 # set up FRB and FRS
251 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
252 SVSHAPE0[30:31] <- 0b01 # DCT/FFT mode
253 SVSHAPE0[6:11] <- 0b000100 # DCT Inner Butterfly COS-gen mode
254 if (SVrm = 0b0101) then
255 SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop for DCT
256 # copy
257 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
258 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
259 # for cos coefficient
260 SVSHAPE1[28:29] <- 0b10 # ci schedule
261 SVSHAPE2[28:29] <- 0b11 # size schedule
262 # set schedule up for iDCT / DCT inverse of half-swapped ordering
263 if (SVrm = 0b0110) | (SVrm = 0b1110) | (SVrm = 0b1111) then
264 vlen[0:6] <- (0b00 || SVxd) + 0b0000001
265 # set up template in SVSHAPE0
266 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
267 if (SVrm = 0b1110) then
268 SVSHAPE0[18:20] <- 0b001 # DCT opposite half-swap
269 if (SVrm = 0b1111) then
270 SVSHAPE0[30:31] <- 0b01 # FFT mode
271 else
272 SVSHAPE0[30:31] <- 0b11 # DCT mode
273 SVSHAPE0[6:11] <- 0b000101 # DCT "half-swap" mode
274 # set schedule up for parallel reduction
275 if (SVrm = 0b0111) then
276 # calculate the total number of operations (brute-force)
277 vlen[0:6] <- [0] * 7
278 itercount[0:6] <- (0b00 || SVxd) + 0b0000001
279 step[0:6] <- 0b0000001
280 i[0:6] <- 0b0000000
281 do while step <u itercount
282 newstep <- step[1:6] || 0b0
283 j[0:6] <- 0b0000000
284 do while (j+step <u itercount)
285 j <- j + newstep
286 i <- i + 1
287 step <- newstep
288 # VL in Parallel-Reduce is the number of operations
289 vlen[0:6] <- i
290 # set up template in SVSHAPE0, then copy to 1. only 2 needed
291 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
292 SVSHAPE0[30:31] <- 0b10 # parallel reduce submode
293 # copy
294 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
295 # set up right operand (left operand 28:29 is zero)
296 SVSHAPE1[28:29] <- 0b01 # right operand
297 # set VL, MVL and Vertical-First
298 SVSTATE[0:6] <- vlen
299 SVSTATE[7:13] <- vlen
300 SVSTATE[63] <- vf
301
302 Special Registers Altered:
303
304 None
305
306 # svindex
307
308 SVI-Form
309
310 * svindex SVG,rmm,SVd,ew,SVyx,mm,sk
311
312 Pseudo-code:
313
314 # based on nearest MAXVL compute other dimension
315 MVL <- SVSTATE[0:6]
316 d <- [0] * 6
317 dim <- SVd+1
318 do while d*dim <u ([0]*4 || MVL)
319 d <- d + 1
320 # set up template, then copy once location identified
321 shape <- [0]*32
322 shape[30:31] <- 0b00 # mode
323 if SVyx = 0 then
324 shape[18:20] <- 0b110 # indexed xd/yd
325 shape[0:5] <- (0b0 || SVd) # xdim
326 if sk = 0 then shape[6:11] <- 0 # ydim
327 else shape[6:11] <- 0b111111 # ydim max
328 else
329 shape[18:20] <- 0b111 # indexed yd/xd
330 if sk = 1 then shape[6:11] <- 0 # ydim
331 else shape[6:11] <- d-1 # ydim max
332 shape[0:5] <- (0b0 || SVd) # ydim
333 shape[12:17] <- (0b0 || SVG) # SVGPR
334 shape[28:29] <- ew # element-width override
335 if sk = 1 then shape[28:29] <- 0b01 # skip 1st dimension
336 else shape[28:29] <- 0b00 # no skipping
337 # select the mode for updating SVSHAPEs
338 SVSTATE[62] <- mm # set or clear persistence
339 if mm = 0 then
340 # clear out all SVSHAPEs first
341 SVSHAPE0[0:31] <- [0] * 32
342 SVSHAPE1[0:31] <- [0] * 32
343 SVSHAPE2[0:31] <- [0] * 32
344 SVSHAPE3[0:31] <- [0] * 32
345 SVSTATE[32:41] <- [0] * 10 # clear REMAP.mi/o
346 SVSTATE[42:46] <- rmm # rmm exactly REMAP.SVme
347 idx <- 0
348 for bit = 0 to 4
349 if rmm[4-bit] then
350 # activate requested shape
351 if idx = 0 then SVSHAPE0 <- shape
352 if idx = 1 then SVSHAPE1 <- shape
353 if idx = 2 then SVSHAPE2 <- shape
354 if idx = 3 then SVSHAPE3 <- shape
355 SVSTATE[bit*2+32:bit*2+33] <- idx
356 # increment shape index, modulo 4
357 if idx = 3 then idx <- 0
358 else idx <- idx + 1
359 else
360 # refined SVSHAPE/REMAP update mode
361 bit <- rmm[0:2]
362 idx <- rmm[3:4]
363 if idx = 0 then SVSHAPE0 <- shape
364 if idx = 1 then SVSHAPE1 <- shape
365 if idx = 2 then SVSHAPE2 <- shape
366 if idx = 3 then SVSHAPE3 <- shape
367 SVSTATE[bit*2+32:bit*2+33] <- idx
368 SVSTATE[46-bit] <- 1
369
370 Special Registers Altered:
371
372 None
373
374 # svshape2
375
376 SVM2-Form
377
378 * svshape2 SVo,SVyx,rmm,SVd,sk,mm
379
380 Pseudo-code:
381
382 # based on nearest MAXVL compute other dimension
383 MVL <- SVSTATE[0:6]
384 d <- [0] * 6
385 dim <- SVd+1
386 do while d*dim <u ([0]*4 || MVL)
387 d <- d + 1
388 # set up template, then copy once location identified
389 shape <- [0]*32
390 shape[30:31] <- 0b00 # mode
391 shape[0:5] <- (0b0 || SVd) # x/ydim
392 if SVyx = 0 then
393 shape[18:20] <- 0b000 # ordering xd/yd(/zd)
394 if sk = 0 then shape[6:11] <- 0 # ydim
395 else shape[6:11] <- 0b111111 # ydim max
396 else
397 shape[18:20] <- 0b010 # ordering yd/xd(/zd)
398 if sk = 1 then shape[6:11] <- 0 # ydim
399 else shape[6:11] <- d-1 # ydim max
400 # offset (the prime purpose of this instruction)
401 shape[24:27] <- SVo # offset
402 if sk = 1 then shape[28:29] <- 0b01 # skip 1st dimension
403 else shape[28:29] <- 0b00 # no skipping
404 # select the mode for updating SVSHAPEs
405 SVSTATE[62] <- mm # set or clear persistence
406 if mm = 0 then
407 # clear out all SVSHAPEs first
408 SVSHAPE0[0:31] <- [0] * 32
409 SVSHAPE1[0:31] <- [0] * 32
410 SVSHAPE2[0:31] <- [0] * 32
411 SVSHAPE3[0:31] <- [0] * 32
412 SVSTATE[32:41] <- [0] * 10 # clear REMAP.mi/o
413 SVSTATE[42:46] <- rmm # rmm exactly REMAP.SVme
414 idx <- 0
415 for bit = 0 to 4
416 if rmm[4-bit] then
417 # activate requested shape
418 if idx = 0 then SVSHAPE0 <- shape
419 if idx = 1 then SVSHAPE1 <- shape
420 if idx = 2 then SVSHAPE2 <- shape
421 if idx = 3 then SVSHAPE3 <- shape
422 SVSTATE[bit*2+32:bit*2+33] <- idx
423 # increment shape index, modulo 4
424 if idx = 3 then idx <- 0
425 else idx <- idx + 1
426 else
427 # refined SVSHAPE/REMAP update mode
428 bit <- rmm[0:2]
429 idx <- rmm[3:4]
430 if idx = 0 then SVSHAPE0 <- shape
431 if idx = 1 then SVSHAPE1 <- shape
432 if idx = 2 then SVSHAPE2 <- shape
433 if idx = 3 then SVSHAPE3 <- shape
434 SVSTATE[bit*2+32:bit*2+33] <- idx
435 SVSTATE[46-bit] <- 1
436
437 Special Registers Altered:
438
439 None
440