ff8c71ab9d42b3f56d63ad7a1347a59a9016ca37
[openpower-isa.git] / openpower / isa / simplev.mdwn
1 <!-- This defines Draft SVP64 instructions to augment PowerISA Version 3.0 -->
2 <!-- These are not described in book 1 -->
3
4 # svstep
5
6 SVL-Form
7
8 * svstep RT,SVi,vf
9 * svstep. RT,SVi,vf
10
11 Pseudo-code:
12
13 step <- SVSTATE_NEXT(SVi, vf)
14 RT <- [0]*57 || step
15
16 Special Registers Altered:
17
18 CR0 (if Rc=1)
19
20 # setvl
21
22 SVL-Form
23
24 * setvl RT,RA,SVi,vf,vs,ms
25 * setvl. RT,RA,SVi,vf,vs,ms
26
27 Pseudo-code:
28
29 overflow <- 0b0
30 if (vf & (¬vs) & ¬(ms)) = 1 then
31 step <- SVSTATE_NEXT(SVi, 0b0)
32 if _RT != 0 then
33 GPR(_RT) <- [0]*57 || step
34 else
35 VLimm <- SVi + 1
36 # set or get MVL
37 if ms = 1 then MVL <- VLimm[0:6]
38 else MVL <- SVSTATE[0:6]
39 # set or get VL
40 if vs = 0 then VL <- SVSTATE[7:13]
41 else if _RA != 0 then
42 if (RA) >u 0b1111111 then
43 VL <- 0b1111111
44 overflow <- 0b1
45 else VL <- (RA)[57:63]
46 else if _RA != 0 then VL <- (RA)[57:63]
47 else if _RT = 0 then VL <- VLimm[0:6]
48 else if CTR >u 0b1111111 then
49 VL <- 0b1111111
50 overflow <- 0b1
51 else VL <- CTR[57:63]
52 # limit VL to within MVL
53 if VL >u MVL then
54 overflow <- 0b1
55 VL <- MVL
56 SVSTATE[0:6] <- MVL
57 SVSTATE[7:13] <- VL
58 if _RT != 0 then
59 GPR(_RT) <- [0]*57 || VL
60 if ((¬vs) & ¬(ms)) = 0 then
61 # set requested Vertical-First mode, clear persist
62 SVSTATE[63] <- vf
63 SVSTATE[62] <- 0b0
64
65 Special Registers Altered:
66
67 CR0 (if Rc=1)
68
69 # svremap
70
71 SVRM-Form
72
73 * svremap SVme,mi0,mi1,mi2,mo0,mo1,pst
74
75 Pseudo-code:
76
77 # registers RA RB RC RT EA/FRS SVSHAPE0-3 indices
78 SVSTATE[32:33] <- mi0
79 SVSTATE[34:35] <- mi1
80 SVSTATE[36:37] <- mi2
81 SVSTATE[38:39] <- mo0
82 SVSTATE[40:41] <- mo1
83 # enable bit for RA RB RC RT EA/FRS
84 SVSTATE[42:46] <- SVme
85 # persistence bit (applies to more than one instruction)
86 SVSTATE[62] <- pst
87
88 Special Registers Altered:
89
90 None
91
92 # svshape
93
94 SVM-Form
95
96 * svshape SVxd,SVyd,SVzd,SVrm,vf
97
98 Pseudo-code:
99
100 # for convenience, VL to be calculated and stored in SVSTATE
101 vlen <- [0] * 7
102 itercount[0:6] <- [0] * 7
103 SVSTATE[0:31] <- [0] * 32
104 # only overwrite REMAP if "persistence" is zero
105 if (SVSTATE[62] = 0b0) then
106 SVSTATE[32:33] <- 0b00
107 SVSTATE[34:35] <- 0b00
108 SVSTATE[36:37] <- 0b00
109 SVSTATE[38:39] <- 0b00
110 SVSTATE[40:41] <- 0b00
111 SVSTATE[42:46] <- 0b00000
112 SVSTATE[62] <- 0b0
113 SVSTATE[63] <- 0b0
114 # clear out all SVSHAPEs
115 SVSHAPE0[0:31] <- [0] * 32
116 SVSHAPE1[0:31] <- [0] * 32
117 SVSHAPE2[0:31] <- [0] * 32
118 SVSHAPE3[0:31] <- [0] * 32
119 # set schedule up for multiply
120 if (SVrm = 0b0000) then
121 # VL in Matrix Multiply is xd*yd*zd
122 n <- (0b00 || SVxd) * (0b00 || SVyd) * (0b00 || SVzd)
123 vlen[0:6] <- n[14:20]
124 # set up template in SVSHAPE0, then copy to 1-3
125 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
126 SVSHAPE0[6:11] <- (0b0 || SVyd) # ydim
127 SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim
128 SVSHAPE0[28:29] <- 0b11 # skip z
129 # copy
130 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
131 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
132 SVSHAPE3[0:31] <- SVSHAPE0[0:31]
133 # set up FRA
134 SVSHAPE1[18:20] <- 0b001 # permute x,z,y
135 SVSHAPE1[28:29] <- 0b01 # skip z
136 # FRC
137 SVSHAPE2[18:20] <- 0b001 # permute x,z,y
138 SVSHAPE2[28:29] <- 0b11 # skip y
139 # set schedule up for FFT butterfly
140 if (SVrm = 0b0001) then
141 # calculate O(N log2 N)
142 n <- [0] * 3
143 do while n < 5
144 if SVxd[4-n] = 0 then
145 leave
146 n <- n + 1
147 n <- ((0b0 || SVxd) + 1) * n
148 vlen[0:6] <- n[1:7]
149 # set up template in SVSHAPE0, then copy to 1-3
150 # for FRA and FRT
151 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
152 SVSHAPE0[30:31] <- 0b01 # Butterfly mode
153 # copy
154 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
155 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
156 # set up FRB and FRS
157 SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule
158 # FRC (coefficients)
159 SVSHAPE2[28:29] <- 0b10 # k schedule
160 # set schedule up for (i)DCT Inner butterfly
161 # SVrm Mode 2 (Mode 6 for iDCT) is for pre-calculated coefficients,
162 # SVrm Mode 4 (Mode 12 for iDCT) is for on-the-fly (Vertical-First Mode)
163 if ((SVrm = 0b0010) | (SVrm = 0b0100) |
164 (SVrm = 0b1010) | (SVrm = 0b1100)) then
165 # calculate O(N log2 N)
166 n <- [0] * 3
167 do while n < 5
168 if SVxd[4-n] = 0 then
169 leave
170 n <- n + 1
171 n <- ((0b0 || SVxd) + 1) * n
172 vlen[0:6] <- n[1:7]
173 # set up template in SVSHAPE0, then copy to 1-3
174 # set up FRB and FRS
175 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
176 if (SVrm = 0b1010) | (SVrm = 0b1100) then
177 SVSHAPE0[30:31] <- 0b11 # iDCT mode
178 SVSHAPE0[18:20] <- 0b011 # iDCT Inner Butterfly sub-mode
179 else
180 SVSHAPE0[30:31] <- 0b01 # DCT mode
181 SVSHAPE0[18:20] <- 0b001 # DCT Inner Butterfly sub-mode
182 SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop
183 if (SVrm = 0b1100) | (SVrm = 0b0100) then
184 SVSHAPE0[6:11] <- 0b000011 # (i)DCT Inner Butterfly mode 4
185 else
186 SVSHAPE0[6:11] <- 0b000001 # (i)DCT Inner Butterfly mode 2
187 # copy
188 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
189 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
190 if (SVrm != 0b0100) & (SVrm != 0b1100) then
191 SVSHAPE3[0:31] <- SVSHAPE0[0:31]
192 # for FRA and FRT
193 SVSHAPE0[28:29] <- 0b01 # j+halfstep schedule
194 # for cos coefficient
195 SVSHAPE2[28:29] <- 0b10 # ci (k for mode 4) schedule
196 if (SVrm != 0b0100) & (SVrm != 0b1100) then
197 SVSHAPE3[28:29] <- 0b11 # size schedule
198 # set schedule up for (i)DCT Outer butterfly
199 if (SVrm = 0b0011) | (SVrm = 0b1011) then
200 # calculate O(N log2 N) number of outer butterfly overlapping adds
201 vlen[0:6] <- [0] * 7
202 n <- 0b000
203 size <- 0b0000001
204 itercount[0:6] <- (0b00 || SVxd) + 0b0000001
205 itercount[0:6] <- (0b0 || itercount[0:5])
206 do while n < 5
207 if SVxd[4-n] = 0 then
208 leave
209 n <- n + 1
210 count <- (itercount - 0b0000001) * size
211 vlen[0:6] <- vlen + count[7:13]
212 size[0:6] <- (size[1:6] || 0b0)
213 itercount[0:6] <- (0b0 || itercount[0:5])
214 # set up template in SVSHAPE0, then copy to 1-3
215 # set up FRB and FRS
216 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
217 if (SVrm = 0b1011) then
218 SVSHAPE0[30:31] <- 0b11 # iDCT mode
219 SVSHAPE0[18:20] <- 0b011 # iDCT Outer Butterfly sub-mode
220 SVSHAPE0[21:23] <- 0b101 # "inverse" on outer and inner loop
221 else
222 SVSHAPE0[30:31] <- 0b01 # DCT mode
223 SVSHAPE0[18:20] <- 0b100 # DCT Outer Butterfly sub-mode
224 SVSHAPE0[6:11] <- 0b000010 # DCT Butterfly mode
225 # copy
226 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
227 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
228 # for FRA and FRT
229 SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule
230 # set schedule up for DCT COS table generation
231 if (SVrm = 0b0101) | (SVrm = 0b1101) then
232 # calculate O(N log2 N)
233 vlen[0:6] <- [0] * 7
234 itercount[0:6] <- (0b00 || SVxd) + 0b0000001
235 itercount[0:6] <- (0b0 || itercount[0:5])
236 n <- [0] * 3
237 do while n < 5
238 if SVxd[4-n] = 0 then
239 leave
240 n <- n + 1
241 vlen[0:6] <- vlen + itercount
242 itercount[0:6] <- (0b0 || itercount[0:5])
243 # set up template in SVSHAPE0, then copy to 1-3
244 # set up FRB and FRS
245 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
246 SVSHAPE0[30:31] <- 0b01 # DCT/FFT mode
247 SVSHAPE0[6:11] <- 0b000100 # DCT Inner Butterfly COS-gen mode
248 if (SVrm = 0b0101) then
249 SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop for DCT
250 # copy
251 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
252 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
253 # for cos coefficient
254 SVSHAPE1[28:29] <- 0b10 # ci schedule
255 SVSHAPE2[28:29] <- 0b11 # size schedule
256 # set schedule up for iDCT / DCT inverse of half-swapped ordering
257 if (SVrm = 0b0110) | (SVrm = 0b1110) | (SVrm = 0b1111) then
258 vlen[0:6] <- (0b00 || SVxd) + 0b0000001
259 # set up template in SVSHAPE0
260 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
261 if (SVrm = 0b1110) then
262 SVSHAPE0[18:20] <- 0b001 # DCT opposite half-swap
263 if (SVrm = 0b1111) then
264 SVSHAPE0[30:31] <- 0b01 # FFT mode
265 else
266 SVSHAPE0[30:31] <- 0b11 # DCT mode
267 SVSHAPE0[6:11] <- 0b000101 # DCT "half-swap" mode
268 # set VL, MVL and Vertical-First
269 SVSTATE[0:6] <- vlen
270 SVSTATE[7:13] <- vlen
271 SVSTATE[63] <- vf
272
273 Special Registers Altered:
274
275 None
276
277 # svindex
278
279 SVI-Form
280
281 * svindex SVG,rmm,SVd,ew,yx,mm,sk
282
283 Pseudo-code:
284
285 # based on nearest MAXVL compute other dimension
286 MVL <- SVSTATE[0:6]
287 d <- [0] * 6
288 dim <- SVd+1
289 do while d*dim <u ([0]*4 || MVL)
290 d <- d + 1
291 # set up template, then copy once location identified
292 shape <- [0]*32
293 shape[30:31] <- 0b00 # mode
294 if yx = 0 then
295 shape[18:20] <- 0b110 # indexed xd/yd
296 shape[0:5] <- (0b0 || SVd) # xdim
297 if sk = 0 then shape[6:11] <- 0 # ydim
298 else shape[6:11] <- 0b111111 # ydim max
299 else
300 shape[18:20] <- 0b111 # indexed yd/xd
301 if sk = 1 then shape[6:11] <- 0 # ydim
302 else shape[6:11] <- d-1 # ydim max
303 shape[0:5] <- (0b0 || SVd) # ydim
304 shape[12:17] <- (0b0 || SVG) # SVGPR
305 shape[28:29] <- ew # element-width override
306 if sk = 1 then shape[28:29] <- 0b01 # skip 1st dimension
307 else shape[28:29] <- 0b00 # no skipping
308 # select the mode for updating SVSHAPEs
309 SVSTATE[62] <- mm # set or clear persistence
310 if mm = 0 then
311 # clear out all SVSHAPEs first
312 SVSHAPE0[0:31] <- [0] * 32
313 SVSHAPE1[0:31] <- [0] * 32
314 SVSHAPE2[0:31] <- [0] * 32
315 SVSHAPE3[0:31] <- [0] * 32
316 SVSTATE[32:41] <- [0] * 10 # clear REMAP.mi/o
317 SVSTATE[42:46] <- rmm # rmm exactly REMAP.SVme
318 idx <- 0
319 for bit = 0 to 4
320 if rmm[4-bit] then
321 # activate requested shape
322 if idx = 0 then SVSHAPE0 <- shape
323 if idx = 1 then SVSHAPE1 <- shape
324 if idx = 2 then SVSHAPE2 <- shape
325 if idx = 3 then SVSHAPE3 <- shape
326 SVSTATE[bit*2+32:bit*2+33] <- idx
327 # increment shape index, modulo 4
328 if idx = 3 then idx <- 0
329 else idx <- idx + 1
330 else
331 # refined SVSHAPE/REMAP update mode
332 bit <- rmm[0:2]
333 idx <- rmm[3:4]
334 if idx = 0 then SVSHAPE0 <- shape
335 if idx = 1 then SVSHAPE1 <- shape
336 if idx = 2 then SVSHAPE2 <- shape
337 if idx = 3 then SVSHAPE3 <- shape
338 SVSTATE[bit*2+32:bit*2+33] <- idx
339 SVSTATE[46-bit] <- 1
340
341 Special Registers Altered:
342
343 None
344
345 # svshape2
346
347 SVM2-Form
348
349 * svshape2 offs,inv,yx,rmm,SVd,sk,mm
350
351 Pseudo-code:
352
353 # based on nearest MAXVL compute other dimension
354 MVL <- SVSTATE[0:6]
355 d <- [0] * 6
356 dim <- SVd+1
357 do while d*dim <u ([0]*4 || MVL)
358 d <- d + 1
359 # set up template, then copy once location identified
360 shape <- [0]*32
361 shape[30:31] <- 0b00 # mode
362 shape[0:5] <- (0b0 || SVd) # x/ydim
363 if yx = 0 then
364 shape[18:20] <- 0b000 # ordering xd/yd(/zd)
365 if sk = 0 then shape[6:11] <- 0 # ydim
366 else shape[6:11] <- 0b111111 # ydim max
367 shape[21:23] <- (0b00 || inv) # "inverse" on xdim
368 else
369 shape[18:20] <- 0b010 # ordering yd/xd(/zd)
370 if sk = 1 then shape[6:11] <- 0 # ydim
371 else shape[6:11] <- d-1 # ydim max
372 shape[21:23] <- (0b0 || inv || 0b0) # "inverse" ydim
373 # offset (the prime purpose of this instruction)
374 shape[24:27] <- (0b0 || offs) # offset extended 1-bit
375 if sk = 1 then shape[28:29] <- 0b01 # skip 1st dimension
376 else shape[28:29] <- 0b00 # no skipping
377 # select the mode for updating SVSHAPEs
378 SVSTATE[62] <- mm # set or clear persistence
379 if mm = 0 then
380 # clear out all SVSHAPEs first
381 SVSHAPE0[0:31] <- [0] * 32
382 SVSHAPE1[0:31] <- [0] * 32
383 SVSHAPE2[0:31] <- [0] * 32
384 SVSHAPE3[0:31] <- [0] * 32
385 SVSTATE[32:41] <- [0] * 10 # clear REMAP.mi/o
386 SVSTATE[42:46] <- rmm # rmm exactly REMAP.SVme
387 idx <- 0
388 for bit = 0 to 4
389 if rmm[4-bit] then
390 # activate requested shape
391 if idx = 0 then SVSHAPE0 <- shape
392 if idx = 1 then SVSHAPE1 <- shape
393 if idx = 2 then SVSHAPE2 <- shape
394 if idx = 3 then SVSHAPE3 <- shape
395 SVSTATE[bit*2+32:bit*2+33] <- idx
396 # increment shape index, modulo 4
397 if idx = 3 then idx <- 0
398 else idx <- idx + 1
399 else
400 # refined SVSHAPE/REMAP update mode
401 bit <- rmm[0:2]
402 idx <- rmm[3:4]
403 if idx = 0 then SVSHAPE0 <- shape
404 if idx = 1 then SVSHAPE1 <- shape
405 if idx = 2 then SVSHAPE2 <- shape
406 if idx = 3 then SVSHAPE3 <- shape
407 SVSTATE[bit*2+32:bit*2+33] <- idx
408 SVSTATE[46-bit] <- 1
409
410 Special Registers Altered:
411
412 None
413