fix setvl. not setting CR0 properly
[openpower-isa.git] / openpower / isa / simplev.mdwn
1 <!-- This defines Draft SVP64 instructions to augment PowerISA Version 3.0 -->
2 <!-- These are not described in book 1 -->
3
4 # svstep
5
6 SVL-Form
7
8 * svstep RT,SVi,vf
9 * svstep. RT,SVi,vf
10
11 Pseudo-code:
12
13 step <- SVSTATE_NEXT(SVi, vf)
14 RT <- [0]*57 || step
15
16 Special Registers Altered:
17
18 CR0 (if Rc=1)
19
20 # setvl
21
22 SVL-Form
23
24 * setvl RT,RA,SVi,vf,vs,ms
25 * setvl. RT,RA,SVi,vf,vs,ms
26
27 Pseudo-code:
28
29 if (vf & (¬vs) & ¬(ms)) = 1 then
30 step <- SVSTATE_NEXT(SVi, 0b0)
31 if _RT != 0 then
32 GPR(_RT) <- [0]*57 || step
33 if Rc = 1 then
34 if step = 0 then c <- 0b001
35 else c <- 0b010
36 CR[32:35] <- c || XER[SO]
37 else
38 VLimm <- SVi + 1
39 # set or get MVL
40 if ms = 1 then MVL <- VLimm[0:6]
41 else MVL <- SVSTATE[0:6]
42 # set or get VL
43 if vs = 0 then VL <- SVSTATE[7:13]
44 else if _RA != 0 then
45 if (RA) >u 0b1111111 then VL <- 0b1111111
46 else VL <- (RA)[57:63]
47 else if _RT = 0 then VL <- VLimm[0:6]
48 else if CTR >u 0b1111111 then VL <- 0b1111111
49 else VL <- CTR[57:63]
50 # limit VL to within MVL
51 if VL >u MVL then
52 VL <- MVL
53 SVSTATE[0:6] <- MVL
54 SVSTATE[7:13] <- VL
55 if _RT != 0 then
56 GPR(_RT) <- [0]*57 || VL
57 if ((¬vs) & ¬(ms)) = 0 then
58 # set requested Vertical-First mode, clear persist
59 SVSTATE[63] <- vf
60 SVSTATE[62] <- 0b0
61 if Rc = 1 then
62 if VL = 0 then c <- 0b001
63 else c <- 0b010
64 CR[32:35] <- c || XER[SO]
65
66 Special Registers Altered:
67
68 CR0 (if Rc=1)
69
70 # svremap
71
72 SVRM-Form
73
74 * svremap SVme,mi0,mi1,mi2,mo0,mo1,pst
75
76 Pseudo-code:
77
78 # registers RA RB RC RT EA/FRS SVSHAPE0-3 indices
79 SVSTATE[32:33] <- mi0
80 SVSTATE[34:35] <- mi1
81 SVSTATE[36:37] <- mi2
82 SVSTATE[38:39] <- mo0
83 SVSTATE[40:41] <- mo1
84 # enable bit for RA RB RC RT EA/FRS
85 SVSTATE[42:46] <- SVme
86 # persistence bit (applies to more than one instruction)
87 SVSTATE[62] <- pst
88
89 Special Registers Altered:
90
91 None
92
93 # svshape
94
95 SVM-Form
96
97 * svshape SVxd,SVyd,SVzd,SVrm,vf
98
99 Pseudo-code:
100
101 # for convenience, VL to be calculated and stored in SVSTATE
102 vlen <- [0] * 7
103 itercount[0:6] <- [0] * 7
104 SVSTATE[0:31] <- [0] * 32
105 # only overwrite REMAP if "persistence" is zero
106 if (SVSTATE[62] = 0b0) then
107 SVSTATE[32:33] <- 0b00
108 SVSTATE[34:35] <- 0b00
109 SVSTATE[36:37] <- 0b00
110 SVSTATE[38:39] <- 0b00
111 SVSTATE[40:41] <- 0b00
112 SVSTATE[42:46] <- 0b00000
113 SVSTATE[62] <- 0b0
114 SVSTATE[63] <- 0b0
115 # clear out all SVSHAPEs
116 SVSHAPE0[0:31] <- [0] * 32
117 SVSHAPE1[0:31] <- [0] * 32
118 SVSHAPE2[0:31] <- [0] * 32
119 SVSHAPE3[0:31] <- [0] * 32
120 # set schedule up for multiply
121 if (SVrm = 0b0000) then
122 # VL in Matrix Multiply is xd*yd*zd
123 n <- (0b00 || SVxd) * (0b00 || SVyd) * (0b00 || SVzd)
124 vlen[0:6] <- n[14:20]
125 # set up template in SVSHAPE0, then copy to 1-3
126 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
127 SVSHAPE0[6:11] <- (0b0 || SVyd) # ydim
128 SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim
129 SVSHAPE0[28:29] <- 0b11 # skip z
130 # copy
131 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
132 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
133 SVSHAPE3[0:31] <- SVSHAPE0[0:31]
134 # set up FRA
135 SVSHAPE1[18:20] <- 0b001 # permute x,z,y
136 SVSHAPE1[28:29] <- 0b01 # skip z
137 # FRC
138 SVSHAPE2[18:20] <- 0b001 # permute x,z,y
139 SVSHAPE2[28:29] <- 0b11 # skip y
140 # set schedule up for FFT butterfly
141 if (SVrm = 0b0001) then
142 # calculate O(N log2 N)
143 n <- [0] * 3
144 do while n < 5
145 if SVxd[4-n] = 0 then
146 leave
147 n <- n + 1
148 n <- ((0b0 || SVxd) + 1) * n
149 vlen[0:6] <- n[1:7]
150 # set up template in SVSHAPE0, then copy to 1-3
151 # for FRA and FRT
152 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
153 SVSHAPE0[30:31] <- 0b01 # Butterfly mode
154 # copy
155 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
156 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
157 # set up FRB and FRS
158 SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule
159 # FRC (coefficients)
160 SVSHAPE2[28:29] <- 0b10 # k schedule
161 # set schedule up for (i)DCT Inner butterfly
162 # SVrm Mode 2 (Mode 6 for iDCT) is for pre-calculated coefficients,
163 # SVrm Mode 4 (Mode 12 for iDCT) is for on-the-fly (Vertical-First Mode)
164 if ((SVrm = 0b0010) | (SVrm = 0b0100) |
165 (SVrm = 0b1010) | (SVrm = 0b1100)) then
166 # calculate O(N log2 N)
167 n <- [0] * 3
168 do while n < 5
169 if SVxd[4-n] = 0 then
170 leave
171 n <- n + 1
172 n <- ((0b0 || SVxd) + 1) * n
173 vlen[0:6] <- n[1:7]
174 # set up template in SVSHAPE0, then copy to 1-3
175 # set up FRB and FRS
176 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
177 if (SVrm = 0b1010) | (SVrm = 0b1100) then
178 SVSHAPE0[30:31] <- 0b11 # iDCT mode
179 SVSHAPE0[18:20] <- 0b011 # iDCT Inner Butterfly sub-mode
180 else
181 SVSHAPE0[30:31] <- 0b01 # DCT mode
182 SVSHAPE0[18:20] <- 0b001 # DCT Inner Butterfly sub-mode
183 SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop
184 if (SVrm = 0b1100) | (SVrm = 0b0100) then
185 SVSHAPE0[6:11] <- 0b000011 # (i)DCT Inner Butterfly mode 4
186 else
187 SVSHAPE0[6:11] <- 0b000001 # (i)DCT Inner Butterfly mode 2
188 # copy
189 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
190 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
191 if (SVrm != 0b0100) & (SVrm != 0b1100) then
192 SVSHAPE3[0:31] <- SVSHAPE0[0:31]
193 # for FRA and FRT
194 SVSHAPE0[28:29] <- 0b01 # j+halfstep schedule
195 # for cos coefficient
196 SVSHAPE2[28:29] <- 0b10 # ci (k for mode 4) schedule
197 if (SVrm != 0b0100) & (SVrm != 0b1100) then
198 SVSHAPE3[28:29] <- 0b11 # size schedule
199 # set schedule up for (i)DCT Outer butterfly
200 if (SVrm = 0b0011) | (SVrm = 0b1011) then
201 # calculate O(N log2 N) number of outer butterfly overlapping adds
202 vlen[0:6] <- [0] * 7
203 n <- 0b000
204 size <- 0b0000001
205 itercount[0:6] <- (0b00 || SVxd) + 0b0000001
206 itercount[0:6] <- (0b0 || itercount[0:5])
207 do while n < 5
208 if SVxd[4-n] = 0 then
209 leave
210 n <- n + 1
211 count <- (itercount - 0b0000001) * size
212 vlen[0:6] <- vlen + count[7:13]
213 size[0:6] <- (size[1:6] || 0b0)
214 itercount[0:6] <- (0b0 || itercount[0:5])
215 # set up template in SVSHAPE0, then copy to 1-3
216 # set up FRB and FRS
217 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
218 if (SVrm = 0b1011) then
219 SVSHAPE0[30:31] <- 0b11 # iDCT mode
220 SVSHAPE0[18:20] <- 0b011 # iDCT Outer Butterfly sub-mode
221 SVSHAPE0[21:23] <- 0b101 # "inverse" on outer and inner loop
222 else
223 SVSHAPE0[30:31] <- 0b01 # DCT mode
224 SVSHAPE0[18:20] <- 0b100 # DCT Outer Butterfly sub-mode
225 SVSHAPE0[6:11] <- 0b000010 # DCT Butterfly mode
226 # copy
227 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
228 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
229 # for FRA and FRT
230 SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule
231 # set schedule up for DCT COS table generation
232 if (SVrm = 0b0101) | (SVrm = 0b1101) then
233 # calculate O(N log2 N)
234 vlen[0:6] <- [0] * 7
235 itercount[0:6] <- (0b00 || SVxd) + 0b0000001
236 itercount[0:6] <- (0b0 || itercount[0:5])
237 n <- [0] * 3
238 do while n < 5
239 if SVxd[4-n] = 0 then
240 leave
241 n <- n + 1
242 vlen[0:6] <- vlen + itercount
243 itercount[0:6] <- (0b0 || itercount[0:5])
244 # set up template in SVSHAPE0, then copy to 1-3
245 # set up FRB and FRS
246 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
247 SVSHAPE0[30:31] <- 0b01 # DCT/FFT mode
248 SVSHAPE0[6:11] <- 0b000100 # DCT Inner Butterfly COS-gen mode
249 if (SVrm = 0b0101) then
250 SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop for DCT
251 # copy
252 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
253 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
254 # for cos coefficient
255 SVSHAPE1[28:29] <- 0b10 # ci schedule
256 SVSHAPE2[28:29] <- 0b11 # size schedule
257 # set schedule up for iDCT / DCT inverse of half-swapped ordering
258 if (SVrm = 0b0110) | (SVrm = 0b1110) | (SVrm = 0b1111) then
259 vlen[0:6] <- (0b00 || SVxd) + 0b0000001
260 # set up template in SVSHAPE0
261 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
262 if (SVrm = 0b1110) then
263 SVSHAPE0[18:20] <- 0b001 # DCT opposite half-swap
264 if (SVrm = 0b1111) then
265 SVSHAPE0[30:31] <- 0b01 # FFT mode
266 else
267 SVSHAPE0[30:31] <- 0b11 # DCT mode
268 SVSHAPE0[6:11] <- 0b000101 # DCT "half-swap" mode
269 # set VL, MVL and Vertical-First
270 SVSTATE[0:6] <- vlen
271 SVSTATE[7:13] <- vlen
272 SVSTATE[63] <- vf
273
274 Special Registers Altered:
275
276 None
277
278 # svindex
279
280 SVI-Form
281
282 * svindex SVG,rmm,SVd,ew,yx,mr,sk
283
284 Pseudo-code:
285
286 # based on nearest MAXVL compute other dimension
287 MVL <- SVSTATE[0:6]
288 d <- [0] * 6
289 dim <- SVd+1
290 do while d*dim <u ([0]*4 || MVL)
291 d <- d + 1
292 # set up template, then copy once location identified
293 shape <- [0]*32
294 shape[30:31] <- 0b00 # mode
295 if yx = 0 then
296 shape[18:20] <- 0b110 # indexed xd/yd
297 shape[0:5] <- (0b0 || SVd) # xdim
298 if sk = 0 then shape[6:11] <- 0 # ydim
299 else shape[6:11] <- 0b111111 # ydim max
300 else
301 shape[18:20] <- 0b111 # indexed yd/xd
302 if sk = 1 then shape[6:11] <- 0 # ydim
303 else shape[6:11] <- d-1 # ydim max
304 shape[0:5] <- (0b0 || SVd) # ydim
305 shape[12:17] <- (0b0 || SVG) # SVGPR
306 shape[28:29] <- ew # element-width override
307 if sk = 1 then shape[28:29] <- 0b01 # skip 1st dimension
308 else shape[28:29] <- 0b00 # no skipping
309 # select the mode for updating SVSHAPEs
310 SVSTATE[62] <- mm # set or clear persistence
311 if mm = 0 then
312 # clear out all SVSHAPEs first
313 SVSHAPE0[0:31] <- [0] * 32
314 SVSHAPE1[0:31] <- [0] * 32
315 SVSHAPE2[0:31] <- [0] * 32
316 SVSHAPE3[0:31] <- [0] * 32
317 SVSTATE[32:41] <- [0] * 10 # clear REMAP.mi/o
318 SVSTATE[42:46] <- rmm # rmm exactly REMAP.SVme
319 idx <- 0
320 for bit = 0 to 4
321 if rmm[4-bit] then
322 # activate requested shape
323 if idx = 0 then SVSHAPE0 <- shape
324 if idx = 1 then SVSHAPE1 <- shape
325 if idx = 2 then SVSHAPE2 <- shape
326 if idx = 3 then SVSHAPE3 <- shape
327 SVSTATE[bit*2+32:bit*2+33] <- idx
328 # increment shape index, modulo 4
329 if idx = 3 then idx <- 0
330 else idx <- idx + 1
331 else
332 # refined SVSHAPE/REMAP update mode
333 bit <- rmm[0:2]
334 idx <- rmm[3:4]
335 if idx = 0 then SVSHAPE0 <- shape
336 if idx = 1 then SVSHAPE1 <- shape
337 if idx = 2 then SVSHAPE2 <- shape
338 if idx = 3 then SVSHAPE3 <- shape
339 SVSTATE[bit*2+32:bit*2+33] <- idx
340 SVSTATE[46-bit] <- 1
341
342 Special Registers Altered:
343
344 None
345