add first stub of svindex pseudocode
[openpower-isa.git] / openpower / isa / simplev.mdwn
1 <!-- This defines Draft SVP64 instructions to augment PowerISA Version 3.0 -->
2 <!-- These are not described in book 1 -->
3
4 # svstep
5
6 SVL-Form
7
8 * svstep RT,SVi,vf
9 * svstep. RT,SVi,vf
10
11 Pseudo-code:
12
13 step <- SVSTATE_NEXT(SVi, vf)
14 RT <- [0]*57 || step
15
16 Special Registers Altered:
17
18 CR0 (if Rc=1)
19
20 # setvl
21
22 SVL-Form
23
24 * setvl RT,RA,SVi,vf,vs,ms
25 * setvl. RT,RA,SVi,vf,vs,ms
26
27 Pseudo-code:
28
29 if (vf & (¬vs) & ¬(ms)) = 1 then
30 step <- SVSTATE_NEXT(SVi, 0b0)
31 if _RT != 0 then
32 GPR(_RT) <- [0]*57 || step
33 else
34 VLimm <- SVi + 1
35 # set or get MVL
36 if ms = 1 then MVL <- VLimm[0:6]
37 else MVL <- SVSTATE[0:6]
38 # set or get VL
39 if vs = 0 then VL <- SVSTATE[7:13]
40 else if _RA != 0 then VL <- (RA)[57:63]
41 else if _RT = 0 then VL <- VLimm[0:6]
42 else if CTR >u 0b1111111 then VL <- 0b1111111
43 else VL <- CTR[57:63]
44 # limit VL to within MVL
45 if VL >u MVL then
46 VL <- MVL
47 SVSTATE[0:6] <- MVL
48 SVSTATE[7:13] <- VL
49 if _RT != 0 then
50 GPR(_RT) <- [0]*57 || VL
51 if ((¬vs) & ¬(ms)) = 0 then
52 # set requested Vertical-First mode, clear persist
53 SVSTATE[63] <- vf
54 SVSTATE[62] <- 0b0
55
56 Special Registers Altered:
57
58 CR0 (if Rc=1)
59
60 # svremap
61
62 SVRM-Form
63
64 * svremap SVme,mi0,mi1,mi2,mo0,mo1,pst
65
66 Pseudo-code:
67
68 # registers RA RB RC RT EA/FRS SVSHAPE0-3 indices
69 SVSTATE[32:33] <- mi0
70 SVSTATE[34:35] <- mi1
71 SVSTATE[36:37] <- mi2
72 SVSTATE[38:39] <- mo0
73 SVSTATE[40:41] <- mo1
74 # enable bit for RA RB RC RT EA/FRS
75 SVSTATE[42:46] <- SVme
76 # persistence bit (applies to more than one instruction)
77 SVSTATE[62] <- pst
78
79 Special Registers Altered:
80
81 None
82
83 # svshape
84
85 SVM-Form
86
87 * svshape SVxd,SVyd,SVzd,SVrm,vf
88
89 Pseudo-code:
90
91 # for convenience, VL to be calculated and stored in SVSTATE
92 vlen <- [0] * 7
93 itercount[0:6] <- [0] * 7
94 SVSTATE[0:31] <- [0] * 32
95 # only overwrite REMAP if "persistence" is zero
96 if (SVSTATE[62] = 0b0) then
97 SVSTATE[32:33] <- 0b00
98 SVSTATE[34:35] <- 0b00
99 SVSTATE[36:37] <- 0b00
100 SVSTATE[38:39] <- 0b00
101 SVSTATE[40:41] <- 0b00
102 SVSTATE[42:46] <- 0b00000
103 SVSTATE[62] <- 0b0
104 SVSTATE[63] <- 0b0
105 # clear out all SVSHAPEs
106 SVSHAPE0[0:31] <- [0] * 32
107 SVSHAPE1[0:31] <- [0] * 32
108 SVSHAPE2[0:31] <- [0] * 32
109 SVSHAPE3[0:31] <- [0] * 32
110 # set schedule up for multiply
111 if (SVrm = 0b0000) then
112 # VL in Matrix Multiply is xd*yd*zd
113 n <- (0b00 || SVxd) * (0b00 || SVyd) * (0b00 || SVzd)
114 vlen[0:6] <- n[14:20]
115 # set up template in SVSHAPE0, then copy to 1-3
116 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
117 SVSHAPE0[6:11] <- (0b0 || SVyd) # ydim
118 SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim
119 SVSHAPE0[28:29] <- 0b11 # skip z
120 # copy
121 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
122 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
123 SVSHAPE3[0:31] <- SVSHAPE0[0:31]
124 # set up FRA
125 SVSHAPE1[18:20] <- 0b001 # permute x,z,y
126 SVSHAPE1[28:29] <- 0b01 # skip z
127 # FRC
128 SVSHAPE2[18:20] <- 0b001 # permute x,z,y
129 SVSHAPE2[28:29] <- 0b11 # skip y
130 # set schedule up for FFT butterfly
131 if (SVrm = 0b0001) then
132 # calculate O(N log2 N)
133 n <- [0] * 3
134 do while n < 5
135 if SVxd[4-n] = 0 then
136 leave
137 n <- n + 1
138 n <- ((0b0 || SVxd) + 1) * n
139 vlen[0:6] <- n[1:7]
140 # set up template in SVSHAPE0, then copy to 1-3
141 # for FRA and FRT
142 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
143 SVSHAPE0[30:31] <- 0b01 # Butterfly mode
144 # copy
145 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
146 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
147 # set up FRB and FRS
148 SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule
149 # FRC (coefficients)
150 SVSHAPE2[28:29] <- 0b10 # k schedule
151 # set schedule up for (i)DCT Inner butterfly
152 # SVrm Mode 2 (Mode 6 for iDCT) is for pre-calculated coefficients,
153 # SVrm Mode 4 (Mode 12 for iDCT) is for on-the-fly (Vertical-First Mode)
154 if ((SVrm = 0b0010) | (SVrm = 0b0100) |
155 (SVrm = 0b1010) | (SVrm = 0b1100)) then
156 # calculate O(N log2 N)
157 n <- [0] * 3
158 do while n < 5
159 if SVxd[4-n] = 0 then
160 leave
161 n <- n + 1
162 n <- ((0b0 || SVxd) + 1) * n
163 vlen[0:6] <- n[1:7]
164 # set up template in SVSHAPE0, then copy to 1-3
165 # set up FRB and FRS
166 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
167 if (SVrm = 0b1010) | (SVrm = 0b1100) then
168 SVSHAPE0[30:31] <- 0b11 # iDCT mode
169 SVSHAPE0[18:20] <- 0b011 # iDCT Inner Butterfly sub-mode
170 else
171 SVSHAPE0[30:31] <- 0b01 # DCT mode
172 SVSHAPE0[18:20] <- 0b001 # DCT Inner Butterfly sub-mode
173 SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop
174 if (SVrm = 0b1100) | (SVrm = 0b0100) then
175 SVSHAPE0[6:11] <- 0b000011 # (i)DCT Inner Butterfly mode 4
176 else
177 SVSHAPE0[6:11] <- 0b000001 # (i)DCT Inner Butterfly mode 2
178 # copy
179 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
180 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
181 if (SVrm != 0b0100) & (SVrm != 0b1100) then
182 SVSHAPE3[0:31] <- SVSHAPE0[0:31]
183 # for FRA and FRT
184 SVSHAPE0[28:29] <- 0b01 # j+halfstep schedule
185 # for cos coefficient
186 SVSHAPE2[28:29] <- 0b10 # ci (k for mode 4) schedule
187 if (SVrm != 0b0100) & (SVrm != 0b1100) then
188 SVSHAPE3[28:29] <- 0b11 # size schedule
189 # set schedule up for (i)DCT Outer butterfly
190 if (SVrm = 0b0011) | (SVrm = 0b1011) then
191 # calculate O(N log2 N) number of outer butterfly overlapping adds
192 vlen[0:6] <- [0] * 7
193 n <- 0b000
194 size <- 0b0000001
195 itercount[0:6] <- (0b00 || SVxd) + 0b0000001
196 itercount[0:6] <- (0b0 || itercount[0:5])
197 do while n < 5
198 if SVxd[4-n] = 0 then
199 leave
200 n <- n + 1
201 count <- (itercount - 0b0000001) * size
202 vlen[0:6] <- vlen + count[7:13]
203 size[0:6] <- (size[1:6] || 0b0)
204 itercount[0:6] <- (0b0 || itercount[0:5])
205 # set up template in SVSHAPE0, then copy to 1-3
206 # set up FRB and FRS
207 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
208 if (SVrm = 0b1011) then
209 SVSHAPE0[30:31] <- 0b11 # iDCT mode
210 SVSHAPE0[18:20] <- 0b011 # iDCT Outer Butterfly sub-mode
211 SVSHAPE0[21:23] <- 0b101 # "inverse" on outer and inner loop
212 else
213 SVSHAPE0[30:31] <- 0b01 # DCT mode
214 SVSHAPE0[18:20] <- 0b100 # DCT Outer Butterfly sub-mode
215 SVSHAPE0[6:11] <- 0b000010 # DCT Butterfly mode
216 # copy
217 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
218 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
219 # for FRA and FRT
220 SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule
221 # set schedule up for DCT COS table generation
222 if (SVrm = 0b0101) | (SVrm = 0b1101) then
223 # calculate O(N log2 N)
224 vlen[0:6] <- [0] * 7
225 itercount[0:6] <- (0b00 || SVxd) + 0b0000001
226 itercount[0:6] <- (0b0 || itercount[0:5])
227 n <- [0] * 3
228 do while n < 5
229 if SVxd[4-n] = 0 then
230 leave
231 n <- n + 1
232 vlen[0:6] <- vlen + itercount
233 itercount[0:6] <- (0b0 || itercount[0:5])
234 # set up template in SVSHAPE0, then copy to 1-3
235 # set up FRB and FRS
236 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
237 SVSHAPE0[30:31] <- 0b01 # DCT/FFT mode
238 SVSHAPE0[6:11] <- 0b000100 # DCT Inner Butterfly COS-gen mode
239 if (SVrm = 0b0101) then
240 SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop for DCT
241 # copy
242 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
243 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
244 # for cos coefficient
245 SVSHAPE1[28:29] <- 0b10 # ci schedule
246 SVSHAPE2[28:29] <- 0b11 # size schedule
247 # set schedule up for iDCT / DCT inverse of half-swapped ordering
248 if (SVrm = 0b0110) | (SVrm = 0b1110) | (SVrm = 0b1111) then
249 vlen[0:6] <- (0b00 || SVxd) + 0b0000001
250 # set up template in SVSHAPE0
251 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
252 if (SVrm = 0b1110) then
253 SVSHAPE0[18:20] <- 0b001 # DCT opposite half-swap
254 if (SVrm = 0b1111) then
255 SVSHAPE0[30:31] <- 0b01 # FFT mode
256 else
257 SVSHAPE0[30:31] <- 0b11 # DCT mode
258 SVSHAPE0[6:11] <- 0b000101 # DCT "half-swap" mode
259 # set VL, MVL and Vertical-First
260 SVSTATE[0:6] <- vlen
261 SVSTATE[7:13] <- vlen
262 SVSTATE[63] <- vf
263
264 Special Registers Altered:
265
266 None
267
268 # svindex
269
270 SVI-Form
271
272 * svindex RS,rmm,SVd,ew,yz,mr,sk
273
274 Pseudo-code:
275
276 # VL in Matrix Multiply is xd*yd*zd
277 n <- (0b00 || SVxd) * (0b00 || SVyd) * (0b00 || SVzd)
278 # set up template, then copy once location identified
279 shape = [0]*32
280 shape[30:31] <- 0b00 # mode
281 if yz = 1 then shape[18:20] = 0b110 # indexed xd/yd
282 else shape[18:20] = 0b111 # indexed yd/xd
283 shape[0:5] <- (0b0 || SVxd) # xdim
284 shape[6:11] <- (0b0 || SVyd) # ydim
285 shape[12:17] <- (0b0 || SVzd || 0b0) # SVGPR
286 shape[28:29] <- ew # element-width override
287 if sk = 1 then shape[28:29] <- 0b01 # skip 1st dimension
288 else shape[28:29] <- 0b00 # no skipping
289 # select the mode for updating SVSHAPEs
290 if mm = 0 then
291 # clear out all SVSHAPEs first
292 SVSHAPE0[0:31] <- [0] * 32
293 SVSHAPE1[0:31] <- [0] * 32
294 SVSHAPE2[0:31] <- [0] * 32
295 SVSHAPE3[0:31] <- [0] * 32
296 SVSTATE[42:46] <- rmm # rmm exactly REMAP.SVme
297 SVSTATE[62] <- 0 # persistence bit cleared
298
299 Special Registers Altered:
300
301 None
302