arch-gcn3: make read2st64_b32 write proper registers
[gem5.git] / src / arch / gcn3 / insts / instructions.cc
1 /*
2 * Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Anthony Gutierrez
34 */
35
36 #include "arch/gcn3/insts/instructions.hh"
37
38 #include <cmath>
39
40 #include "arch/gcn3/insts/inst_util.hh"
41 #include "debug/GCN3.hh"
42 #include "debug/GPUSync.hh"
43 #include "gpu-compute/shader.hh"
44
45 namespace Gcn3ISA
46 {
47
48 Inst_SOP2__S_ADD_U32::Inst_SOP2__S_ADD_U32(InFmt_SOP2 *iFmt)
49 : Inst_SOP2(iFmt, "s_add_u32")
50 {
51 setFlag(ALU);
52 } // Inst_SOP2__S_ADD_U32
53
54 Inst_SOP2__S_ADD_U32::~Inst_SOP2__S_ADD_U32()
55 {
56 } // ~Inst_SOP2__S_ADD_U32
57
58 // D.u = S0.u + S1.u;
59 // SCC = (S0.u + S1.u >= 0x100000000ULL ? 1 : 0) is an unsigned
60 // overflow/carry-out.
61 void
62 Inst_SOP2__S_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
63 {
64 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
65 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
66 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
67 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
68
69 src0.read();
70 src1.read();
71
72 sdst = src0.rawData() + src1.rawData();
73 scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData())
74 >= 0x100000000ULL ? 1 : 0;
75
76 sdst.write();
77 scc.write();
78 }
79
80 Inst_SOP2__S_SUB_U32::Inst_SOP2__S_SUB_U32(InFmt_SOP2 *iFmt)
81 : Inst_SOP2(iFmt, "s_sub_u32")
82 {
83 setFlag(ALU);
84 } // Inst_SOP2__S_SUB_U32
85
86 Inst_SOP2__S_SUB_U32::~Inst_SOP2__S_SUB_U32()
87 {
88 } // ~Inst_SOP2__S_SUB_U32
89
90 // D.u = S0.u - S1.u;
91 // SCC = (S1.u > S0.u ? 1 : 0) is an unsigned overflow or carry-out.
92 void
93 Inst_SOP2__S_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
94 {
95 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
96 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
97 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
98 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
99
100 src0.read();
101 src1.read();
102
103 sdst = src0.rawData() - src1.rawData();
104 scc = (src1.rawData() > src0.rawData()) ? 1 : 0;
105
106 sdst.write();
107 scc.write();
108 }
109
110 Inst_SOP2__S_ADD_I32::Inst_SOP2__S_ADD_I32(InFmt_SOP2 *iFmt)
111 : Inst_SOP2(iFmt, "s_add_i32")
112 {
113 setFlag(ALU);
114 } // Inst_SOP2__S_ADD_I32
115
116 Inst_SOP2__S_ADD_I32::~Inst_SOP2__S_ADD_I32()
117 {
118 } // ~Inst_SOP2__S_ADD_I32
119
120 // D.i = S0.i + S1.i;
121 // SCC = (S0.u[31] == S1.u[31] && S0.u[31] != D.u[31]) is a signed
122 // overflow.
123 void
124 Inst_SOP2__S_ADD_I32::execute(GPUDynInstPtr gpuDynInst)
125 {
126 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
127 ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
128 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
129 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
130
131 src0.read();
132 src1.read();
133
134 sdst = src0.rawData() + src1.rawData();
135 scc = (bits(src0.rawData(), 31) == bits(src1.rawData(), 31)
136 && bits(src0.rawData(), 31) != bits(sdst.rawData(), 31))
137 ? 1 : 0;
138
139 sdst.write();
140 scc.write();
141 }
142
143 Inst_SOP2__S_SUB_I32::Inst_SOP2__S_SUB_I32(InFmt_SOP2 *iFmt)
144 : Inst_SOP2(iFmt, "s_sub_i32")
145 {
146 setFlag(ALU);
147 } // Inst_SOP2__S_SUB_I32
148
149 Inst_SOP2__S_SUB_I32::~Inst_SOP2__S_SUB_I32()
150 {
151 } // ~Inst_SOP2__S_SUB_I32
152
153 // D.i = S0.i - S1.i;
154 // SCC = (S0.u[31] != S1.u[31] && S0.u[31] != D.u[31]) is a signed
155 // overflow.
156 void
157 Inst_SOP2__S_SUB_I32::execute(GPUDynInstPtr gpuDynInst)
158 {
159 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
160 ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
161 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
162 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
163
164 src0.read();
165 src1.read();
166
167 sdst = src0.rawData() - src1.rawData();
168 scc = (bits(src0.rawData(), 31) != bits(src1.rawData(), 31)
169 && bits(src0.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0;
170
171 sdst.write();
172 scc.write();
173 }
174
175 Inst_SOP2__S_ADDC_U32::Inst_SOP2__S_ADDC_U32(InFmt_SOP2 *iFmt)
176 : Inst_SOP2(iFmt, "s_addc_u32")
177 {
178 setFlag(ALU);
179 } // Inst_SOP2__S_ADDC_U32
180
181 Inst_SOP2__S_ADDC_U32::~Inst_SOP2__S_ADDC_U32()
182 {
183 } // ~Inst_SOP2__S_ADDC_U32
184
185 // D.u = S0.u + S1.u + SCC;
186 // SCC = (S0.u + S1.u + SCC >= 0x100000000ULL ? 1 : 0) is an unsigned
187 // overflow.
188 void
189 Inst_SOP2__S_ADDC_U32::execute(GPUDynInstPtr gpuDynInst)
190 {
191 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
192 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
193 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
194 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
195
196 src0.read();
197 src1.read();
198 scc.read();
199
200 sdst = src0.rawData() + src1.rawData() + scc.rawData();
201 scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData()
202 + (ScalarRegU64)scc.rawData()) >= 0x100000000ULL ? 1 : 0;
203
204 sdst.write();
205 scc.write();
206 }
207
208 Inst_SOP2__S_SUBB_U32::Inst_SOP2__S_SUBB_U32(InFmt_SOP2 *iFmt)
209 : Inst_SOP2(iFmt, "s_subb_u32")
210 {
211 setFlag(ALU);
212 } // Inst_SOP2__S_SUBB_U32
213
214 Inst_SOP2__S_SUBB_U32::~Inst_SOP2__S_SUBB_U32()
215 {
216 } // ~Inst_SOP2__S_SUBB_U32
217
218 // D.u = S0.u - S1.u - SCC;
219 // SCC = (S1.u + SCC > S0.u ? 1 : 0) is an unsigned overflow.
220 void
221 Inst_SOP2__S_SUBB_U32::execute(GPUDynInstPtr gpuDynInst)
222 {
223 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
224 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
225 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
226 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
227
228 src0.read();
229 src1.read();
230 scc.read();
231
232 sdst = src0.rawData() - src1.rawData() - scc.rawData();
233 scc = (src1.rawData() + scc.rawData()) > src0.rawData() ? 1 : 0;
234
235 sdst.write();
236 scc.write();
237 }
238
239 Inst_SOP2__S_MIN_I32::Inst_SOP2__S_MIN_I32(InFmt_SOP2 *iFmt)
240 : Inst_SOP2(iFmt, "s_min_i32")
241 {
242 setFlag(ALU);
243 } // Inst_SOP2__S_MIN_I32
244
245 Inst_SOP2__S_MIN_I32::~Inst_SOP2__S_MIN_I32()
246 {
247 } // ~Inst_SOP2__S_MIN_I32
248
249 // D.i = (S0.i < S1.i) ? S0.i : S1.i;
250 // SCC = 1 if S0 is chosen as the minimum value.
251 void
252 Inst_SOP2__S_MIN_I32::execute(GPUDynInstPtr gpuDynInst)
253 {
254 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
255 ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
256 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
257 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
258
259 src0.read();
260 src1.read();
261
262 sdst = std::min(src0.rawData(), src1.rawData());
263 scc = (src0.rawData() < src1.rawData()) ? 1 : 0;
264
265 sdst.write();
266 scc.write();
267 }
268
269 Inst_SOP2__S_MIN_U32::Inst_SOP2__S_MIN_U32(InFmt_SOP2 *iFmt)
270 : Inst_SOP2(iFmt, "s_min_u32")
271 {
272 setFlag(ALU);
273 } // Inst_SOP2__S_MIN_U32
274
275 Inst_SOP2__S_MIN_U32::~Inst_SOP2__S_MIN_U32()
276 {
277 } // ~Inst_SOP2__S_MIN_U32
278
279 // D.u = (S0.u < S1.u) ? S0.u : S1.u;
280 // SCC = 1 if S0 is chosen as the minimum value.
281 void
282 Inst_SOP2__S_MIN_U32::execute(GPUDynInstPtr gpuDynInst)
283 {
284 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
285 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
286 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
287 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
288
289 src0.read();
290 src1.read();
291
292 sdst = std::min(src0.rawData(), src1.rawData());
293 scc = (src0.rawData() < src1.rawData()) ? 1 : 0;
294
295 sdst.write();
296 scc.write();
297 }
298
299 Inst_SOP2__S_MAX_I32::Inst_SOP2__S_MAX_I32(InFmt_SOP2 *iFmt)
300 : Inst_SOP2(iFmt, "s_max_i32")
301 {
302 setFlag(ALU);
303 } // Inst_SOP2__S_MAX_I32
304
305 Inst_SOP2__S_MAX_I32::~Inst_SOP2__S_MAX_I32()
306 {
307 } // ~Inst_SOP2__S_MAX_I32
308
309 // D.i = (S0.i > S1.i) ? S0.i : S1.i;
310 // SCC = 1 if S0 is chosen as the maximum value.
311 void
312 Inst_SOP2__S_MAX_I32::execute(GPUDynInstPtr gpuDynInst)
313 {
314 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
315 ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
316 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
317 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
318
319 src0.read();
320 src1.read();
321
322 sdst = std::max(src0.rawData(), src1.rawData());
323 scc = (src0.rawData() > src1.rawData()) ? 1 : 0;
324
325 sdst.write();
326 scc.write();
327 }
328
329 Inst_SOP2__S_MAX_U32::Inst_SOP2__S_MAX_U32(InFmt_SOP2 *iFmt)
330 : Inst_SOP2(iFmt, "s_max_u32")
331 {
332 setFlag(ALU);
333 } // Inst_SOP2__S_MAX_U32
334
335 Inst_SOP2__S_MAX_U32::~Inst_SOP2__S_MAX_U32()
336 {
337 } // ~Inst_SOP2__S_MAX_U32
338
339 // D.u = (S0.u > S1.u) ? S0.u : S1.u;
340 // SCC = 1 if S0 is chosen as the maximum value.
341 void
342 Inst_SOP2__S_MAX_U32::execute(GPUDynInstPtr gpuDynInst)
343 {
344 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
345 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
346 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
347 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
348
349 src0.read();
350 src1.read();
351
352 sdst = std::max(src0.rawData(), src1.rawData());
353 scc = (src0.rawData() > src1.rawData()) ? 1 : 0;
354
355 sdst.write();
356 scc.write();
357 }
358
359 Inst_SOP2__S_CSELECT_B32::Inst_SOP2__S_CSELECT_B32(InFmt_SOP2 *iFmt)
360 : Inst_SOP2(iFmt, "s_cselect_b32")
361 {
362 setFlag(ALU);
363 } // Inst_SOP2__S_CSELECT_B32
364
365 Inst_SOP2__S_CSELECT_B32::~Inst_SOP2__S_CSELECT_B32()
366 {
367 } // ~Inst_SOP2__S_CSELECT_B32
368
369 // D.u = SCC ? S0.u : S1.u (conditional select).
370 void
371 Inst_SOP2__S_CSELECT_B32::execute(GPUDynInstPtr gpuDynInst)
372 {
373 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
374 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
375 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
376 ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
377
378 src0.read();
379 src1.read();
380 scc.read();
381
382 sdst = scc.rawData() ? src0.rawData() : src1.rawData();
383
384 sdst.write();
385 }
386
387 Inst_SOP2__S_CSELECT_B64::Inst_SOP2__S_CSELECT_B64(InFmt_SOP2 *iFmt)
388 : Inst_SOP2(iFmt, "s_cselect_b64")
389 {
390 setFlag(ALU);
391 } // Inst_SOP2__S_CSELECT_B64
392
393 Inst_SOP2__S_CSELECT_B64::~Inst_SOP2__S_CSELECT_B64()
394 {
395 } // ~Inst_SOP2__S_CSELECT_B64
396
397 // D.u64 = SCC ? S0.u64 : S1.u64 (conditional select).
398 void
399 Inst_SOP2__S_CSELECT_B64::execute(GPUDynInstPtr gpuDynInst)
400 {
401 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
402 ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
403 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
404 ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
405
406 src0.read();
407 src1.read();
408 scc.read();
409
410 sdst = scc.rawData() ? src0.rawData() : src1.rawData();
411
412 sdst.write();
413 }
414
415 Inst_SOP2__S_AND_B32::Inst_SOP2__S_AND_B32(InFmt_SOP2 *iFmt)
416 : Inst_SOP2(iFmt, "s_and_b32")
417 {
418 setFlag(ALU);
419 } // Inst_SOP2__S_AND_B32
420
421 Inst_SOP2__S_AND_B32::~Inst_SOP2__S_AND_B32()
422 {
423 } // ~Inst_SOP2__S_AND_B32
424
425 // D.u = S0.u & S1.u;
426 // SCC = 1 if result is non-zero.
427 void
428 Inst_SOP2__S_AND_B32::execute(GPUDynInstPtr gpuDynInst)
429 {
430 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
431 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
432 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
433 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
434
435 src0.read();
436 src1.read();
437
438 sdst = src0.rawData() & src1.rawData();
439 scc = sdst.rawData() ? 1 : 0;
440
441 sdst.write();
442 scc.write();
443 }
444
445 Inst_SOP2__S_AND_B64::Inst_SOP2__S_AND_B64(InFmt_SOP2 *iFmt)
446 : Inst_SOP2(iFmt, "s_and_b64")
447 {
448 setFlag(ALU);
449 } // Inst_SOP2__S_AND_B64
450
451 Inst_SOP2__S_AND_B64::~Inst_SOP2__S_AND_B64()
452 {
453 } // ~Inst_SOP2__S_AND_B64
454
455 // D.u64 = S0.u64 & S1.u64;
456 // SCC = 1 if result is non-zero.
457 void
458 Inst_SOP2__S_AND_B64::execute(GPUDynInstPtr gpuDynInst)
459 {
460 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
461 ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
462 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
463 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
464
465 src0.read();
466 src1.read();
467
468 sdst = src0.rawData() & src1.rawData();
469 scc = sdst.rawData() ? 1 : 0;
470
471 sdst.write();
472 scc.write();
473 }
474
475 Inst_SOP2__S_OR_B32::Inst_SOP2__S_OR_B32(InFmt_SOP2 *iFmt)
476 : Inst_SOP2(iFmt, "s_or_b32")
477 {
478 setFlag(ALU);
479 } // Inst_SOP2__S_OR_B32
480
481 Inst_SOP2__S_OR_B32::~Inst_SOP2__S_OR_B32()
482 {
483 } // ~Inst_SOP2__S_OR_B32
484
485 // D.u = S0.u | S1.u;
486 // SCC = 1 if result is non-zero.
487 void
488 Inst_SOP2__S_OR_B32::execute(GPUDynInstPtr gpuDynInst)
489 {
490 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
491 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
492 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
493 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
494
495 src0.read();
496 src1.read();
497
498 sdst = src0.rawData() | src1.rawData();
499 scc = sdst.rawData() ? 1 : 0;
500
501 sdst.write();
502 scc.write();
503 }
504
505 Inst_SOP2__S_OR_B64::Inst_SOP2__S_OR_B64(InFmt_SOP2 *iFmt)
506 : Inst_SOP2(iFmt, "s_or_b64")
507 {
508 setFlag(ALU);
509 } // Inst_SOP2__S_OR_B64
510
511 Inst_SOP2__S_OR_B64::~Inst_SOP2__S_OR_B64()
512 {
513 } // ~Inst_SOP2__S_OR_B64
514
515 // D.u64 = S0.u64 | S1.u64;
516 // SCC = 1 if result is non-zero.
517 void
518 Inst_SOP2__S_OR_B64::execute(GPUDynInstPtr gpuDynInst)
519 {
520 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
521 ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
522 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
523 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
524
525 src0.read();
526 src1.read();
527
528 sdst = src0.rawData() | src1.rawData();
529 scc = sdst.rawData() ? 1 : 0;
530
531 sdst.write();
532 scc.write();
533 }
534
535 Inst_SOP2__S_XOR_B32::Inst_SOP2__S_XOR_B32(InFmt_SOP2 *iFmt)
536 : Inst_SOP2(iFmt, "s_xor_b32")
537 {
538 setFlag(ALU);
539 } // Inst_SOP2__S_XOR_B32
540
541 Inst_SOP2__S_XOR_B32::~Inst_SOP2__S_XOR_B32()
542 {
543 } // ~Inst_SOP2__S_XOR_B32
544
545 // D.u = S0.u ^ S1.u;
546 // SCC = 1 if result is non-zero.
547 void
548 Inst_SOP2__S_XOR_B32::execute(GPUDynInstPtr gpuDynInst)
549 {
550 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
551 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
552 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
553 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
554
555 src0.read();
556 src1.read();
557
558 sdst = src0.rawData() ^ src1.rawData();
559 scc = sdst.rawData() ? 1 : 0;
560
561 sdst.write();
562 scc.write();
563 }
564
565 Inst_SOP2__S_XOR_B64::Inst_SOP2__S_XOR_B64(InFmt_SOP2 *iFmt)
566 : Inst_SOP2(iFmt, "s_xor_b64")
567 {
568 setFlag(ALU);
569 } // Inst_SOP2__S_XOR_B64
570
571 Inst_SOP2__S_XOR_B64::~Inst_SOP2__S_XOR_B64()
572 {
573 } // ~Inst_SOP2__S_XOR_B64
574
575 // D.u64 = S0.u64 ^ S1.u64;
576 // SCC = 1 if result is non-zero.
577 void
578 Inst_SOP2__S_XOR_B64::execute(GPUDynInstPtr gpuDynInst)
579 {
580 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
581 ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
582 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
583 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
584
585 src0.read();
586 src1.read();
587
588 sdst = src0.rawData() ^ src1.rawData();
589 scc = sdst.rawData() ? 1 : 0;
590
591 sdst.write();
592 scc.write();
593 }
594
595 Inst_SOP2__S_ANDN2_B32::Inst_SOP2__S_ANDN2_B32(InFmt_SOP2 *iFmt)
596 : Inst_SOP2(iFmt, "s_andn2_b32")
597 {
598 setFlag(ALU);
599 } // Inst_SOP2__S_ANDN2_B32
600
601 Inst_SOP2__S_ANDN2_B32::~Inst_SOP2__S_ANDN2_B32()
602 {
603 } // ~Inst_SOP2__S_ANDN2_B32
604
605 // D.u = S0.u & ~S1.u;
606 // SCC = 1 if result is non-zero.
607 void
608 Inst_SOP2__S_ANDN2_B32::execute(GPUDynInstPtr gpuDynInst)
609 {
610 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
611 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
612 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
613 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
614
615 src0.read();
616 src1.read();
617
618 sdst = src0.rawData() &~ src1.rawData();
619 scc = sdst.rawData() ? 1 : 0;
620
621 sdst.write();
622 scc.write();
623 }
624
625 Inst_SOP2__S_ANDN2_B64::Inst_SOP2__S_ANDN2_B64(InFmt_SOP2 *iFmt)
626 : Inst_SOP2(iFmt, "s_andn2_b64")
627 {
628 setFlag(ALU);
629 } // Inst_SOP2__S_ANDN2_B64
630
631 Inst_SOP2__S_ANDN2_B64::~Inst_SOP2__S_ANDN2_B64()
632 {
633 } // ~Inst_SOP2__S_ANDN2_B64
634
635 // D.u64 = S0.u64 & ~S1.u64;
636 // SCC = 1 if result is non-zero.
637 void
638 Inst_SOP2__S_ANDN2_B64::execute(GPUDynInstPtr gpuDynInst)
639 {
640 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
641 ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
642 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
643 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
644
645 src0.read();
646 src1.read();
647
648 sdst = src0.rawData() &~ src1.rawData();
649 scc = sdst.rawData() ? 1 : 0;
650
651 sdst.write();
652 scc.write();
653 }
654
655 Inst_SOP2__S_ORN2_B32::Inst_SOP2__S_ORN2_B32(InFmt_SOP2 *iFmt)
656 : Inst_SOP2(iFmt, "s_orn2_b32")
657 {
658 setFlag(ALU);
659 } // Inst_SOP2__S_ORN2_B32
660
661 Inst_SOP2__S_ORN2_B32::~Inst_SOP2__S_ORN2_B32()
662 {
663 } // ~Inst_SOP2__S_ORN2_B32
664
665 // D.u = S0.u | ~S1.u;
666 // SCC = 1 if result is non-zero.
667 void
668 Inst_SOP2__S_ORN2_B32::execute(GPUDynInstPtr gpuDynInst)
669 {
670 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
671 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
672 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
673 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
674
675 src0.read();
676 src1.read();
677
678 sdst = src0.rawData() |~ src1.rawData();
679 scc = sdst.rawData() ? 1 : 0;
680
681 sdst.write();
682 scc.write();
683 }
684
685 Inst_SOP2__S_ORN2_B64::Inst_SOP2__S_ORN2_B64(InFmt_SOP2 *iFmt)
686 : Inst_SOP2(iFmt, "s_orn2_b64")
687 {
688 setFlag(ALU);
689 } // Inst_SOP2__S_ORN2_B64
690
691 Inst_SOP2__S_ORN2_B64::~Inst_SOP2__S_ORN2_B64()
692 {
693 } // ~Inst_SOP2__S_ORN2_B64
694
695 // D.u64 = S0.u64 | ~S1.u64;
696 // SCC = 1 if result is non-zero.
697 void
698 Inst_SOP2__S_ORN2_B64::execute(GPUDynInstPtr gpuDynInst)
699 {
700 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
701 ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
702 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
703 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
704
705 src0.read();
706 src1.read();
707
708 sdst = src0.rawData() |~ src1.rawData();
709 scc = sdst.rawData() ? 1 : 0;
710
711 sdst.write();
712 scc.write();
713 }
714
715 Inst_SOP2__S_NAND_B32::Inst_SOP2__S_NAND_B32(InFmt_SOP2 *iFmt)
716 : Inst_SOP2(iFmt, "s_nand_b32")
717 {
718 setFlag(ALU);
719 } // Inst_SOP2__S_NAND_B32
720
721 Inst_SOP2__S_NAND_B32::~Inst_SOP2__S_NAND_B32()
722 {
723 } // ~Inst_SOP2__S_NAND_B32
724
725 // D.u = ~(S0.u & S1.u);
726 // SCC = 1 if result is non-zero.
727 void
728 Inst_SOP2__S_NAND_B32::execute(GPUDynInstPtr gpuDynInst)
729 {
730 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
731 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
732 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
733 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
734
735 src0.read();
736 src1.read();
737
738 sdst = ~(src0.rawData() & src1.rawData());
739 scc = sdst.rawData() ? 1 : 0;
740
741 sdst.write();
742 scc.write();
743 }
744
745 Inst_SOP2__S_NAND_B64::Inst_SOP2__S_NAND_B64(InFmt_SOP2 *iFmt)
746 : Inst_SOP2(iFmt, "s_nand_b64")
747 {
748 setFlag(ALU);
749 } // Inst_SOP2__S_NAND_B64
750
751 Inst_SOP2__S_NAND_B64::~Inst_SOP2__S_NAND_B64()
752 {
753 } // ~Inst_SOP2__S_NAND_B64
754
755 // D.u64 = ~(S0.u64 & S1.u64);
756 // SCC = 1 if result is non-zero.
757 void
758 Inst_SOP2__S_NAND_B64::execute(GPUDynInstPtr gpuDynInst)
759 {
760 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
761 ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
762 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
763 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
764
765 src0.read();
766 src1.read();
767
768 sdst = ~(src0.rawData() & src1.rawData());
769 scc = sdst.rawData() ? 1 : 0;
770
771 sdst.write();
772 scc.write();
773 }
774
775 Inst_SOP2__S_NOR_B32::Inst_SOP2__S_NOR_B32(InFmt_SOP2 *iFmt)
776 : Inst_SOP2(iFmt, "s_nor_b32")
777 {
778 setFlag(ALU);
779 } // Inst_SOP2__S_NOR_B32
780
781 Inst_SOP2__S_NOR_B32::~Inst_SOP2__S_NOR_B32()
782 {
783 } // ~Inst_SOP2__S_NOR_B32
784
785 // D.u = ~(S0.u | S1.u);
786 // SCC = 1 if result is non-zero.
787 void
788 Inst_SOP2__S_NOR_B32::execute(GPUDynInstPtr gpuDynInst)
789 {
790 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
791 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
792 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
793 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
794
795 src0.read();
796 src1.read();
797
798 sdst = ~(src0.rawData() | src1.rawData());
799 scc = sdst.rawData() ? 1 : 0;
800
801 sdst.write();
802 scc.write();
803 }
804
805 Inst_SOP2__S_NOR_B64::Inst_SOP2__S_NOR_B64(InFmt_SOP2 *iFmt)
806 : Inst_SOP2(iFmt, "s_nor_b64")
807 {
808 setFlag(ALU);
809 } // Inst_SOP2__S_NOR_B64
810
811 Inst_SOP2__S_NOR_B64::~Inst_SOP2__S_NOR_B64()
812 {
813 } // ~Inst_SOP2__S_NOR_B64
814
815 // D.u64 = ~(S0.u64 | S1.u64);
816 // SCC = 1 if result is non-zero.
817 void
818 Inst_SOP2__S_NOR_B64::execute(GPUDynInstPtr gpuDynInst)
819 {
820 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
821 ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
822 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
823 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
824
825 src0.read();
826 src1.read();
827
828 sdst = ~(src0.rawData() | src1.rawData());
829 scc = sdst.rawData() ? 1 : 0;
830
831 sdst.write();
832 scc.write();
833 }
834
835 Inst_SOP2__S_XNOR_B32::Inst_SOP2__S_XNOR_B32(InFmt_SOP2 *iFmt)
836 : Inst_SOP2(iFmt, "s_xnor_b32")
837 {
838 setFlag(ALU);
839 } // Inst_SOP2__S_XNOR_B32
840
841 Inst_SOP2__S_XNOR_B32::~Inst_SOP2__S_XNOR_B32()
842 {
843 } // ~Inst_SOP2__S_XNOR_B32
844
845 // D.u = ~(S0.u ^ S1.u);
846 // SCC = 1 if result is non-zero.
847 void
848 Inst_SOP2__S_XNOR_B32::execute(GPUDynInstPtr gpuDynInst)
849 {
850 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
851 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
852 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
853 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
854
855 src0.read();
856 src1.read();
857
858 sdst = ~(src0.rawData() ^ src1.rawData());
859 scc = sdst.rawData() ? 1 : 0;
860
861 sdst.write();
862 scc.write();
863 }
864
865 Inst_SOP2__S_XNOR_B64::Inst_SOP2__S_XNOR_B64(InFmt_SOP2 *iFmt)
866 : Inst_SOP2(iFmt, "s_xnor_b64")
867 {
868 setFlag(ALU);
869 } // Inst_SOP2__S_XNOR_B64
870
871 Inst_SOP2__S_XNOR_B64::~Inst_SOP2__S_XNOR_B64()
872 {
873 } // ~Inst_SOP2__S_XNOR_B64
874
875 // D.u64 = ~(S0.u64 ^ S1.u64);
876 // SCC = 1 if result is non-zero.
877 void
878 Inst_SOP2__S_XNOR_B64::execute(GPUDynInstPtr gpuDynInst)
879 {
880 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
881 ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
882 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
883 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
884
885 src0.read();
886 src1.read();
887
888 sdst = ~(src0.rawData() ^ src1.rawData());
889 scc = sdst.rawData() ? 1 : 0;
890
891 sdst.write();
892 scc.write();
893 }
894
895 Inst_SOP2__S_LSHL_B32::Inst_SOP2__S_LSHL_B32(InFmt_SOP2 *iFmt)
896 : Inst_SOP2(iFmt, "s_lshl_b32")
897 {
898 setFlag(ALU);
899 } // Inst_SOP2__S_LSHL_B32
900
901 Inst_SOP2__S_LSHL_B32::~Inst_SOP2__S_LSHL_B32()
902 {
903 } // ~Inst_SOP2__S_LSHL_B32
904
905 // D.u = S0.u << S1.u[4:0];
906 // SCC = 1 if result is non-zero.
907 void
908 Inst_SOP2__S_LSHL_B32::execute(GPUDynInstPtr gpuDynInst)
909 {
910 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
911 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
912 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
913 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
914
915 src0.read();
916 src1.read();
917
918 sdst = (src0.rawData() << bits(src1.rawData(), 4, 0));
919 scc = sdst.rawData() ? 1 : 0;
920
921 sdst.write();
922 scc.write();
923 }
924
925 Inst_SOP2__S_LSHL_B64::Inst_SOP2__S_LSHL_B64(InFmt_SOP2 *iFmt)
926 : Inst_SOP2(iFmt, "s_lshl_b64")
927 {
928 setFlag(ALU);
929 } // Inst_SOP2__S_LSHL_B64
930
931 Inst_SOP2__S_LSHL_B64::~Inst_SOP2__S_LSHL_B64()
932 {
933 } // ~Inst_SOP2__S_LSHL_B64
934
935 // D.u64 = S0.u64 << S1.u[5:0];
936 // SCC = 1 if result is non-zero.
937 void
938 Inst_SOP2__S_LSHL_B64::execute(GPUDynInstPtr gpuDynInst)
939 {
940 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
941 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
942 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
943 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
944
945 src0.read();
946 src1.read();
947
948 sdst = (src0.rawData() << bits(src1.rawData(), 5, 0));
949 scc = sdst.rawData() ? 1 : 0;
950
951 sdst.write();
952 scc.write();
953 }
954
955 Inst_SOP2__S_LSHR_B32::Inst_SOP2__S_LSHR_B32(InFmt_SOP2 *iFmt)
956 : Inst_SOP2(iFmt, "s_lshr_b32")
957 {
958 setFlag(ALU);
959 } // Inst_SOP2__S_LSHR_B32
960
961 Inst_SOP2__S_LSHR_B32::~Inst_SOP2__S_LSHR_B32()
962 {
963 } // ~Inst_SOP2__S_LSHR_B32
964
965 // D.u = S0.u >> S1.u[4:0];
966 // SCC = 1 if result is non-zero.
967 // The vacated bits are set to zero.
968 void
969 Inst_SOP2__S_LSHR_B32::execute(GPUDynInstPtr gpuDynInst)
970 {
971 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
972 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
973 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
974 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
975
976 src0.read();
977 src1.read();
978
979 sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0));
980 scc = sdst.rawData() ? 1 : 0;
981
982 sdst.write();
983 scc.write();
984 }
985
986 Inst_SOP2__S_LSHR_B64::Inst_SOP2__S_LSHR_B64(InFmt_SOP2 *iFmt)
987 : Inst_SOP2(iFmt, "s_lshr_b64")
988 {
989 setFlag(ALU);
990 } // Inst_SOP2__S_LSHR_B64
991
992 Inst_SOP2__S_LSHR_B64::~Inst_SOP2__S_LSHR_B64()
993 {
994 } // ~Inst_SOP2__S_LSHR_B64
995
996 // D.u64 = S0.u64 >> S1.u[5:0];
997 // SCC = 1 if result is non-zero.
998 // The vacated bits are set to zero.
999 void
1000 Inst_SOP2__S_LSHR_B64::execute(GPUDynInstPtr gpuDynInst)
1001 {
1002 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
1003 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1004 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1005 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1006
1007 src0.read();
1008 src1.read();
1009
1010 sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0));
1011 scc = sdst.rawData() ? 1 : 0;
1012
1013 sdst.write();
1014 scc.write();
1015 }
1016
1017 Inst_SOP2__S_ASHR_I32::Inst_SOP2__S_ASHR_I32(InFmt_SOP2 *iFmt)
1018 : Inst_SOP2(iFmt, "s_ashr_i32")
1019 {
1020 setFlag(ALU);
1021 } // Inst_SOP2__S_ASHR_I32
1022
1023 Inst_SOP2__S_ASHR_I32::~Inst_SOP2__S_ASHR_I32()
1024 {
1025 } // ~Inst_SOP2__S_ASHR_I32
1026
1027 // D.i = signext(S0.i) >> S1.u[4:0];
1028 // SCC = 1 if result is non-zero.
1029 // The vacated bits are set to the sign bit of the input value.
1030 void
1031 Inst_SOP2__S_ASHR_I32::execute(GPUDynInstPtr gpuDynInst)
1032 {
1033 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
1034 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1035 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1036 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1037
1038 src0.read();
1039 src1.read();
1040
1041 sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0));
1042 scc = sdst.rawData() ? 1 : 0;
1043
1044 sdst.write();
1045 scc.write();
1046 }
1047
1048 Inst_SOP2__S_ASHR_I64::Inst_SOP2__S_ASHR_I64(InFmt_SOP2 *iFmt)
1049 : Inst_SOP2(iFmt, "s_ashr_i64")
1050 {
1051 setFlag(ALU);
1052 } // Inst_SOP2__S_ASHR_I64
1053
1054 Inst_SOP2__S_ASHR_I64::~Inst_SOP2__S_ASHR_I64()
1055 {
1056 } // ~Inst_SOP2__S_ASHR_I64
1057
1058 // D.i64 = signext(S0.i64) >> S1.u[5:0];
1059 // SCC = 1 if result is non-zero.
1060 // The vacated bits are set to the sign bit of the input value.
1061 void
1062 Inst_SOP2__S_ASHR_I64::execute(GPUDynInstPtr gpuDynInst)
1063 {
1064 ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0);
1065 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1066 ScalarOperandI64 sdst(gpuDynInst, instData.SDST);
1067 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1068
1069 src0.read();
1070 src1.read();
1071
1072 sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0));
1073 scc = sdst.rawData() ? 1 : 0;
1074
1075 sdst.write();
1076 scc.write();
1077 }
1078
1079 Inst_SOP2__S_BFM_B32::Inst_SOP2__S_BFM_B32(InFmt_SOP2 *iFmt)
1080 : Inst_SOP2(iFmt, "s_bfm_b32")
1081 {
1082 setFlag(ALU);
1083 } // Inst_SOP2__S_BFM_B32
1084
1085 Inst_SOP2__S_BFM_B32::~Inst_SOP2__S_BFM_B32()
1086 {
1087 } // ~Inst_SOP2__S_BFM_B32
1088
1089 // D.u = ((1 << S0.u[4:0]) - 1) << S1.u[4:0] (bitfield mask).
1090 void
1091 Inst_SOP2__S_BFM_B32::execute(GPUDynInstPtr gpuDynInst)
1092 {
1093 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
1094 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1095 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
1096
1097 src0.read();
1098 src1.read();
1099
1100 sdst = ((1 << bits(src0.rawData(), 4, 0)) - 1)
1101 << bits(src1.rawData(), 4, 0);
1102
1103 sdst.write();
1104 }
1105
1106 Inst_SOP2__S_BFM_B64::Inst_SOP2__S_BFM_B64(InFmt_SOP2 *iFmt)
1107 : Inst_SOP2(iFmt, "s_bfm_b64")
1108 {
1109 setFlag(ALU);
1110 } // Inst_SOP2__S_BFM_B64
1111
1112 Inst_SOP2__S_BFM_B64::~Inst_SOP2__S_BFM_B64()
1113 {
1114 } // ~Inst_SOP2__S_BFM_B64
1115
1116 // D.u64 = ((1ULL << S0.u[5:0]) - 1) << S1.u[5:0] (bitfield mask).
1117 void
1118 Inst_SOP2__S_BFM_B64::execute(GPUDynInstPtr gpuDynInst)
1119 {
1120 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
1121 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1122 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1123
1124 src0.read();
1125 src1.read();
1126
1127 sdst = ((1ULL << bits(src0.rawData(), 5, 0)) - 1)
1128 << bits(src1.rawData(), 5, 0);
1129
1130 sdst.write();
1131 }
1132
1133 Inst_SOP2__S_MUL_I32::Inst_SOP2__S_MUL_I32(InFmt_SOP2 *iFmt)
1134 : Inst_SOP2(iFmt, "s_mul_i32")
1135 {
1136 setFlag(ALU);
1137 } // Inst_SOP2__S_MUL_I32
1138
1139 Inst_SOP2__S_MUL_I32::~Inst_SOP2__S_MUL_I32()
1140 {
1141 } // ~Inst_SOP2__S_MUL_I32
1142
1143 // D.i = S0.i * S1.i.
1144 void
1145 Inst_SOP2__S_MUL_I32::execute(GPUDynInstPtr gpuDynInst)
1146 {
1147 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
1148 ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
1149 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1150
1151 src0.read();
1152 src1.read();
1153
1154 sdst = src0.rawData() * src1.rawData();
1155
1156 sdst.write();
1157 }
1158
1159 Inst_SOP2__S_BFE_U32::Inst_SOP2__S_BFE_U32(InFmt_SOP2 *iFmt)
1160 : Inst_SOP2(iFmt, "s_bfe_u32")
1161 {
1162 setFlag(ALU);
1163 } // Inst_SOP2__S_BFE_U32
1164
1165 Inst_SOP2__S_BFE_U32::~Inst_SOP2__S_BFE_U32()
1166 {
1167 } // ~Inst_SOP2__S_BFE_U32
1168
1169 // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is
1170 // field width.
1171 // D.u = (S0.u >> S1.u[4:0]) & ((1 << S1.u[22:16]) - 1);
1172 // SCC = 1 if result is non-zero.
1173 void
1174 Inst_SOP2__S_BFE_U32::execute(GPUDynInstPtr gpuDynInst)
1175 {
1176 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
1177 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1178 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
1179 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1180
1181 src0.read();
1182 src1.read();
1183
1184 sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0))
1185 & ((1 << bits(src1.rawData(), 22, 16)) - 1);
1186 scc = sdst.rawData() ? 1 : 0;
1187
1188 sdst.write();
1189 scc.write();
1190 }
1191
1192 Inst_SOP2__S_BFE_I32::Inst_SOP2__S_BFE_I32(InFmt_SOP2 *iFmt)
1193 : Inst_SOP2(iFmt, "s_bfe_i32")
1194 {
1195 setFlag(ALU);
1196 } // Inst_SOP2__S_BFE_I32
1197
1198 Inst_SOP2__S_BFE_I32::~Inst_SOP2__S_BFE_I32()
1199 {
1200 } // ~Inst_SOP2__S_BFE_I32
1201
1202 // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is
1203 // field width.
1204 // D.i = (S0.i >> S1.u[4:0]) & ((1 << S1.u[22:16]) - 1);
1205 // Sign-extend the result;
1206 // SCC = 1 if result is non-zero.
1207 void
1208 Inst_SOP2__S_BFE_I32::execute(GPUDynInstPtr gpuDynInst)
1209 {
1210 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
1211 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1212 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1213 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1214
1215 src0.read();
1216 src1.read();
1217
1218 sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0))
1219 & ((1 << bits(src1.rawData(), 22, 16)) - 1);
1220 scc = sdst.rawData() ? 1 : 0;
1221
1222 sdst.write();
1223 scc.write();
1224 }
1225
1226 Inst_SOP2__S_BFE_U64::Inst_SOP2__S_BFE_U64(InFmt_SOP2 *iFmt)
1227 : Inst_SOP2(iFmt, "s_bfe_u64")
1228 {
1229 setFlag(ALU);
1230 } // Inst_SOP2__S_BFE_U64
1231
1232 Inst_SOP2__S_BFE_U64::~Inst_SOP2__S_BFE_U64()
1233 {
1234 } // ~Inst_SOP2__S_BFE_U64
1235
1236 // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is
1237 // field width.
1238 // D.u64 = (S0.u64 >> S1.u[5:0]) & ((1 << S1.u[22:16]) - 1);
1239 // SCC = 1 if result is non-zero.
1240 void
1241 Inst_SOP2__S_BFE_U64::execute(GPUDynInstPtr gpuDynInst)
1242 {
1243 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
1244 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1245 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1246 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1247
1248 src0.read();
1249 src1.read();
1250
1251 sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0))
1252 & ((1 << bits(src1.rawData(), 22, 16)) - 1);
1253 scc = sdst.rawData() ? 1 : 0;
1254
1255 sdst.write();
1256 scc.write();
1257 }
1258
1259 Inst_SOP2__S_BFE_I64::Inst_SOP2__S_BFE_I64(InFmt_SOP2 *iFmt)
1260 : Inst_SOP2(iFmt, "s_bfe_i64")
1261 {
1262 setFlag(ALU);
1263 } // Inst_SOP2__S_BFE_I64
1264
1265 Inst_SOP2__S_BFE_I64::~Inst_SOP2__S_BFE_I64()
1266 {
1267 } // ~Inst_SOP2__S_BFE_I64
1268
1269 // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is
1270 // field width.
1271 // D.i64 = (S0.i64 >> S1.u[5:0]) & ((1 << S1.u[22:16]) - 1);
1272 // Sign-extend result;
1273 // SCC = 1 if result is non-zero.
1274 void
1275 Inst_SOP2__S_BFE_I64::execute(GPUDynInstPtr gpuDynInst)
1276 {
1277 ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0);
1278 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1279 ScalarOperandI64 sdst(gpuDynInst, instData.SDST);
1280 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1281
1282 src0.read();
1283 src1.read();
1284
1285 sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0))
1286 & ((1 << bits(src1.rawData(), 22, 16)) - 1);
1287 scc = sdst.rawData() ? 1 : 0;
1288
1289 sdst.write();
1290 scc.write();
1291 }
1292
1293 Inst_SOP2__S_CBRANCH_G_FORK::Inst_SOP2__S_CBRANCH_G_FORK(InFmt_SOP2 *iFmt)
1294 : Inst_SOP2(iFmt, "s_cbranch_g_fork")
1295 {
1296 setFlag(Branch);
1297 } // Inst_SOP2__S_CBRANCH_G_FORK
1298
1299 Inst_SOP2__S_CBRANCH_G_FORK::~Inst_SOP2__S_CBRANCH_G_FORK()
1300 {
1301 } // ~Inst_SOP2__S_CBRANCH_G_FORK
1302
1303 // Conditional branch using branch-stack.
1304 // S0 = compare mask(vcc or any sgpr) and
1305 // S1 = 64-bit byte address of target instruction.
1306 void
1307 Inst_SOP2__S_CBRANCH_G_FORK::execute(GPUDynInstPtr gpuDynInst)
1308 {
1309 panicUnimplemented();
1310 }
1311
1312 Inst_SOP2__S_ABSDIFF_I32::Inst_SOP2__S_ABSDIFF_I32(InFmt_SOP2 *iFmt)
1313 : Inst_SOP2(iFmt, "s_absdiff_i32")
1314 {
1315 setFlag(ALU);
1316 } // Inst_SOP2__S_ABSDIFF_I32
1317
1318 Inst_SOP2__S_ABSDIFF_I32::~Inst_SOP2__S_ABSDIFF_I32()
1319 {
1320 } // ~Inst_SOP2__S_ABSDIFF_I32
1321
1322 // D.i = S0.i - S1.i;
1323 // if (D.i < 0) then D.i = -D.i;
1324 // SCC = 1 if result is non-zero.
1325 // Compute the absolute value of difference between two values.
1326 void
1327 Inst_SOP2__S_ABSDIFF_I32::execute(GPUDynInstPtr gpuDynInst)
1328 {
1329 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
1330 ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
1331 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1332 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1333
1334 sdst = std::abs(src0.rawData() - src1.rawData());
1335 scc = sdst.rawData() ? 1 : 0;
1336
1337 sdst.write();
1338 scc.write();
1339 }
1340
1341 Inst_SOP2__S_RFE_RESTORE_B64::Inst_SOP2__S_RFE_RESTORE_B64(
1342 InFmt_SOP2 *iFmt)
1343 : Inst_SOP2(iFmt, "s_rfe_restore_b64")
1344 {
1345 } // Inst_SOP2__S_RFE_RESTORE_B64
1346
1347 Inst_SOP2__S_RFE_RESTORE_B64::~Inst_SOP2__S_RFE_RESTORE_B64()
1348 {
1349 } // ~Inst_SOP2__S_RFE_RESTORE_B64
1350
1351 // Return from exception handler and continue.
1352 void
1353 Inst_SOP2__S_RFE_RESTORE_B64::execute(GPUDynInstPtr gpuDynInst)
1354 {
1355 panicUnimplemented();
1356 }
1357
1358 Inst_SOPK__S_MOVK_I32::Inst_SOPK__S_MOVK_I32(InFmt_SOPK *iFmt)
1359 : Inst_SOPK(iFmt, "s_movk_i32")
1360 {
1361 setFlag(ALU);
1362 } // Inst_SOPK__S_MOVK_I32
1363
1364 Inst_SOPK__S_MOVK_I32::~Inst_SOPK__S_MOVK_I32()
1365 {
1366 } // ~Inst_SOPK__S_MOVK_I32
1367
1368 // D.i = signext(SIMM16) (sign extension).
1369 void
1370 Inst_SOPK__S_MOVK_I32::execute(GPUDynInstPtr gpuDynInst)
1371 {
1372 ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
1373 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1374
1375 sdst = simm16;
1376
1377 sdst.write();
1378 }
1379
1380 Inst_SOPK__S_CMOVK_I32::Inst_SOPK__S_CMOVK_I32(InFmt_SOPK *iFmt)
1381 : Inst_SOPK(iFmt, "s_cmovk_i32")
1382 {
1383 setFlag(ALU);
1384 } // Inst_SOPK__S_CMOVK_I32
1385
1386 Inst_SOPK__S_CMOVK_I32::~Inst_SOPK__S_CMOVK_I32()
1387 {
1388 } // ~Inst_SOPK__S_CMOVK_I32
1389
1390 // if (SCC) then D.i = signext(SIMM16);
1391 // else NOP.
1392 // Conditional move with sign extension.
1393 void
1394 Inst_SOPK__S_CMOVK_I32::execute(GPUDynInstPtr gpuDynInst)
1395 {
1396 ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
1397 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1398 ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
1399
1400 scc.read();
1401
1402 if (scc.rawData()) {
1403 sdst = simm16;
1404 sdst.write();
1405 }
1406 }
1407
1408 Inst_SOPK__S_CMPK_EQ_I32::Inst_SOPK__S_CMPK_EQ_I32(InFmt_SOPK *iFmt)
1409 : Inst_SOPK(iFmt, "s_cmpk_eq_i32")
1410 {
1411 setFlag(ALU);
1412 } // Inst_SOPK__S_CMPK_EQ_I32
1413
1414 Inst_SOPK__S_CMPK_EQ_I32::~Inst_SOPK__S_CMPK_EQ_I32()
1415 {
1416 } // ~Inst_SOPK__S_CMPK_EQ_I32
1417
1418 // SCC = (S0.i == signext(SIMM16)).
1419 void
1420 Inst_SOPK__S_CMPK_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
1421 {
1422 ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
1423 ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
1424 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1425
1426 src.read();
1427
1428 scc = (src.rawData() == simm16) ? 1 : 0;
1429
1430 scc.write();
1431 }
1432
1433 Inst_SOPK__S_CMPK_LG_I32::Inst_SOPK__S_CMPK_LG_I32(InFmt_SOPK *iFmt)
1434 : Inst_SOPK(iFmt, "s_cmpk_lg_i32")
1435 {
1436 setFlag(ALU);
1437 } // Inst_SOPK__S_CMPK_LG_I32
1438
1439 Inst_SOPK__S_CMPK_LG_I32::~Inst_SOPK__S_CMPK_LG_I32()
1440 {
1441 } // ~Inst_SOPK__S_CMPK_LG_I32
1442
1443 // SCC = (S0.i != signext(SIMM16)).
1444 void
1445 Inst_SOPK__S_CMPK_LG_I32::execute(GPUDynInstPtr gpuDynInst)
1446 {
1447 ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
1448 ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
1449 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1450
1451 src.read();
1452
1453 scc = (src.rawData() != simm16) ? 1 : 0;
1454
1455 scc.write();
1456 }
1457
1458 Inst_SOPK__S_CMPK_GT_I32::Inst_SOPK__S_CMPK_GT_I32(InFmt_SOPK *iFmt)
1459 : Inst_SOPK(iFmt, "s_cmpk_gt_i32")
1460 {
1461 setFlag(ALU);
1462 } // Inst_SOPK__S_CMPK_GT_I32
1463
1464 Inst_SOPK__S_CMPK_GT_I32::~Inst_SOPK__S_CMPK_GT_I32()
1465 {
1466 } // ~Inst_SOPK__S_CMPK_GT_I32
1467
1468 // SCC = (S0.i > signext(SIMM16)).
1469 void
1470 Inst_SOPK__S_CMPK_GT_I32::execute(GPUDynInstPtr gpuDynInst)
1471 {
1472 ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
1473 ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
1474 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1475
1476 src.read();
1477
1478 scc = (src.rawData() > simm16) ? 1 : 0;
1479
1480 scc.write();
1481 }
1482
1483 Inst_SOPK__S_CMPK_GE_I32::Inst_SOPK__S_CMPK_GE_I32(InFmt_SOPK *iFmt)
1484 : Inst_SOPK(iFmt, "s_cmpk_ge_i32")
1485 {
1486 setFlag(ALU);
1487 } // Inst_SOPK__S_CMPK_GE_I32
1488
1489 Inst_SOPK__S_CMPK_GE_I32::~Inst_SOPK__S_CMPK_GE_I32()
1490 {
1491 } // ~Inst_SOPK__S_CMPK_GE_I32
1492
1493 // SCC = (S0.i >= signext(SIMM16)).
1494 void
1495 Inst_SOPK__S_CMPK_GE_I32::execute(GPUDynInstPtr gpuDynInst)
1496 {
1497 ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
1498 ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
1499 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1500
1501 src.read();
1502
1503 scc = (src.rawData() >= simm16) ? 1 : 0;
1504
1505 scc.write();
1506 }
1507
1508 Inst_SOPK__S_CMPK_LT_I32::Inst_SOPK__S_CMPK_LT_I32(InFmt_SOPK *iFmt)
1509 : Inst_SOPK(iFmt, "s_cmpk_lt_i32")
1510 {
1511 setFlag(ALU);
1512 } // Inst_SOPK__S_CMPK_LT_I32
1513
1514 Inst_SOPK__S_CMPK_LT_I32::~Inst_SOPK__S_CMPK_LT_I32()
1515 {
1516 } // ~Inst_SOPK__S_CMPK_LT_I32
1517
1518 // SCC = (S0.i < signext(SIMM16)).
1519 void
1520 Inst_SOPK__S_CMPK_LT_I32::execute(GPUDynInstPtr gpuDynInst)
1521 {
1522 ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
1523 ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
1524 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1525
1526 src.read();
1527
1528 scc = (src.rawData() < simm16) ? 1 : 0;
1529
1530 scc.write();
1531 }
1532
1533 Inst_SOPK__S_CMPK_LE_I32::Inst_SOPK__S_CMPK_LE_I32(InFmt_SOPK *iFmt)
1534 : Inst_SOPK(iFmt, "s_cmpk_le_i32")
1535 {
1536 setFlag(ALU);
1537 } // Inst_SOPK__S_CMPK_LE_I32
1538
1539 Inst_SOPK__S_CMPK_LE_I32::~Inst_SOPK__S_CMPK_LE_I32()
1540 {
1541 } // ~Inst_SOPK__S_CMPK_LE_I32
1542
1543 // SCC = (S0.i <= signext(SIMM16)).
1544 void
1545 Inst_SOPK__S_CMPK_LE_I32::execute(GPUDynInstPtr gpuDynInst)
1546 {
1547 ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
1548 ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
1549 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1550
1551 src.read();
1552
1553 scc = (src.rawData() <= simm16) ? 1 : 0;
1554
1555 scc.write();
1556 }
1557
1558 Inst_SOPK__S_CMPK_EQ_U32::Inst_SOPK__S_CMPK_EQ_U32(InFmt_SOPK *iFmt)
1559 : Inst_SOPK(iFmt, "s_cmpk_eq_u32")
1560 {
1561 setFlag(ALU);
1562 } // Inst_SOPK__S_CMPK_EQ_U32
1563
1564 Inst_SOPK__S_CMPK_EQ_U32::~Inst_SOPK__S_CMPK_EQ_U32()
1565 {
1566 } // ~Inst_SOPK__S_CMPK_EQ_U32
1567
1568 // SCC = (S0.u == SIMM16).
1569 void
1570 Inst_SOPK__S_CMPK_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
1571 {
1572 ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
1573 ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
1574 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1575
1576 src.read();
1577
1578 scc = (src.rawData() == simm16) ? 1 : 0;
1579
1580 scc.write();
1581 }
1582
1583 Inst_SOPK__S_CMPK_LG_U32::Inst_SOPK__S_CMPK_LG_U32(InFmt_SOPK *iFmt)
1584 : Inst_SOPK(iFmt, "s_cmpk_lg_u32")
1585 {
1586 setFlag(ALU);
1587 } // Inst_SOPK__S_CMPK_LG_U32
1588
1589 Inst_SOPK__S_CMPK_LG_U32::~Inst_SOPK__S_CMPK_LG_U32()
1590 {
1591 } // ~Inst_SOPK__S_CMPK_LG_U32
1592
1593 // SCC = (S0.u != SIMM16).
1594 void
1595 Inst_SOPK__S_CMPK_LG_U32::execute(GPUDynInstPtr gpuDynInst)
1596 {
1597 ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
1598 ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
1599 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1600
1601 src.read();
1602
1603 scc = (src.rawData() != simm16) ? 1 : 0;
1604
1605 scc.write();
1606 }
1607
1608 Inst_SOPK__S_CMPK_GT_U32::Inst_SOPK__S_CMPK_GT_U32(InFmt_SOPK *iFmt)
1609 : Inst_SOPK(iFmt, "s_cmpk_gt_u32")
1610 {
1611 setFlag(ALU);
1612 } // Inst_SOPK__S_CMPK_GT_U32
1613
1614 Inst_SOPK__S_CMPK_GT_U32::~Inst_SOPK__S_CMPK_GT_U32()
1615 {
1616 } // ~Inst_SOPK__S_CMPK_GT_U32
1617
1618 // SCC = (S0.u > SIMM16).
1619 void
1620 Inst_SOPK__S_CMPK_GT_U32::execute(GPUDynInstPtr gpuDynInst)
1621 {
1622 ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
1623 ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
1624 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1625
1626 src.read();
1627
1628 scc = (src.rawData() > simm16) ? 1 : 0;
1629
1630 scc.write();
1631 }
1632
1633 Inst_SOPK__S_CMPK_GE_U32::Inst_SOPK__S_CMPK_GE_U32(InFmt_SOPK *iFmt)
1634 : Inst_SOPK(iFmt, "s_cmpk_ge_u32")
1635 {
1636 setFlag(ALU);
1637 } // Inst_SOPK__S_CMPK_GE_U32
1638
1639 Inst_SOPK__S_CMPK_GE_U32::~Inst_SOPK__S_CMPK_GE_U32()
1640 {
1641 } // ~Inst_SOPK__S_CMPK_GE_U32
1642
1643 // SCC = (S0.u >= SIMM16).
1644 void
1645 Inst_SOPK__S_CMPK_GE_U32::execute(GPUDynInstPtr gpuDynInst)
1646 {
1647 ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
1648 ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
1649 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1650
1651 src.read();
1652
1653 scc = (src.rawData() >= simm16) ? 1 : 0;
1654
1655 scc.write();
1656 }
1657
1658 Inst_SOPK__S_CMPK_LT_U32::Inst_SOPK__S_CMPK_LT_U32(InFmt_SOPK *iFmt)
1659 : Inst_SOPK(iFmt, "s_cmpk_lt_u32")
1660 {
1661 setFlag(ALU);
1662 } // Inst_SOPK__S_CMPK_LT_U32
1663
1664 Inst_SOPK__S_CMPK_LT_U32::~Inst_SOPK__S_CMPK_LT_U32()
1665 {
1666 } // ~Inst_SOPK__S_CMPK_LT_U32
1667
1668 // SCC = (S0.u < SIMM16).
1669 void
1670 Inst_SOPK__S_CMPK_LT_U32::execute(GPUDynInstPtr gpuDynInst)
1671 {
1672 ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
1673 ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
1674 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1675
1676 src.read();
1677
1678 scc = (src.rawData() < simm16) ? 1 : 0;
1679
1680 scc.write();
1681 }
1682
1683 Inst_SOPK__S_CMPK_LE_U32::Inst_SOPK__S_CMPK_LE_U32(InFmt_SOPK *iFmt)
1684 : Inst_SOPK(iFmt, "s_cmpk_le_u32")
1685 {
1686 setFlag(ALU);
1687 } // Inst_SOPK__S_CMPK_LE_U32
1688
1689 Inst_SOPK__S_CMPK_LE_U32::~Inst_SOPK__S_CMPK_LE_U32()
1690 {
1691 } // ~Inst_SOPK__S_CMPK_LE_U32
1692
1693 // SCC = (S0.u <= SIMM16).
1694 void
1695 Inst_SOPK__S_CMPK_LE_U32::execute(GPUDynInstPtr gpuDynInst)
1696 {
1697 ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
1698 ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
1699 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1700
1701 src.read();
1702
1703 scc = (src.rawData() <= simm16) ? 1 : 0;
1704
1705 scc.write();
1706 }
1707
1708 Inst_SOPK__S_ADDK_I32::Inst_SOPK__S_ADDK_I32(InFmt_SOPK *iFmt)
1709 : Inst_SOPK(iFmt, "s_addk_i32")
1710 {
1711 setFlag(ALU);
1712 } // Inst_SOPK__S_ADDK_I32
1713
1714 Inst_SOPK__S_ADDK_I32::~Inst_SOPK__S_ADDK_I32()
1715 {
1716 } // ~Inst_SOPK__S_ADDK_I32
1717
1718 // D.i = D.i + signext(SIMM16);
1719 // SCC = overflow.
1720 void
1721 Inst_SOPK__S_ADDK_I32::execute(GPUDynInstPtr gpuDynInst)
1722 {
1723 ScalarRegI16 simm16 = instData.SIMM16;
1724 ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
1725 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1726 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1727
1728 src.read();
1729
1730 sdst = src.rawData() + (ScalarRegI32)simm16;
1731 scc = (bits(src.rawData(), 31) == bits(simm16, 15)
1732 && bits(src.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0;
1733
1734 sdst.write();
1735 scc.write();
1736 }
1737
1738 Inst_SOPK__S_MULK_I32::Inst_SOPK__S_MULK_I32(InFmt_SOPK *iFmt)
1739 : Inst_SOPK(iFmt, "s_mulk_i32")
1740 {
1741 setFlag(ALU);
1742 } // Inst_SOPK__S_MULK_I32
1743
1744 Inst_SOPK__S_MULK_I32::~Inst_SOPK__S_MULK_I32()
1745 {
1746 } // ~Inst_SOPK__S_MULK_I32
1747
1748 // D.i = D.i * signext(SIMM16).
1749 void
1750 Inst_SOPK__S_MULK_I32::execute(GPUDynInstPtr gpuDynInst)
1751 {
1752 ScalarRegI16 simm16 = instData.SIMM16;
1753 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1754
1755 sdst.read();
1756
1757 sdst = sdst.rawData() * (ScalarRegI32)simm16;
1758
1759 sdst.write();
1760 }
1761
1762 Inst_SOPK__S_CBRANCH_I_FORK::Inst_SOPK__S_CBRANCH_I_FORK(InFmt_SOPK *iFmt)
1763 : Inst_SOPK(iFmt, "s_cbranch_i_fork")
1764 {
1765 setFlag(Branch);
1766 } // Inst_SOPK__S_CBRANCH_I_FORK
1767
1768 Inst_SOPK__S_CBRANCH_I_FORK::~Inst_SOPK__S_CBRANCH_I_FORK()
1769 {
1770 } // ~Inst_SOPK__S_CBRANCH_I_FORK
1771
1772 // Conditional branch using branch-stack.
1773 // S0 = compare mask(vcc or any sgpr), and
1774 // SIMM16 = signed DWORD branch offset relative to next instruction.
1775 void
1776 Inst_SOPK__S_CBRANCH_I_FORK::execute(GPUDynInstPtr gpuDynInst)
1777 {
1778 panicUnimplemented();
1779 }
1780
1781 Inst_SOPK__S_GETREG_B32::Inst_SOPK__S_GETREG_B32(InFmt_SOPK *iFmt)
1782 : Inst_SOPK(iFmt, "s_getreg_b32")
1783 {
1784 } // Inst_SOPK__S_GETREG_B32
1785
1786 Inst_SOPK__S_GETREG_B32::~Inst_SOPK__S_GETREG_B32()
1787 {
1788 } // ~Inst_SOPK__S_GETREG_B32
1789
1790 // D.u = hardware-reg. Read some or all of a hardware register into the
1791 // LSBs of D.
1792 // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
1793 // is 1..32.
1794 void
1795 Inst_SOPK__S_GETREG_B32::execute(GPUDynInstPtr gpuDynInst)
1796 {
1797 panicUnimplemented();
1798 }
1799
1800 Inst_SOPK__S_SETREG_B32::Inst_SOPK__S_SETREG_B32(InFmt_SOPK *iFmt)
1801 : Inst_SOPK(iFmt, "s_setreg_b32")
1802 {
1803 setFlag(ALU);
1804 } // Inst_SOPK__S_SETREG_B32
1805
1806 Inst_SOPK__S_SETREG_B32::~Inst_SOPK__S_SETREG_B32()
1807 {
1808 } // ~Inst_SOPK__S_SETREG_B32
1809
1810 // hardware-reg = S0.u. Write some or all of the LSBs of D into a hardware
1811 // register.
1812 // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
1813 // is 1..32.
1814 void
1815 Inst_SOPK__S_SETREG_B32::execute(GPUDynInstPtr gpuDynInst)
1816 {
1817 ScalarRegI16 simm16 = instData.SIMM16;
1818 ScalarRegU32 hwregId = simm16 & 0x3f;
1819 ScalarRegU32 offset = (simm16 >> 6) & 31;
1820 ScalarRegU32 size = ((simm16 >> 11) & 31) + 1;
1821
1822 ScalarOperandU32 hwreg(gpuDynInst, hwregId);
1823 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
1824 hwreg.read();
1825 sdst.read();
1826
1827 // Store value from SDST to part of the hardware register.
1828 ScalarRegU32 mask = (((1U << size) - 1U) << offset);
1829 hwreg = ((hwreg.rawData() & ~mask)
1830 | ((sdst.rawData() << offset) & mask));
1831 hwreg.write();
1832
1833 // set MODE register to control the behavior of single precision
1834 // floating-point numbers: denormal mode or round mode
1835 if (hwregId==1 && size==2
1836 && (offset==4 || offset==0)) {
1837 warn_once("Be cautious that s_setreg_b32 has no real effect "
1838 "on FP modes: %s\n", gpuDynInst->disassemble());
1839 return;
1840 }
1841
1842 // panic if not changing MODE of floating-point numbers
1843 panicUnimplemented();
1844 }
1845
1846 Inst_SOPK__S_SETREG_IMM32_B32::Inst_SOPK__S_SETREG_IMM32_B32(
1847 InFmt_SOPK *iFmt)
1848 : Inst_SOPK(iFmt, "s_setreg_imm32_b32")
1849 {
1850 } // Inst_SOPK__S_SETREG_IMM32_B32
1851
1852 Inst_SOPK__S_SETREG_IMM32_B32::~Inst_SOPK__S_SETREG_IMM32_B32()
1853 {
1854 } // ~Inst_SOPK__S_SETREG_IMM32_B32
1855
1856 // Write some or all of the LSBs of IMM32 into a hardware register; this
1857 // instruction requires a 32-bit literal constant.
1858 // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
1859 // is 1..32.
1860 void
1861 Inst_SOPK__S_SETREG_IMM32_B32::execute(GPUDynInstPtr gpuDynInst)
1862 {
1863 panicUnimplemented();
1864 }
1865
1866 Inst_SOP1__S_MOV_B32::Inst_SOP1__S_MOV_B32(InFmt_SOP1 *iFmt)
1867 : Inst_SOP1(iFmt, "s_mov_b32")
1868 {
1869 setFlag(ALU);
1870 } // Inst_SOP1__S_MOV_B32
1871
1872 Inst_SOP1__S_MOV_B32::~Inst_SOP1__S_MOV_B32()
1873 {
1874 } // ~Inst_SOP1__S_MOV_B32
1875
1876 // D.u = S0.u.
1877 void
1878 Inst_SOP1__S_MOV_B32::execute(GPUDynInstPtr gpuDynInst)
1879 {
1880 ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
1881 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
1882
1883 src.read();
1884
1885 sdst = src.rawData();
1886
1887 sdst.write();
1888 }
1889
1890 Inst_SOP1__S_MOV_B64::Inst_SOP1__S_MOV_B64(InFmt_SOP1 *iFmt)
1891 : Inst_SOP1(iFmt, "s_mov_b64")
1892 {
1893 setFlag(ALU);
1894 } // Inst_SOP1__S_MOV_B64
1895
1896 Inst_SOP1__S_MOV_B64::~Inst_SOP1__S_MOV_B64()
1897 {
1898 } // ~Inst_SOP1__S_MOV_B64
1899
1900 // D.u64 = S0.u64.
1901 void
1902 Inst_SOP1__S_MOV_B64::execute(GPUDynInstPtr gpuDynInst)
1903 {
1904 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
1905 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1906
1907 src.read();
1908
1909 sdst = src.rawData();
1910
1911 sdst.write();
1912 }
1913
1914 Inst_SOP1__S_CMOV_B32::Inst_SOP1__S_CMOV_B32(InFmt_SOP1 *iFmt)
1915 : Inst_SOP1(iFmt, "s_cmov_b32")
1916 {
1917 setFlag(ALU);
1918 } // Inst_SOP1__S_CMOV_B32
1919
1920 Inst_SOP1__S_CMOV_B32::~Inst_SOP1__S_CMOV_B32()
1921 {
1922 } // ~Inst_SOP1__S_CMOV_B32
1923
1924 // if (SCC) then D.u = S0.u;
1925 // else NOP.
1926 // Conditional move.
1927 void
1928 Inst_SOP1__S_CMOV_B32::execute(GPUDynInstPtr gpuDynInst)
1929 {
1930 ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
1931 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
1932 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1933
1934 src.read();
1935 scc.read();
1936
1937 if (scc.rawData()) {
1938 sdst = src.rawData();
1939 sdst.write();
1940 }
1941 }
1942
1943 Inst_SOP1__S_CMOV_B64::Inst_SOP1__S_CMOV_B64(InFmt_SOP1 *iFmt)
1944 : Inst_SOP1(iFmt, "s_cmov_b64")
1945 {
1946 setFlag(ALU);
1947 } // Inst_SOP1__S_CMOV_B64
1948
1949 Inst_SOP1__S_CMOV_B64::~Inst_SOP1__S_CMOV_B64()
1950 {
1951 } // ~Inst_SOP1__S_CMOV_B64
1952
1953 // if (SCC) then D.u64 = S0.u64;
1954 // else NOP.
1955 // Conditional move.
1956 void
1957 Inst_SOP1__S_CMOV_B64::execute(GPUDynInstPtr gpuDynInst)
1958 {
1959 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
1960 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1961 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1962
1963 src.read();
1964 scc.read();
1965
1966 if (scc.rawData()) {
1967 sdst = src.rawData();
1968 sdst.write();
1969 }
1970 }
1971
1972 Inst_SOP1__S_NOT_B32::Inst_SOP1__S_NOT_B32(InFmt_SOP1 *iFmt)
1973 : Inst_SOP1(iFmt, "s_not_b32")
1974 {
1975 setFlag(ALU);
1976 } // Inst_SOP1__S_NOT_B32
1977
1978 Inst_SOP1__S_NOT_B32::~Inst_SOP1__S_NOT_B32()
1979 {
1980 } // ~Inst_SOP1__S_NOT_B32
1981
1982 // D.u = ~S0.u;
1983 // SCC = 1 if result is non-zero.
1984 // Bitwise negation.
1985 void
1986 Inst_SOP1__S_NOT_B32::execute(GPUDynInstPtr gpuDynInst)
1987 {
1988 ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
1989 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
1990 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1991
1992 src.read();
1993
1994 sdst = ~src.rawData();
1995
1996 scc = sdst.rawData() ? 1 : 0;
1997
1998 sdst.write();
1999 scc.write();
2000 }
2001
2002 Inst_SOP1__S_NOT_B64::Inst_SOP1__S_NOT_B64(InFmt_SOP1 *iFmt)
2003 : Inst_SOP1(iFmt, "s_not_b64")
2004 {
2005 setFlag(ALU);
2006 } // Inst_SOP1__S_NOT_B64
2007
2008 Inst_SOP1__S_NOT_B64::~Inst_SOP1__S_NOT_B64()
2009 {
2010 } // ~Inst_SOP1__S_NOT_B64
2011
2012 // D.u64 = ~S0.u64;
2013 // SCC = 1 if result is non-zero.
2014 // Bitwise negation.
2015 void
2016 Inst_SOP1__S_NOT_B64::execute(GPUDynInstPtr gpuDynInst)
2017 {
2018 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2019 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2020 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2021
2022 src.read();
2023
2024 sdst = ~src.rawData();
2025 scc = sdst.rawData() ? 1 : 0;
2026
2027 sdst.write();
2028 scc.write();
2029 }
2030
2031 Inst_SOP1__S_WQM_B32::Inst_SOP1__S_WQM_B32(InFmt_SOP1 *iFmt)
2032 : Inst_SOP1(iFmt, "s_wqm_b32")
2033 {
2034 setFlag(ALU);
2035 } // Inst_SOP1__S_WQM_B32
2036
2037 Inst_SOP1__S_WQM_B32::~Inst_SOP1__S_WQM_B32()
2038 {
2039 } // ~Inst_SOP1__S_WQM_B32
2040
2041 // Computes whole quad mode for an active/valid mask.
2042 // SCC = 1 if result is non-zero.
2043 void
2044 Inst_SOP1__S_WQM_B32::execute(GPUDynInstPtr gpuDynInst)
2045 {
2046 ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2047 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
2048 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2049
2050 src.read();
2051
2052 sdst = wholeQuadMode(src.rawData());
2053 scc = sdst.rawData() ? 1 : 0;
2054
2055 sdst.write();
2056 scc.write();
2057 }
2058
2059 Inst_SOP1__S_WQM_B64::Inst_SOP1__S_WQM_B64(InFmt_SOP1 *iFmt)
2060 : Inst_SOP1(iFmt, "s_wqm_b64")
2061 {
2062 setFlag(ALU);
2063 } // Inst_SOP1__S_WQM_B64
2064
2065 Inst_SOP1__S_WQM_B64::~Inst_SOP1__S_WQM_B64()
2066 {
2067 } // ~Inst_SOP1__S_WQM_B64
2068
2069 // Computes whole quad mode for an active/valid mask.
2070 // SCC = 1 if result is non-zero.
2071 void
2072 Inst_SOP1__S_WQM_B64::execute(GPUDynInstPtr gpuDynInst)
2073 {
2074 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2075 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2076 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2077
2078 src.read();
2079
2080 sdst = wholeQuadMode(src.rawData());
2081 scc = sdst.rawData() ? 1 : 0;
2082
2083 sdst.write();
2084 scc.write();
2085 }
2086
2087 Inst_SOP1__S_BREV_B32::Inst_SOP1__S_BREV_B32(InFmt_SOP1 *iFmt)
2088 : Inst_SOP1(iFmt, "s_brev_b32")
2089 {
2090 setFlag(ALU);
2091 } // Inst_SOP1__S_BREV_B32
2092
2093 Inst_SOP1__S_BREV_B32::~Inst_SOP1__S_BREV_B32()
2094 {
2095 } // ~Inst_SOP1__S_BREV_B32
2096
2097 // D.u[31:0] = S0.u[0:31] (reverse bits).
2098 void
2099 Inst_SOP1__S_BREV_B32::execute(GPUDynInstPtr gpuDynInst)
2100 {
2101 ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2102 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
2103
2104 src.read();
2105
2106 sdst = reverseBits(src.rawData());
2107
2108 sdst.write();
2109 }
2110
2111 Inst_SOP1__S_BREV_B64::Inst_SOP1__S_BREV_B64(InFmt_SOP1 *iFmt)
2112 : Inst_SOP1(iFmt, "s_brev_b64")
2113 {
2114 setFlag(ALU);
2115 } // Inst_SOP1__S_BREV_B64
2116
2117 Inst_SOP1__S_BREV_B64::~Inst_SOP1__S_BREV_B64()
2118 {
2119 } // ~Inst_SOP1__S_BREV_B64
2120
2121 // D.u64[63:0] = S0.u64[0:63] (reverse bits).
2122 void
2123 Inst_SOP1__S_BREV_B64::execute(GPUDynInstPtr gpuDynInst)
2124 {
2125 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2126 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2127
2128 src.read();
2129
2130 sdst = reverseBits(src.rawData());
2131
2132 sdst.write();
2133 }
2134
2135 Inst_SOP1__S_BCNT0_I32_B32::Inst_SOP1__S_BCNT0_I32_B32(InFmt_SOP1 *iFmt)
2136 : Inst_SOP1(iFmt, "s_bcnt0_i32_b32")
2137 {
2138 setFlag(ALU);
2139 } // Inst_SOP1__S_BCNT0_I32_B32
2140
2141 Inst_SOP1__S_BCNT0_I32_B32::~Inst_SOP1__S_BCNT0_I32_B32()
2142 {
2143 } // ~Inst_SOP1__S_BCNT0_I32_B32
2144
2145 // D.i = CountZeroBits(S0.u);
2146 // SCC = 1 if result is non-zero.
2147 void
2148 Inst_SOP1__S_BCNT0_I32_B32::execute(GPUDynInstPtr gpuDynInst)
2149 {
2150 ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2151 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2152 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2153
2154 src.read();
2155
2156 sdst = countZeroBits(src.rawData());
2157 scc = sdst.rawData() ? 1 : 0;
2158
2159 sdst.write();
2160 scc.write();
2161 }
2162
2163 Inst_SOP1__S_BCNT0_I32_B64::Inst_SOP1__S_BCNT0_I32_B64(InFmt_SOP1 *iFmt)
2164 : Inst_SOP1(iFmt, "s_bcnt0_i32_b64")
2165 {
2166 setFlag(ALU);
2167 } // Inst_SOP1__S_BCNT0_I32_B64
2168
2169 Inst_SOP1__S_BCNT0_I32_B64::~Inst_SOP1__S_BCNT0_I32_B64()
2170 {
2171 } // ~Inst_SOP1__S_BCNT0_I32_B64
2172
2173 // D.i = CountZeroBits(S0.u64);
2174 // SCC = 1 if result is non-zero.
2175 void
2176 Inst_SOP1__S_BCNT0_I32_B64::execute(GPUDynInstPtr gpuDynInst)
2177 {
2178 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2179 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2180 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2181
2182 src.read();
2183
2184 sdst = countZeroBits(src.rawData());
2185 scc = sdst.rawData() ? 1 : 0;
2186
2187 sdst.write();
2188 scc.write();
2189 }
2190
2191 Inst_SOP1__S_BCNT1_I32_B32::Inst_SOP1__S_BCNT1_I32_B32(InFmt_SOP1 *iFmt)
2192 : Inst_SOP1(iFmt, "s_bcnt1_i32_b32")
2193 {
2194 setFlag(ALU);
2195 } // Inst_SOP1__S_BCNT1_I32_B32
2196
2197 Inst_SOP1__S_BCNT1_I32_B32::~Inst_SOP1__S_BCNT1_I32_B32()
2198 {
2199 } // ~Inst_SOP1__S_BCNT1_I32_B32
2200
2201 // D.i = CountOneBits(S0.u);
2202 // SCC = 1 if result is non-zero.
2203 void
2204 Inst_SOP1__S_BCNT1_I32_B32::execute(GPUDynInstPtr gpuDynInst)
2205 {
2206 ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2207 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2208 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2209
2210 src.read();
2211
2212 sdst = popCount(src.rawData());
2213 scc = sdst.rawData() ? 1 : 0;
2214
2215 sdst.write();
2216 scc.write();
2217 }
2218
2219 Inst_SOP1__S_BCNT1_I32_B64::Inst_SOP1__S_BCNT1_I32_B64(InFmt_SOP1 *iFmt)
2220 : Inst_SOP1(iFmt, "s_bcnt1_i32_b64")
2221 {
2222 setFlag(ALU);
2223 } // Inst_SOP1__S_BCNT1_I32_B64
2224
2225 Inst_SOP1__S_BCNT1_I32_B64::~Inst_SOP1__S_BCNT1_I32_B64()
2226 {
2227 } // ~Inst_SOP1__S_BCNT1_I32_B64
2228
2229 // D.i = CountOneBits(S0.u64);
2230 // SCC = 1 if result is non-zero.
2231 void
2232 Inst_SOP1__S_BCNT1_I32_B64::execute(GPUDynInstPtr gpuDynInst)
2233 {
2234 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2235 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2236 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2237
2238 src.read();
2239
2240 sdst = popCount(src.rawData());
2241 scc = sdst.rawData() ? 1 : 0;
2242
2243 sdst.write();
2244 scc.write();
2245 }
2246
2247 Inst_SOP1__S_FF0_I32_B32::Inst_SOP1__S_FF0_I32_B32(InFmt_SOP1 *iFmt)
2248 : Inst_SOP1(iFmt, "s_ff0_i32_b32")
2249 {
2250 setFlag(ALU);
2251 } // Inst_SOP1__S_FF0_I32_B32
2252
2253 Inst_SOP1__S_FF0_I32_B32::~Inst_SOP1__S_FF0_I32_B32()
2254 {
2255 } // ~Inst_SOP1__S_FF0_I32_B32
2256
2257 // D.i = FindFirstZero(S0.u);
2258 // If no zeros are found, return -1.
2259 // Returns the bit position of the first zero from the LSB.
2260 void
2261 Inst_SOP1__S_FF0_I32_B32::execute(GPUDynInstPtr gpuDynInst)
2262 {
2263 ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2264 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2265
2266 src.read();
2267
2268 sdst = findFirstZero(src.rawData());
2269
2270 sdst.write();
2271 }
2272
2273 Inst_SOP1__S_FF0_I32_B64::Inst_SOP1__S_FF0_I32_B64(InFmt_SOP1 *iFmt)
2274 : Inst_SOP1(iFmt, "s_ff0_i32_b64")
2275 {
2276 setFlag(ALU);
2277 } // Inst_SOP1__S_FF0_I32_B64
2278
2279 Inst_SOP1__S_FF0_I32_B64::~Inst_SOP1__S_FF0_I32_B64()
2280 {
2281 } // ~Inst_SOP1__S_FF0_I32_B64
2282
2283 // D.i = FindFirstZero(S0.u64);
2284 // If no zeros are found, return -1.
2285 // Returns the bit position of the first zero from the LSB.
2286 void
2287 Inst_SOP1__S_FF0_I32_B64::execute(GPUDynInstPtr gpuDynInst)
2288 {
2289 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2290 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2291
2292 src.read();
2293
2294 sdst = findFirstZero(src.rawData());
2295
2296 sdst.write();
2297 }
2298
2299 Inst_SOP1__S_FF1_I32_B32::Inst_SOP1__S_FF1_I32_B32(InFmt_SOP1 *iFmt)
2300 : Inst_SOP1(iFmt, "s_ff1_i32_b32")
2301 {
2302 setFlag(ALU);
2303 } // Inst_SOP1__S_FF1_I32_B32
2304
2305 Inst_SOP1__S_FF1_I32_B32::~Inst_SOP1__S_FF1_I32_B32()
2306 {
2307 } // ~Inst_SOP1__S_FF1_I32_B32
2308
2309 // D.i = FindFirstOne(S0.u);
2310 // If no ones are found, return -1.
2311 // Returns the bit position of the first one from the LSB.
2312 void
2313 Inst_SOP1__S_FF1_I32_B32::execute(GPUDynInstPtr gpuDynInst)
2314 {
2315 ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2316 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2317
2318 src.read();
2319
2320 sdst = findFirstOne(src.rawData());
2321
2322 sdst.write();
2323 }
2324
2325 Inst_SOP1__S_FF1_I32_B64::Inst_SOP1__S_FF1_I32_B64(InFmt_SOP1 *iFmt)
2326 : Inst_SOP1(iFmt, "s_ff1_i32_b64")
2327 {
2328 setFlag(ALU);
2329 } // Inst_SOP1__S_FF1_I32_B64
2330
2331 Inst_SOP1__S_FF1_I32_B64::~Inst_SOP1__S_FF1_I32_B64()
2332 {
2333 } // ~Inst_SOP1__S_FF1_I32_B64
2334
2335 // D.i = FindFirstOne(S0.u64);
2336 // If no ones are found, return -1.
2337 // Returns the bit position of the first one from the LSB.
2338 void
2339 Inst_SOP1__S_FF1_I32_B64::execute(GPUDynInstPtr gpuDynInst)
2340 {
2341 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2342 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2343
2344 src.read();
2345
2346 sdst = findFirstOne(src.rawData());
2347
2348 sdst.write();
2349 }
2350
2351 Inst_SOP1__S_FLBIT_I32_B32::Inst_SOP1__S_FLBIT_I32_B32(InFmt_SOP1 *iFmt)
2352 : Inst_SOP1(iFmt, "s_flbit_i32_b32")
2353 {
2354 setFlag(ALU);
2355 } // Inst_SOP1__S_FLBIT_I32_B32
2356
2357 Inst_SOP1__S_FLBIT_I32_B32::~Inst_SOP1__S_FLBIT_I32_B32()
2358 {
2359 } // ~Inst_SOP1__S_FLBIT_I32_B32
2360
2361 // D.i = FindFirstOne(S0.u);
2362 // If no ones are found, return -1.
2363 // Counts how many zeros before the first one starting from the MSB.
2364 void
2365 Inst_SOP1__S_FLBIT_I32_B32::execute(GPUDynInstPtr gpuDynInst)
2366 {
2367 ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2368 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2369
2370 src.read();
2371
2372 sdst = countZeroBitsMsb(src.rawData());
2373
2374 sdst.write();
2375 }
2376
2377 Inst_SOP1__S_FLBIT_I32_B64::Inst_SOP1__S_FLBIT_I32_B64(InFmt_SOP1 *iFmt)
2378 : Inst_SOP1(iFmt, "s_flbit_i32_b64")
2379 {
2380 setFlag(ALU);
2381 } // Inst_SOP1__S_FLBIT_I32_B64
2382
2383 Inst_SOP1__S_FLBIT_I32_B64::~Inst_SOP1__S_FLBIT_I32_B64()
2384 {
2385 } // ~Inst_SOP1__S_FLBIT_I32_B64
2386
2387 // D.i = FindFirstOne(S0.u64);
2388 // If no ones are found, return -1.
2389 // Counts how many zeros before the first one starting from the MSB.
2390 void
2391 Inst_SOP1__S_FLBIT_I32_B64::execute(GPUDynInstPtr gpuDynInst)
2392 {
2393 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2394 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2395
2396 src.read();
2397
2398 sdst = countZeroBitsMsb(src.rawData());
2399
2400 sdst.write();
2401 }
2402
2403 Inst_SOP1__S_FLBIT_I32::Inst_SOP1__S_FLBIT_I32(InFmt_SOP1 *iFmt)
2404 : Inst_SOP1(iFmt, "s_flbit_i32")
2405 {
2406 setFlag(ALU);
2407 } // Inst_SOP1__S_FLBIT_I32
2408
2409 Inst_SOP1__S_FLBIT_I32::~Inst_SOP1__S_FLBIT_I32()
2410 {
2411 } // ~Inst_SOP1__S_FLBIT_I32
2412
2413 // D.i = FirstOppositeSignBit(S0.i);
2414 // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1.
2415 // Counts how many bits in a row (from MSB to LSB) are the same as the
2416 // sign bit.
2417 void
2418 Inst_SOP1__S_FLBIT_I32::execute(GPUDynInstPtr gpuDynInst)
2419 {
2420 ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0);
2421 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2422
2423 src.read();
2424
2425 sdst = firstOppositeSignBit(src.rawData());
2426
2427 sdst.write();
2428 }
2429
2430 Inst_SOP1__S_FLBIT_I32_I64::Inst_SOP1__S_FLBIT_I32_I64(InFmt_SOP1 *iFmt)
2431 : Inst_SOP1(iFmt, "s_flbit_i32_i64")
2432 {
2433 setFlag(ALU);
2434 } // Inst_SOP1__S_FLBIT_I32_I64
2435
2436 Inst_SOP1__S_FLBIT_I32_I64::~Inst_SOP1__S_FLBIT_I32_I64()
2437 {
2438 } // ~Inst_SOP1__S_FLBIT_I32_I64
2439
2440 // D.i = FirstOppositeSignBit(S0.i64);
2441 // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1.
2442 // Counts how many bits in a row (from MSB to LSB) are the same as the
2443 // sign bit.
2444 void
2445 Inst_SOP1__S_FLBIT_I32_I64::execute(GPUDynInstPtr gpuDynInst)
2446 {
2447 ConstScalarOperandI64 src(gpuDynInst, instData.SSRC0);
2448 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2449
2450 src.read();
2451
2452 sdst = firstOppositeSignBit(src.rawData());
2453
2454 sdst.write();
2455 }
2456
2457 Inst_SOP1__S_SEXT_I32_I8::Inst_SOP1__S_SEXT_I32_I8(InFmt_SOP1 *iFmt)
2458 : Inst_SOP1(iFmt, "s_sext_i32_i8")
2459 {
2460 setFlag(ALU);
2461 } // Inst_SOP1__S_SEXT_I32_I8
2462
2463 Inst_SOP1__S_SEXT_I32_I8::~Inst_SOP1__S_SEXT_I32_I8()
2464 {
2465 } // ~Inst_SOP1__S_SEXT_I32_I8
2466
2467 // D.i = signext(S0.i[7:0]) (sign extension).
2468 void
2469 Inst_SOP1__S_SEXT_I32_I8::execute(GPUDynInstPtr gpuDynInst)
2470 {
2471 ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0);
2472 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2473
2474 src.read();
2475
2476 sdst = sext<std::numeric_limits<ScalarRegI8>::digits>(
2477 bits(src.rawData(), 7, 0));
2478
2479 sdst.write();
2480 }
2481
2482 Inst_SOP1__S_SEXT_I32_I16::Inst_SOP1__S_SEXT_I32_I16(InFmt_SOP1 *iFmt)
2483 : Inst_SOP1(iFmt, "s_sext_i32_i16")
2484 {
2485 setFlag(ALU);
2486 } // Inst_SOP1__S_SEXT_I32_I16
2487
2488 Inst_SOP1__S_SEXT_I32_I16::~Inst_SOP1__S_SEXT_I32_I16()
2489 {
2490 } // ~Inst_SOP1__S_SEXT_I32_I16
2491
2492 // D.i = signext(S0.i[15:0]) (sign extension).
2493 void
2494 Inst_SOP1__S_SEXT_I32_I16::execute(GPUDynInstPtr gpuDynInst)
2495 {
2496 ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0);
2497 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2498
2499 src.read();
2500
2501 sdst = sext<std::numeric_limits<ScalarRegI16>::digits>(
2502 bits(src.rawData(), 15, 0));
2503
2504 sdst.write();
2505 }
2506
2507 Inst_SOP1__S_BITSET0_B32::Inst_SOP1__S_BITSET0_B32(InFmt_SOP1 *iFmt)
2508 : Inst_SOP1(iFmt, "s_bitset0_b32")
2509 {
2510 setFlag(ALU);
2511 } // Inst_SOP1__S_BITSET0_B32
2512
2513 Inst_SOP1__S_BITSET0_B32::~Inst_SOP1__S_BITSET0_B32()
2514 {
2515 } // ~Inst_SOP1__S_BITSET0_B32
2516
2517 // D.u[S0.u[4:0]] = 0.
2518 void
2519 Inst_SOP1__S_BITSET0_B32::execute(GPUDynInstPtr gpuDynInst)
2520 {
2521 ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2522 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
2523
2524 src.read();
2525
2526 sdst.setBit(bits(src.rawData(), 4, 0), 0);
2527
2528 sdst.write();
2529 }
2530
2531 Inst_SOP1__S_BITSET0_B64::Inst_SOP1__S_BITSET0_B64(InFmt_SOP1 *iFmt)
2532 : Inst_SOP1(iFmt, "s_bitset0_b64")
2533 {
2534 setFlag(ALU);
2535 } // Inst_SOP1__S_BITSET0_B64
2536
2537 Inst_SOP1__S_BITSET0_B64::~Inst_SOP1__S_BITSET0_B64()
2538 {
2539 } // ~Inst_SOP1__S_BITSET0_B64
2540
2541 // D.u64[S0.u[5:0]] = 0.
2542 void
2543 Inst_SOP1__S_BITSET0_B64::execute(GPUDynInstPtr gpuDynInst)
2544 {
2545 ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2546 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2547
2548 src.read();
2549
2550 sdst.setBit(bits(src.rawData(), 5, 0), 0);
2551
2552 sdst.write();
2553 }
2554
2555 Inst_SOP1__S_BITSET1_B32::Inst_SOP1__S_BITSET1_B32(InFmt_SOP1 *iFmt)
2556 : Inst_SOP1(iFmt, "s_bitset1_b32")
2557 {
2558 setFlag(ALU);
2559 } // Inst_SOP1__S_BITSET1_B32
2560
2561 Inst_SOP1__S_BITSET1_B32::~Inst_SOP1__S_BITSET1_B32()
2562 {
2563 } // ~Inst_SOP1__S_BITSET1_B32
2564
2565 // D.u[S0.u[4:0]] = 1.
2566 void
2567 Inst_SOP1__S_BITSET1_B32::execute(GPUDynInstPtr gpuDynInst)
2568 {
2569 ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2570 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
2571
2572 src.read();
2573
2574 sdst.setBit(bits(src.rawData(), 4, 0), 1);
2575
2576 sdst.write();
2577 }
2578
2579 Inst_SOP1__S_BITSET1_B64::Inst_SOP1__S_BITSET1_B64(InFmt_SOP1 *iFmt)
2580 : Inst_SOP1(iFmt, "s_bitset1_b64")
2581 {
2582 setFlag(ALU);
2583 } // Inst_SOP1__S_BITSET1_B64
2584
2585 Inst_SOP1__S_BITSET1_B64::~Inst_SOP1__S_BITSET1_B64()
2586 {
2587 } // ~Inst_SOP1__S_BITSET1_B64
2588
2589 // D.u64[S0.u[5:0]] = 1.
2590 void
2591 Inst_SOP1__S_BITSET1_B64::execute(GPUDynInstPtr gpuDynInst)
2592 {
2593 ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2594 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2595
2596 src.read();
2597
2598 sdst.setBit(bits(src.rawData(), 5, 0), 1);
2599
2600 sdst.write();
2601 }
2602
2603 Inst_SOP1__S_GETPC_B64::Inst_SOP1__S_GETPC_B64(InFmt_SOP1 *iFmt)
2604 : Inst_SOP1(iFmt, "s_getpc_b64")
2605 {
2606 setFlag(ALU);
2607 } // Inst_SOP1__S_GETPC_B64
2608
2609 Inst_SOP1__S_GETPC_B64::~Inst_SOP1__S_GETPC_B64()
2610 {
2611 } // ~Inst_SOP1__S_GETPC_B64
2612
2613 // D.u64 = PC + 4.
2614 // Destination receives the byte address of the next instruction.
2615 void
2616 Inst_SOP1__S_GETPC_B64::execute(GPUDynInstPtr gpuDynInst)
2617 {
2618 Wavefront *wf = gpuDynInst->wavefront();
2619 Addr pc = wf->pc();
2620 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2621
2622 sdst = pc + 4;
2623
2624 sdst.write();
2625 }
2626
2627 Inst_SOP1__S_SETPC_B64::Inst_SOP1__S_SETPC_B64(InFmt_SOP1 *iFmt)
2628 : Inst_SOP1(iFmt, "s_setpc_b64")
2629 {
2630 setFlag(ALU);
2631 } // Inst_SOP1__S_SETPC_B64
2632
2633 Inst_SOP1__S_SETPC_B64::~Inst_SOP1__S_SETPC_B64()
2634 {
2635 } // ~Inst_SOP1__S_SETPC_B64
2636
2637 // PC = S0.u64.
2638 // S0.u64 is a byte address of the instruction to jump to.
2639 void
2640 Inst_SOP1__S_SETPC_B64::execute(GPUDynInstPtr gpuDynInst)
2641 {
2642 Wavefront *wf = gpuDynInst->wavefront();
2643 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2644
2645 src.read();
2646
2647 wf->pc(src.rawData());
2648 }
2649
2650 Inst_SOP1__S_SWAPPC_B64::Inst_SOP1__S_SWAPPC_B64(InFmt_SOP1 *iFmt)
2651 : Inst_SOP1(iFmt, "s_swappc_b64")
2652 {
2653 setFlag(ALU);
2654 } // Inst_SOP1__S_SWAPPC_B64
2655
2656 Inst_SOP1__S_SWAPPC_B64::~Inst_SOP1__S_SWAPPC_B64()
2657 {
2658 } // ~Inst_SOP1__S_SWAPPC_B64
2659
2660 // D.u64 = PC + 4; PC = S0.u64.
2661 // S0.u64 is a byte address of the instruction to jump to.
2662 void
2663 Inst_SOP1__S_SWAPPC_B64::execute(GPUDynInstPtr gpuDynInst)
2664 {
2665 Wavefront *wf = gpuDynInst->wavefront();
2666 Addr pc = wf->pc();
2667 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2668 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2669
2670 src.read();
2671
2672 sdst = pc + 4;
2673
2674 wf->pc(src.rawData());
2675 sdst.write();
2676 }
2677
2678 Inst_SOP1__S_RFE_B64::Inst_SOP1__S_RFE_B64(InFmt_SOP1 *iFmt)
2679 : Inst_SOP1(iFmt, "s_rfe_b64")
2680 {
2681 } // Inst_SOP1__S_RFE_B64
2682
2683 Inst_SOP1__S_RFE_B64::~Inst_SOP1__S_RFE_B64()
2684 {
2685 } // ~Inst_SOP1__S_RFE_B64
2686
2687 // Return from exception handler and continue.
2688 void
2689 Inst_SOP1__S_RFE_B64::execute(GPUDynInstPtr gpuDynInst)
2690 {
2691 panicUnimplemented();
2692 }
2693
2694 Inst_SOP1__S_AND_SAVEEXEC_B64::Inst_SOP1__S_AND_SAVEEXEC_B64(
2695 InFmt_SOP1 *iFmt)
2696 : Inst_SOP1(iFmt, "s_and_saveexec_b64")
2697 {
2698 setFlag(ALU);
2699 } // Inst_SOP1__S_AND_SAVEEXEC_B64
2700
2701 Inst_SOP1__S_AND_SAVEEXEC_B64::~Inst_SOP1__S_AND_SAVEEXEC_B64()
2702 {
2703 } // ~Inst_SOP1__S_AND_SAVEEXEC_B64
2704
2705 // D.u64 = EXEC;
2706 // EXEC = S0.u64 & EXEC;
2707 // SCC = 1 if the new value of EXEC is non-zero.
2708 void
2709 Inst_SOP1__S_AND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2710 {
2711 Wavefront *wf = gpuDynInst->wavefront();
2712 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2713 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2714 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2715
2716 src.read();
2717
2718 sdst = wf->execMask().to_ullong();
2719 wf->execMask() = src.rawData() & wf->execMask().to_ullong();
2720 scc = wf->execMask().any() ? 1 : 0;
2721
2722 sdst.write();
2723 scc.write();
2724 }
2725
2726 Inst_SOP1__S_OR_SAVEEXEC_B64::Inst_SOP1__S_OR_SAVEEXEC_B64(
2727 InFmt_SOP1 *iFmt)
2728 : Inst_SOP1(iFmt, "s_or_saveexec_b64")
2729 {
2730 setFlag(ALU);
2731 } // Inst_SOP1__S_OR_SAVEEXEC_B64
2732
2733 Inst_SOP1__S_OR_SAVEEXEC_B64::~Inst_SOP1__S_OR_SAVEEXEC_B64()
2734 {
2735 } // ~Inst_SOP1__S_OR_SAVEEXEC_B64
2736
2737 // D.u64 = EXEC;
2738 // EXEC = S0.u64 | EXEC;
2739 // SCC = 1 if the new value of EXEC is non-zero.
2740 void
2741 Inst_SOP1__S_OR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2742 {
2743 Wavefront *wf = gpuDynInst->wavefront();
2744 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2745 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2746 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2747
2748 src.read();
2749
2750 sdst = wf->execMask().to_ullong();
2751 wf->execMask() = src.rawData() | wf->execMask().to_ullong();
2752 scc = wf->execMask().any() ? 1 : 0;
2753
2754 sdst.write();
2755 scc.write();
2756 }
2757
2758 Inst_SOP1__S_XOR_SAVEEXEC_B64::Inst_SOP1__S_XOR_SAVEEXEC_B64(
2759 InFmt_SOP1 *iFmt)
2760 : Inst_SOP1(iFmt, "s_xor_saveexec_b64")
2761 {
2762 setFlag(ALU);
2763 } // Inst_SOP1__S_XOR_SAVEEXEC_B64
2764
2765 Inst_SOP1__S_XOR_SAVEEXEC_B64::~Inst_SOP1__S_XOR_SAVEEXEC_B64()
2766 {
2767 } // ~Inst_SOP1__S_XOR_SAVEEXEC_B64
2768
2769 // D.u64 = EXEC;
2770 // EXEC = S0.u64 ^ EXEC;
2771 // SCC = 1 if the new value of EXEC is non-zero.
2772 void
2773 Inst_SOP1__S_XOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2774 {
2775 Wavefront *wf = gpuDynInst->wavefront();
2776 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2777 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2778 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2779
2780 src.read();
2781
2782 sdst = wf->execMask().to_ullong();
2783 wf->execMask() = src.rawData() ^ wf->execMask().to_ullong();
2784 scc = wf->execMask().any() ? 1 : 0;
2785
2786 sdst.write();
2787 scc.write();
2788 }
2789
2790 Inst_SOP1__S_ANDN2_SAVEEXEC_B64::Inst_SOP1__S_ANDN2_SAVEEXEC_B64(
2791 InFmt_SOP1 *iFmt)
2792 : Inst_SOP1(iFmt, "s_andn2_saveexec_b64")
2793 {
2794 setFlag(ALU);
2795 } // Inst_SOP1__S_ANDN2_SAVEEXEC_B64
2796
2797 Inst_SOP1__S_ANDN2_SAVEEXEC_B64::~Inst_SOP1__S_ANDN2_SAVEEXEC_B64()
2798 {
2799 } // ~Inst_SOP1__S_ANDN2_SAVEEXEC_B64
2800
2801 // D.u64 = EXEC;
2802 // EXEC = S0.u64 & ~EXEC;
2803 // SCC = 1 if the new value of EXEC is non-zero.
2804 void
2805 Inst_SOP1__S_ANDN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2806 {
2807 Wavefront *wf = gpuDynInst->wavefront();
2808 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2809 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2810 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2811
2812 src.read();
2813
2814 sdst = wf->execMask().to_ullong();
2815 wf->execMask() = src.rawData() &~ wf->execMask().to_ullong();
2816 scc = wf->execMask().any() ? 1 : 0;
2817
2818 sdst.write();
2819 scc.write();
2820 }
2821
2822 Inst_SOP1__S_ORN2_SAVEEXEC_B64::Inst_SOP1__S_ORN2_SAVEEXEC_B64(
2823 InFmt_SOP1 *iFmt)
2824 : Inst_SOP1(iFmt, "s_orn2_saveexec_b64")
2825 {
2826 setFlag(ALU);
2827 } // Inst_SOP1__S_ORN2_SAVEEXEC_B64
2828
2829 Inst_SOP1__S_ORN2_SAVEEXEC_B64::~Inst_SOP1__S_ORN2_SAVEEXEC_B64()
2830 {
2831 } // ~Inst_SOP1__S_ORN2_SAVEEXEC_B64
2832
2833 // D.u64 = EXEC;
2834 // EXEC = S0.u64 | ~EXEC;
2835 // SCC = 1 if the new value of EXEC is non-zero.
2836 void
2837 Inst_SOP1__S_ORN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2838 {
2839 Wavefront *wf = gpuDynInst->wavefront();
2840 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2841 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2842 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2843
2844 src.read();
2845
2846 sdst = wf->execMask().to_ullong();
2847 wf->execMask() = src.rawData() |~ wf->execMask().to_ullong();
2848 scc = wf->execMask().any() ? 1 : 0;
2849
2850 sdst.write();
2851 scc.write();
2852 }
2853
2854 Inst_SOP1__S_NAND_SAVEEXEC_B64::Inst_SOP1__S_NAND_SAVEEXEC_B64(
2855 InFmt_SOP1 *iFmt)
2856 : Inst_SOP1(iFmt, "s_nand_saveexec_b64")
2857 {
2858 setFlag(ALU);
2859 } // Inst_SOP1__S_NAND_SAVEEXEC_B64
2860
2861 Inst_SOP1__S_NAND_SAVEEXEC_B64::~Inst_SOP1__S_NAND_SAVEEXEC_B64()
2862 {
2863 } // ~Inst_SOP1__S_NAND_SAVEEXEC_B64
2864
2865 // D.u64 = EXEC;
2866 // EXEC = ~(S0.u64 & EXEC);
2867 // SCC = 1 if the new value of EXEC is non-zero.
2868 void
2869 Inst_SOP1__S_NAND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2870 {
2871 Wavefront *wf = gpuDynInst->wavefront();
2872 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2873 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2874 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2875
2876 src.read();
2877
2878 sdst = wf->execMask().to_ullong();
2879 wf->execMask() = ~(src.rawData() & wf->execMask().to_ullong());
2880 scc = wf->execMask().any() ? 1 : 0;
2881
2882 sdst.write();
2883 scc.write();
2884 }
2885
2886 Inst_SOP1__S_NOR_SAVEEXEC_B64::Inst_SOP1__S_NOR_SAVEEXEC_B64(
2887 InFmt_SOP1 *iFmt)
2888 : Inst_SOP1(iFmt, "s_nor_saveexec_b64")
2889 {
2890 setFlag(ALU);
2891 } // Inst_SOP1__S_NOR_SAVEEXEC_B64
2892
2893 Inst_SOP1__S_NOR_SAVEEXEC_B64::~Inst_SOP1__S_NOR_SAVEEXEC_B64()
2894 {
2895 } // ~Inst_SOP1__S_NOR_SAVEEXEC_B64
2896
2897 // D.u64 = EXEC;
2898 // EXEC = ~(S0.u64 | EXEC);
2899 // SCC = 1 if the new value of EXEC is non-zero.
2900 void
2901 Inst_SOP1__S_NOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2902 {
2903 Wavefront *wf = gpuDynInst->wavefront();
2904 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2905 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2906 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2907
2908 src.read();
2909
2910 sdst = wf->execMask().to_ullong();
2911 wf->execMask() = ~(src.rawData() | wf->execMask().to_ullong());
2912 scc = wf->execMask().any() ? 1 : 0;
2913
2914 sdst.write();
2915 scc.write();
2916 }
2917
2918 Inst_SOP1__S_XNOR_SAVEEXEC_B64::Inst_SOP1__S_XNOR_SAVEEXEC_B64(
2919 InFmt_SOP1 *iFmt)
2920 : Inst_SOP1(iFmt, "s_xnor_saveexec_b64")
2921 {
2922 setFlag(ALU);
2923 } // Inst_SOP1__S_XNOR_SAVEEXEC_B64
2924
2925 Inst_SOP1__S_XNOR_SAVEEXEC_B64::~Inst_SOP1__S_XNOR_SAVEEXEC_B64()
2926 {
2927 } // ~Inst_SOP1__S_XNOR_SAVEEXEC_B64
2928
2929 // D.u64 = EXEC;
2930 // EXEC = ~(S0.u64 ^ EXEC);
2931 // SCC = 1 if the new value of EXEC is non-zero.
2932 void
2933 Inst_SOP1__S_XNOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2934 {
2935 Wavefront *wf = gpuDynInst->wavefront();
2936 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2937 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2938 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2939
2940 src.read();
2941
2942 sdst = wf->execMask().to_ullong();
2943 wf->execMask() = ~(src.rawData() ^ wf->execMask().to_ullong());
2944 scc = wf->execMask().any() ? 1 : 0;
2945
2946 sdst.write();
2947 scc.write();
2948 }
2949
2950 Inst_SOP1__S_QUADMASK_B32::Inst_SOP1__S_QUADMASK_B32(InFmt_SOP1 *iFmt)
2951 : Inst_SOP1(iFmt, "s_quadmask_b32")
2952 {
2953 setFlag(ALU);
2954 } // Inst_SOP1__S_QUADMASK_B32
2955
2956 Inst_SOP1__S_QUADMASK_B32::~Inst_SOP1__S_QUADMASK_B32()
2957 {
2958 } // ~Inst_SOP1__S_QUADMASK_B32
2959
2960 // D.u = QuadMask(S0.u):
2961 // D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[31:8] = 0;
2962 // SCC = 1 if result is non-zero.
2963 void
2964 Inst_SOP1__S_QUADMASK_B32::execute(GPUDynInstPtr gpuDynInst)
2965 {
2966 ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2967 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
2968 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2969
2970 src.read();
2971
2972 sdst = quadMask(src.rawData());
2973 scc = sdst.rawData() ? 1 : 0;
2974
2975 sdst.write();
2976 scc.write();
2977 }
2978
2979 Inst_SOP1__S_QUADMASK_B64::Inst_SOP1__S_QUADMASK_B64(InFmt_SOP1 *iFmt)
2980 : Inst_SOP1(iFmt, "s_quadmask_b64")
2981 {
2982 setFlag(ALU);
2983 } // Inst_SOP1__S_QUADMASK_B64
2984
2985 Inst_SOP1__S_QUADMASK_B64::~Inst_SOP1__S_QUADMASK_B64()
2986 {
2987 } // ~Inst_SOP1__S_QUADMASK_B64
2988
2989 // D.u64 = QuadMask(S0.u64):
2990 // D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[63:16] = 0;
2991 // SCC = 1 if result is non-zero.
2992 void
2993 Inst_SOP1__S_QUADMASK_B64::execute(GPUDynInstPtr gpuDynInst)
2994 {
2995 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2996 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2997 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2998
2999 src.read();
3000
3001 sdst = quadMask(src.rawData());
3002 scc = sdst.rawData() ? 1 : 0;
3003
3004 sdst.write();
3005 scc.write();
3006 }
3007
3008 Inst_SOP1__S_MOVRELS_B32::Inst_SOP1__S_MOVRELS_B32(InFmt_SOP1 *iFmt)
3009 : Inst_SOP1(iFmt, "s_movrels_b32")
3010 {
3011 setFlag(ALU);
3012 } // Inst_SOP1__S_MOVRELS_B32
3013
3014 Inst_SOP1__S_MOVRELS_B32::~Inst_SOP1__S_MOVRELS_B32()
3015 {
3016 } // ~Inst_SOP1__S_MOVRELS_B32
3017
3018 // D.u = SGPR[S0.u + M0.u].u (move from relative source).
3019 void
3020 Inst_SOP1__S_MOVRELS_B32::execute(GPUDynInstPtr gpuDynInst)
3021 {
3022 ConstScalarOperandU32 m0(gpuDynInst, REG_M0);
3023 m0.read();
3024 ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0 + m0.rawData());
3025 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
3026
3027 src.read();
3028
3029 sdst = src.rawData();
3030
3031 sdst.write();
3032 }
3033
3034 Inst_SOP1__S_MOVRELS_B64::Inst_SOP1__S_MOVRELS_B64(InFmt_SOP1 *iFmt)
3035 : Inst_SOP1(iFmt, "s_movrels_b64")
3036 {
3037 setFlag(ALU);
3038 } // Inst_SOP1__S_MOVRELS_B64
3039
3040 Inst_SOP1__S_MOVRELS_B64::~Inst_SOP1__S_MOVRELS_B64()
3041 {
3042 } // ~Inst_SOP1__S_MOVRELS_B64
3043
3044 // D.u64 = SGPR[S0.u + M0.u].u64 (move from relative source).
3045 // The index in M0.u must be even for this operation.
3046 void
3047 Inst_SOP1__S_MOVRELS_B64::execute(GPUDynInstPtr gpuDynInst)
3048 {
3049 ConstScalarOperandU32 m0(gpuDynInst, REG_M0);
3050 m0.read();
3051 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0 + m0.rawData());
3052 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
3053
3054 src.read();
3055
3056 sdst = src.rawData();
3057
3058 sdst.write();
3059 }
3060
3061 Inst_SOP1__S_MOVRELD_B32::Inst_SOP1__S_MOVRELD_B32(InFmt_SOP1 *iFmt)
3062 : Inst_SOP1(iFmt, "s_movreld_b32")
3063 {
3064 setFlag(ALU);
3065 } // Inst_SOP1__S_MOVRELD_B32
3066
3067 Inst_SOP1__S_MOVRELD_B32::~Inst_SOP1__S_MOVRELD_B32()
3068 {
3069 } // ~Inst_SOP1__S_MOVRELD_B32
3070
3071 // SGPR[D.u + M0.u].u = S0.u (move to relative destination).
3072 void
3073 Inst_SOP1__S_MOVRELD_B32::execute(GPUDynInstPtr gpuDynInst)
3074 {
3075 ConstScalarOperandU32 m0(gpuDynInst, REG_M0);
3076 m0.read();
3077 ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
3078 ScalarOperandU32 sdst(gpuDynInst, instData.SDST + m0.rawData());
3079
3080 src.read();
3081
3082 sdst = src.rawData();
3083
3084 sdst.write();
3085 }
3086
3087 Inst_SOP1__S_MOVRELD_B64::Inst_SOP1__S_MOVRELD_B64(InFmt_SOP1 *iFmt)
3088 : Inst_SOP1(iFmt, "s_movreld_b64")
3089 {
3090 setFlag(ALU);
3091 } // Inst_SOP1__S_MOVRELD_B64
3092
3093 Inst_SOP1__S_MOVRELD_B64::~Inst_SOP1__S_MOVRELD_B64()
3094 {
3095 } // ~Inst_SOP1__S_MOVRELD_B64
3096
3097 // SGPR[D.u + M0.u].u64 = S0.u64 (move to relative destination).
3098 // The index in M0.u must be even for this operation.
3099 void
3100 Inst_SOP1__S_MOVRELD_B64::execute(GPUDynInstPtr gpuDynInst)
3101 {
3102 ConstScalarOperandU32 m0(gpuDynInst, REG_M0);
3103 m0.read();
3104 ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
3105 ScalarOperandU64 sdst(gpuDynInst, instData.SDST + m0.rawData());
3106
3107 src.read();
3108
3109 sdst = src.rawData();
3110
3111 sdst.write();
3112 }
3113
3114 Inst_SOP1__S_CBRANCH_JOIN::Inst_SOP1__S_CBRANCH_JOIN(InFmt_SOP1 *iFmt)
3115 : Inst_SOP1(iFmt, "s_cbranch_join")
3116 {
3117 setFlag(Branch);
3118 } // Inst_SOP1__S_CBRANCH_JOIN
3119
3120 Inst_SOP1__S_CBRANCH_JOIN::~Inst_SOP1__S_CBRANCH_JOIN()
3121 {
3122 } // ~Inst_SOP1__S_CBRANCH_JOIN
3123
3124 // Conditional branch join point (end of conditional branch block).
3125 void
3126 Inst_SOP1__S_CBRANCH_JOIN::execute(GPUDynInstPtr gpuDynInst)
3127 {
3128 panicUnimplemented();
3129 }
3130
3131 Inst_SOP1__S_ABS_I32::Inst_SOP1__S_ABS_I32(InFmt_SOP1 *iFmt)
3132 : Inst_SOP1(iFmt, "s_abs_i32")
3133 {
3134 setFlag(ALU);
3135 } // Inst_SOP1__S_ABS_I32
3136
3137 Inst_SOP1__S_ABS_I32::~Inst_SOP1__S_ABS_I32()
3138 {
3139 } // ~Inst_SOP1__S_ABS_I32
3140
3141 // if (S.i < 0) then D.i = -S.i;
3142 // else D.i = S.i;
3143 // SCC = 1 if result is non-zero.
3144 // Integer absolute value.
3145 void
3146 Inst_SOP1__S_ABS_I32::execute(GPUDynInstPtr gpuDynInst)
3147 {
3148 ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0);
3149 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
3150 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3151
3152 src.read();
3153
3154 sdst = std::abs(src.rawData());
3155
3156 scc = sdst.rawData() ? 1 : 0;
3157
3158 sdst.write();
3159 scc.write();
3160 }
3161
3162 Inst_SOP1__S_MOV_FED_B32::Inst_SOP1__S_MOV_FED_B32(InFmt_SOP1 *iFmt)
3163 : Inst_SOP1(iFmt, "s_mov_fed_b32")
3164 {
3165 setFlag(ALU);
3166 } // Inst_SOP1__S_MOV_FED_B32
3167
3168 Inst_SOP1__S_MOV_FED_B32::~Inst_SOP1__S_MOV_FED_B32()
3169 {
3170 } // ~Inst_SOP1__S_MOV_FED_B32
3171
3172 // D.u = S0.u.
3173 void
3174 Inst_SOP1__S_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst)
3175 {
3176 panicUnimplemented();
3177 }
3178
3179 Inst_SOP1__S_SET_GPR_IDX_IDX::Inst_SOP1__S_SET_GPR_IDX_IDX(
3180 InFmt_SOP1 *iFmt)
3181 : Inst_SOP1(iFmt, "s_set_gpr_idx_idx")
3182 {
3183 } // Inst_SOP1__S_SET_GPR_IDX_IDX
3184
3185 Inst_SOP1__S_SET_GPR_IDX_IDX::~Inst_SOP1__S_SET_GPR_IDX_IDX()
3186 {
3187 } // ~Inst_SOP1__S_SET_GPR_IDX_IDX
3188
3189 // M0[7:0] = S0.u[7:0].
3190 // Modify the index used in vector GPR indexing.
3191 void
3192 Inst_SOP1__S_SET_GPR_IDX_IDX::execute(GPUDynInstPtr gpuDynInst)
3193 {
3194 panicUnimplemented();
3195 }
3196
3197 Inst_SOPC__S_CMP_EQ_I32::Inst_SOPC__S_CMP_EQ_I32(InFmt_SOPC *iFmt)
3198 : Inst_SOPC(iFmt, "s_cmp_eq_i32")
3199 {
3200 setFlag(ALU);
3201 } // Inst_SOPC__S_CMP_EQ_I32
3202
3203 Inst_SOPC__S_CMP_EQ_I32::~Inst_SOPC__S_CMP_EQ_I32()
3204 {
3205 } // ~Inst_SOPC__S_CMP_EQ_I32
3206
3207 // SCC = (S0.i == S1.i).
3208 void
3209 Inst_SOPC__S_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
3210 {
3211 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
3212 ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
3213 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3214
3215 src0.read();
3216 src1.read();
3217
3218 scc = (src0.rawData() == src1.rawData()) ? 1 : 0;
3219
3220 scc.write();
3221 }
3222
3223 Inst_SOPC__S_CMP_LG_I32::Inst_SOPC__S_CMP_LG_I32(InFmt_SOPC *iFmt)
3224 : Inst_SOPC(iFmt, "s_cmp_lg_i32")
3225 {
3226 setFlag(ALU);
3227 } // Inst_SOPC__S_CMP_LG_I32
3228
3229 Inst_SOPC__S_CMP_LG_I32::~Inst_SOPC__S_CMP_LG_I32()
3230 {
3231 } // ~Inst_SOPC__S_CMP_LG_I32
3232
3233 // SCC = (S0.i != S1.i).
3234 void
3235 Inst_SOPC__S_CMP_LG_I32::execute(GPUDynInstPtr gpuDynInst)
3236 {
3237 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
3238 ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
3239 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3240
3241 src0.read();
3242 src1.read();
3243
3244 scc = (src0.rawData() != src1.rawData()) ? 1 : 0;
3245
3246 scc.write();
3247 }
3248
3249 Inst_SOPC__S_CMP_GT_I32::Inst_SOPC__S_CMP_GT_I32(InFmt_SOPC *iFmt)
3250 : Inst_SOPC(iFmt, "s_cmp_gt_i32")
3251 {
3252 setFlag(ALU);
3253 } // Inst_SOPC__S_CMP_GT_I32
3254
3255 Inst_SOPC__S_CMP_GT_I32::~Inst_SOPC__S_CMP_GT_I32()
3256 {
3257 } // ~Inst_SOPC__S_CMP_GT_I32
3258
3259 // SCC = (S0.i > S1.i).
3260 void
3261 Inst_SOPC__S_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst)
3262 {
3263 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
3264 ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
3265 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3266
3267 src0.read();
3268 src1.read();
3269
3270 scc = (src0.rawData() > src1.rawData()) ? 1 : 0;
3271
3272 scc.write();
3273 }
3274
3275 Inst_SOPC__S_CMP_GE_I32::Inst_SOPC__S_CMP_GE_I32(InFmt_SOPC *iFmt)
3276 : Inst_SOPC(iFmt, "s_cmp_ge_i32")
3277 {
3278 setFlag(ALU);
3279 } // Inst_SOPC__S_CMP_GE_I32
3280
3281 Inst_SOPC__S_CMP_GE_I32::~Inst_SOPC__S_CMP_GE_I32()
3282 {
3283 } // ~Inst_SOPC__S_CMP_GE_I32
3284
3285 // SCC = (S0.i >= S1.i).
3286 void
3287 Inst_SOPC__S_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst)
3288 {
3289 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
3290 ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
3291 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3292
3293 src0.read();
3294 src1.read();
3295
3296 scc = (src0.rawData() >= src1.rawData()) ? 1 : 0;
3297
3298 scc.write();
3299 }
3300
3301 Inst_SOPC__S_CMP_LT_I32::Inst_SOPC__S_CMP_LT_I32(InFmt_SOPC *iFmt)
3302 : Inst_SOPC(iFmt, "s_cmp_lt_i32")
3303 {
3304 setFlag(ALU);
3305 } // Inst_SOPC__S_CMP_LT_I32
3306
3307 Inst_SOPC__S_CMP_LT_I32::~Inst_SOPC__S_CMP_LT_I32()
3308 {
3309 } // ~Inst_SOPC__S_CMP_LT_I32
3310
3311 // SCC = (S0.i < S1.i).
3312 void
3313 Inst_SOPC__S_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst)
3314 {
3315 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
3316 ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
3317 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3318
3319 src0.read();
3320 src1.read();
3321
3322 scc = (src0.rawData() < src1.rawData()) ? 1 : 0;
3323
3324 scc.write();
3325 }
3326
3327 Inst_SOPC__S_CMP_LE_I32::Inst_SOPC__S_CMP_LE_I32(InFmt_SOPC *iFmt)
3328 : Inst_SOPC(iFmt, "s_cmp_le_i32")
3329 {
3330 setFlag(ALU);
3331 } // Inst_SOPC__S_CMP_LE_I32
3332
3333 Inst_SOPC__S_CMP_LE_I32::~Inst_SOPC__S_CMP_LE_I32()
3334 {
3335 } // ~Inst_SOPC__S_CMP_LE_I32
3336
3337 // SCC = (S0.i <= S1.i).
3338 void
3339 Inst_SOPC__S_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst)
3340 {
3341 ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
3342 ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
3343 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3344
3345 src0.read();
3346 src1.read();
3347
3348 scc = (src0.rawData() <= src1.rawData()) ? 1 : 0;
3349
3350 scc.write();
3351 }
3352
3353 Inst_SOPC__S_CMP_EQ_U32::Inst_SOPC__S_CMP_EQ_U32(InFmt_SOPC *iFmt)
3354 : Inst_SOPC(iFmt, "s_cmp_eq_u32")
3355 {
3356 setFlag(ALU);
3357 } // Inst_SOPC__S_CMP_EQ_U32
3358
3359 Inst_SOPC__S_CMP_EQ_U32::~Inst_SOPC__S_CMP_EQ_U32()
3360 {
3361 } // ~Inst_SOPC__S_CMP_EQ_U32
3362
3363 // SCC = (S0.u == S1.u).
3364 void
3365 Inst_SOPC__S_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
3366 {
3367 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3368 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3369 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3370
3371 src0.read();
3372 src1.read();
3373
3374 scc = (src0.rawData() == src1.rawData()) ? 1 : 0;
3375
3376 scc.write();
3377 }
3378
3379 Inst_SOPC__S_CMP_LG_U32::Inst_SOPC__S_CMP_LG_U32(InFmt_SOPC *iFmt)
3380 : Inst_SOPC(iFmt, "s_cmp_lg_u32")
3381 {
3382 setFlag(ALU);
3383 } // Inst_SOPC__S_CMP_LG_U32
3384
3385 Inst_SOPC__S_CMP_LG_U32::~Inst_SOPC__S_CMP_LG_U32()
3386 {
3387 } // ~Inst_SOPC__S_CMP_LG_U32
3388
3389 // SCC = (S0.u != S1.u).
3390 void
3391 Inst_SOPC__S_CMP_LG_U32::execute(GPUDynInstPtr gpuDynInst)
3392 {
3393 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3394 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3395 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3396
3397 src0.read();
3398 src1.read();
3399
3400 scc = (src0.rawData() != src1.rawData()) ? 1 : 0;
3401
3402 scc.write();
3403 }
3404
3405 Inst_SOPC__S_CMP_GT_U32::Inst_SOPC__S_CMP_GT_U32(InFmt_SOPC *iFmt)
3406 : Inst_SOPC(iFmt, "s_cmp_gt_u32")
3407 {
3408 setFlag(ALU);
3409 } // Inst_SOPC__S_CMP_GT_U32
3410
3411 Inst_SOPC__S_CMP_GT_U32::~Inst_SOPC__S_CMP_GT_U32()
3412 {
3413 } // ~Inst_SOPC__S_CMP_GT_U32
3414
3415 // SCC = (S0.u > S1.u).
3416 void
3417 Inst_SOPC__S_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst)
3418 {
3419 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3420 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3421 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3422
3423 src0.read();
3424 src1.read();
3425
3426 scc = (src0.rawData() > src1.rawData()) ? 1 : 0;
3427
3428 scc.write();
3429 }
3430
3431 Inst_SOPC__S_CMP_GE_U32::Inst_SOPC__S_CMP_GE_U32(InFmt_SOPC *iFmt)
3432 : Inst_SOPC(iFmt, "s_cmp_ge_u32")
3433 {
3434 setFlag(ALU);
3435 } // Inst_SOPC__S_CMP_GE_U32
3436
3437 Inst_SOPC__S_CMP_GE_U32::~Inst_SOPC__S_CMP_GE_U32()
3438 {
3439 } // ~Inst_SOPC__S_CMP_GE_U32
3440
3441 // SCC = (S0.u >= S1.u).
3442 void
3443 Inst_SOPC__S_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst)
3444 {
3445 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3446 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3447 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3448
3449 src0.read();
3450 src1.read();
3451
3452 scc = (src0.rawData() >= src1.rawData()) ? 1 : 0;
3453
3454 scc.write();
3455 }
3456
3457 Inst_SOPC__S_CMP_LT_U32::Inst_SOPC__S_CMP_LT_U32(InFmt_SOPC *iFmt)
3458 : Inst_SOPC(iFmt, "s_cmp_lt_u32")
3459 {
3460 setFlag(ALU);
3461 } // Inst_SOPC__S_CMP_LT_U32
3462
3463 Inst_SOPC__S_CMP_LT_U32::~Inst_SOPC__S_CMP_LT_U32()
3464 {
3465 } // ~Inst_SOPC__S_CMP_LT_U32
3466
3467 // SCC = (S0.u < S1.u).
3468 void
3469 Inst_SOPC__S_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst)
3470 {
3471 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3472 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3473 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3474
3475 src0.read();
3476 src1.read();
3477
3478 scc = (src0.rawData() <= src1.rawData()) ? 1 : 0;
3479
3480 scc.write();
3481 }
3482
3483 Inst_SOPC__S_CMP_LE_U32::Inst_SOPC__S_CMP_LE_U32(InFmt_SOPC *iFmt)
3484 : Inst_SOPC(iFmt, "s_cmp_le_u32")
3485 {
3486 setFlag(ALU);
3487 } // Inst_SOPC__S_CMP_LE_U32
3488
3489 Inst_SOPC__S_CMP_LE_U32::~Inst_SOPC__S_CMP_LE_U32()
3490 {
3491 } // ~Inst_SOPC__S_CMP_LE_U32
3492
3493 // SCC = (S0.u <= S1.u).
3494 void
3495 Inst_SOPC__S_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst)
3496 {
3497 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3498 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3499 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3500
3501 src0.read();
3502 src1.read();
3503
3504 scc = (src0.rawData() <= src1.rawData()) ? 1 : 0;
3505
3506 scc.write();
3507 }
3508
3509 Inst_SOPC__S_BITCMP0_B32::Inst_SOPC__S_BITCMP0_B32(InFmt_SOPC *iFmt)
3510 : Inst_SOPC(iFmt, "s_bitcmp0_b32")
3511 {
3512 setFlag(ALU);
3513 } // Inst_SOPC__S_BITCMP0_B32
3514
3515 Inst_SOPC__S_BITCMP0_B32::~Inst_SOPC__S_BITCMP0_B32()
3516 {
3517 } // ~Inst_SOPC__S_BITCMP0_B32
3518
3519 // SCC = (S0.u[S1.u[4:0]] == 0).
3520 void
3521 Inst_SOPC__S_BITCMP0_B32::execute(GPUDynInstPtr gpuDynInst)
3522 {
3523 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3524 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3525 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3526
3527 src0.read();
3528 src1.read();
3529
3530 scc = !bits(src0.rawData(), bits(src1.rawData(), 4, 0)) ? 1 : 0;
3531
3532 scc.write();
3533 }
3534
3535 Inst_SOPC__S_BITCMP1_B32::Inst_SOPC__S_BITCMP1_B32(InFmt_SOPC *iFmt)
3536 : Inst_SOPC(iFmt, "s_bitcmp1_b32")
3537 {
3538 setFlag(ALU);
3539 } // Inst_SOPC__S_BITCMP1_B32
3540
3541 Inst_SOPC__S_BITCMP1_B32::~Inst_SOPC__S_BITCMP1_B32()
3542 {
3543 } // ~Inst_SOPC__S_BITCMP1_B32
3544
3545 // SCC = (S0.u[S1.u[4:0]] == 1).
3546 void
3547 Inst_SOPC__S_BITCMP1_B32::execute(GPUDynInstPtr gpuDynInst)
3548 {
3549 ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3550 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3551 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3552
3553 src0.read();
3554 src1.read();
3555
3556 scc = bits(src0.rawData(), bits(src1.rawData(), 4, 0)) ? 1 : 0;
3557
3558 scc.write();
3559 }
3560
3561 Inst_SOPC__S_BITCMP0_B64::Inst_SOPC__S_BITCMP0_B64(InFmt_SOPC *iFmt)
3562 : Inst_SOPC(iFmt, "s_bitcmp0_b64")
3563 {
3564 setFlag(ALU);
3565 } // Inst_SOPC__S_BITCMP0_B64
3566
3567 Inst_SOPC__S_BITCMP0_B64::~Inst_SOPC__S_BITCMP0_B64()
3568 {
3569 } // ~Inst_SOPC__S_BITCMP0_B64
3570
3571 // SCC = (S0.u64[S1.u[5:0]] == 0).
3572 void
3573 Inst_SOPC__S_BITCMP0_B64::execute(GPUDynInstPtr gpuDynInst)
3574 {
3575 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
3576 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3577 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3578
3579 src0.read();
3580 src1.read();
3581
3582 scc = !bits(src0.rawData(), bits(src1.rawData(), 5, 0)) ? 1 : 0;
3583
3584 scc.write();
3585 }
3586
3587 Inst_SOPC__S_BITCMP1_B64::Inst_SOPC__S_BITCMP1_B64(InFmt_SOPC *iFmt)
3588 : Inst_SOPC(iFmt, "s_bitcmp1_b64")
3589 {
3590 setFlag(ALU);
3591 } // Inst_SOPC__S_BITCMP1_B64
3592
3593 Inst_SOPC__S_BITCMP1_B64::~Inst_SOPC__S_BITCMP1_B64()
3594 {
3595 } // ~Inst_SOPC__S_BITCMP1_B64
3596
3597 // SCC = (S0.u64[S1.u[5:0]] == 1).
3598 void
3599 Inst_SOPC__S_BITCMP1_B64::execute(GPUDynInstPtr gpuDynInst)
3600 {
3601 ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
3602 ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3603 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3604
3605 src0.read();
3606 src1.read();
3607
3608 scc = bits(src0.rawData(), bits(src1.rawData(), 5, 0)) ? 1 : 0;
3609
3610 scc.write();
3611 }
3612
3613 Inst_SOPC__S_SETVSKIP::Inst_SOPC__S_SETVSKIP(InFmt_SOPC *iFmt)
3614 : Inst_SOPC(iFmt, "s_setvskip")
3615 {
3616 setFlag(UnconditionalJump);
3617 } // Inst_SOPC__S_SETVSKIP
3618
3619 Inst_SOPC__S_SETVSKIP::~Inst_SOPC__S_SETVSKIP()
3620 {
3621 } // ~Inst_SOPC__S_SETVSKIP
3622
3623 // VSKIP = S0.u[S1.u[4:0]].
3624 // Enables and disables VSKIP mode.
3625 // When VSKIP is enabled, no VOP*/M*BUF/MIMG/DS/FLAT/EXP instuctions are
3626 // issued.
3627 void
3628 Inst_SOPC__S_SETVSKIP::execute(GPUDynInstPtr gpuDynInst)
3629 {
3630 panicUnimplemented();
3631 }
3632
3633 Inst_SOPC__S_SET_GPR_IDX_ON::Inst_SOPC__S_SET_GPR_IDX_ON(InFmt_SOPC *iFmt)
3634 : Inst_SOPC(iFmt, "s_set_gpr_idx_on")
3635 {
3636 } // Inst_SOPC__S_SET_GPR_IDX_ON
3637
3638 Inst_SOPC__S_SET_GPR_IDX_ON::~Inst_SOPC__S_SET_GPR_IDX_ON()
3639 {
3640 } // ~Inst_SOPC__S_SET_GPR_IDX_ON
3641
3642 // MODE.gpr_idx_en = 1;
3643 // M0[7:0] = S0.u[7:0];
3644 // M0[15:12] = SIMM4 (direct contents of S1 field);
3645 // Remaining bits of M0 are unmodified.
3646 // Enable GPR indexing mode. Vector operations after this will perform
3647 // relative GPR addressing based on the contents of M0.
3648 // The raw contents of the S1 field are read and used to set the enable
3649 // bits. S1[0] = VSRC0_REL, S1[1] = VSRC1_REL, S1[2] = VSRC2_REL and
3650 // S1[3] = VDST_REL.
3651 void
3652 Inst_SOPC__S_SET_GPR_IDX_ON::execute(GPUDynInstPtr gpuDynInst)
3653 {
3654 panicUnimplemented();
3655 }
3656
3657 Inst_SOPC__S_CMP_EQ_U64::Inst_SOPC__S_CMP_EQ_U64(InFmt_SOPC *iFmt)
3658 : Inst_SOPC(iFmt, "s_cmp_eq_u64")
3659 {
3660 setFlag(ALU);
3661 } // Inst_SOPC__S_CMP_EQ_U64
3662
3663 Inst_SOPC__S_CMP_EQ_U64::~Inst_SOPC__S_CMP_EQ_U64()
3664 {
3665 } // ~Inst_SOPC__S_CMP_EQ_U64
3666
3667 // SCC = (S0.i64 == S1.i64).
3668 void
3669 Inst_SOPC__S_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst)
3670 {
3671 ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0);
3672 ConstScalarOperandI64 src1(gpuDynInst, instData.SSRC1);
3673 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3674
3675 src0.read();
3676 src1.read();
3677
3678 scc = (src0.rawData() == src1.rawData()) ? 1 : 0;
3679
3680 scc.write();
3681 }
3682
3683 Inst_SOPC__S_CMP_LG_U64::Inst_SOPC__S_CMP_LG_U64(InFmt_SOPC *iFmt)
3684 : Inst_SOPC(iFmt, "s_cmp_lg_u64")
3685 {
3686 setFlag(ALU);
3687 } // Inst_SOPC__S_CMP_LG_U64
3688
3689 Inst_SOPC__S_CMP_LG_U64::~Inst_SOPC__S_CMP_LG_U64()
3690 {
3691 } // ~Inst_SOPC__S_CMP_LG_U64
3692
3693 // SCC = (S0.i64 != S1.i64).
3694 void
3695 Inst_SOPC__S_CMP_LG_U64::execute(GPUDynInstPtr gpuDynInst)
3696 {
3697 ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0);
3698 ConstScalarOperandI64 src1(gpuDynInst, instData.SSRC1);
3699 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3700
3701 src0.read();
3702 src1.read();
3703
3704 scc = (src0.rawData() != src1.rawData()) ? 1 : 0;
3705
3706 scc.write();
3707 }
3708
3709 Inst_SOPP__S_NOP::Inst_SOPP__S_NOP(InFmt_SOPP *iFmt)
3710 : Inst_SOPP(iFmt, "s_nop")
3711 {
3712 setFlag(Nop);
3713 } // Inst_SOPP__S_NOP
3714
3715 Inst_SOPP__S_NOP::~Inst_SOPP__S_NOP()
3716 {
3717 } // ~Inst_SOPP__S_NOP
3718
3719 // Do nothing.
3720 void
3721 Inst_SOPP__S_NOP::execute(GPUDynInstPtr gpuDynInst)
3722 {
3723 }
3724
3725 Inst_SOPP__S_ENDPGM::Inst_SOPP__S_ENDPGM(InFmt_SOPP *iFmt)
3726 : Inst_SOPP(iFmt, "s_endpgm")
3727 {
3728 setFlag(EndOfKernel);
3729 } // Inst_SOPP__S_ENDPGM
3730
3731 Inst_SOPP__S_ENDPGM::~Inst_SOPP__S_ENDPGM()
3732 {
3733 } // ~Inst_SOPP__S_ENDPGM
3734
3735 // End of program; terminate wavefront.
3736 void
3737 Inst_SOPP__S_ENDPGM::execute(GPUDynInstPtr gpuDynInst)
3738 {
3739 Wavefront *wf = gpuDynInst->wavefront();
3740 ComputeUnit *cu = gpuDynInst->computeUnit();
3741
3742 // delete extra instructions fetched for completed work-items
3743 wf->instructionBuffer.erase(wf->instructionBuffer.begin() + 1,
3744 wf->instructionBuffer.end());
3745
3746 if (wf->pendingFetch) {
3747 wf->dropFetch = true;
3748 }
3749
3750 wf->computeUnit->fetchStage.fetchUnit(wf->simdId)
3751 .flushBuf(wf->wfSlotId);
3752 wf->setStatus(Wavefront::S_STOPPED);
3753
3754 int refCount = wf->computeUnit->getLds()
3755 .decreaseRefCounter(wf->dispatchId, wf->wgId);
3756
3757 /**
3758 * The parent WF of this instruction is exiting, therefore
3759 * it should not participate in this barrier any longer. This
3760 * prevents possible deadlock issues if WFs exit early.
3761 */
3762 int bar_id = WFBarrier::InvalidID;
3763 if (wf->hasBarrier()) {
3764 assert(wf->getStatus() != Wavefront::S_BARRIER);
3765 bar_id = wf->barrierId();
3766 assert(bar_id != WFBarrier::InvalidID);
3767 wf->releaseBarrier();
3768 cu->decMaxBarrierCnt(bar_id);
3769 DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Exiting the "
3770 "program and decrementing max barrier count for "
3771 "barrier Id%d. New max count: %d.\n", cu->cu_id,
3772 wf->simdId, wf->wfSlotId, wf->wfDynId, bar_id,
3773 cu->maxBarrierCnt(bar_id));
3774 }
3775
3776 DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n",
3777 wf->computeUnit->cu_id, wf->wgId, refCount);
3778
3779 wf->computeUnit->registerManager->freeRegisters(wf);
3780 wf->computeUnit->completedWfs++;
3781 wf->computeUnit->activeWaves--;
3782
3783 panic_if(wf->computeUnit->activeWaves < 0, "CU[%d] Active waves less "
3784 "than zero\n", wf->computeUnit->cu_id);
3785
3786 DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n",
3787 wf->computeUnit->cu_id, wf->simdId, wf->wfSlotId, wf->wfDynId);
3788
3789 for (int i = 0; i < wf->vecReads.size(); i++) {
3790 if (wf->rawDist.find(i) != wf->rawDist.end()) {
3791 wf->readsPerWrite.sample(wf->vecReads.at(i));
3792 }
3793 }
3794 wf->vecReads.clear();
3795 wf->rawDist.clear();
3796 wf->lastInstExec = 0;
3797
3798 if (!refCount) {
3799 /**
3800 * If all WFs have finished, and hence the WG has finished,
3801 * then we can free up the barrier belonging to the parent
3802 * WG, but only if we actually used a barrier (i.e., more
3803 * than one WF in the WG).
3804 */
3805 if (bar_id != WFBarrier::InvalidID) {
3806 DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - All waves are "
3807 "now complete. Releasing barrier Id%d.\n", cu->cu_id,
3808 wf->simdId, wf->wfSlotId, wf->wfDynId,
3809 wf->barrierId());
3810 cu->releaseBarrier(bar_id);
3811 }
3812
3813 /**
3814 * Last wavefront of the workgroup has executed return. If the
3815 * workgroup is not the final one in the kernel, then simply
3816 * retire it; however, if it is the final one (i.e., indicating
3817 * the kernel end) then release operation is needed.
3818 */
3819
3820 // check whether the workgroup is indicating the kernel end (i.e.,
3821 // the last workgroup in the kernel).
3822 bool kernelEnd =
3823 wf->computeUnit->shader->dispatcher().isReachingKernelEnd(wf);
3824 // further check whether 'release @ kernel end' is needed
3825 bool relNeeded =
3826 wf->computeUnit->shader->impl_kern_end_rel;
3827
3828 // if not a kernel end or no release needed, retire the workgroup
3829 // directly
3830 if (!kernelEnd || !relNeeded) {
3831 wf->computeUnit->shader->dispatcher().notifyWgCompl(wf);
3832 wf->setStatus(Wavefront::S_STOPPED);
3833 wf->computeUnit->completedWGs++;
3834
3835 return;
3836 }
3837
3838 /**
3839 * If a kernel end and release needed, inject a memory sync and
3840 * retire the workgroup after receving all acks.
3841 */
3842 setFlag(MemSync);
3843 setFlag(GlobalSegment);
3844 // Notify Memory System of Kernel Completion
3845 wf->setStatus(Wavefront::S_RETURNING);
3846 gpuDynInst->simdId = wf->simdId;
3847 gpuDynInst->wfSlotId = wf->wfSlotId;
3848 gpuDynInst->wfDynId = wf->wfDynId;
3849
3850 DPRINTF(GPUExec, "inject global memory fence for CU%d: "
3851 "WF[%d][%d][%d]\n", wf->computeUnit->cu_id,
3852 wf->simdId, wf->wfSlotId, wf->wfDynId);
3853
3854 // call shader to prepare the flush operations
3855 wf->computeUnit->shader->prepareFlush(gpuDynInst);
3856
3857 wf->computeUnit->completedWGs++;
3858 } else {
3859 wf->computeUnit->shader->dispatcher().scheduleDispatch();
3860 }
3861 }
3862
3863
3864 Inst_SOPP__S_BRANCH::Inst_SOPP__S_BRANCH(InFmt_SOPP *iFmt)
3865 : Inst_SOPP(iFmt, "s_branch")
3866 {
3867 setFlag(Branch);
3868 } // Inst_SOPP__S_BRANCH
3869
3870 Inst_SOPP__S_BRANCH::~Inst_SOPP__S_BRANCH()
3871 {
3872 } // ~Inst_SOPP__S_BRANCH
3873
3874 // PC = PC + signext(SIMM16 * 4) + 4 (short jump).
3875 void
3876 Inst_SOPP__S_BRANCH::execute(GPUDynInstPtr gpuDynInst)
3877 {
3878 Wavefront *wf = gpuDynInst->wavefront();
3879 Addr pc = wf->pc();
3880 ScalarRegI16 simm16 = instData.SIMM16;
3881
3882 pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
3883
3884 wf->pc(pc);
3885 }
3886
3887 Inst_SOPP__S_WAKEUP::Inst_SOPP__S_WAKEUP(InFmt_SOPP *iFmt)
3888 : Inst_SOPP(iFmt, "s_wakeup")
3889 {
3890 } // Inst_SOPP__S_WAKEUP
3891
3892 Inst_SOPP__S_WAKEUP::~Inst_SOPP__S_WAKEUP()
3893 {
3894 } // ~Inst_SOPP__S_WAKEUP
3895
3896 // Allow a wave to wakeup all the other waves in its workgroup to force
3897 // them to wake up immediately from an S_SLEEP instruction. The wakeup is
3898 // ignored if the waves are not sleeping.
3899 void
3900 Inst_SOPP__S_WAKEUP::execute(GPUDynInstPtr gpuDynInst)
3901 {
3902 panicUnimplemented();
3903 }
3904
3905 Inst_SOPP__S_CBRANCH_SCC0::Inst_SOPP__S_CBRANCH_SCC0(InFmt_SOPP *iFmt)
3906 : Inst_SOPP(iFmt, "s_cbranch_scc0")
3907 {
3908 setFlag(Branch);
3909 } // Inst_SOPP__S_CBRANCH_SCC0
3910
3911 Inst_SOPP__S_CBRANCH_SCC0::~Inst_SOPP__S_CBRANCH_SCC0()
3912 {
3913 } // ~Inst_SOPP__S_CBRANCH_SCC0
3914
3915 // if (SCC == 0) then PC = PC + signext(SIMM16 * 4) + 4;
3916 // else NOP.
3917 void
3918 Inst_SOPP__S_CBRANCH_SCC0::execute(GPUDynInstPtr gpuDynInst)
3919 {
3920 Wavefront *wf = gpuDynInst->wavefront();
3921 Addr pc = wf->pc();
3922 ScalarRegI16 simm16 = instData.SIMM16;
3923 ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
3924
3925 scc.read();
3926
3927 if (!scc.rawData()) {
3928 pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
3929 }
3930
3931 wf->pc(pc);
3932 }
3933
3934 Inst_SOPP__S_CBRANCH_SCC1::Inst_SOPP__S_CBRANCH_SCC1(InFmt_SOPP *iFmt)
3935 : Inst_SOPP(iFmt, "s_cbranch_scc1")
3936 {
3937 setFlag(Branch);
3938 } // Inst_SOPP__S_CBRANCH_SCC1
3939
3940 Inst_SOPP__S_CBRANCH_SCC1::~Inst_SOPP__S_CBRANCH_SCC1()
3941 {
3942 } // ~Inst_SOPP__S_CBRANCH_SCC1
3943
3944 // if (SCC == 1) then PC = PC + signext(SIMM16 * 4) + 4;
3945 // else NOP.
3946 void
3947 Inst_SOPP__S_CBRANCH_SCC1::execute(GPUDynInstPtr gpuDynInst)
3948 {
3949 Wavefront *wf = gpuDynInst->wavefront();
3950 Addr pc = wf->pc();
3951 ScalarRegI16 simm16 = instData.SIMM16;
3952 ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
3953
3954 scc.read();
3955
3956 if (scc.rawData()) {
3957 pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
3958 }
3959
3960 wf->pc(pc);
3961 }
3962
3963 Inst_SOPP__S_CBRANCH_VCCZ::Inst_SOPP__S_CBRANCH_VCCZ(InFmt_SOPP *iFmt)
3964 : Inst_SOPP(iFmt, "s_cbranch_vccz")
3965 {
3966 setFlag(Branch);
3967 setFlag(ReadsVCC);
3968 } // Inst_SOPP__S_CBRANCH_VCCZ
3969
3970 Inst_SOPP__S_CBRANCH_VCCZ::~Inst_SOPP__S_CBRANCH_VCCZ()
3971 {
3972 } // ~Inst_SOPP__S_CBRANCH_VCCZ
3973
3974 // if (VCC == 0) then PC = PC + signext(SIMM16 * 4) + 4;
3975 // else NOP.
3976 void
3977 Inst_SOPP__S_CBRANCH_VCCZ::execute(GPUDynInstPtr gpuDynInst)
3978 {
3979 Wavefront *wf = gpuDynInst->wavefront();
3980 ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
3981 Addr pc = wf->pc();
3982 ScalarRegI16 simm16 = instData.SIMM16;
3983
3984 vcc.read();
3985
3986 if (!vcc.rawData()) {
3987 pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
3988 }
3989
3990 wf->pc(pc);
3991 }
3992
3993 Inst_SOPP__S_CBRANCH_VCCNZ::Inst_SOPP__S_CBRANCH_VCCNZ(InFmt_SOPP *iFmt)
3994 : Inst_SOPP(iFmt, "s_cbranch_vccnz")
3995 {
3996 setFlag(Branch);
3997 setFlag(ReadsVCC);
3998 } // Inst_SOPP__S_CBRANCH_VCCNZ
3999
4000 Inst_SOPP__S_CBRANCH_VCCNZ::~Inst_SOPP__S_CBRANCH_VCCNZ()
4001 {
4002 } // ~Inst_SOPP__S_CBRANCH_VCCNZ
4003
4004 // if (VCC != 0) then PC = PC + signext(SIMM16 * 4) + 4;
4005 // else NOP.
4006 void
4007 Inst_SOPP__S_CBRANCH_VCCNZ::execute(GPUDynInstPtr gpuDynInst)
4008 {
4009 Wavefront *wf = gpuDynInst->wavefront();
4010 ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
4011
4012 vcc.read();
4013
4014 if (vcc.rawData()) {
4015 Addr pc = wf->pc();
4016 ScalarRegI16 simm16 = instData.SIMM16;
4017 pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
4018 wf->pc(pc);
4019 }
4020 }
4021
4022 Inst_SOPP__S_CBRANCH_EXECZ::Inst_SOPP__S_CBRANCH_EXECZ(InFmt_SOPP *iFmt)
4023 : Inst_SOPP(iFmt, "s_cbranch_execz")
4024 {
4025 setFlag(Branch);
4026 } // Inst_SOPP__S_CBRANCH_EXECZ
4027
4028 Inst_SOPP__S_CBRANCH_EXECZ::~Inst_SOPP__S_CBRANCH_EXECZ()
4029 {
4030 } // ~Inst_SOPP__S_CBRANCH_EXECZ
4031
4032 // if (EXEC == 0) then PC = PC + signext(SIMM16 * 4) + 4;
4033 // else NOP.
4034 void
4035 Inst_SOPP__S_CBRANCH_EXECZ::execute(GPUDynInstPtr gpuDynInst)
4036 {
4037 Wavefront *wf = gpuDynInst->wavefront();
4038
4039 if (wf->execMask().none()) {
4040 Addr pc = wf->pc();
4041 ScalarRegI16 simm16 = instData.SIMM16;
4042 pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
4043 wf->pc(pc);
4044 }
4045 }
4046
4047 Inst_SOPP__S_CBRANCH_EXECNZ::Inst_SOPP__S_CBRANCH_EXECNZ(InFmt_SOPP *iFmt)
4048 : Inst_SOPP(iFmt, "s_cbranch_execnz")
4049 {
4050 setFlag(Branch);
4051 } // Inst_SOPP__S_CBRANCH_EXECNZ
4052
4053 Inst_SOPP__S_CBRANCH_EXECNZ::~Inst_SOPP__S_CBRANCH_EXECNZ()
4054 {
4055 } // ~Inst_SOPP__S_CBRANCH_EXECNZ
4056
4057 // if (EXEC != 0) then PC = PC + signext(SIMM16 * 4) + 4;
4058 // else NOP.
4059 void
4060 Inst_SOPP__S_CBRANCH_EXECNZ::execute(GPUDynInstPtr gpuDynInst)
4061 {
4062 Wavefront *wf = gpuDynInst->wavefront();
4063
4064 if (wf->execMask().any()) {
4065 Addr pc = wf->pc();
4066 ScalarRegI16 simm16 = instData.SIMM16;
4067 pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
4068 wf->pc(pc);
4069 }
4070 }
4071
4072 Inst_SOPP__S_BARRIER::Inst_SOPP__S_BARRIER(InFmt_SOPP *iFmt)
4073 : Inst_SOPP(iFmt, "s_barrier")
4074 {
4075 setFlag(MemBarrier);
4076 } // Inst_SOPP__S_BARRIER
4077
4078 Inst_SOPP__S_BARRIER::~Inst_SOPP__S_BARRIER()
4079 {
4080 } // ~Inst_SOPP__S_BARRIER
4081
4082 /**
4083 * Synchronize waves within a workgroup. If not all waves of the workgroup
4084 * have been created yet, wait for entire group before proceeding. If some
4085 * waves in the wokgroup have already terminated, this waits on only the
4086 * surviving waves.
4087 */
4088 void
4089 Inst_SOPP__S_BARRIER::execute(GPUDynInstPtr gpuDynInst)
4090 {
4091 Wavefront *wf = gpuDynInst->wavefront();
4092 ComputeUnit *cu = gpuDynInst->computeUnit();
4093
4094 if (wf->hasBarrier()) {
4095 int bar_id = wf->barrierId();
4096 assert(wf->getStatus() != Wavefront::S_BARRIER);
4097 wf->setStatus(Wavefront::S_BARRIER);
4098 cu->incNumAtBarrier(bar_id);
4099 DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Stalling at "
4100 "barrier Id%d. %d waves now at barrier, %d waves "
4101 "remain.\n", cu->cu_id, wf->simdId, wf->wfSlotId,
4102 wf->wfDynId, bar_id, cu->numAtBarrier(bar_id),
4103 cu->numYetToReachBarrier(bar_id));
4104 }
4105 } // execute
4106 // --- Inst_SOPP__S_SETKILL class methods ---
4107
4108 Inst_SOPP__S_SETKILL::Inst_SOPP__S_SETKILL(InFmt_SOPP *iFmt)
4109 : Inst_SOPP(iFmt, "s_setkill")
4110 {
4111 } // Inst_SOPP__S_SETKILL
4112
4113 Inst_SOPP__S_SETKILL::~Inst_SOPP__S_SETKILL()
4114 {
4115 } // ~Inst_SOPP__S_SETKILL
4116
4117 void
4118 Inst_SOPP__S_SETKILL::execute(GPUDynInstPtr gpuDynInst)
4119 {
4120 panicUnimplemented();
4121 }
4122
4123 Inst_SOPP__S_WAITCNT::Inst_SOPP__S_WAITCNT(InFmt_SOPP *iFmt)
4124 : Inst_SOPP(iFmt, "s_waitcnt")
4125 {
4126 setFlag(ALU);
4127 setFlag(Waitcnt);
4128 } // Inst_SOPP__S_WAITCNT
4129
4130 Inst_SOPP__S_WAITCNT::~Inst_SOPP__S_WAITCNT()
4131 {
4132 } // ~Inst_SOPP__S_WAITCNT
4133
4134 // Wait for the counts of outstanding lds, vector-memory and
4135 // export/vmem-write-data to be at or below the specified levels.
4136 // SIMM16[3:0] = vmcount (vector memory operations),
4137 // SIMM16[6:4] = export/mem-write-data count,
4138 // SIMM16[12:8] = LGKM_cnt (scalar-mem/GDS/LDS count).
4139 void
4140 Inst_SOPP__S_WAITCNT::execute(GPUDynInstPtr gpuDynInst)
4141 {
4142 ScalarRegI32 vm_cnt = 0;
4143 ScalarRegI32 exp_cnt = 0;
4144 ScalarRegI32 lgkm_cnt = 0;
4145 vm_cnt = bits<ScalarRegI16>(instData.SIMM16, 3, 0);
4146 exp_cnt = bits<ScalarRegI16>(instData.SIMM16, 6, 4);
4147 lgkm_cnt = bits<ScalarRegI16>(instData.SIMM16, 12, 8);
4148 gpuDynInst->wavefront()->setWaitCnts(vm_cnt, exp_cnt, lgkm_cnt);
4149 }
4150
4151 Inst_SOPP__S_SETHALT::Inst_SOPP__S_SETHALT(InFmt_SOPP *iFmt)
4152 : Inst_SOPP(iFmt, "s_sethalt")
4153 {
4154 } // Inst_SOPP__S_SETHALT
4155
4156 Inst_SOPP__S_SETHALT::~Inst_SOPP__S_SETHALT()
4157 {
4158 } // ~Inst_SOPP__S_SETHALT
4159
4160 void
4161 Inst_SOPP__S_SETHALT::execute(GPUDynInstPtr gpuDynInst)
4162 {
4163 panicUnimplemented();
4164 }
4165
4166 Inst_SOPP__S_SLEEP::Inst_SOPP__S_SLEEP(InFmt_SOPP *iFmt)
4167 : Inst_SOPP(iFmt, "s_sleep")
4168 {
4169 } // Inst_SOPP__S_SLEEP
4170
4171 Inst_SOPP__S_SLEEP::~Inst_SOPP__S_SLEEP()
4172 {
4173 } // ~Inst_SOPP__S_SLEEP
4174
4175 // Cause a wave to sleep for (64 * SIMM16[2:0] + 1..64) clocks.
4176 void
4177 Inst_SOPP__S_SLEEP::execute(GPUDynInstPtr gpuDynInst)
4178 {
4179 panicUnimplemented();
4180 }
4181
4182 Inst_SOPP__S_SETPRIO::Inst_SOPP__S_SETPRIO(InFmt_SOPP *iFmt)
4183 : Inst_SOPP(iFmt, "s_setprio")
4184 {
4185 } // Inst_SOPP__S_SETPRIO
4186
4187 Inst_SOPP__S_SETPRIO::~Inst_SOPP__S_SETPRIO()
4188 {
4189 } // ~Inst_SOPP__S_SETPRIO
4190
4191 // User settable wave priority is set to SIMM16[1:0]. 0 = lowest,
4192 // 3 = highest.
4193 void
4194 Inst_SOPP__S_SETPRIO::execute(GPUDynInstPtr gpuDynInst)
4195 {
4196 panicUnimplemented();
4197 }
4198
4199 Inst_SOPP__S_SENDMSG::Inst_SOPP__S_SENDMSG(InFmt_SOPP *iFmt)
4200 : Inst_SOPP(iFmt, "s_sendmsg")
4201 {
4202 } // Inst_SOPP__S_SENDMSG
4203
4204 Inst_SOPP__S_SENDMSG::~Inst_SOPP__S_SENDMSG()
4205 {
4206 } // ~Inst_SOPP__S_SENDMSG
4207
4208 void
4209 Inst_SOPP__S_SENDMSG::execute(GPUDynInstPtr gpuDynInst)
4210 {
4211 panicUnimplemented();
4212 }
4213
4214 Inst_SOPP__S_SENDMSGHALT::Inst_SOPP__S_SENDMSGHALT(InFmt_SOPP *iFmt)
4215 : Inst_SOPP(iFmt, "s_sendmsghalt")
4216 {
4217 } // Inst_SOPP__S_SENDMSGHALT
4218
4219 Inst_SOPP__S_SENDMSGHALT::~Inst_SOPP__S_SENDMSGHALT()
4220 {
4221 } // ~Inst_SOPP__S_SENDMSGHALT
4222
4223 void
4224 Inst_SOPP__S_SENDMSGHALT::execute(GPUDynInstPtr gpuDynInst)
4225 {
4226 panicUnimplemented();
4227 }
4228
4229 Inst_SOPP__S_TRAP::Inst_SOPP__S_TRAP(InFmt_SOPP *iFmt)
4230 : Inst_SOPP(iFmt, "s_trap")
4231 {
4232 } // Inst_SOPP__S_TRAP
4233
4234 Inst_SOPP__S_TRAP::~Inst_SOPP__S_TRAP()
4235 {
4236 } // ~Inst_SOPP__S_TRAP
4237
4238 // Enter the trap handler.
4239 void
4240 Inst_SOPP__S_TRAP::execute(GPUDynInstPtr gpuDynInst)
4241 {
4242 panicUnimplemented();
4243 }
4244
4245 Inst_SOPP__S_ICACHE_INV::Inst_SOPP__S_ICACHE_INV(InFmt_SOPP *iFmt)
4246 : Inst_SOPP(iFmt, "s_icache_inv")
4247 {
4248 } // Inst_SOPP__S_ICACHE_INV
4249
4250 Inst_SOPP__S_ICACHE_INV::~Inst_SOPP__S_ICACHE_INV()
4251 {
4252 } // ~Inst_SOPP__S_ICACHE_INV
4253
4254 // Invalidate entire L1 instruction cache.
4255 void
4256 Inst_SOPP__S_ICACHE_INV::execute(GPUDynInstPtr gpuDynInst)
4257 {
4258 panicUnimplemented();
4259 }
4260
4261 Inst_SOPP__S_INCPERFLEVEL::Inst_SOPP__S_INCPERFLEVEL(InFmt_SOPP *iFmt)
4262 : Inst_SOPP(iFmt, "s_incperflevel")
4263 {
4264 } // Inst_SOPP__S_INCPERFLEVEL
4265
4266 Inst_SOPP__S_INCPERFLEVEL::~Inst_SOPP__S_INCPERFLEVEL()
4267 {
4268 } // ~Inst_SOPP__S_INCPERFLEVEL
4269
4270 void
4271 Inst_SOPP__S_INCPERFLEVEL::execute(GPUDynInstPtr gpuDynInst)
4272 {
4273 panicUnimplemented();
4274 }
4275
4276 Inst_SOPP__S_DECPERFLEVEL::Inst_SOPP__S_DECPERFLEVEL(InFmt_SOPP *iFmt)
4277 : Inst_SOPP(iFmt, "s_decperflevel")
4278 {
4279 } // Inst_SOPP__S_DECPERFLEVEL
4280
4281 Inst_SOPP__S_DECPERFLEVEL::~Inst_SOPP__S_DECPERFLEVEL()
4282 {
4283 } // ~Inst_SOPP__S_DECPERFLEVEL
4284
4285 void
4286 Inst_SOPP__S_DECPERFLEVEL::execute(GPUDynInstPtr gpuDynInst)
4287 {
4288 panicUnimplemented();
4289 }
4290
4291 Inst_SOPP__S_TTRACEDATA::Inst_SOPP__S_TTRACEDATA(InFmt_SOPP *iFmt)
4292 : Inst_SOPP(iFmt, "s_ttracedata")
4293 {
4294 } // Inst_SOPP__S_TTRACEDATA
4295
4296 Inst_SOPP__S_TTRACEDATA::~Inst_SOPP__S_TTRACEDATA()
4297 {
4298 } // ~Inst_SOPP__S_TTRACEDATA
4299
4300 void
4301 Inst_SOPP__S_TTRACEDATA::execute(GPUDynInstPtr gpuDynInst)
4302 {
4303 panicUnimplemented();
4304 }
4305
4306 Inst_SOPP__S_CBRANCH_CDBGSYS::Inst_SOPP__S_CBRANCH_CDBGSYS(
4307 InFmt_SOPP *iFmt)
4308 : Inst_SOPP(iFmt, "s_cbranch_cdbgsys")
4309 {
4310 setFlag(Branch);
4311 } // Inst_SOPP__S_CBRANCH_CDBGSYS
4312
4313 Inst_SOPP__S_CBRANCH_CDBGSYS::~Inst_SOPP__S_CBRANCH_CDBGSYS()
4314 {
4315 } // ~Inst_SOPP__S_CBRANCH_CDBGSYS
4316
4317 void
4318 Inst_SOPP__S_CBRANCH_CDBGSYS::execute(GPUDynInstPtr gpuDynInst)
4319 {
4320 panicUnimplemented();
4321 }
4322
4323 Inst_SOPP__S_CBRANCH_CDBGUSER::Inst_SOPP__S_CBRANCH_CDBGUSER(
4324 InFmt_SOPP *iFmt)
4325 : Inst_SOPP(iFmt, "s_cbranch_cdbguser")
4326 {
4327 setFlag(Branch);
4328 } // Inst_SOPP__S_CBRANCH_CDBGUSER
4329
4330 Inst_SOPP__S_CBRANCH_CDBGUSER::~Inst_SOPP__S_CBRANCH_CDBGUSER()
4331 {
4332 } // ~Inst_SOPP__S_CBRANCH_CDBGUSER
4333
4334 void
4335 Inst_SOPP__S_CBRANCH_CDBGUSER::execute(GPUDynInstPtr gpuDynInst)
4336 {
4337 panicUnimplemented();
4338 }
4339
4340 Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER(
4341 InFmt_SOPP *iFmt)
4342 : Inst_SOPP(iFmt, "s_cbranch_cdbgsys_or_user")
4343 {
4344 setFlag(Branch);
4345 } // Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER
4346
4347 Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::
4348 ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER()
4349 {
4350 } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER
4351
4352 void
4353 Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::execute(GPUDynInstPtr gpuDynInst)
4354 {
4355 panicUnimplemented();
4356 }
4357
4358 Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::
4359 Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER(InFmt_SOPP *iFmt)
4360 : Inst_SOPP(iFmt, "s_cbranch_cdbgsys_and_user")
4361 {
4362 setFlag(Branch);
4363 } // Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER
4364
4365 Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::
4366 ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER()
4367 {
4368 } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER
4369
4370 void
4371 Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::execute(GPUDynInstPtr gpuDynInst)
4372 {
4373 panicUnimplemented();
4374 }
4375
4376 Inst_SOPP__S_ENDPGM_SAVED::Inst_SOPP__S_ENDPGM_SAVED(InFmt_SOPP *iFmt)
4377 : Inst_SOPP(iFmt, "s_endpgm_saved")
4378 {
4379 } // Inst_SOPP__S_ENDPGM_SAVED
4380
4381 Inst_SOPP__S_ENDPGM_SAVED::~Inst_SOPP__S_ENDPGM_SAVED()
4382 {
4383 } // ~Inst_SOPP__S_ENDPGM_SAVED
4384
4385 // End of program.
4386 void
4387 Inst_SOPP__S_ENDPGM_SAVED::execute(GPUDynInstPtr gpuDynInst)
4388 {
4389 panicUnimplemented();
4390 }
4391
4392 Inst_SOPP__S_SET_GPR_IDX_OFF::Inst_SOPP__S_SET_GPR_IDX_OFF(
4393 InFmt_SOPP *iFmt)
4394 : Inst_SOPP(iFmt, "s_set_gpr_idx_off")
4395 {
4396 } // Inst_SOPP__S_SET_GPR_IDX_OFF
4397
4398 Inst_SOPP__S_SET_GPR_IDX_OFF::~Inst_SOPP__S_SET_GPR_IDX_OFF()
4399 {
4400 } // ~Inst_SOPP__S_SET_GPR_IDX_OFF
4401
4402 // MODE.gpr_idx_en = 0.
4403 // Clear GPR indexing mode. Vector operations after this will not perform
4404 // relative GPR addressing regardless of the contents of M0.
4405 void
4406 Inst_SOPP__S_SET_GPR_IDX_OFF::execute(GPUDynInstPtr gpuDynInst)
4407 {
4408 panicUnimplemented();
4409 }
4410
4411 Inst_SOPP__S_SET_GPR_IDX_MODE::Inst_SOPP__S_SET_GPR_IDX_MODE(
4412 InFmt_SOPP *iFmt)
4413 : Inst_SOPP(iFmt, "s_set_gpr_idx_mode")
4414 {
4415 } // Inst_SOPP__S_SET_GPR_IDX_MODE
4416
4417 Inst_SOPP__S_SET_GPR_IDX_MODE::~Inst_SOPP__S_SET_GPR_IDX_MODE()
4418 {
4419 } // ~Inst_SOPP__S_SET_GPR_IDX_MODE
4420
4421 // M0[15:12] = SIMM4.
4422 // Modify the mode used for vector GPR indexing.
4423 // The raw contents of the source field are read and used to set the enable
4424 // bits. SIMM4[0] = VSRC0_REL, SIMM4[1] = VSRC1_REL, SIMM4[2] = VSRC2_REL
4425 // and SIMM4[3] = VDST_REL.
4426 void
4427 Inst_SOPP__S_SET_GPR_IDX_MODE::execute(GPUDynInstPtr gpuDynInst)
4428 {
4429 panicUnimplemented();
4430 }
4431
4432 Inst_SMEM__S_LOAD_DWORD::Inst_SMEM__S_LOAD_DWORD(InFmt_SMEM *iFmt)
4433 : Inst_SMEM(iFmt, "s_load_dword")
4434 {
4435 setFlag(MemoryRef);
4436 setFlag(Load);
4437 } // Inst_SMEM__S_LOAD_DWORD
4438
4439 Inst_SMEM__S_LOAD_DWORD::~Inst_SMEM__S_LOAD_DWORD()
4440 {
4441 } // ~Inst_SMEM__S_LOAD_DWORD
4442
4443 /**
4444 * Read 1 dword from scalar data cache. If the offset is specified as an
4445 * sgpr, the sgpr contains an unsigned byte offset (the 2 LSBs are
4446 * ignored). If the offset is specified as an immediate 20-bit constant,
4447 * the constant is an unsigned byte offset.
4448 */
4449 void
4450 Inst_SMEM__S_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst)
4451 {
4452 Wavefront *wf = gpuDynInst->wavefront();
4453 gpuDynInst->execUnitId = wf->execUnitId;
4454 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4455 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4456 ScalarRegU32 offset(0);
4457 ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
4458
4459 addr.read();
4460
4461 if (instData.IMM) {
4462 offset = extData.OFFSET;
4463 } else {
4464 ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4465 off_sgpr.read();
4466 offset = off_sgpr.rawData();
4467 }
4468
4469 calcAddr(gpuDynInst, addr, offset);
4470
4471 gpuDynInst->computeUnit()->scalarMemoryPipe
4472 .getGMReqFIFO().push(gpuDynInst);
4473
4474 wf->scalarRdGmReqsInPipe--;
4475 wf->scalarOutstandingReqsRdGm++;
4476 gpuDynInst->wavefront()->outstandingReqs++;
4477 gpuDynInst->wavefront()->validateRequestCounters();
4478 }
4479
4480 void
4481 Inst_SMEM__S_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
4482 {
4483 initMemRead<1>(gpuDynInst);
4484 } // initiateAcc
4485
4486 void
4487 Inst_SMEM__S_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
4488 {
4489 ScalarOperandU32 sdst(gpuDynInst, instData.SDATA);
4490 sdst.write();
4491 } // completeAcc
4492
4493 Inst_SMEM__S_LOAD_DWORDX2::Inst_SMEM__S_LOAD_DWORDX2(InFmt_SMEM *iFmt)
4494 : Inst_SMEM(iFmt, "s_load_dwordx2")
4495 {
4496 setFlag(MemoryRef);
4497 setFlag(Load);
4498 } // Inst_SMEM__S_LOAD_DWORDX2
4499
4500 Inst_SMEM__S_LOAD_DWORDX2::~Inst_SMEM__S_LOAD_DWORDX2()
4501 {
4502 } // ~Inst_SMEM__S_LOAD_DWORDX2
4503
4504 /**
4505 * Read 2 dwords from scalar data cache. See s_load_dword for details on
4506 * the offset input.
4507 */
4508 void
4509 Inst_SMEM__S_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
4510 {
4511 Wavefront *wf = gpuDynInst->wavefront();
4512 gpuDynInst->execUnitId = wf->execUnitId;
4513 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4514 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4515 ScalarRegU32 offset(0);
4516 ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
4517
4518 addr.read();
4519
4520 if (instData.IMM) {
4521 offset = extData.OFFSET;
4522 } else {
4523 ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4524 off_sgpr.read();
4525 offset = off_sgpr.rawData();
4526 }
4527
4528 calcAddr(gpuDynInst, addr, offset);
4529
4530 gpuDynInst->computeUnit()->scalarMemoryPipe.
4531 getGMReqFIFO().push(gpuDynInst);
4532
4533 wf->scalarRdGmReqsInPipe--;
4534 wf->scalarOutstandingReqsRdGm++;
4535 gpuDynInst->wavefront()->outstandingReqs++;
4536 gpuDynInst->wavefront()->validateRequestCounters();
4537 }
4538
4539 void
4540 Inst_SMEM__S_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
4541 {
4542 initMemRead<2>(gpuDynInst);
4543 } // initiateAcc
4544
4545 void
4546 Inst_SMEM__S_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
4547 {
4548 ScalarOperandU64 sdst(gpuDynInst, instData.SDATA);
4549 sdst.write();
4550 } // completeAcc
4551
4552 Inst_SMEM__S_LOAD_DWORDX4::Inst_SMEM__S_LOAD_DWORDX4(InFmt_SMEM *iFmt)
4553 : Inst_SMEM(iFmt, "s_load_dwordx4")
4554 {
4555 setFlag(MemoryRef);
4556 setFlag(Load);
4557 } // Inst_SMEM__S_LOAD_DWORDX4
4558
4559 Inst_SMEM__S_LOAD_DWORDX4::~Inst_SMEM__S_LOAD_DWORDX4()
4560 {
4561 } // ~Inst_SMEM__S_LOAD_DWORDX4
4562
4563 // Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on
4564 // the offset input.
4565 void
4566 Inst_SMEM__S_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
4567 {
4568 Wavefront *wf = gpuDynInst->wavefront();
4569 gpuDynInst->execUnitId = wf->execUnitId;
4570 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4571 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4572 ScalarRegU32 offset(0);
4573 ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
4574
4575 addr.read();
4576
4577 if (instData.IMM) {
4578 offset = extData.OFFSET;
4579 } else {
4580 ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4581 off_sgpr.read();
4582 offset = off_sgpr.rawData();
4583 }
4584
4585 calcAddr(gpuDynInst, addr, offset);
4586
4587 gpuDynInst->computeUnit()->scalarMemoryPipe.
4588 getGMReqFIFO().push(gpuDynInst);
4589
4590 wf->scalarRdGmReqsInPipe--;
4591 wf->scalarOutstandingReqsRdGm++;
4592 gpuDynInst->wavefront()->outstandingReqs++;
4593 gpuDynInst->wavefront()->validateRequestCounters();
4594 }
4595
4596 void
4597 Inst_SMEM__S_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
4598 {
4599 initMemRead<4>(gpuDynInst);
4600 } // initiateAcc
4601
4602 void
4603 Inst_SMEM__S_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
4604 {
4605 ScalarOperandU128 sdst(gpuDynInst, instData.SDATA);
4606 sdst.write();
4607 } // completeAcc
4608
4609 Inst_SMEM__S_LOAD_DWORDX8::Inst_SMEM__S_LOAD_DWORDX8(InFmt_SMEM *iFmt)
4610 : Inst_SMEM(iFmt, "s_load_dwordx8")
4611 {
4612 setFlag(MemoryRef);
4613 setFlag(Load);
4614 } // Inst_SMEM__S_LOAD_DWORDX8
4615
4616 Inst_SMEM__S_LOAD_DWORDX8::~Inst_SMEM__S_LOAD_DWORDX8()
4617 {
4618 } // ~Inst_SMEM__S_LOAD_DWORDX8
4619
4620 // Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on
4621 // the offset input.
4622 void
4623 Inst_SMEM__S_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst)
4624 {
4625 Wavefront *wf = gpuDynInst->wavefront();
4626 gpuDynInst->execUnitId = wf->execUnitId;
4627 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4628 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4629 ScalarRegU32 offset(0);
4630 ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
4631
4632 addr.read();
4633
4634 if (instData.IMM) {
4635 offset = extData.OFFSET;
4636 } else {
4637 ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4638 off_sgpr.read();
4639 offset = off_sgpr.rawData();
4640 }
4641
4642 calcAddr(gpuDynInst, addr, offset);
4643
4644 gpuDynInst->computeUnit()->scalarMemoryPipe.
4645 getGMReqFIFO().push(gpuDynInst);
4646
4647 wf->scalarRdGmReqsInPipe--;
4648 wf->scalarOutstandingReqsRdGm++;
4649 gpuDynInst->wavefront()->outstandingReqs++;
4650 gpuDynInst->wavefront()->validateRequestCounters();
4651 }
4652
4653 void
4654 Inst_SMEM__S_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst)
4655 {
4656 initMemRead<8>(gpuDynInst);
4657 } // initiateAcc
4658
4659 void
4660 Inst_SMEM__S_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst)
4661 {
4662 ScalarOperandU256 sdst(gpuDynInst, instData.SDATA);
4663 sdst.write();
4664 } // completeAcc
4665
4666 Inst_SMEM__S_LOAD_DWORDX16::Inst_SMEM__S_LOAD_DWORDX16(InFmt_SMEM *iFmt)
4667 : Inst_SMEM(iFmt, "s_load_dwordx16")
4668 {
4669 setFlag(MemoryRef);
4670 setFlag(Load);
4671 } // Inst_SMEM__S_LOAD_DWORDX16
4672
4673 Inst_SMEM__S_LOAD_DWORDX16::~Inst_SMEM__S_LOAD_DWORDX16()
4674 {
4675 } // ~Inst_SMEM__S_LOAD_DWORDX16
4676
4677 // Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on
4678 // the offset input.
4679 void
4680 Inst_SMEM__S_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst)
4681 {
4682 Wavefront *wf = gpuDynInst->wavefront();
4683 gpuDynInst->execUnitId = wf->execUnitId;
4684 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4685 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4686 ScalarRegU32 offset(0);
4687 ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
4688
4689 addr.read();
4690
4691 if (instData.IMM) {
4692 offset = extData.OFFSET;
4693 } else {
4694 ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4695 off_sgpr.read();
4696 offset = off_sgpr.rawData();
4697 }
4698
4699 calcAddr(gpuDynInst, addr, offset);
4700
4701 gpuDynInst->computeUnit()->scalarMemoryPipe.
4702 getGMReqFIFO().push(gpuDynInst);
4703
4704 wf->scalarRdGmReqsInPipe--;
4705 wf->scalarOutstandingReqsRdGm++;
4706 gpuDynInst->wavefront()->outstandingReqs++;
4707 gpuDynInst->wavefront()->validateRequestCounters();
4708 }
4709
4710 void
4711 Inst_SMEM__S_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst)
4712 {
4713 initMemRead<16>(gpuDynInst);
4714 } // initiateAcc
4715
4716 void
4717 Inst_SMEM__S_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst)
4718 {
4719 ScalarOperandU512 sdst(gpuDynInst, instData.SDATA);
4720 sdst.write();
4721 } // completeAcc
4722
4723 Inst_SMEM__S_BUFFER_LOAD_DWORD::Inst_SMEM__S_BUFFER_LOAD_DWORD(
4724 InFmt_SMEM *iFmt)
4725 : Inst_SMEM(iFmt, "s_buffer_load_dword")
4726 {
4727 setFlag(MemoryRef);
4728 setFlag(Load);
4729 } // Inst_SMEM__S_BUFFER_LOAD_DWORD
4730
4731 Inst_SMEM__S_BUFFER_LOAD_DWORD::~Inst_SMEM__S_BUFFER_LOAD_DWORD()
4732 {
4733 } // ~Inst_SMEM__S_BUFFER_LOAD_DWORD
4734
4735 // Read 1 dword from scalar data cache. See S_LOAD_DWORD for details on the
4736 // offset input.
4737 void
4738 Inst_SMEM__S_BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst)
4739 {
4740 Wavefront *wf = gpuDynInst->wavefront();
4741 gpuDynInst->execUnitId = wf->execUnitId;
4742 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4743 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4744 ScalarRegU32 offset(0);
4745 ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
4746
4747 rsrcDesc.read();
4748
4749 if (instData.IMM) {
4750 offset = extData.OFFSET;
4751 } else {
4752 ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4753 off_sgpr.read();
4754 offset = off_sgpr.rawData();
4755 }
4756
4757 calcAddr(gpuDynInst, rsrcDesc, offset);
4758
4759 gpuDynInst->computeUnit()->scalarMemoryPipe
4760 .getGMReqFIFO().push(gpuDynInst);
4761
4762 wf->scalarRdGmReqsInPipe--;
4763 wf->scalarOutstandingReqsRdGm++;
4764 gpuDynInst->wavefront()->outstandingReqs++;
4765 gpuDynInst->wavefront()->validateRequestCounters();
4766 } // execute
4767
4768 void
4769 Inst_SMEM__S_BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
4770 {
4771 initMemRead<1>(gpuDynInst);
4772 } // initiateAcc
4773
4774 void
4775 Inst_SMEM__S_BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
4776 {
4777 // 1 request, size 32
4778 ScalarOperandU32 sdst(gpuDynInst, instData.SDATA);
4779 sdst.write();
4780 } // completeAcc
4781
4782 Inst_SMEM__S_BUFFER_LOAD_DWORDX2::Inst_SMEM__S_BUFFER_LOAD_DWORDX2(
4783 InFmt_SMEM *iFmt)
4784 : Inst_SMEM(iFmt, "s_buffer_load_dwordx2")
4785 {
4786 setFlag(MemoryRef);
4787 setFlag(Load);
4788 } // Inst_SMEM__S_BUFFER_LOAD_DWORDX2
4789
4790 Inst_SMEM__S_BUFFER_LOAD_DWORDX2::~Inst_SMEM__S_BUFFER_LOAD_DWORDX2()
4791 {
4792 } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX2
4793
4794 // Read 2 dwords from scalar data cache. See S_LOAD_DWORD for details on
4795 // the offset input.
4796 void
4797 Inst_SMEM__S_BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
4798 {
4799 Wavefront *wf = gpuDynInst->wavefront();
4800 gpuDynInst->execUnitId = wf->execUnitId;
4801 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4802 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4803 ScalarRegU32 offset(0);
4804 ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
4805
4806 rsrcDesc.read();
4807
4808 if (instData.IMM) {
4809 offset = extData.OFFSET;
4810 } else {
4811 ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4812 off_sgpr.read();
4813 offset = off_sgpr.rawData();
4814 }
4815
4816 calcAddr(gpuDynInst, rsrcDesc, offset);
4817
4818 gpuDynInst->computeUnit()->scalarMemoryPipe
4819 .getGMReqFIFO().push(gpuDynInst);
4820
4821 wf->scalarRdGmReqsInPipe--;
4822 wf->scalarOutstandingReqsRdGm++;
4823 gpuDynInst->wavefront()->outstandingReqs++;
4824 gpuDynInst->wavefront()->validateRequestCounters();
4825 } // execute
4826
4827 void
4828 Inst_SMEM__S_BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
4829 {
4830 initMemRead<2>(gpuDynInst);
4831 } // initiateAcc
4832
4833 void
4834 Inst_SMEM__S_BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
4835 {
4836 // use U64 because 2 requests, each size 32
4837 ScalarOperandU64 sdst(gpuDynInst, instData.SDATA);
4838 sdst.write();
4839 } // completeAcc
4840
4841 Inst_SMEM__S_BUFFER_LOAD_DWORDX4::Inst_SMEM__S_BUFFER_LOAD_DWORDX4(
4842 InFmt_SMEM *iFmt)
4843 : Inst_SMEM(iFmt, "s_buffer_load_dwordx4")
4844 {
4845 setFlag(MemoryRef);
4846 setFlag(Load);
4847 } // Inst_SMEM__S_BUFFER_LOAD_DWORDX4
4848
4849 Inst_SMEM__S_BUFFER_LOAD_DWORDX4::~Inst_SMEM__S_BUFFER_LOAD_DWORDX4()
4850 {
4851 } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX4
4852
4853 // Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on
4854 // the offset input.
4855 void
4856 Inst_SMEM__S_BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
4857 {
4858 Wavefront *wf = gpuDynInst->wavefront();
4859 gpuDynInst->execUnitId = wf->execUnitId;
4860 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4861 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4862 ScalarRegU32 offset(0);
4863 ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
4864
4865 rsrcDesc.read();
4866
4867 if (instData.IMM) {
4868 offset = extData.OFFSET;
4869 } else {
4870 ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4871 off_sgpr.read();
4872 offset = off_sgpr.rawData();
4873 }
4874
4875 calcAddr(gpuDynInst, rsrcDesc, offset);
4876
4877 gpuDynInst->computeUnit()->scalarMemoryPipe
4878 .getGMReqFIFO().push(gpuDynInst);
4879
4880 wf->scalarRdGmReqsInPipe--;
4881 wf->scalarOutstandingReqsRdGm++;
4882 gpuDynInst->wavefront()->outstandingReqs++;
4883 gpuDynInst->wavefront()->validateRequestCounters();
4884 } // execute
4885
4886 void
4887 Inst_SMEM__S_BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
4888 {
4889 initMemRead<4>(gpuDynInst);
4890 } // initiateAcc
4891
4892 void
4893 Inst_SMEM__S_BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
4894 {
4895 // 4 requests, each size 32
4896 ScalarOperandU128 sdst(gpuDynInst, instData.SDATA);
4897 sdst.write();
4898 } // completeAcc
4899
4900 Inst_SMEM__S_BUFFER_LOAD_DWORDX8::Inst_SMEM__S_BUFFER_LOAD_DWORDX8(
4901 InFmt_SMEM *iFmt)
4902 : Inst_SMEM(iFmt, "s_buffer_load_dwordx8")
4903 {
4904 setFlag(MemoryRef);
4905 setFlag(Load);
4906 } // Inst_SMEM__S_BUFFER_LOAD_DWORDX8
4907
4908 Inst_SMEM__S_BUFFER_LOAD_DWORDX8::~Inst_SMEM__S_BUFFER_LOAD_DWORDX8()
4909 {
4910 } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX8
4911
4912 // Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on
4913 // the offset input.
4914 void
4915 Inst_SMEM__S_BUFFER_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst)
4916 {
4917 Wavefront *wf = gpuDynInst->wavefront();
4918 gpuDynInst->execUnitId = wf->execUnitId;
4919 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4920 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4921 ScalarRegU32 offset(0);
4922 ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
4923
4924 rsrcDesc.read();
4925
4926 if (instData.IMM) {
4927 offset = extData.OFFSET;
4928 } else {
4929 ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4930 off_sgpr.read();
4931 offset = off_sgpr.rawData();
4932 }
4933
4934 calcAddr(gpuDynInst, rsrcDesc, offset);
4935
4936 gpuDynInst->computeUnit()->scalarMemoryPipe
4937 .getGMReqFIFO().push(gpuDynInst);
4938
4939 wf->scalarRdGmReqsInPipe--;
4940 wf->scalarOutstandingReqsRdGm++;
4941 gpuDynInst->wavefront()->outstandingReqs++;
4942 gpuDynInst->wavefront()->validateRequestCounters();
4943 } // execute
4944
4945 void
4946 Inst_SMEM__S_BUFFER_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst)
4947 {
4948 initMemRead<8>(gpuDynInst);
4949 } // initiateAcc
4950
4951 void
4952 Inst_SMEM__S_BUFFER_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst)
4953 {
4954 // 8 requests, each size 32
4955 ScalarOperandU256 sdst(gpuDynInst, instData.SDATA);
4956 sdst.write();
4957 } // completeAcc
4958
4959 Inst_SMEM__S_BUFFER_LOAD_DWORDX16::Inst_SMEM__S_BUFFER_LOAD_DWORDX16(
4960 InFmt_SMEM *iFmt)
4961 : Inst_SMEM(iFmt, "s_buffer_load_dwordx16")
4962 {
4963 setFlag(MemoryRef);
4964 setFlag(Load);
4965 } // Inst_SMEM__S_BUFFER_LOAD_DWORDX16
4966
4967 Inst_SMEM__S_BUFFER_LOAD_DWORDX16::~Inst_SMEM__S_BUFFER_LOAD_DWORDX16()
4968 {
4969 } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX16
4970
4971 // Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on
4972 // the offset input.
4973 void
4974 Inst_SMEM__S_BUFFER_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst)
4975 {
4976 Wavefront *wf = gpuDynInst->wavefront();
4977 gpuDynInst->execUnitId = wf->execUnitId;
4978 gpuDynInst->latency.init(gpuDynInst->computeUnit());
4979 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4980 ScalarRegU32 offset(0);
4981 ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
4982
4983 rsrcDesc.read();
4984
4985 if (instData.IMM) {
4986 offset = extData.OFFSET;
4987 } else {
4988 ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4989 off_sgpr.read();
4990 offset = off_sgpr.rawData();
4991 }
4992
4993 calcAddr(gpuDynInst, rsrcDesc, offset);
4994
4995 gpuDynInst->computeUnit()->scalarMemoryPipe
4996 .getGMReqFIFO().push(gpuDynInst);
4997
4998 wf->scalarRdGmReqsInPipe--;
4999 wf->scalarOutstandingReqsRdGm++;
5000 gpuDynInst->wavefront()->outstandingReqs++;
5001 gpuDynInst->wavefront()->validateRequestCounters();
5002 } // execute
5003
5004 void
5005 Inst_SMEM__S_BUFFER_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst)
5006 {
5007 initMemRead<16>(gpuDynInst);
5008 } // initiateAcc
5009
5010 void
5011 Inst_SMEM__S_BUFFER_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst)
5012 {
5013 // 16 requests, each size 32
5014 ScalarOperandU512 sdst(gpuDynInst, instData.SDATA);
5015 sdst.write();
5016 } // completeAcc
5017
5018 Inst_SMEM__S_STORE_DWORD::Inst_SMEM__S_STORE_DWORD(InFmt_SMEM *iFmt)
5019 : Inst_SMEM(iFmt, "s_store_dword")
5020 {
5021 setFlag(MemoryRef);
5022 setFlag(Store);
5023 } // Inst_SMEM__S_STORE_DWORD
5024
5025 Inst_SMEM__S_STORE_DWORD::~Inst_SMEM__S_STORE_DWORD()
5026 {
5027 } // ~Inst_SMEM__S_STORE_DWORD
5028
5029 // Write 1 dword to scalar data cache.
5030 // If the offset is specified as an SGPR, the SGPR contains an unsigned
5031 // BYTE offset (the 2 LSBs are ignored).
5032 // If the offset is specified as an immediate 20-bit constant, the
5033 // constant is an unsigned BYTE offset.
5034 void
5035 Inst_SMEM__S_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst)
5036 {
5037 Wavefront *wf = gpuDynInst->wavefront();
5038 gpuDynInst->execUnitId = wf->execUnitId;
5039 gpuDynInst->latency.init(gpuDynInst->computeUnit());
5040 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
5041 ScalarRegU32 offset(0);
5042 ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
5043
5044 addr.read();
5045
5046 if (instData.IMM) {
5047 offset = extData.OFFSET;
5048 } else {
5049 ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
5050 off_sgpr.read();
5051 offset = off_sgpr.rawData();
5052 }
5053
5054 calcAddr(gpuDynInst, addr, offset);
5055
5056 gpuDynInst->computeUnit()->scalarMemoryPipe.
5057 getGMReqFIFO().push(gpuDynInst);
5058
5059 wf->scalarWrGmReqsInPipe--;
5060 wf->scalarOutstandingReqsWrGm++;
5061 gpuDynInst->wavefront()->outstandingReqs++;
5062 gpuDynInst->wavefront()->validateRequestCounters();
5063 }
5064
5065 void
5066 Inst_SMEM__S_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
5067 {
5068 ConstScalarOperandU32 sdata(gpuDynInst, instData.SDATA);
5069 sdata.read();
5070 std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(),
5071 sizeof(ScalarRegU32));
5072 initMemWrite<1>(gpuDynInst);
5073 } // initiateAcc
5074
5075 void
5076 Inst_SMEM__S_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
5077 {
5078 } // completeAcc
5079
5080 Inst_SMEM__S_STORE_DWORDX2::Inst_SMEM__S_STORE_DWORDX2(InFmt_SMEM *iFmt)
5081 : Inst_SMEM(iFmt, "s_store_dwordx2")
5082 {
5083 setFlag(MemoryRef);
5084 setFlag(Store);
5085 } // Inst_SMEM__S_STORE_DWORDX2
5086
5087 Inst_SMEM__S_STORE_DWORDX2::~Inst_SMEM__S_STORE_DWORDX2()
5088 {
5089 } // ~Inst_SMEM__S_STORE_DWORDX2
5090
5091 // Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on
5092 // the offset input.
5093 void
5094 Inst_SMEM__S_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
5095 {
5096 Wavefront *wf = gpuDynInst->wavefront();
5097 gpuDynInst->execUnitId = wf->execUnitId;
5098 gpuDynInst->latency.init(gpuDynInst->computeUnit());
5099 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
5100 ScalarRegU32 offset(0);
5101 ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
5102
5103 addr.read();
5104
5105 if (instData.IMM) {
5106 offset = extData.OFFSET;
5107 } else {
5108 ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
5109 off_sgpr.read();
5110 offset = off_sgpr.rawData();
5111 }
5112
5113 calcAddr(gpuDynInst, addr, offset);
5114
5115 gpuDynInst->computeUnit()->scalarMemoryPipe.
5116 getGMReqFIFO().push(gpuDynInst);
5117
5118 wf->scalarWrGmReqsInPipe--;
5119 wf->scalarOutstandingReqsWrGm++;
5120 gpuDynInst->wavefront()->outstandingReqs++;
5121 gpuDynInst->wavefront()->validateRequestCounters();
5122 }
5123
5124 void
5125 Inst_SMEM__S_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
5126 {
5127 ConstScalarOperandU64 sdata(gpuDynInst, instData.SDATA);
5128 sdata.read();
5129 std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(),
5130 sizeof(ScalarRegU64));
5131 initMemWrite<2>(gpuDynInst);
5132 } // initiateAcc
5133
5134 void
5135 Inst_SMEM__S_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
5136 {
5137 } // completeAcc
5138
5139 Inst_SMEM__S_STORE_DWORDX4::Inst_SMEM__S_STORE_DWORDX4(InFmt_SMEM *iFmt)
5140 : Inst_SMEM(iFmt, "s_store_dwordx4")
5141 {
5142 setFlag(MemoryRef);
5143 setFlag(Store);
5144 } // Inst_SMEM__S_STORE_DWORDX4
5145
5146 Inst_SMEM__S_STORE_DWORDX4::~Inst_SMEM__S_STORE_DWORDX4()
5147 {
5148 } // ~Inst_SMEM__S_STORE_DWORDX4
5149
5150 // Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on
5151 // the offset input.
5152 void
5153 Inst_SMEM__S_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
5154 {
5155 Wavefront *wf = gpuDynInst->wavefront();
5156 gpuDynInst->execUnitId = wf->execUnitId;
5157 gpuDynInst->latency.init(gpuDynInst->computeUnit());
5158 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
5159 ScalarRegU32 offset(0);
5160 ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
5161
5162 addr.read();
5163
5164 if (instData.IMM) {
5165 offset = extData.OFFSET;
5166 } else {
5167 ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
5168 off_sgpr.read();
5169 offset = off_sgpr.rawData();
5170 }
5171
5172 calcAddr(gpuDynInst, addr, offset);
5173
5174 gpuDynInst->computeUnit()->scalarMemoryPipe.
5175 getGMReqFIFO().push(gpuDynInst);
5176
5177 wf->scalarWrGmReqsInPipe--;
5178 wf->scalarOutstandingReqsWrGm++;
5179 gpuDynInst->wavefront()->outstandingReqs++;
5180 gpuDynInst->wavefront()->validateRequestCounters();
5181 }
5182
5183 void
5184 Inst_SMEM__S_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
5185 {
5186 ConstScalarOperandU128 sdata(gpuDynInst, instData.SDATA);
5187 sdata.read();
5188 std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(),
5189 4 * sizeof(ScalarRegU32));
5190 initMemWrite<4>(gpuDynInst);
5191 } // initiateAcc
5192
5193 void
5194 Inst_SMEM__S_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
5195 {
5196 } // completeAcc
5197
5198 Inst_SMEM__S_BUFFER_STORE_DWORD::Inst_SMEM__S_BUFFER_STORE_DWORD(
5199 InFmt_SMEM *iFmt)
5200 : Inst_SMEM(iFmt, "s_buffer_store_dword")
5201 {
5202 setFlag(MemoryRef);
5203 setFlag(Store);
5204 } // Inst_SMEM__S_BUFFER_STORE_DWORD
5205
5206 Inst_SMEM__S_BUFFER_STORE_DWORD::~Inst_SMEM__S_BUFFER_STORE_DWORD()
5207 {
5208 } // ~Inst_SMEM__S_BUFFER_STORE_DWORD
5209
5210 // Write 1 dword to scalar data cache. See S_STORE_DWORD for details on the
5211 // offset input.
5212 void
5213 Inst_SMEM__S_BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst)
5214 {
5215 panicUnimplemented();
5216 }
5217
5218 void
5219 Inst_SMEM__S_BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
5220 {
5221 } // initiateAcc
5222
5223 void
5224 Inst_SMEM__S_BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
5225 {
5226 } // completeAcc
5227
5228 Inst_SMEM__S_BUFFER_STORE_DWORDX2::Inst_SMEM__S_BUFFER_STORE_DWORDX2(
5229 InFmt_SMEM *iFmt)
5230 : Inst_SMEM(iFmt, "s_buffer_store_dwordx2")
5231 {
5232 setFlag(MemoryRef);
5233 setFlag(Store);
5234 } // Inst_SMEM__S_BUFFER_STORE_DWORDX2
5235
5236 Inst_SMEM__S_BUFFER_STORE_DWORDX2::~Inst_SMEM__S_BUFFER_STORE_DWORDX2()
5237 {
5238 } // ~Inst_SMEM__S_BUFFER_STORE_DWORDX2
5239
5240 // Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on
5241 // the offset input.
5242 void
5243 Inst_SMEM__S_BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
5244 {
5245 panicUnimplemented();
5246 }
5247
5248 void
5249 Inst_SMEM__S_BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
5250 {
5251 } // initiateAcc
5252
5253 void
5254 Inst_SMEM__S_BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
5255 {
5256 } // completeAcc
5257
5258 Inst_SMEM__S_BUFFER_STORE_DWORDX4::Inst_SMEM__S_BUFFER_STORE_DWORDX4(
5259 InFmt_SMEM *iFmt)
5260 : Inst_SMEM(iFmt, "s_buffer_store_dwordx4")
5261 {
5262 setFlag(MemoryRef);
5263 setFlag(Store);
5264 } // Inst_SMEM__S_BUFFER_STORE_DWORDX4
5265
5266 Inst_SMEM__S_BUFFER_STORE_DWORDX4::~Inst_SMEM__S_BUFFER_STORE_DWORDX4()
5267 {
5268 } // ~Inst_SMEM__S_BUFFER_STORE_DWORDX4
5269
5270 // Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on
5271 // the offset input.
5272 void
5273 Inst_SMEM__S_BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
5274 {
5275 panicUnimplemented();
5276 }
5277
5278 void
5279 Inst_SMEM__S_BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
5280 {
5281 } // initiateAcc
5282
5283 void
5284 Inst_SMEM__S_BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
5285 {
5286 } // completeAcc
5287
5288 Inst_SMEM__S_DCACHE_INV::Inst_SMEM__S_DCACHE_INV(InFmt_SMEM *iFmt)
5289 : Inst_SMEM(iFmt, "s_dcache_inv")
5290 {
5291 } // Inst_SMEM__S_DCACHE_INV
5292
5293 Inst_SMEM__S_DCACHE_INV::~Inst_SMEM__S_DCACHE_INV()
5294 {
5295 } // ~Inst_SMEM__S_DCACHE_INV
5296
5297 // Invalidate the scalar data cache.
5298 void
5299 Inst_SMEM__S_DCACHE_INV::execute(GPUDynInstPtr gpuDynInst)
5300 {
5301 panicUnimplemented();
5302 }
5303
5304 Inst_SMEM__S_DCACHE_WB::Inst_SMEM__S_DCACHE_WB(InFmt_SMEM *iFmt)
5305 : Inst_SMEM(iFmt, "s_dcache_wb")
5306 {
5307 } // Inst_SMEM__S_DCACHE_WB
5308
5309 Inst_SMEM__S_DCACHE_WB::~Inst_SMEM__S_DCACHE_WB()
5310 {
5311 } // ~Inst_SMEM__S_DCACHE_WB
5312
5313 // Write back dirty data in the scalar data cache.
5314 void
5315 Inst_SMEM__S_DCACHE_WB::execute(GPUDynInstPtr gpuDynInst)
5316 {
5317 panicUnimplemented();
5318 }
5319
5320 Inst_SMEM__S_DCACHE_INV_VOL::Inst_SMEM__S_DCACHE_INV_VOL(InFmt_SMEM *iFmt)
5321 : Inst_SMEM(iFmt, "s_dcache_inv_vol")
5322 {
5323 } // Inst_SMEM__S_DCACHE_INV_VOL
5324
5325 Inst_SMEM__S_DCACHE_INV_VOL::~Inst_SMEM__S_DCACHE_INV_VOL()
5326 {
5327 } // ~Inst_SMEM__S_DCACHE_INV_VOL
5328
5329 // Invalidate the scalar data cache volatile lines.
5330 void
5331 Inst_SMEM__S_DCACHE_INV_VOL::execute(GPUDynInstPtr gpuDynInst)
5332 {
5333 panicUnimplemented();
5334 }
5335
5336 Inst_SMEM__S_DCACHE_WB_VOL::Inst_SMEM__S_DCACHE_WB_VOL(InFmt_SMEM *iFmt)
5337 : Inst_SMEM(iFmt, "s_dcache_wb_vol")
5338 {
5339 } // Inst_SMEM__S_DCACHE_WB_VOL
5340
5341 Inst_SMEM__S_DCACHE_WB_VOL::~Inst_SMEM__S_DCACHE_WB_VOL()
5342 {
5343 } // ~Inst_SMEM__S_DCACHE_WB_VOL
5344
5345 // Write back dirty data in the scalar data cache volatile lines.
5346 void
5347 Inst_SMEM__S_DCACHE_WB_VOL::execute(GPUDynInstPtr gpuDynInst)
5348 {
5349 panicUnimplemented();
5350 }
5351
5352 Inst_SMEM__S_MEMTIME::Inst_SMEM__S_MEMTIME(InFmt_SMEM *iFmt)
5353 : Inst_SMEM(iFmt, "s_memtime")
5354 {
5355 } // Inst_SMEM__S_MEMTIME
5356
5357 Inst_SMEM__S_MEMTIME::~Inst_SMEM__S_MEMTIME()
5358 {
5359 } // ~Inst_SMEM__S_MEMTIME
5360
5361 // Return current 64-bit timestamp.
5362 void
5363 Inst_SMEM__S_MEMTIME::execute(GPUDynInstPtr gpuDynInst)
5364 {
5365 panicUnimplemented();
5366 }
5367
5368 Inst_SMEM__S_MEMREALTIME::Inst_SMEM__S_MEMREALTIME(InFmt_SMEM *iFmt)
5369 : Inst_SMEM(iFmt, "s_memrealtime")
5370 {
5371 } // Inst_SMEM__S_MEMREALTIME
5372
5373 Inst_SMEM__S_MEMREALTIME::~Inst_SMEM__S_MEMREALTIME()
5374 {
5375 } // ~Inst_SMEM__S_MEMREALTIME
5376
5377 // Return current 64-bit RTC.
5378 void
5379 Inst_SMEM__S_MEMREALTIME::execute(GPUDynInstPtr gpuDynInst)
5380 {
5381 panicUnimplemented();
5382 }
5383
5384 Inst_SMEM__S_ATC_PROBE::Inst_SMEM__S_ATC_PROBE(InFmt_SMEM *iFmt)
5385 : Inst_SMEM(iFmt, "s_atc_probe")
5386 {
5387 } // Inst_SMEM__S_ATC_PROBE
5388
5389 Inst_SMEM__S_ATC_PROBE::~Inst_SMEM__S_ATC_PROBE()
5390 {
5391 } // ~Inst_SMEM__S_ATC_PROBE
5392
5393 void
5394 Inst_SMEM__S_ATC_PROBE::execute(GPUDynInstPtr gpuDynInst)
5395 {
5396 panicUnimplemented();
5397 }
5398
5399 Inst_SMEM__S_ATC_PROBE_BUFFER::Inst_SMEM__S_ATC_PROBE_BUFFER(
5400 InFmt_SMEM *iFmt)
5401 : Inst_SMEM(iFmt, "s_atc_probe_buffer")
5402 {
5403 } // Inst_SMEM__S_ATC_PROBE_BUFFER
5404
5405 Inst_SMEM__S_ATC_PROBE_BUFFER::~Inst_SMEM__S_ATC_PROBE_BUFFER()
5406 {
5407 } // ~Inst_SMEM__S_ATC_PROBE_BUFFER
5408
5409 void
5410 Inst_SMEM__S_ATC_PROBE_BUFFER::execute(GPUDynInstPtr gpuDynInst)
5411 {
5412 panicUnimplemented();
5413 }
5414
5415 Inst_VOP2__V_CNDMASK_B32::Inst_VOP2__V_CNDMASK_B32(InFmt_VOP2 *iFmt)
5416 : Inst_VOP2(iFmt, "v_cndmask_b32")
5417 {
5418 setFlag(ALU);
5419 setFlag(ReadsVCC);
5420 } // Inst_VOP2__V_CNDMASK_B32
5421
5422 Inst_VOP2__V_CNDMASK_B32::~Inst_VOP2__V_CNDMASK_B32()
5423 {
5424 } // ~Inst_VOP2__V_CNDMASK_B32
5425
5426 // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC
5427 // as a scalar GPR in S2.
5428 void
5429 Inst_VOP2__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst)
5430 {
5431 Wavefront *wf = gpuDynInst->wavefront();
5432 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
5433 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
5434 VecOperandU32 vdst(gpuDynInst, instData.VDST);
5435 ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
5436
5437 src0.readSrc();
5438 src1.read();
5439 vcc.read();
5440
5441 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5442 if (wf->execMask(lane)) {
5443 vdst[lane]
5444 = bits(vcc.rawData(), lane) ? src1[lane] : src0[lane];
5445 }
5446 }
5447
5448 vdst.write();
5449 }
5450
5451 Inst_VOP2__V_ADD_F32::Inst_VOP2__V_ADD_F32(InFmt_VOP2 *iFmt)
5452 : Inst_VOP2(iFmt, "v_add_f32")
5453 {
5454 setFlag(ALU);
5455 setFlag(F32);
5456 } // Inst_VOP2__V_ADD_F32
5457
5458 Inst_VOP2__V_ADD_F32::~Inst_VOP2__V_ADD_F32()
5459 {
5460 } // ~Inst_VOP2__V_ADD_F32
5461
5462 // D.f = S0.f + S1.f.
5463 void
5464 Inst_VOP2__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst)
5465 {
5466 Wavefront *wf = gpuDynInst->wavefront();
5467 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
5468 VecOperandF32 src1(gpuDynInst, instData.VSRC1);
5469 VecOperandF32 vdst(gpuDynInst, instData.VDST);
5470
5471 src0.readSrc();
5472 src1.read();
5473
5474 if (isDPPInst()) {
5475 VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
5476 src0_dpp.read();
5477
5478 DPRINTF(GCN3, "Handling V_ADD_F32 SRC DPP. SRC0: register v[%d], "
5479 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
5480 "SRC1_ABS: %d, SRC1_NEG: %d, BOUND_CTRL: %d, "
5481 "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0,
5482 extData.iFmt_VOP_DPP.DPP_CTRL,
5483 extData.iFmt_VOP_DPP.SRC0_ABS,
5484 extData.iFmt_VOP_DPP.SRC0_NEG,
5485 extData.iFmt_VOP_DPP.SRC1_ABS,
5486 extData.iFmt_VOP_DPP.SRC1_NEG,
5487 extData.iFmt_VOP_DPP.BOUND_CTRL,
5488 extData.iFmt_VOP_DPP.BANK_MASK,
5489 extData.iFmt_VOP_DPP.ROW_MASK);
5490
5491 processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1);
5492
5493 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5494 if (wf->execMask(lane)) {
5495 vdst[lane] = src0_dpp[lane] + src1[lane];
5496 }
5497 }
5498 } else {
5499 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5500 if (wf->execMask(lane)) {
5501 vdst[lane] = src0[lane] + src1[lane];
5502 }
5503 }
5504 }
5505
5506 vdst.write();
5507 }
5508
5509 Inst_VOP2__V_SUB_F32::Inst_VOP2__V_SUB_F32(InFmt_VOP2 *iFmt)
5510 : Inst_VOP2(iFmt, "v_sub_f32")
5511 {
5512 setFlag(ALU);
5513 setFlag(F32);
5514 } // Inst_VOP2__V_SUB_F32
5515
5516 Inst_VOP2__V_SUB_F32::~Inst_VOP2__V_SUB_F32()
5517 {
5518 } // ~Inst_VOP2__V_SUB_F32
5519
5520 // D.f = S0.f - S1.f.
5521 void
5522 Inst_VOP2__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst)
5523 {
5524 Wavefront *wf = gpuDynInst->wavefront();
5525 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
5526 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
5527 VecOperandF32 vdst(gpuDynInst, instData.VDST);
5528
5529 src0.readSrc();
5530 src1.read();
5531
5532 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5533 if (wf->execMask(lane)) {
5534 vdst[lane] = src0[lane] - src1[lane];
5535 }
5536 }
5537
5538 vdst.write();
5539 }
5540
5541 Inst_VOP2__V_SUBREV_F32::Inst_VOP2__V_SUBREV_F32(InFmt_VOP2 *iFmt)
5542 : Inst_VOP2(iFmt, "v_subrev_f32")
5543 {
5544 setFlag(ALU);
5545 setFlag(F32);
5546 } // Inst_VOP2__V_SUBREV_F32
5547
5548 Inst_VOP2__V_SUBREV_F32::~Inst_VOP2__V_SUBREV_F32()
5549 {
5550 } // ~Inst_VOP2__V_SUBREV_F32
5551
5552 // D.f = S1.f - S0.f.
5553 void
5554 Inst_VOP2__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst)
5555 {
5556 Wavefront *wf = gpuDynInst->wavefront();
5557 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
5558 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
5559 VecOperandF32 vdst(gpuDynInst, instData.VDST);
5560
5561 src0.readSrc();
5562 src1.read();
5563
5564 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5565 if (wf->execMask(lane)) {
5566 vdst[lane] = src1[lane] - src0[lane];
5567 }
5568 }
5569
5570 vdst.write();
5571 }
5572
5573 Inst_VOP2__V_MUL_LEGACY_F32::Inst_VOP2__V_MUL_LEGACY_F32(InFmt_VOP2 *iFmt)
5574 : Inst_VOP2(iFmt, "v_mul_legacy_f32")
5575 {
5576 setFlag(ALU);
5577 setFlag(F32);
5578 } // Inst_VOP2__V_MUL_LEGACY_F32
5579
5580 Inst_VOP2__V_MUL_LEGACY_F32::~Inst_VOP2__V_MUL_LEGACY_F32()
5581 {
5582 } // ~Inst_VOP2__V_MUL_LEGACY_F32
5583
5584 // D.f = S0.f * S1.f
5585 void
5586 Inst_VOP2__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
5587 {
5588 Wavefront *wf = gpuDynInst->wavefront();
5589 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
5590 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
5591 VecOperandF32 vdst(gpuDynInst, instData.VDST);
5592
5593 src0.readSrc();
5594 src1.read();
5595
5596 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5597 if (wf->execMask(lane)) {
5598 vdst[lane] = src0[lane] * src1[lane];
5599 }
5600 }
5601
5602 vdst.write();
5603 }
5604
5605 Inst_VOP2__V_MUL_F32::Inst_VOP2__V_MUL_F32(InFmt_VOP2 *iFmt)
5606 : Inst_VOP2(iFmt, "v_mul_f32")
5607 {
5608 setFlag(ALU);
5609 setFlag(F32);
5610 } // Inst_VOP2__V_MUL_F32
5611
5612 Inst_VOP2__V_MUL_F32::~Inst_VOP2__V_MUL_F32()
5613 {
5614 } // ~Inst_VOP2__V_MUL_F32
5615
5616 // D.f = S0.f * S1.f.
5617 void
5618 Inst_VOP2__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst)
5619 {
5620 Wavefront *wf = gpuDynInst->wavefront();
5621 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
5622 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
5623 VecOperandF32 vdst(gpuDynInst, instData.VDST);
5624
5625 src0.readSrc();
5626 src1.read();
5627
5628 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5629 if (wf->execMask(lane)) {
5630 if (std::isnan(src0[lane]) ||
5631 std::isnan(src1[lane])) {
5632 vdst[lane] = NAN;
5633 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
5634 std::fpclassify(src0[lane]) == FP_ZERO) &&
5635 !std::signbit(src0[lane])) {
5636 if (std::isinf(src1[lane])) {
5637 vdst[lane] = NAN;
5638 } else if (!std::signbit(src1[lane])) {
5639 vdst[lane] = +0.0;
5640 } else {
5641 vdst[lane] = -0.0;
5642 }
5643 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
5644 std::fpclassify(src0[lane]) == FP_ZERO) &&
5645 std::signbit(src0[lane])) {
5646 if (std::isinf(src1[lane])) {
5647 vdst[lane] = NAN;
5648 } else if (std::signbit(src1[lane])) {
5649 vdst[lane] = +0.0;
5650 } else {
5651 vdst[lane] = -0.0;
5652 }
5653 } else if (std::isinf(src0[lane]) &&
5654 !std::signbit(src0[lane])) {
5655 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
5656 std::fpclassify(src1[lane]) == FP_ZERO) {
5657 vdst[lane] = NAN;
5658 } else if (!std::signbit(src1[lane])) {
5659 vdst[lane] = +INFINITY;
5660 } else {
5661 vdst[lane] = -INFINITY;
5662 }
5663 } else if (std::isinf(src0[lane]) &&
5664 std::signbit(src0[lane])) {
5665 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
5666 std::fpclassify(src1[lane]) == FP_ZERO) {
5667 vdst[lane] = NAN;
5668 } else if (std::signbit(src1[lane])) {
5669 vdst[lane] = +INFINITY;
5670 } else {
5671 vdst[lane] = -INFINITY;
5672 }
5673 } else {
5674 vdst[lane] = src0[lane] * src1[lane];
5675 }
5676 }
5677 }
5678
5679 vdst.write();
5680 }
5681
5682 Inst_VOP2__V_MUL_I32_I24::Inst_VOP2__V_MUL_I32_I24(InFmt_VOP2 *iFmt)
5683 : Inst_VOP2(iFmt, "v_mul_i32_i24")
5684 {
5685 setFlag(ALU);
5686 } // Inst_VOP2__V_MUL_I32_I24
5687
5688 Inst_VOP2__V_MUL_I32_I24::~Inst_VOP2__V_MUL_I32_I24()
5689 {
5690 } // ~Inst_VOP2__V_MUL_I32_I24
5691
5692 // D.i = S0.i[23:0] * S1.i[23:0].
5693 void
5694 Inst_VOP2__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst)
5695 {
5696 Wavefront *wf = gpuDynInst->wavefront();
5697 ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
5698 ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
5699 VecOperandI32 vdst(gpuDynInst, instData.VDST);
5700
5701 src0.readSrc();
5702 src1.read();
5703
5704 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5705 if (wf->execMask(lane)) {
5706 vdst[lane] = sext<24>(bits(src0[lane], 23, 0))
5707 * sext<24>(bits(src1[lane], 23, 0));
5708 }
5709 }
5710
5711 vdst.write();
5712 }
5713
5714 Inst_VOP2__V_MUL_HI_I32_I24::Inst_VOP2__V_MUL_HI_I32_I24(InFmt_VOP2 *iFmt)
5715 : Inst_VOP2(iFmt, "v_mul_hi_i32_i24")
5716 {
5717 setFlag(ALU);
5718 } // Inst_VOP2__V_MUL_HI_I32_I24
5719
5720 Inst_VOP2__V_MUL_HI_I32_I24::~Inst_VOP2__V_MUL_HI_I32_I24()
5721 {
5722 } // ~Inst_VOP2__V_MUL_HI_I32_I24
5723
5724 // D.i = (S0.i[23:0] * S1.i[23:0]) >> 32.
5725 void
5726 Inst_VOP2__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst)
5727 {
5728 Wavefront *wf = gpuDynInst->wavefront();
5729 ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
5730 ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
5731 VecOperandI32 vdst(gpuDynInst, instData.VDST);
5732
5733 src0.readSrc();
5734 src1.read();
5735
5736 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5737 if (wf->execMask(lane)) {
5738 VecElemI64 tmp_src0
5739 = (VecElemI64)sext<24>(bits(src0[lane], 23, 0));
5740 VecElemI64 tmp_src1
5741 = (VecElemI64)sext<24>(bits(src1[lane], 23, 0));
5742
5743 vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32);
5744 }
5745 }
5746
5747 vdst.write();
5748 }
5749
5750 Inst_VOP2__V_MUL_U32_U24::Inst_VOP2__V_MUL_U32_U24(InFmt_VOP2 *iFmt)
5751 : Inst_VOP2(iFmt, "v_mul_u32_u24")
5752 {
5753 setFlag(ALU);
5754 } // Inst_VOP2__V_MUL_U32_U24
5755
5756 Inst_VOP2__V_MUL_U32_U24::~Inst_VOP2__V_MUL_U32_U24()
5757 {
5758 } // ~Inst_VOP2__V_MUL_U32_U24
5759
5760 // D.u = S0.u[23:0] * S1.u[23:0].
5761 void
5762 Inst_VOP2__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst)
5763 {
5764 Wavefront *wf = gpuDynInst->wavefront();
5765 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
5766 VecOperandU32 src1(gpuDynInst, instData.VSRC1);
5767 VecOperandU32 vdst(gpuDynInst, instData.VDST);
5768
5769 src0.readSrc();
5770 src1.read();
5771
5772 if (isSDWAInst()) {
5773 VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
5774 // use copies of original src0, src1, and dest during selecting
5775 VecOperandU32 origSrc0_sdwa(gpuDynInst,
5776 extData.iFmt_VOP_SDWA.SRC0);
5777 VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
5778 VecOperandU32 origVdst(gpuDynInst, instData.VDST);
5779
5780 src0_sdwa.read();
5781 origSrc0_sdwa.read();
5782 origSrc1.read();
5783
5784 DPRINTF(GCN3, "Handling V_MUL_U32_U24 SRC SDWA. SRC0: register "
5785 "v[%d], DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: "
5786 "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: "
5787 "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
5788 extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
5789 extData.iFmt_VOP_SDWA.DST_UNUSED,
5790 extData.iFmt_VOP_SDWA.CLAMP,
5791 extData.iFmt_VOP_SDWA.SRC0_SEL,
5792 extData.iFmt_VOP_SDWA.SRC0_SEXT,
5793 extData.iFmt_VOP_SDWA.SRC0_NEG,
5794 extData.iFmt_VOP_SDWA.SRC0_ABS,
5795 extData.iFmt_VOP_SDWA.SRC1_SEL,
5796 extData.iFmt_VOP_SDWA.SRC1_SEXT,
5797 extData.iFmt_VOP_SDWA.SRC1_NEG,
5798 extData.iFmt_VOP_SDWA.SRC1_ABS);
5799
5800 processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
5801 src1, origSrc1);
5802
5803 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5804 if (wf->execMask(lane)) {
5805 vdst[lane] = bits(src0_sdwa[lane], 23, 0) *
5806 bits(src1[lane], 23, 0);
5807 origVdst[lane] = vdst[lane]; // keep copy consistent
5808 }
5809 }
5810
5811 processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
5812 } else {
5813 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5814 if (wf->execMask(lane)) {
5815 vdst[lane] = bits(src0[lane], 23, 0) *
5816 bits(src1[lane], 23, 0);
5817 }
5818 }
5819 }
5820
5821
5822 vdst.write();
5823 }
5824
5825 Inst_VOP2__V_MUL_HI_U32_U24::Inst_VOP2__V_MUL_HI_U32_U24(InFmt_VOP2 *iFmt)
5826 : Inst_VOP2(iFmt, "v_mul_hi_u32_u24")
5827 {
5828 setFlag(ALU);
5829 } // Inst_VOP2__V_MUL_HI_U32_U24
5830
5831 Inst_VOP2__V_MUL_HI_U32_U24::~Inst_VOP2__V_MUL_HI_U32_U24()
5832 {
5833 } // ~Inst_VOP2__V_MUL_HI_U32_U24
5834
5835 // D.i = (S0.u[23:0] * S1.u[23:0]) >> 32.
5836 void
5837 Inst_VOP2__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst)
5838 {
5839 Wavefront *wf = gpuDynInst->wavefront();
5840 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
5841 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
5842 VecOperandU32 vdst(gpuDynInst, instData.VDST);
5843
5844 src0.readSrc();
5845 src1.read();
5846
5847 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5848 if (wf->execMask(lane)) {
5849 VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0);
5850 VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0);
5851 vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32);
5852 }
5853 }
5854
5855 vdst.write();
5856 }
5857
5858 Inst_VOP2__V_MIN_F32::Inst_VOP2__V_MIN_F32(InFmt_VOP2 *iFmt)
5859 : Inst_VOP2(iFmt, "v_min_f32")
5860 {
5861 setFlag(ALU);
5862 setFlag(F32);
5863 } // Inst_VOP2__V_MIN_F32
5864
5865 Inst_VOP2__V_MIN_F32::~Inst_VOP2__V_MIN_F32()
5866 {
5867 } // ~Inst_VOP2__V_MIN_F32
5868
5869 // D.f = (S0.f < S1.f ? S0.f : S1.f).
5870 void
5871 Inst_VOP2__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst)
5872 {
5873 Wavefront *wf = gpuDynInst->wavefront();
5874 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
5875 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
5876 VecOperandF32 vdst(gpuDynInst, instData.VDST);
5877
5878 src0.readSrc();
5879 src1.read();
5880
5881 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5882 if (wf->execMask(lane)) {
5883 vdst[lane] = std::fmin(src0[lane], src1[lane]);
5884 }
5885 }
5886
5887 vdst.write();
5888 }
5889
5890 Inst_VOP2__V_MAX_F32::Inst_VOP2__V_MAX_F32(InFmt_VOP2 *iFmt)
5891 : Inst_VOP2(iFmt, "v_max_f32")
5892 {
5893 setFlag(ALU);
5894 setFlag(F32);
5895 } // Inst_VOP2__V_MAX_F32
5896
5897 Inst_VOP2__V_MAX_F32::~Inst_VOP2__V_MAX_F32()
5898 {
5899 } // ~Inst_VOP2__V_MAX_F32
5900
5901 // D.f = (S0.f >= S1.f ? S0.f : S1.f).
5902 void
5903 Inst_VOP2__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst)
5904 {
5905 Wavefront *wf = gpuDynInst->wavefront();
5906 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
5907 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
5908 VecOperandF32 vdst(gpuDynInst, instData.VDST);
5909
5910 src0.readSrc();
5911 src1.read();
5912
5913 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5914 if (wf->execMask(lane)) {
5915 vdst[lane] = std::fmax(src0[lane], src1[lane]);
5916 }
5917 }
5918
5919 vdst.write();
5920 }
5921
5922 Inst_VOP2__V_MIN_I32::Inst_VOP2__V_MIN_I32(InFmt_VOP2 *iFmt)
5923 : Inst_VOP2(iFmt, "v_min_i32")
5924 {
5925 setFlag(ALU);
5926 } // Inst_VOP2__V_MIN_I32
5927
5928 Inst_VOP2__V_MIN_I32::~Inst_VOP2__V_MIN_I32()
5929 {
5930 } // ~Inst_VOP2__V_MIN_I32
5931
5932 // D.i = min(S0.i, S1.i).
5933 void
5934 Inst_VOP2__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst)
5935 {
5936 Wavefront *wf = gpuDynInst->wavefront();
5937 ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
5938 ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
5939 VecOperandI32 vdst(gpuDynInst, instData.VDST);
5940
5941 src0.readSrc();
5942 src1.read();
5943
5944 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5945 if (wf->execMask(lane)) {
5946 vdst[lane] = std::min(src0[lane], src1[lane]);
5947 }
5948 }
5949
5950 vdst.write();
5951 }
5952
5953 Inst_VOP2__V_MAX_I32::Inst_VOP2__V_MAX_I32(InFmt_VOP2 *iFmt)
5954 : Inst_VOP2(iFmt, "v_max_i32")
5955 {
5956 setFlag(ALU);
5957 } // Inst_VOP2__V_MAX_I32
5958
5959 Inst_VOP2__V_MAX_I32::~Inst_VOP2__V_MAX_I32()
5960 {
5961 } // ~Inst_VOP2__V_MAX_I32
5962
5963 // D.i = max(S0.i, S1.i).
5964 void
5965 Inst_VOP2__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst)
5966 {
5967 Wavefront *wf = gpuDynInst->wavefront();
5968 ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
5969 ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
5970 VecOperandI32 vdst(gpuDynInst, instData.VDST);
5971
5972 src0.readSrc();
5973 src1.read();
5974
5975 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5976 if (wf->execMask(lane)) {
5977 vdst[lane] = std::max(src0[lane], src1[lane]);
5978 }
5979 }
5980
5981 vdst.write();
5982 }
5983
5984 Inst_VOP2__V_MIN_U32::Inst_VOP2__V_MIN_U32(InFmt_VOP2 *iFmt)
5985 : Inst_VOP2(iFmt, "v_min_u32")
5986 {
5987 setFlag(ALU);
5988 } // Inst_VOP2__V_MIN_U32
5989
5990 Inst_VOP2__V_MIN_U32::~Inst_VOP2__V_MIN_U32()
5991 {
5992 } // ~Inst_VOP2__V_MIN_U32
5993
5994 // D.u = min(S0.u, S1.u).
5995 void
5996 Inst_VOP2__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst)
5997 {
5998 Wavefront *wf = gpuDynInst->wavefront();
5999 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6000 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6001 VecOperandU32 vdst(gpuDynInst, instData.VDST);
6002
6003 src0.readSrc();
6004 src1.read();
6005
6006 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6007 if (wf->execMask(lane)) {
6008 vdst[lane] = std::min(src0[lane], src1[lane]);
6009 }
6010 }
6011
6012 vdst.write();
6013 }
6014
6015 Inst_VOP2__V_MAX_U32::Inst_VOP2__V_MAX_U32(InFmt_VOP2 *iFmt)
6016 : Inst_VOP2(iFmt, "v_max_u32")
6017 {
6018 setFlag(ALU);
6019 } // Inst_VOP2__V_MAX_U32
6020
6021 Inst_VOP2__V_MAX_U32::~Inst_VOP2__V_MAX_U32()
6022 {
6023 } // ~Inst_VOP2__V_MAX_U32
6024
6025 // D.u = max(S0.u, S1.u).
6026 void
6027 Inst_VOP2__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst)
6028 {
6029 Wavefront *wf = gpuDynInst->wavefront();
6030 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6031 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6032 VecOperandU32 vdst(gpuDynInst, instData.VDST);
6033
6034 src0.readSrc();
6035 src1.read();
6036
6037 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6038 if (wf->execMask(lane)) {
6039 vdst[lane] = std::max(src0[lane], src1[lane]);
6040 }
6041 }
6042
6043 vdst.write();
6044 }
6045
6046 Inst_VOP2__V_LSHRREV_B32::Inst_VOP2__V_LSHRREV_B32(InFmt_VOP2 *iFmt)
6047 : Inst_VOP2(iFmt, "v_lshrrev_b32")
6048 {
6049 setFlag(ALU);
6050 } // Inst_VOP2__V_LSHRREV_B32
6051
6052 Inst_VOP2__V_LSHRREV_B32::~Inst_VOP2__V_LSHRREV_B32()
6053 {
6054 } // ~Inst_VOP2__V_LSHRREV_B32
6055
6056 // D.u = S1.u >> S0.u[4:0].
6057 // The vacated bits are set to zero.
6058 void
6059 Inst_VOP2__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst)
6060 {
6061 Wavefront *wf = gpuDynInst->wavefront();
6062 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6063 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6064 VecOperandU32 vdst(gpuDynInst, instData.VDST);
6065
6066 src0.readSrc();
6067 src1.read();
6068
6069 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6070 if (wf->execMask(lane)) {
6071 vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
6072 }
6073 }
6074
6075 vdst.write();
6076 }
6077
6078 Inst_VOP2__V_ASHRREV_I32::Inst_VOP2__V_ASHRREV_I32(InFmt_VOP2 *iFmt)
6079 : Inst_VOP2(iFmt, "v_ashrrev_i32")
6080 {
6081 setFlag(ALU);
6082 } // Inst_VOP2__V_ASHRREV_I32
6083
6084 Inst_VOP2__V_ASHRREV_I32::~Inst_VOP2__V_ASHRREV_I32()
6085 {
6086 } // ~Inst_VOP2__V_ASHRREV_I32
6087
6088 // D.i = signext(S1.i) >> S0.i[4:0].
6089 // The vacated bits are set to the sign bit of the input value.
6090 void
6091 Inst_VOP2__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst)
6092 {
6093 Wavefront *wf = gpuDynInst->wavefront();
6094 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6095 ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
6096 VecOperandI32 vdst(gpuDynInst, instData.VDST);
6097
6098 src0.readSrc();
6099 src1.read();
6100
6101 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6102 if (wf->execMask(lane)) {
6103 vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
6104 }
6105 }
6106
6107 vdst.write();
6108 }
6109
6110 Inst_VOP2__V_LSHLREV_B32::Inst_VOP2__V_LSHLREV_B32(InFmt_VOP2 *iFmt)
6111 : Inst_VOP2(iFmt, "v_lshlrev_b32")
6112 {
6113 setFlag(ALU);
6114 } // Inst_VOP2__V_LSHLREV_B32
6115
6116 Inst_VOP2__V_LSHLREV_B32::~Inst_VOP2__V_LSHLREV_B32()
6117 {
6118 } // ~Inst_VOP2__V_LSHLREV_B32
6119
6120 // D.u = S1.u << S0.u[4:0].
6121 void
6122 Inst_VOP2__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst)
6123 {
6124 Wavefront *wf = gpuDynInst->wavefront();
6125 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6126 VecOperandU32 src1(gpuDynInst, instData.VSRC1);
6127 VecOperandU32 vdst(gpuDynInst, instData.VDST);
6128
6129 src0.readSrc();
6130 src1.read();
6131
6132 if (isSDWAInst()) {
6133 VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
6134 // use copies of original src0, src1, and vdst during selecting
6135 VecOperandU32 origSrc0_sdwa(gpuDynInst,
6136 extData.iFmt_VOP_SDWA.SRC0);
6137 VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
6138 VecOperandU32 origVdst(gpuDynInst, instData.VDST);
6139
6140 src0_sdwa.read();
6141 origSrc0_sdwa.read();
6142 origSrc1.read();
6143
6144 DPRINTF(GCN3, "Handling V_LSHLREV_B32 SRC SDWA. SRC0: register "
6145 "v[%d], DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: "
6146 "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: "
6147 "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
6148 extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
6149 extData.iFmt_VOP_SDWA.DST_UNUSED,
6150 extData.iFmt_VOP_SDWA.CLAMP,
6151 extData.iFmt_VOP_SDWA.SRC0_SEL,
6152 extData.iFmt_VOP_SDWA.SRC0_SEXT,
6153 extData.iFmt_VOP_SDWA.SRC0_NEG,
6154 extData.iFmt_VOP_SDWA.SRC0_ABS,
6155 extData.iFmt_VOP_SDWA.SRC1_SEL,
6156 extData.iFmt_VOP_SDWA.SRC1_SEXT,
6157 extData.iFmt_VOP_SDWA.SRC1_NEG,
6158 extData.iFmt_VOP_SDWA.SRC1_ABS);
6159
6160 processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
6161 src1, origSrc1);
6162
6163 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6164 if (wf->execMask(lane)) {
6165 vdst[lane] = src1[lane] << bits(src0_sdwa[lane], 4, 0);
6166 origVdst[lane] = vdst[lane]; // keep copy consistent
6167 }
6168 }
6169
6170 processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
6171 } else {
6172 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6173 if (wf->execMask(lane)) {
6174 vdst[lane] = src1[lane] << bits(src0[lane], 4, 0);
6175 }
6176 }
6177 }
6178
6179 vdst.write();
6180 }
6181
6182 Inst_VOP2__V_AND_B32::Inst_VOP2__V_AND_B32(InFmt_VOP2 *iFmt)
6183 : Inst_VOP2(iFmt, "v_and_b32")
6184 {
6185 setFlag(ALU);
6186 } // Inst_VOP2__V_AND_B32
6187
6188 Inst_VOP2__V_AND_B32::~Inst_VOP2__V_AND_B32()
6189 {
6190 } // ~Inst_VOP2__V_AND_B32
6191
6192 // D.u = S0.u & S1.u.
6193 // Input and output modifiers not supported.
6194 void
6195 Inst_VOP2__V_AND_B32::execute(GPUDynInstPtr gpuDynInst)
6196 {
6197 Wavefront *wf = gpuDynInst->wavefront();
6198 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6199 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6200 VecOperandU32 vdst(gpuDynInst, instData.VDST);
6201
6202 src0.readSrc();
6203 src1.read();
6204
6205 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6206 if (wf->execMask(lane)) {
6207 vdst[lane] = src0[lane] & src1[lane];
6208 }
6209 }
6210
6211 vdst.write();
6212 }
6213
6214 Inst_VOP2__V_OR_B32::Inst_VOP2__V_OR_B32(InFmt_VOP2 *iFmt)
6215 : Inst_VOP2(iFmt, "v_or_b32")
6216 {
6217 setFlag(ALU);
6218 } // Inst_VOP2__V_OR_B32
6219
6220 Inst_VOP2__V_OR_B32::~Inst_VOP2__V_OR_B32()
6221 {
6222 } // ~Inst_VOP2__V_OR_B32
6223
6224 // D.u = S0.u | S1.u.
6225 // Input and output modifiers not supported.
6226 void
6227 Inst_VOP2__V_OR_B32::execute(GPUDynInstPtr gpuDynInst)
6228 {
6229 Wavefront *wf = gpuDynInst->wavefront();
6230 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6231 VecOperandU32 src1(gpuDynInst, instData.VSRC1);
6232 VecOperandU32 vdst(gpuDynInst, instData.VDST);
6233
6234 src0.readSrc();
6235 src1.read();
6236
6237 if (isSDWAInst()) {
6238 VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
6239 // use copies of original src0, src1, and dest during selecting
6240 VecOperandU32 origSrc0_sdwa(gpuDynInst,
6241 extData.iFmt_VOP_SDWA.SRC0);
6242 VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
6243 VecOperandU32 origVdst(gpuDynInst, instData.VDST);
6244
6245 src0_sdwa.read();
6246 origSrc0_sdwa.read();
6247 origSrc1.read();
6248
6249 DPRINTF(GCN3, "Handling V_OR_B32 SRC SDWA. SRC0: register v[%d], "
6250 "DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: %d, "
6251 "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
6252 "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
6253 extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
6254 extData.iFmt_VOP_SDWA.DST_UNUSED,
6255 extData.iFmt_VOP_SDWA.CLAMP,
6256 extData.iFmt_VOP_SDWA.SRC0_SEL,
6257 extData.iFmt_VOP_SDWA.SRC0_SEXT,
6258 extData.iFmt_VOP_SDWA.SRC0_NEG,
6259 extData.iFmt_VOP_SDWA.SRC0_ABS,
6260 extData.iFmt_VOP_SDWA.SRC1_SEL,
6261 extData.iFmt_VOP_SDWA.SRC1_SEXT,
6262 extData.iFmt_VOP_SDWA.SRC1_NEG,
6263 extData.iFmt_VOP_SDWA.SRC1_ABS);
6264
6265 processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
6266 src1, origSrc1);
6267
6268 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6269 if (wf->execMask(lane)) {
6270 vdst[lane] = src0_sdwa[lane] | src1[lane];
6271 origVdst[lane] = vdst[lane]; // keep copy consistent
6272 }
6273 }
6274
6275 processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
6276 } else {
6277 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6278 if (wf->execMask(lane)) {
6279 vdst[lane] = src0[lane] | src1[lane];
6280 }
6281 }
6282 }
6283
6284 vdst.write();
6285 }
6286
6287 Inst_VOP2__V_XOR_B32::Inst_VOP2__V_XOR_B32(InFmt_VOP2 *iFmt)
6288 : Inst_VOP2(iFmt, "v_xor_b32")
6289 {
6290 setFlag(ALU);
6291 } // Inst_VOP2__V_XOR_B32
6292
6293 Inst_VOP2__V_XOR_B32::~Inst_VOP2__V_XOR_B32()
6294 {
6295 } // ~Inst_VOP2__V_XOR_B32
6296
6297 // D.u = S0.u ^ S1.u.
6298 // Input and output modifiers not supported.
6299 void
6300 Inst_VOP2__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst)
6301 {
6302 Wavefront *wf = gpuDynInst->wavefront();
6303 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6304 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6305 VecOperandU32 vdst(gpuDynInst, instData.VDST);
6306
6307 src0.readSrc();
6308 src1.read();
6309
6310 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6311 if (wf->execMask(lane)) {
6312 vdst[lane] = src0[lane] ^ src1[lane];
6313 }
6314 }
6315
6316 vdst.write();
6317 }
6318
6319 Inst_VOP2__V_MAC_F32::Inst_VOP2__V_MAC_F32(InFmt_VOP2 *iFmt)
6320 : Inst_VOP2(iFmt, "v_mac_f32")
6321 {
6322 setFlag(ALU);
6323 setFlag(F32);
6324 setFlag(MAC);
6325 } // Inst_VOP2__V_MAC_F32
6326
6327 Inst_VOP2__V_MAC_F32::~Inst_VOP2__V_MAC_F32()
6328 {
6329 } // ~Inst_VOP2__V_MAC_F32
6330
6331 // D.f = S0.f * S1.f + D.f.
6332 void
6333 Inst_VOP2__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst)
6334 {
6335 Wavefront *wf = gpuDynInst->wavefront();
6336 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
6337 VecOperandF32 src1(gpuDynInst, instData.VSRC1);
6338 VecOperandF32 vdst(gpuDynInst, instData.VDST);
6339
6340 src0.readSrc();
6341 src1.read();
6342 vdst.read();
6343
6344 if (isDPPInst()) {
6345 VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
6346 src0_dpp.read();
6347
6348 DPRINTF(GCN3, "Handling V_MAC_F32 SRC DPP. SRC0: register v[%d], "
6349 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
6350 "SRC1_ABS: %d, SRC1_NEG: %d, BOUND_CTRL: %d, "
6351 "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0,
6352 extData.iFmt_VOP_DPP.DPP_CTRL,
6353 extData.iFmt_VOP_DPP.SRC0_ABS,
6354 extData.iFmt_VOP_DPP.SRC0_NEG,
6355 extData.iFmt_VOP_DPP.SRC1_ABS,
6356 extData.iFmt_VOP_DPP.SRC1_NEG,
6357 extData.iFmt_VOP_DPP.BOUND_CTRL,
6358 extData.iFmt_VOP_DPP.BANK_MASK,
6359 extData.iFmt_VOP_DPP.ROW_MASK);
6360
6361 processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1);
6362
6363 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6364 if (wf->execMask(lane)) {
6365 vdst[lane] = std::fma(src0_dpp[lane], src1[lane],
6366 vdst[lane]);
6367 }
6368 }
6369 } else {
6370 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6371 if (wf->execMask(lane)) {
6372 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
6373 }
6374 }
6375 }
6376
6377 vdst.write();
6378 }
6379
6380 Inst_VOP2__V_MADMK_F32::Inst_VOP2__V_MADMK_F32(InFmt_VOP2 *iFmt)
6381 : Inst_VOP2(iFmt, "v_madmk_f32")
6382 {
6383 setFlag(ALU);
6384 setFlag(F32);
6385 setFlag(MAD);
6386 } // Inst_VOP2__V_MADMK_F32
6387
6388 Inst_VOP2__V_MADMK_F32::~Inst_VOP2__V_MADMK_F32()
6389 {
6390 } // ~Inst_VOP2__V_MADMK_F32
6391
6392 // D.f = S0.f * K + S1.f; K is a 32-bit inline constant.
6393 // This opcode cannot use the input/output modifiers.
6394 void
6395 Inst_VOP2__V_MADMK_F32::execute(GPUDynInstPtr gpuDynInst)
6396 {
6397 Wavefront *wf = gpuDynInst->wavefront();
6398 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
6399 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
6400 VecOperandF32 vdst(gpuDynInst, instData.VDST);
6401 VecElemF32 k = extData.imm_f32;
6402
6403 src0.readSrc();
6404 src1.read();
6405
6406 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6407 if (wf->execMask(lane)) {
6408 vdst[lane] = std::fma(src0[lane], k, src1[lane]);
6409 }
6410 }
6411
6412 vdst.write();
6413 }
6414
6415 Inst_VOP2__V_MADAK_F32::Inst_VOP2__V_MADAK_F32(InFmt_VOP2 *iFmt)
6416 : Inst_VOP2(iFmt, "v_madak_f32")
6417 {
6418 setFlag(ALU);
6419 setFlag(F32);
6420 setFlag(MAD);
6421 } // Inst_VOP2__V_MADAK_F32
6422
6423 Inst_VOP2__V_MADAK_F32::~Inst_VOP2__V_MADAK_F32()
6424 {
6425 } // ~Inst_VOP2__V_MADAK_F32
6426
6427 // D.f = S0.f * S1.f + K; K is a 32-bit inline constant.
6428 // This opcode cannot use input/output modifiers.
6429 void
6430 Inst_VOP2__V_MADAK_F32::execute(GPUDynInstPtr gpuDynInst)
6431 {
6432 Wavefront *wf = gpuDynInst->wavefront();
6433 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
6434 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
6435 VecOperandF32 vdst(gpuDynInst, instData.VDST);
6436 VecElemF32 k = extData.imm_f32;
6437
6438 src0.readSrc();
6439 src1.read();
6440
6441 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6442 if (wf->execMask(lane)) {
6443 vdst[lane] = std::fma(src0[lane], src1[lane], k);
6444 }
6445 }
6446
6447 vdst.write();
6448 }
6449
6450 Inst_VOP2__V_ADD_U32::Inst_VOP2__V_ADD_U32(InFmt_VOP2 *iFmt)
6451 : Inst_VOP2(iFmt, "v_add_u32")
6452 {
6453 setFlag(ALU);
6454 setFlag(WritesVCC);
6455 } // Inst_VOP2__V_ADD_U32
6456
6457 Inst_VOP2__V_ADD_U32::~Inst_VOP2__V_ADD_U32()
6458 {
6459 } // ~Inst_VOP2__V_ADD_U32
6460
6461 // D.u = S0.u + S1.u;
6462 // VCC[threadId] = (S0.u + S1.u >= 0x100000000ULL ? 1 : 0) is an UNSIGNED
6463 // overflow or carry-out.
6464 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
6465 void
6466 Inst_VOP2__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
6467 {
6468 Wavefront *wf = gpuDynInst->wavefront();
6469 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6470 VecOperandU32 src1(gpuDynInst, instData.VSRC1);
6471 VecOperandU32 vdst(gpuDynInst, instData.VDST);
6472 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
6473
6474 src0.readSrc();
6475 src1.read();
6476
6477 if (isSDWAInst()) {
6478 VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
6479 // use copies of original src0, src1, and dest during selecting
6480 VecOperandU32 origSrc0_sdwa(gpuDynInst,
6481 extData.iFmt_VOP_SDWA.SRC0);
6482 VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
6483 VecOperandU32 origVdst(gpuDynInst, instData.VDST);
6484
6485 src0_sdwa.read();
6486 origSrc0_sdwa.read();
6487 origSrc1.read();
6488
6489 DPRINTF(GCN3, "Handling V_ADD_U32 SRC SDWA. SRC0: register v[%d], "
6490 "DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: %d, "
6491 "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
6492 "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
6493 extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
6494 extData.iFmt_VOP_SDWA.DST_UNUSED,
6495 extData.iFmt_VOP_SDWA.CLAMP,
6496 extData.iFmt_VOP_SDWA.SRC0_SEL,
6497 extData.iFmt_VOP_SDWA.SRC0_SEXT,
6498 extData.iFmt_VOP_SDWA.SRC0_NEG,
6499 extData.iFmt_VOP_SDWA.SRC0_ABS,
6500 extData.iFmt_VOP_SDWA.SRC1_SEL,
6501 extData.iFmt_VOP_SDWA.SRC1_SEXT,
6502 extData.iFmt_VOP_SDWA.SRC1_NEG,
6503 extData.iFmt_VOP_SDWA.SRC1_ABS);
6504
6505 processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
6506 src1, origSrc1);
6507
6508 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6509 if (wf->execMask(lane)) {
6510 vdst[lane] = src0_sdwa[lane] + src1[lane];
6511 origVdst[lane] = vdst[lane]; // keep copy consistent
6512 vcc.setBit(lane, ((VecElemU64)src0_sdwa[lane]
6513 + (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0);
6514 }
6515 }
6516
6517 processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
6518 } else {
6519 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6520 if (wf->execMask(lane)) {
6521 vdst[lane] = src0[lane] + src1[lane];
6522 vcc.setBit(lane, ((VecElemU64)src0[lane]
6523 + (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0);
6524 }
6525 }
6526 }
6527
6528 vcc.write();
6529 vdst.write();
6530 }
6531
6532 Inst_VOP2__V_SUB_U32::Inst_VOP2__V_SUB_U32(InFmt_VOP2 *iFmt)
6533 : Inst_VOP2(iFmt, "v_sub_u32")
6534 {
6535 setFlag(ALU);
6536 setFlag(WritesVCC);
6537 } // Inst_VOP2__V_SUB_U32
6538
6539 Inst_VOP2__V_SUB_U32::~Inst_VOP2__V_SUB_U32()
6540 {
6541 } // ~Inst_VOP2__V_SUB_U32
6542
6543 // D.u = S0.u - S1.u;
6544 // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or
6545 // carry-out.
6546 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
6547 void
6548 Inst_VOP2__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
6549 {
6550 Wavefront *wf = gpuDynInst->wavefront();
6551 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6552 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6553 VecOperandU32 vdst(gpuDynInst, instData.VDST);
6554 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
6555
6556 src0.readSrc();
6557 src1.read();
6558
6559 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6560 if (wf->execMask(lane)) {
6561 vdst[lane] = src0[lane] - src1[lane];
6562 vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0);
6563 }
6564 }
6565
6566 vdst.write();
6567 vcc.write();
6568 }
6569
6570 Inst_VOP2__V_SUBREV_U32::Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *iFmt)
6571 : Inst_VOP2(iFmt, "v_subrev_u32")
6572 {
6573 setFlag(ALU);
6574 setFlag(WritesVCC);
6575 } // Inst_VOP2__V_SUBREV_U32
6576
6577 Inst_VOP2__V_SUBREV_U32::~Inst_VOP2__V_SUBREV_U32()
6578 {
6579 } // ~Inst_VOP2__V_SUBREV_U32
6580
6581 // D.u = S1.u - S0.u;
6582 // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or
6583 // carry-out.
6584 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
6585 void
6586 Inst_VOP2__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst)
6587 {
6588 Wavefront *wf = gpuDynInst->wavefront();
6589 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6590 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6591 VecOperandU32 vdst(gpuDynInst, instData.VDST);
6592 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
6593
6594 src0.readSrc();
6595 src1.read();
6596
6597 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6598 if (wf->execMask(lane)) {
6599 vdst[lane] = src1[lane] - src0[lane];
6600 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
6601 }
6602 }
6603
6604 vdst.write();
6605 vcc.write();
6606 }
6607
6608 Inst_VOP2__V_ADDC_U32::Inst_VOP2__V_ADDC_U32(InFmt_VOP2 *iFmt)
6609 : Inst_VOP2(iFmt, "v_addc_u32")
6610 {
6611 setFlag(ALU);
6612 setFlag(WritesVCC);
6613 setFlag(ReadsVCC);
6614 } // Inst_VOP2__V_ADDC_U32
6615
6616 Inst_VOP2__V_ADDC_U32::~Inst_VOP2__V_ADDC_U32()
6617 {
6618 } // ~Inst_VOP2__V_ADDC_U32
6619
6620 // D.u = S0.u + S1.u + VCC[threadId];
6621 // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x100000000ULL ? 1 : 0)
6622 // is an UNSIGNED overflow.
6623 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
6624 // source comes from the SGPR-pair at S2.u.
6625 void
6626 Inst_VOP2__V_ADDC_U32::execute(GPUDynInstPtr gpuDynInst)
6627 {
6628 Wavefront *wf = gpuDynInst->wavefront();
6629 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6630 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6631 VecOperandU32 vdst(gpuDynInst, instData.VDST);
6632 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
6633
6634 src0.readSrc();
6635 src1.read();
6636 vcc.read();
6637
6638 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6639 if (wf->execMask(lane)) {
6640 vdst[lane] = src0[lane] + src1[lane]
6641 + bits(vcc.rawData(), lane);
6642 vcc.setBit(lane, ((VecElemU64)src0[lane]
6643 + (VecElemU64)src1[lane]
6644 + (VecElemU64)bits(vcc.rawData(), lane, lane))
6645 >= 0x100000000 ? 1 : 0);
6646 }
6647 }
6648
6649 vdst.write();
6650 vcc.write();
6651 }
6652
6653 Inst_VOP2__V_SUBB_U32::Inst_VOP2__V_SUBB_U32(InFmt_VOP2 *iFmt)
6654 : Inst_VOP2(iFmt, "v_subb_u32")
6655 {
6656 setFlag(ALU);
6657 setFlag(WritesVCC);
6658 setFlag(ReadsVCC);
6659 } // Inst_VOP2__V_SUBB_U32
6660
6661 Inst_VOP2__V_SUBB_U32::~Inst_VOP2__V_SUBB_U32()
6662 {
6663 } // ~Inst_VOP2__V_SUBB_U32
6664
6665 // D.u = S0.u - S1.u - VCC[threadId];
6666 // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
6667 // overflow.
6668 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
6669 // source comes from the SGPR-pair at S2.u.
6670 void
6671 Inst_VOP2__V_SUBB_U32::execute(GPUDynInstPtr gpuDynInst)
6672 {
6673 Wavefront *wf = gpuDynInst->wavefront();
6674 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6675 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6676 VecOperandU32 vdst(gpuDynInst, instData.VDST);
6677 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
6678
6679 src0.readSrc();
6680 src1.read();
6681 vcc.read();
6682
6683 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6684 if (wf->execMask(lane)) {
6685 vdst[lane]
6686 = src0[lane] - src1[lane] - bits(vcc.rawData(), lane);
6687 vcc.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane))
6688 > src0[lane] ? 1 : 0);
6689 }
6690 }
6691
6692 vdst.write();
6693 vcc.write();
6694 }
6695
6696 Inst_VOP2__V_SUBBREV_U32::Inst_VOP2__V_SUBBREV_U32(InFmt_VOP2 *iFmt)
6697 : Inst_VOP2(iFmt, "v_subbrev_u32")
6698 {
6699 setFlag(ALU);
6700 setFlag(WritesVCC);
6701 setFlag(ReadsVCC);
6702 } // Inst_VOP2__V_SUBBREV_U32
6703
6704 Inst_VOP2__V_SUBBREV_U32::~Inst_VOP2__V_SUBBREV_U32()
6705 {
6706 } // ~Inst_VOP2__V_SUBBREV_U32
6707
6708 // D.u = S1.u - S0.u - VCC[threadId];
6709 // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
6710 // overflow.
6711 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
6712 // source comes from the SGPR-pair at S2.u.
6713 void
6714 Inst_VOP2__V_SUBBREV_U32::execute(GPUDynInstPtr gpuDynInst)
6715 {
6716 Wavefront *wf = gpuDynInst->wavefront();
6717 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6718 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6719 VecOperandU32 vdst(gpuDynInst, instData.VDST);
6720 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
6721
6722 src0.readSrc();
6723 src1.read();
6724 vcc.read();
6725
6726 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6727 if (wf->execMask(lane)) {
6728 vdst[lane]
6729 = src1[lane] - src0[lane] - bits(vcc.rawData(), lane);
6730 vcc.setBit(lane, (src0[lane] + bits(vcc.rawData(), lane))
6731 > src1[lane] ? 1 : 0);
6732 }
6733 }
6734
6735 vdst.write();
6736 vcc.write();
6737 }
6738
6739 Inst_VOP2__V_ADD_F16::Inst_VOP2__V_ADD_F16(InFmt_VOP2 *iFmt)
6740 : Inst_VOP2(iFmt, "v_add_f16")
6741 {
6742 setFlag(ALU);
6743 setFlag(F16);
6744 } // Inst_VOP2__V_ADD_F16
6745
6746 Inst_VOP2__V_ADD_F16::~Inst_VOP2__V_ADD_F16()
6747 {
6748 } // ~Inst_VOP2__V_ADD_F16
6749
6750 // D.f16 = S0.f16 + S1.f16.
6751 void
6752 Inst_VOP2__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst)
6753 {
6754 panicUnimplemented();
6755 }
6756
6757 Inst_VOP2__V_SUB_F16::Inst_VOP2__V_SUB_F16(InFmt_VOP2 *iFmt)
6758 : Inst_VOP2(iFmt, "v_sub_f16")
6759 {
6760 setFlag(ALU);
6761 setFlag(F16);
6762 } // Inst_VOP2__V_SUB_F16
6763
6764 Inst_VOP2__V_SUB_F16::~Inst_VOP2__V_SUB_F16()
6765 {
6766 } // ~Inst_VOP2__V_SUB_F16
6767
6768 // D.f16 = S0.f16 - S1.f16.
6769 void
6770 Inst_VOP2__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst)
6771 {
6772 panicUnimplemented();
6773 }
6774
6775 Inst_VOP2__V_SUBREV_F16::Inst_VOP2__V_SUBREV_F16(InFmt_VOP2 *iFmt)
6776 : Inst_VOP2(iFmt, "v_subrev_f16")
6777 {
6778 setFlag(ALU);
6779 setFlag(F16);
6780 } // Inst_VOP2__V_SUBREV_F16
6781
6782 Inst_VOP2__V_SUBREV_F16::~Inst_VOP2__V_SUBREV_F16()
6783 {
6784 } // ~Inst_VOP2__V_SUBREV_F16
6785
6786 // D.f16 = S1.f16 - S0.f16.
6787 void
6788 Inst_VOP2__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst)
6789 {
6790 panicUnimplemented();
6791 }
6792
6793 Inst_VOP2__V_MUL_F16::Inst_VOP2__V_MUL_F16(InFmt_VOP2 *iFmt)
6794 : Inst_VOP2(iFmt, "v_mul_f16")
6795 {
6796 setFlag(ALU);
6797 setFlag(F16);
6798 } // Inst_VOP2__V_MUL_F16
6799
6800 Inst_VOP2__V_MUL_F16::~Inst_VOP2__V_MUL_F16()
6801 {
6802 } // ~Inst_VOP2__V_MUL_F16
6803
6804 // D.f16 = S0.f16 * S1.f16.
6805 void
6806 Inst_VOP2__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst)
6807 {
6808 panicUnimplemented();
6809 }
6810
6811 Inst_VOP2__V_MAC_F16::Inst_VOP2__V_MAC_F16(InFmt_VOP2 *iFmt)
6812 : Inst_VOP2(iFmt, "v_mac_f16")
6813 {
6814 setFlag(ALU);
6815 setFlag(F16);
6816 setFlag(MAC);
6817 } // Inst_VOP2__V_MAC_F16
6818
6819 Inst_VOP2__V_MAC_F16::~Inst_VOP2__V_MAC_F16()
6820 {
6821 } // ~Inst_VOP2__V_MAC_F16
6822
6823 // D.f16 = S0.f16 * S1.f16 + D.f16.
6824 void
6825 Inst_VOP2__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst)
6826 {
6827 panicUnimplemented();
6828 }
6829
6830 Inst_VOP2__V_MADMK_F16::Inst_VOP2__V_MADMK_F16(InFmt_VOP2 *iFmt)
6831 : Inst_VOP2(iFmt, "v_madmk_f16")
6832 {
6833 setFlag(ALU);
6834 setFlag(F16);
6835 setFlag(MAD);
6836 } // Inst_VOP2__V_MADMK_F16
6837
6838 Inst_VOP2__V_MADMK_F16::~Inst_VOP2__V_MADMK_F16()
6839 {
6840 } // ~Inst_VOP2__V_MADMK_F16
6841
6842 // D.f16 = S0.f16 * K.f16 + S1.f16; K is a 16-bit inline constant stored
6843 // in the following literal DWORD.
6844 // This opcode cannot use the VOP3 encoding and cannot use input/output
6845 // modifiers.
6846 void
6847 Inst_VOP2__V_MADMK_F16::execute(GPUDynInstPtr gpuDynInst)
6848 {
6849 panicUnimplemented();
6850 }
6851
6852 Inst_VOP2__V_MADAK_F16::Inst_VOP2__V_MADAK_F16(InFmt_VOP2 *iFmt)
6853 : Inst_VOP2(iFmt, "v_madak_f16")
6854 {
6855 setFlag(ALU);
6856 setFlag(F16);
6857 setFlag(MAD);
6858 } // Inst_VOP2__V_MADAK_F16
6859
6860 Inst_VOP2__V_MADAK_F16::~Inst_VOP2__V_MADAK_F16()
6861 {
6862 } // ~Inst_VOP2__V_MADAK_F16
6863
6864 // D.f16 = S0.f16 * S1.f16 + K.f16; K is a 16-bit inline constant stored
6865 // in the following literal DWORD.
6866 // This opcode cannot use the VOP3 encoding and cannot use input/output
6867 // modifiers.
6868 void
6869 Inst_VOP2__V_MADAK_F16::execute(GPUDynInstPtr gpuDynInst)
6870 {
6871 panicUnimplemented();
6872 }
6873
6874 Inst_VOP2__V_ADD_U16::Inst_VOP2__V_ADD_U16(InFmt_VOP2 *iFmt)
6875 : Inst_VOP2(iFmt, "v_add_u16")
6876 {
6877 setFlag(ALU);
6878 } // Inst_VOP2__V_ADD_U16
6879
6880 Inst_VOP2__V_ADD_U16::~Inst_VOP2__V_ADD_U16()
6881 {
6882 } // ~Inst_VOP2__V_ADD_U16
6883
6884 // D.u16 = S0.u16 + S1.u16.
6885 void
6886 Inst_VOP2__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst)
6887 {
6888 Wavefront *wf = gpuDynInst->wavefront();
6889 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
6890 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
6891 VecOperandU16 vdst(gpuDynInst, instData.VDST);
6892
6893 src0.readSrc();
6894 src1.read();
6895
6896 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6897 if (wf->execMask(lane)) {
6898 vdst[lane] = src0[lane] + src1[lane];
6899 }
6900 }
6901
6902 vdst.write();
6903 }
6904
6905 Inst_VOP2__V_SUB_U16::Inst_VOP2__V_SUB_U16(InFmt_VOP2 *iFmt)
6906 : Inst_VOP2(iFmt, "v_sub_u16")
6907 {
6908 setFlag(ALU);
6909 } // Inst_VOP2__V_SUB_U16
6910
6911 Inst_VOP2__V_SUB_U16::~Inst_VOP2__V_SUB_U16()
6912 {
6913 } // ~Inst_VOP2__V_SUB_U16
6914
6915 // D.u16 = S0.u16 - S1.u16.
6916 void
6917 Inst_VOP2__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst)
6918 {
6919 Wavefront *wf = gpuDynInst->wavefront();
6920 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
6921 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
6922 VecOperandU16 vdst(gpuDynInst, instData.VDST);
6923
6924 src0.readSrc();
6925 src1.read();
6926
6927 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6928 if (wf->execMask(lane)) {
6929 vdst[lane] = src0[lane] - src1[lane];
6930 }
6931 }
6932
6933 vdst.write();
6934 }
6935
6936 Inst_VOP2__V_SUBREV_U16::Inst_VOP2__V_SUBREV_U16(InFmt_VOP2 *iFmt)
6937 : Inst_VOP2(iFmt, "v_subrev_u16")
6938 {
6939 setFlag(ALU);
6940 } // Inst_VOP2__V_SUBREV_U16
6941
6942 Inst_VOP2__V_SUBREV_U16::~Inst_VOP2__V_SUBREV_U16()
6943 {
6944 } // ~Inst_VOP2__V_SUBREV_U16
6945
6946 // D.u16 = S1.u16 - S0.u16.
6947 void
6948 Inst_VOP2__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst)
6949 {
6950 Wavefront *wf = gpuDynInst->wavefront();
6951 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
6952 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
6953 VecOperandU16 vdst(gpuDynInst, instData.VDST);
6954
6955 src0.readSrc();
6956 src1.read();
6957
6958 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6959 if (wf->execMask(lane)) {
6960 vdst[lane] = src1[lane] - src0[lane];
6961 }
6962 }
6963
6964 vdst.write();
6965 }
6966
6967 Inst_VOP2__V_MUL_LO_U16::Inst_VOP2__V_MUL_LO_U16(InFmt_VOP2 *iFmt)
6968 : Inst_VOP2(iFmt, "v_mul_lo_u16")
6969 {
6970 setFlag(ALU);
6971 } // Inst_VOP2__V_MUL_LO_U16
6972
6973 Inst_VOP2__V_MUL_LO_U16::~Inst_VOP2__V_MUL_LO_U16()
6974 {
6975 } // ~Inst_VOP2__V_MUL_LO_U16
6976
6977 // D.u16 = S0.u16 * S1.u16.
6978 void
6979 Inst_VOP2__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst)
6980 {
6981 Wavefront *wf = gpuDynInst->wavefront();
6982 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
6983 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
6984 VecOperandU16 vdst(gpuDynInst, instData.VDST);
6985
6986 src0.readSrc();
6987 src1.read();
6988
6989 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6990 if (wf->execMask(lane)) {
6991 vdst[lane] = src0[lane] * src1[lane];
6992 }
6993 }
6994
6995 vdst.write();
6996 }
6997
6998 Inst_VOP2__V_LSHLREV_B16::Inst_VOP2__V_LSHLREV_B16(InFmt_VOP2 *iFmt)
6999 : Inst_VOP2(iFmt, "v_lshlrev_b16")
7000 {
7001 setFlag(ALU);
7002 } // Inst_VOP2__V_LSHLREV_B16
7003
7004 Inst_VOP2__V_LSHLREV_B16::~Inst_VOP2__V_LSHLREV_B16()
7005 {
7006 } // ~Inst_VOP2__V_LSHLREV_B16
7007
7008 // D.u[15:0] = S1.u[15:0] << S0.u[3:0].
7009 void
7010 Inst_VOP2__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst)
7011 {
7012 Wavefront *wf = gpuDynInst->wavefront();
7013 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
7014 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
7015 VecOperandU16 vdst(gpuDynInst, instData.VDST);
7016
7017 src0.readSrc();
7018 src1.read();
7019
7020 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7021 if (wf->execMask(lane)) {
7022 vdst[lane] = src1[lane] << bits(src0[lane], 3, 0);
7023 }
7024 }
7025
7026 vdst.write();
7027 }
7028
7029 Inst_VOP2__V_LSHRREV_B16::Inst_VOP2__V_LSHRREV_B16(InFmt_VOP2 *iFmt)
7030 : Inst_VOP2(iFmt, "v_lshrrev_b16")
7031 {
7032 setFlag(ALU);
7033 } // Inst_VOP2__V_LSHRREV_B16
7034
7035 Inst_VOP2__V_LSHRREV_B16::~Inst_VOP2__V_LSHRREV_B16()
7036 {
7037 } // ~Inst_VOP2__V_LSHRREV_B16
7038
7039 // D.u[15:0] = S1.u[15:0] >> S0.u[3:0].
7040 // The vacated bits are set to zero.
7041 void
7042 Inst_VOP2__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst)
7043 {
7044 Wavefront *wf = gpuDynInst->wavefront();
7045 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
7046 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
7047 VecOperandU16 vdst(gpuDynInst, instData.VDST);
7048
7049 src0.readSrc();
7050 src1.read();
7051
7052 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7053 if (wf->execMask(lane)) {
7054 vdst[lane] = src1[lane] >> src0[lane];
7055 }
7056 }
7057
7058 vdst.write();
7059 }
7060
7061 Inst_VOP2__V_ASHRREV_I16::Inst_VOP2__V_ASHRREV_I16(InFmt_VOP2 *iFmt)
7062 : Inst_VOP2(iFmt, "v_ashrrev_i16")
7063 {
7064 setFlag(ALU);
7065 } // Inst_VOP2__V_ASHRREV_I16
7066
7067 Inst_VOP2__V_ASHRREV_I16::~Inst_VOP2__V_ASHRREV_I16()
7068 {
7069 } // ~Inst_VOP2__V_ASHRREV_I16
7070
7071 // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0].
7072 // The vacated bits are set to the sign bit of the input value.
7073 void
7074 Inst_VOP2__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst)
7075 {
7076 Wavefront *wf = gpuDynInst->wavefront();
7077 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
7078 ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
7079 VecOperandI16 vdst(gpuDynInst, instData.VDST);
7080
7081 src0.readSrc();
7082 src1.read();
7083
7084 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7085 if (wf->execMask(lane)) {
7086 vdst[lane] = src1[lane] >> src0[lane];
7087 }
7088 }
7089
7090 vdst.write();
7091 }
7092
7093 Inst_VOP2__V_MAX_F16::Inst_VOP2__V_MAX_F16(InFmt_VOP2 *iFmt)
7094 : Inst_VOP2(iFmt, "v_max_f16")
7095 {
7096 setFlag(ALU);
7097 setFlag(F16);
7098 } // Inst_VOP2__V_MAX_F16
7099
7100 Inst_VOP2__V_MAX_F16::~Inst_VOP2__V_MAX_F16()
7101 {
7102 } // ~Inst_VOP2__V_MAX_F16
7103
7104 // D.f16 = max(S0.f16, S1.f16).
7105 void
7106 Inst_VOP2__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst)
7107 {
7108 panicUnimplemented();
7109 }
7110
7111 Inst_VOP2__V_MIN_F16::Inst_VOP2__V_MIN_F16(InFmt_VOP2 *iFmt)
7112 : Inst_VOP2(iFmt, "v_min_f16")
7113 {
7114 setFlag(ALU);
7115 setFlag(F16);
7116 } // Inst_VOP2__V_MIN_F16
7117
7118 Inst_VOP2__V_MIN_F16::~Inst_VOP2__V_MIN_F16()
7119 {
7120 } // ~Inst_VOP2__V_MIN_F16
7121
7122 // D.f16 = min(S0.f16, S1.f16).
7123 void
7124 Inst_VOP2__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst)
7125 {
7126 panicUnimplemented();
7127 }
7128
7129 Inst_VOP2__V_MAX_U16::Inst_VOP2__V_MAX_U16(InFmt_VOP2 *iFmt)
7130 : Inst_VOP2(iFmt, "v_max_u16")
7131 {
7132 setFlag(ALU);
7133 } // Inst_VOP2__V_MAX_U16
7134
7135 Inst_VOP2__V_MAX_U16::~Inst_VOP2__V_MAX_U16()
7136 {
7137 } // ~Inst_VOP2__V_MAX_U16
7138
7139 // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]).
7140 void
7141 Inst_VOP2__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst)
7142 {
7143 Wavefront *wf = gpuDynInst->wavefront();
7144 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
7145 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
7146 VecOperandU16 vdst(gpuDynInst, instData.VDST);
7147
7148 src0.readSrc();
7149 src1.read();
7150
7151 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7152 if (wf->execMask(lane)) {
7153 vdst[lane] = std::max(src0[lane], src1[lane]);
7154 }
7155 }
7156
7157 vdst.write();
7158 }
7159
7160 Inst_VOP2__V_MAX_I16::Inst_VOP2__V_MAX_I16(InFmt_VOP2 *iFmt)
7161 : Inst_VOP2(iFmt, "v_max_i16")
7162 {
7163 setFlag(ALU);
7164 } // Inst_VOP2__V_MAX_I16
7165
7166 Inst_VOP2__V_MAX_I16::~Inst_VOP2__V_MAX_I16()
7167 {
7168 } // ~Inst_VOP2__V_MAX_I16
7169
7170 // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]).
7171 void
7172 Inst_VOP2__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst)
7173 {
7174 Wavefront *wf = gpuDynInst->wavefront();
7175 ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
7176 ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
7177 VecOperandI16 vdst(gpuDynInst, instData.VDST);
7178
7179 src0.readSrc();
7180 src1.read();
7181
7182 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7183 if (wf->execMask(lane)) {
7184 vdst[lane] = std::max(src0[lane], src1[lane]);
7185 }
7186 }
7187
7188 vdst.write();
7189 }
7190
7191 Inst_VOP2__V_MIN_U16::Inst_VOP2__V_MIN_U16(InFmt_VOP2 *iFmt)
7192 : Inst_VOP2(iFmt, "v_min_u16")
7193 {
7194 setFlag(ALU);
7195 } // Inst_VOP2__V_MIN_U16
7196
7197 Inst_VOP2__V_MIN_U16::~Inst_VOP2__V_MIN_U16()
7198 {
7199 } // ~Inst_VOP2__V_MIN_U16
7200
7201 // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]).
7202 void
7203 Inst_VOP2__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst)
7204 {
7205 Wavefront *wf = gpuDynInst->wavefront();
7206 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
7207 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
7208 VecOperandU16 vdst(gpuDynInst, instData.VDST);
7209
7210 src0.readSrc();
7211 src1.read();
7212
7213 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7214 if (wf->execMask(lane)) {
7215 vdst[lane] = std::min(src0[lane], src1[lane]);
7216 }
7217 }
7218
7219 vdst.write();
7220 }
7221
7222 Inst_VOP2__V_MIN_I16::Inst_VOP2__V_MIN_I16(InFmt_VOP2 *iFmt)
7223 : Inst_VOP2(iFmt, "v_min_i16")
7224 {
7225 setFlag(ALU);
7226 } // Inst_VOP2__V_MIN_I16
7227
7228 Inst_VOP2__V_MIN_I16::~Inst_VOP2__V_MIN_I16()
7229 {
7230 } // ~Inst_VOP2__V_MIN_I16
7231
7232 // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]).
7233 void
7234 Inst_VOP2__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst)
7235 {
7236 Wavefront *wf = gpuDynInst->wavefront();
7237 ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
7238 ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
7239 VecOperandI16 vdst(gpuDynInst, instData.VDST);
7240
7241 src0.readSrc();
7242 src1.read();
7243
7244 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7245 if (wf->execMask(lane)) {
7246 vdst[lane] = std::min(src0[lane], src1[lane]);
7247 }
7248 }
7249
7250 vdst.write();
7251 }
7252
7253 Inst_VOP2__V_LDEXP_F16::Inst_VOP2__V_LDEXP_F16(InFmt_VOP2 *iFmt)
7254 : Inst_VOP2(iFmt, "v_ldexp_f16")
7255 {
7256 setFlag(ALU);
7257 setFlag(F16);
7258 } // Inst_VOP2__V_LDEXP_F16
7259
7260 Inst_VOP2__V_LDEXP_F16::~Inst_VOP2__V_LDEXP_F16()
7261 {
7262 } // ~Inst_VOP2__V_LDEXP_F16
7263
7264 // D.f16 = S0.f16 * (2 ** S1.i16).
7265 void
7266 Inst_VOP2__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst)
7267 {
7268 panicUnimplemented();
7269 }
7270
7271 Inst_VOP1__V_NOP::Inst_VOP1__V_NOP(InFmt_VOP1 *iFmt)
7272 : Inst_VOP1(iFmt, "v_nop")
7273 {
7274 setFlag(Nop);
7275 setFlag(ALU);
7276 } // Inst_VOP1__V_NOP
7277
7278 Inst_VOP1__V_NOP::~Inst_VOP1__V_NOP()
7279 {
7280 } // ~Inst_VOP1__V_NOP
7281
7282 // Do nothing.
7283 void
7284 Inst_VOP1__V_NOP::execute(GPUDynInstPtr gpuDynInst)
7285 {
7286 }
7287
7288 Inst_VOP1__V_MOV_B32::Inst_VOP1__V_MOV_B32(InFmt_VOP1 *iFmt)
7289 : Inst_VOP1(iFmt, "v_mov_b32")
7290 {
7291 setFlag(ALU);
7292 } // Inst_VOP1__V_MOV_B32
7293
7294 Inst_VOP1__V_MOV_B32::~Inst_VOP1__V_MOV_B32()
7295 {
7296 } // ~Inst_VOP1__V_MOV_B32
7297
7298 // D.u = S0.u.
7299 // Input and output modifiers not supported; this is an untyped operation.
7300 void
7301 Inst_VOP1__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst)
7302 {
7303 Wavefront *wf = gpuDynInst->wavefront();
7304 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7305 VecOperandU32 vdst(gpuDynInst, instData.VDST);
7306
7307 src.readSrc();
7308
7309 if (isDPPInst()) {
7310 VecOperandU32 src_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
7311 src_dpp.read();
7312
7313 DPRINTF(GCN3, "Handling V_MOV_B32 SRC DPP. SRC0: register v[%d], "
7314 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
7315 "SRC1_ABS: %d, SRC1_NEG: %d, BOUND_CTRL: %d, "
7316 "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0,
7317 extData.iFmt_VOP_DPP.DPP_CTRL,
7318 extData.iFmt_VOP_DPP.SRC0_ABS,
7319 extData.iFmt_VOP_DPP.SRC0_NEG,
7320 extData.iFmt_VOP_DPP.SRC1_ABS,
7321 extData.iFmt_VOP_DPP.SRC1_NEG,
7322 extData.iFmt_VOP_DPP.BOUND_CTRL,
7323 extData.iFmt_VOP_DPP.BANK_MASK,
7324 extData.iFmt_VOP_DPP.ROW_MASK);
7325
7326 // NOTE: For VOP1, there is no SRC1, so make sure we're not trying
7327 // to negate it or take the absolute value of it
7328 assert(!extData.iFmt_VOP_DPP.SRC1_ABS);
7329 assert(!extData.iFmt_VOP_DPP.SRC1_NEG);
7330 processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src_dpp);
7331
7332 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7333 if (wf->execMask(lane)) {
7334 vdst[lane] = src_dpp[lane];
7335 }
7336 }
7337 } else {
7338 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7339 if (wf->execMask(lane)) {
7340 vdst[lane] = src[lane];
7341 }
7342 }
7343 }
7344
7345 vdst.write();
7346 }
7347
7348 Inst_VOP1__V_READFIRSTLANE_B32::Inst_VOP1__V_READFIRSTLANE_B32(
7349 InFmt_VOP1 *iFmt)
7350 : Inst_VOP1(iFmt, "v_readfirstlane_b32")
7351 {
7352 setFlag(ALU);
7353 } // Inst_VOP1__V_READFIRSTLANE_B32
7354
7355 Inst_VOP1__V_READFIRSTLANE_B32::~Inst_VOP1__V_READFIRSTLANE_B32()
7356 {
7357 } // ~Inst_VOP1__V_READFIRSTLANE_B32
7358
7359 // Copy one VGPR value to one SGPR. D = SGPR destination, S0 = source data
7360 // (VGPR# or M0 for lds direct access), Lane# = FindFirst1fromLSB(exec)
7361 // (Lane# = 0 if exec is zero). Ignores exec mask for the access.
7362 // Input and output modifiers not supported; this is an untyped operation.
7363 void
7364 Inst_VOP1__V_READFIRSTLANE_B32::execute(GPUDynInstPtr gpuDynInst)
7365 {
7366 Wavefront *wf = gpuDynInst->wavefront();
7367 ScalarRegI32 src_lane(0);
7368 ScalarRegU64 exec_mask = wf->execMask().to_ullong();
7369 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7370 ScalarOperandU32 sdst(gpuDynInst, instData.VDST);
7371
7372 src.readSrc();
7373
7374 if (exec_mask) {
7375 src_lane = findLsbSet(exec_mask);
7376 }
7377
7378 sdst = src[src_lane];
7379
7380 sdst.write();
7381 }
7382
7383 Inst_VOP1__V_CVT_I32_F64::Inst_VOP1__V_CVT_I32_F64(InFmt_VOP1 *iFmt)
7384 : Inst_VOP1(iFmt, "v_cvt_i32_f64")
7385 {
7386 setFlag(ALU);
7387 setFlag(F64);
7388 } // Inst_VOP1__V_CVT_I32_F64
7389
7390 Inst_VOP1__V_CVT_I32_F64::~Inst_VOP1__V_CVT_I32_F64()
7391 {
7392 } // ~Inst_VOP1__V_CVT_I32_F64
7393
7394 // D.i = (int)S0.d.
7395 // Out-of-range floating point values (including infinity) saturate. NaN
7396 // is converted to 0.
7397 void
7398 Inst_VOP1__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst)
7399 {
7400 Wavefront *wf = gpuDynInst->wavefront();
7401 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
7402 VecOperandI32 vdst(gpuDynInst, instData.VDST);
7403
7404 src.readSrc();
7405
7406 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7407 if (wf->execMask(lane)) {
7408 int exp;
7409 std::frexp(src[lane],&exp);
7410 if (std::isnan(src[lane])) {
7411 vdst[lane] = 0;
7412 } else if (std::isinf(src[lane]) || exp > 30) {
7413 if (std::signbit(src[lane])) {
7414 vdst[lane] = INT_MIN;
7415 } else {
7416 vdst[lane] = INT_MAX;
7417 }
7418 } else {
7419 vdst[lane] = (VecElemI32)src[lane];
7420 }
7421 }
7422 }
7423
7424 vdst.write();
7425 }
7426
7427 Inst_VOP1__V_CVT_F64_I32::Inst_VOP1__V_CVT_F64_I32(InFmt_VOP1 *iFmt)
7428 : Inst_VOP1(iFmt, "v_cvt_f64_i32")
7429 {
7430 setFlag(ALU);
7431 setFlag(F64);
7432 } // Inst_VOP1__V_CVT_F64_I32
7433
7434 Inst_VOP1__V_CVT_F64_I32::~Inst_VOP1__V_CVT_F64_I32()
7435 {
7436 } // ~Inst_VOP1__V_CVT_F64_I32
7437
7438 // D.d = (double)S0.i.
7439 void
7440 Inst_VOP1__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst)
7441 {
7442 Wavefront *wf = gpuDynInst->wavefront();
7443 ConstVecOperandI32 src(gpuDynInst, instData.SRC0);
7444 VecOperandF64 vdst(gpuDynInst, instData.VDST);
7445
7446 src.readSrc();
7447
7448 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7449 if (wf->execMask(lane)) {
7450 vdst[lane] = (VecElemF64)src[lane];
7451 }
7452 }
7453
7454 vdst.write();
7455 }
7456
7457 Inst_VOP1__V_CVT_F32_I32::Inst_VOP1__V_CVT_F32_I32(InFmt_VOP1 *iFmt)
7458 : Inst_VOP1(iFmt, "v_cvt_f32_i32")
7459 {
7460 setFlag(ALU);
7461 setFlag(F32);
7462 } // Inst_VOP1__V_CVT_F32_I32
7463
7464 Inst_VOP1__V_CVT_F32_I32::~Inst_VOP1__V_CVT_F32_I32()
7465 {
7466 } // ~Inst_VOP1__V_CVT_F32_I32
7467
7468 // D.f = (float)S0.i.
7469 void
7470 Inst_VOP1__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst)
7471 {
7472 Wavefront *wf = gpuDynInst->wavefront();
7473 ConstVecOperandI32 src(gpuDynInst, instData.SRC0);
7474 VecOperandF32 vdst(gpuDynInst, instData.VDST);
7475
7476 src.readSrc();
7477
7478 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7479 if (wf->execMask(lane)) {
7480 vdst[lane] = (VecElemF32)src[lane];
7481 }
7482 }
7483
7484 vdst.write();
7485 }
7486
7487 Inst_VOP1__V_CVT_F32_U32::Inst_VOP1__V_CVT_F32_U32(InFmt_VOP1 *iFmt)
7488 : Inst_VOP1(iFmt, "v_cvt_f32_u32")
7489 {
7490 setFlag(ALU);
7491 setFlag(F32);
7492 } // Inst_VOP1__V_CVT_F32_U32
7493
7494 Inst_VOP1__V_CVT_F32_U32::~Inst_VOP1__V_CVT_F32_U32()
7495 {
7496 } // ~Inst_VOP1__V_CVT_F32_U32
7497
7498 // D.f = (float)S0.u.
7499 void
7500 Inst_VOP1__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst)
7501 {
7502 Wavefront *wf = gpuDynInst->wavefront();
7503 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7504 VecOperandF32 vdst(gpuDynInst, instData.VDST);
7505
7506 src.readSrc();
7507
7508 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7509 if (wf->execMask(lane)) {
7510 vdst[lane] = (VecElemF32)src[lane];
7511 }
7512 }
7513
7514 vdst.write();
7515 }
7516
7517 Inst_VOP1__V_CVT_U32_F32::Inst_VOP1__V_CVT_U32_F32(InFmt_VOP1 *iFmt)
7518 : Inst_VOP1(iFmt, "v_cvt_u32_f32")
7519 {
7520 setFlag(ALU);
7521 setFlag(F32);
7522 } // Inst_VOP1__V_CVT_U32_F32
7523
7524 Inst_VOP1__V_CVT_U32_F32::~Inst_VOP1__V_CVT_U32_F32()
7525 {
7526 } // ~Inst_VOP1__V_CVT_U32_F32
7527
7528 // D.u = (unsigned)S0.f.
7529 // Out-of-range floating point values (including infinity) saturate. NaN
7530 // is converted to 0.
7531 void
7532 Inst_VOP1__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst)
7533 {
7534 Wavefront *wf = gpuDynInst->wavefront();
7535 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
7536 VecOperandU32 vdst(gpuDynInst, instData.VDST);
7537
7538 src.readSrc();
7539
7540 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7541 if (wf->execMask(lane)) {
7542 int exp;
7543 std::frexp(src[lane],&exp);
7544 if (std::isnan(src[lane])) {
7545 vdst[lane] = 0;
7546 } else if (std::isinf(src[lane])) {
7547 if (std::signbit(src[lane])) {
7548 vdst[lane] = 0;
7549 } else {
7550 vdst[lane] = UINT_MAX;
7551 }
7552 } else if (exp > 31) {
7553 vdst[lane] = UINT_MAX;
7554 } else {
7555 vdst[lane] = (VecElemU32)src[lane];
7556 }
7557 }
7558 }
7559
7560 vdst.write();
7561 }
7562
7563 Inst_VOP1__V_CVT_I32_F32::Inst_VOP1__V_CVT_I32_F32(InFmt_VOP1 *iFmt)
7564 : Inst_VOP1(iFmt, "v_cvt_i32_f32")
7565 {
7566 setFlag(ALU);
7567 setFlag(F32);
7568 } // Inst_VOP1__V_CVT_I32_F32
7569
7570 Inst_VOP1__V_CVT_I32_F32::~Inst_VOP1__V_CVT_I32_F32()
7571 {
7572 } // ~Inst_VOP1__V_CVT_I32_F32
7573
7574 // D.i = (int)S0.f.
7575 // Out-of-range floating point values (including infinity) saturate. NaN
7576 // is converted to 0.
7577 void
7578 Inst_VOP1__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst)
7579 {
7580 Wavefront *wf = gpuDynInst->wavefront();
7581 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
7582 VecOperandI32 vdst(gpuDynInst, instData.VDST);
7583
7584 src.readSrc();
7585
7586 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7587 if (wf->execMask(lane)) {
7588 int exp;
7589 std::frexp(src[lane],&exp);
7590 if (std::isnan(src[lane])) {
7591 vdst[lane] = 0;
7592 } else if (std::isinf(src[lane]) || exp > 30) {
7593 if (std::signbit(src[lane])) {
7594 vdst[lane] = INT_MIN;
7595 } else {
7596 vdst[lane] = INT_MAX;
7597 }
7598 } else {
7599 vdst[lane] = (VecElemI32)src[lane];
7600 }
7601 }
7602 }
7603
7604 vdst.write();
7605 }
7606
7607 Inst_VOP1__V_MOV_FED_B32::Inst_VOP1__V_MOV_FED_B32(InFmt_VOP1 *iFmt)
7608 : Inst_VOP1(iFmt, "v_mov_fed_b32")
7609 {
7610 setFlag(ALU);
7611 } // Inst_VOP1__V_MOV_FED_B32
7612
7613 Inst_VOP1__V_MOV_FED_B32::~Inst_VOP1__V_MOV_FED_B32()
7614 {
7615 } // ~Inst_VOP1__V_MOV_FED_B32
7616
7617 // D.u = S0.u;
7618 // Input and output modifiers not supported; this is an untyped operation.
7619 void
7620 Inst_VOP1__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst)
7621 {
7622 panicUnimplemented();
7623 }
7624
7625 Inst_VOP1__V_CVT_F16_F32::Inst_VOP1__V_CVT_F16_F32(InFmt_VOP1 *iFmt)
7626 : Inst_VOP1(iFmt, "v_cvt_f16_f32")
7627 {
7628 setFlag(ALU);
7629 setFlag(F32);
7630 } // Inst_VOP1__V_CVT_F16_F32
7631
7632 Inst_VOP1__V_CVT_F16_F32::~Inst_VOP1__V_CVT_F16_F32()
7633 {
7634 } // ~Inst_VOP1__V_CVT_F16_F32
7635
7636 // D.f16 = flt32_to_flt16(S0.f).
7637 void
7638 Inst_VOP1__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst)
7639 {
7640 panicUnimplemented();
7641 }
7642
7643 Inst_VOP1__V_CVT_F32_F16::Inst_VOP1__V_CVT_F32_F16(InFmt_VOP1 *iFmt)
7644 : Inst_VOP1(iFmt, "v_cvt_f32_f16")
7645 {
7646 setFlag(ALU);
7647 setFlag(F32);
7648 } // Inst_VOP1__V_CVT_F32_F16
7649
7650 Inst_VOP1__V_CVT_F32_F16::~Inst_VOP1__V_CVT_F32_F16()
7651 {
7652 } // ~Inst_VOP1__V_CVT_F32_F16
7653
7654 // D.f = flt16_to_flt32(S0.f16).
7655 void
7656 Inst_VOP1__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst)
7657 {
7658 panicUnimplemented();
7659 }
7660
7661 Inst_VOP1__V_CVT_RPI_I32_F32::Inst_VOP1__V_CVT_RPI_I32_F32(
7662 InFmt_VOP1 *iFmt)
7663 : Inst_VOP1(iFmt, "v_cvt_rpi_i32_f32")
7664 {
7665 setFlag(ALU);
7666 setFlag(F32);
7667 } // Inst_VOP1__V_CVT_RPI_I32_F32
7668
7669 Inst_VOP1__V_CVT_RPI_I32_F32::~Inst_VOP1__V_CVT_RPI_I32_F32()
7670 {
7671 } // ~Inst_VOP1__V_CVT_RPI_I32_F32
7672
7673 // D.i = (int)floor(S0.f + 0.5).
7674 void
7675 Inst_VOP1__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst)
7676 {
7677 Wavefront *wf = gpuDynInst->wavefront();
7678 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
7679 VecOperandI32 vdst(gpuDynInst, instData.VDST);
7680
7681 src.readSrc();
7682
7683 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7684 if (wf->execMask(lane)) {
7685 vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5);
7686 }
7687 }
7688
7689 vdst.write();
7690 }
7691
7692 Inst_VOP1__V_CVT_FLR_I32_F32::Inst_VOP1__V_CVT_FLR_I32_F32(
7693 InFmt_VOP1 *iFmt)
7694 : Inst_VOP1(iFmt, "v_cvt_flr_i32_f32")
7695 {
7696 setFlag(ALU);
7697 setFlag(F32);
7698 } // Inst_VOP1__V_CVT_FLR_I32_F32
7699
7700 Inst_VOP1__V_CVT_FLR_I32_F32::~Inst_VOP1__V_CVT_FLR_I32_F32()
7701 {
7702 } // ~Inst_VOP1__V_CVT_FLR_I32_F32
7703
7704 // D.i = (int)floor(S0.f).
7705 void
7706 Inst_VOP1__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst)
7707 {
7708 Wavefront *wf = gpuDynInst->wavefront();
7709 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
7710 VecOperandI32 vdst(gpuDynInst, instData.VDST);
7711
7712 src.readSrc();
7713
7714 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7715 if (wf->execMask(lane)) {
7716 vdst[lane] = (VecElemI32)std::floor(src[lane]);
7717 }
7718 }
7719
7720 vdst.write();
7721 }
7722
7723 Inst_VOP1__V_CVT_OFF_F32_I4::Inst_VOP1__V_CVT_OFF_F32_I4(InFmt_VOP1 *iFmt)
7724 : Inst_VOP1(iFmt, "v_cvt_off_f32_i4")
7725 {
7726 setFlag(ALU);
7727 setFlag(F32);
7728 } // Inst_VOP1__V_CVT_OFF_F32_I4
7729
7730 Inst_VOP1__V_CVT_OFF_F32_I4::~Inst_VOP1__V_CVT_OFF_F32_I4()
7731 {
7732 } // ~Inst_VOP1__V_CVT_OFF_F32_I4
7733
7734 // 4-bit signed int to 32-bit float.
7735 void
7736 Inst_VOP1__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst)
7737 {
7738 panicUnimplemented();
7739 }
7740
7741 Inst_VOP1__V_CVT_F32_F64::Inst_VOP1__V_CVT_F32_F64(InFmt_VOP1 *iFmt)
7742 : Inst_VOP1(iFmt, "v_cvt_f32_f64")
7743 {
7744 setFlag(ALU);
7745 setFlag(F64);
7746 } // Inst_VOP1__V_CVT_F32_F64
7747
7748 Inst_VOP1__V_CVT_F32_F64::~Inst_VOP1__V_CVT_F32_F64()
7749 {
7750 } // ~Inst_VOP1__V_CVT_F32_F64
7751
7752 // D.f = (float)S0.d.
7753 void
7754 Inst_VOP1__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst)
7755 {
7756 Wavefront *wf = gpuDynInst->wavefront();
7757 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
7758 VecOperandF32 vdst(gpuDynInst, instData.VDST);
7759
7760 src.readSrc();
7761
7762 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7763 if (wf->execMask(lane)) {
7764 vdst[lane] = (VecElemF32)src[lane];
7765 }
7766 }
7767
7768 vdst.write();
7769 }
7770
7771 Inst_VOP1__V_CVT_F64_F32::Inst_VOP1__V_CVT_F64_F32(InFmt_VOP1 *iFmt)
7772 : Inst_VOP1(iFmt, "v_cvt_f64_f32")
7773 {
7774 setFlag(ALU);
7775 setFlag(F64);
7776 } // Inst_VOP1__V_CVT_F64_F32
7777
7778 Inst_VOP1__V_CVT_F64_F32::~Inst_VOP1__V_CVT_F64_F32()
7779 {
7780 } // ~Inst_VOP1__V_CVT_F64_F32
7781
7782 // D.d = (double)S0.f.
7783 void
7784 Inst_VOP1__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst)
7785 {
7786 Wavefront *wf = gpuDynInst->wavefront();
7787 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
7788 VecOperandF64 vdst(gpuDynInst, instData.VDST);
7789
7790 src.readSrc();
7791
7792 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7793 if (wf->execMask(lane)) {
7794 vdst[lane] = (VecElemF64)src[lane];
7795 }
7796 }
7797
7798 vdst.write();
7799 }
7800
7801 Inst_VOP1__V_CVT_F32_UBYTE0::Inst_VOP1__V_CVT_F32_UBYTE0(InFmt_VOP1 *iFmt)
7802 : Inst_VOP1(iFmt, "v_cvt_f32_ubyte0")
7803 {
7804 setFlag(ALU);
7805 setFlag(F32);
7806 } // Inst_VOP1__V_CVT_F32_UBYTE0
7807
7808 Inst_VOP1__V_CVT_F32_UBYTE0::~Inst_VOP1__V_CVT_F32_UBYTE0()
7809 {
7810 } // ~Inst_VOP1__V_CVT_F32_UBYTE0
7811
7812 // D.f = (float)(S0.u[7:0]).
7813 void
7814 Inst_VOP1__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst)
7815 {
7816 Wavefront *wf = gpuDynInst->wavefront();
7817 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7818 VecOperandF32 vdst(gpuDynInst, instData.VDST);
7819
7820 src.readSrc();
7821
7822 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7823 if (wf->execMask(lane)) {
7824 vdst[lane] = (VecElemF32)(bits(src[lane], 7, 0));
7825 }
7826 }
7827
7828 vdst.write();
7829 }
7830
7831 Inst_VOP1__V_CVT_F32_UBYTE1::Inst_VOP1__V_CVT_F32_UBYTE1(InFmt_VOP1 *iFmt)
7832 : Inst_VOP1(iFmt, "v_cvt_f32_ubyte1")
7833 {
7834 setFlag(ALU);
7835 setFlag(F32);
7836 } // Inst_VOP1__V_CVT_F32_UBYTE1
7837
7838 Inst_VOP1__V_CVT_F32_UBYTE1::~Inst_VOP1__V_CVT_F32_UBYTE1()
7839 {
7840 } // ~Inst_VOP1__V_CVT_F32_UBYTE1
7841
7842 // D.f = (float)(S0.u[15:8]).
7843 void
7844 Inst_VOP1__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst)
7845 {
7846 Wavefront *wf = gpuDynInst->wavefront();
7847 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7848 VecOperandF32 vdst(gpuDynInst, instData.VDST);
7849
7850 src.readSrc();
7851
7852 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7853 if (wf->execMask(lane)) {
7854 vdst[lane] = (VecElemF32)(bits(src[lane], 15, 8));
7855 }
7856 }
7857
7858 vdst.write();
7859 }
7860
7861 Inst_VOP1__V_CVT_F32_UBYTE2::Inst_VOP1__V_CVT_F32_UBYTE2(InFmt_VOP1 *iFmt)
7862 : Inst_VOP1(iFmt, "v_cvt_f32_ubyte2")
7863 {
7864 setFlag(ALU);
7865 setFlag(F32);
7866 } // Inst_VOP1__V_CVT_F32_UBYTE2
7867
7868 Inst_VOP1__V_CVT_F32_UBYTE2::~Inst_VOP1__V_CVT_F32_UBYTE2()
7869 {
7870 } // ~Inst_VOP1__V_CVT_F32_UBYTE2
7871
7872 // D.f = (float)(S0.u[23:16]).
7873 void
7874 Inst_VOP1__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst)
7875 {
7876 Wavefront *wf = gpuDynInst->wavefront();
7877 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7878 VecOperandF32 vdst(gpuDynInst, instData.VDST);
7879
7880 src.readSrc();
7881
7882 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7883 if (wf->execMask(lane)) {
7884 vdst[lane] = (VecElemF32)(bits(src[lane], 23, 16));
7885 }
7886 }
7887
7888 vdst.write();
7889 }
7890
7891 Inst_VOP1__V_CVT_F32_UBYTE3::Inst_VOP1__V_CVT_F32_UBYTE3(InFmt_VOP1 *iFmt)
7892 : Inst_VOP1(iFmt, "v_cvt_f32_ubyte3")
7893 {
7894 setFlag(ALU);
7895 setFlag(F32);
7896 } // Inst_VOP1__V_CVT_F32_UBYTE3
7897
7898 Inst_VOP1__V_CVT_F32_UBYTE3::~Inst_VOP1__V_CVT_F32_UBYTE3()
7899 {
7900 } // ~Inst_VOP1__V_CVT_F32_UBYTE3
7901
7902 // D.f = (float)(S0.u[31:24]).
7903 void
7904 Inst_VOP1__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst)
7905 {
7906 Wavefront *wf = gpuDynInst->wavefront();
7907 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7908 VecOperandF32 vdst(gpuDynInst, instData.VDST);
7909
7910 src.readSrc();
7911
7912 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7913 if (wf->execMask(lane)) {
7914 vdst[lane] = (VecElemF32)(bits(src[lane], 31, 24));
7915 }
7916 }
7917
7918 vdst.write();
7919 }
7920
7921 Inst_VOP1__V_CVT_U32_F64::Inst_VOP1__V_CVT_U32_F64(InFmt_VOP1 *iFmt)
7922 : Inst_VOP1(iFmt, "v_cvt_u32_f64")
7923 {
7924 setFlag(ALU);
7925 setFlag(F64);
7926 } // Inst_VOP1__V_CVT_U32_F64
7927
7928 Inst_VOP1__V_CVT_U32_F64::~Inst_VOP1__V_CVT_U32_F64()
7929 {
7930 } // ~Inst_VOP1__V_CVT_U32_F64
7931
7932 // D.u = (unsigned)S0.d.
7933 // Out-of-range floating point values (including infinity) saturate. NaN
7934 // is converted to 0.
7935 void
7936 Inst_VOP1__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst)
7937 {
7938 Wavefront *wf = gpuDynInst->wavefront();
7939 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
7940 VecOperandU32 vdst(gpuDynInst, instData.VDST);
7941
7942 src.readSrc();
7943
7944 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7945 if (wf->execMask(lane)) {
7946 int exp;
7947 std::frexp(src[lane],&exp);
7948 if (std::isnan(src[lane])) {
7949 vdst[lane] = 0;
7950 } else if (std::isinf(src[lane])) {
7951 if (std::signbit(src[lane])) {
7952 vdst[lane] = 0;
7953 } else {
7954 vdst[lane] = UINT_MAX;
7955 }
7956 } else if (exp > 31) {
7957 vdst[lane] = UINT_MAX;
7958 } else {
7959 vdst[lane] = (VecElemU32)src[lane];
7960 }
7961 }
7962 }
7963
7964 vdst.write();
7965 }
7966
7967 Inst_VOP1__V_CVT_F64_U32::Inst_VOP1__V_CVT_F64_U32(InFmt_VOP1 *iFmt)
7968 : Inst_VOP1(iFmt, "v_cvt_f64_u32")
7969 {
7970 setFlag(ALU);
7971 setFlag(F64);
7972 } // Inst_VOP1__V_CVT_F64_U32
7973
7974 Inst_VOP1__V_CVT_F64_U32::~Inst_VOP1__V_CVT_F64_U32()
7975 {
7976 } // ~Inst_VOP1__V_CVT_F64_U32
7977
7978 // D.d = (double)S0.u.
7979 void
7980 Inst_VOP1__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst)
7981 {
7982 Wavefront *wf = gpuDynInst->wavefront();
7983 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7984 VecOperandF64 vdst(gpuDynInst, instData.VDST);
7985
7986 src.readSrc();
7987
7988 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7989 if (wf->execMask(lane)) {
7990 vdst[lane] = (VecElemF64)src[lane];
7991 }
7992 }
7993
7994 vdst.write();
7995 }
7996
7997 Inst_VOP1__V_TRUNC_F64::Inst_VOP1__V_TRUNC_F64(InFmt_VOP1 *iFmt)
7998 : Inst_VOP1(iFmt, "v_trunc_f64")
7999 {
8000 setFlag(ALU);
8001 setFlag(F64);
8002 } // Inst_VOP1__V_TRUNC_F64
8003
8004 Inst_VOP1__V_TRUNC_F64::~Inst_VOP1__V_TRUNC_F64()
8005 {
8006 } // ~Inst_VOP1__V_TRUNC_F64
8007
8008 // D.d = trunc(S0.d), return integer part of S0.d.
8009 void
8010 Inst_VOP1__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst)
8011 {
8012 Wavefront *wf = gpuDynInst->wavefront();
8013 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8014 VecOperandF64 vdst(gpuDynInst, instData.VDST);
8015
8016 src.readSrc();
8017
8018 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8019 if (wf->execMask(lane)) {
8020 vdst[lane] = std::trunc(src[lane]);
8021 }
8022 }
8023
8024 vdst.write();
8025 }
8026
8027 Inst_VOP1__V_CEIL_F64::Inst_VOP1__V_CEIL_F64(InFmt_VOP1 *iFmt)
8028 : Inst_VOP1(iFmt, "v_ceil_f64")
8029 {
8030 setFlag(ALU);
8031 setFlag(F64);
8032 } // Inst_VOP1__V_CEIL_F64
8033
8034 Inst_VOP1__V_CEIL_F64::~Inst_VOP1__V_CEIL_F64()
8035 {
8036 } // ~Inst_VOP1__V_CEIL_F64
8037
8038 // D.d = ceil(S0.d);
8039 void
8040 Inst_VOP1__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst)
8041 {
8042 Wavefront *wf = gpuDynInst->wavefront();
8043 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8044 VecOperandF64 vdst(gpuDynInst, instData.VDST);
8045
8046 src.readSrc();
8047
8048 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8049 if (wf->execMask(lane)) {
8050 vdst[lane] = std::ceil(src[lane]);
8051 }
8052 }
8053
8054 vdst.write();
8055 }
8056
8057 Inst_VOP1__V_RNDNE_F64::Inst_VOP1__V_RNDNE_F64(InFmt_VOP1 *iFmt)
8058 : Inst_VOP1(iFmt, "v_rndne_f64")
8059 {
8060 setFlag(ALU);
8061 setFlag(F64);
8062 } // Inst_VOP1__V_RNDNE_F64
8063
8064 Inst_VOP1__V_RNDNE_F64::~Inst_VOP1__V_RNDNE_F64()
8065 {
8066 } // ~Inst_VOP1__V_RNDNE_F64
8067
8068 // D.d = round_nearest_even(S0.d).
8069 void
8070 Inst_VOP1__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst)
8071 {
8072 Wavefront *wf = gpuDynInst->wavefront();
8073 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8074 VecOperandF64 vdst(gpuDynInst, instData.VDST);
8075
8076 src.readSrc();
8077
8078 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8079 if (wf->execMask(lane)) {
8080 vdst[lane] = roundNearestEven(src[lane]);
8081 }
8082 }
8083
8084 vdst.write();
8085 }
8086
8087 Inst_VOP1__V_FLOOR_F64::Inst_VOP1__V_FLOOR_F64(InFmt_VOP1 *iFmt)
8088 : Inst_VOP1(iFmt, "v_floor_f64")
8089 {
8090 setFlag(ALU);
8091 setFlag(F64);
8092 } // Inst_VOP1__V_FLOOR_F64
8093
8094 Inst_VOP1__V_FLOOR_F64::~Inst_VOP1__V_FLOOR_F64()
8095 {
8096 } // ~Inst_VOP1__V_FLOOR_F64
8097
8098 // D.d = floor(S0.d);
8099 void
8100 Inst_VOP1__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst)
8101 {
8102 Wavefront *wf = gpuDynInst->wavefront();
8103 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8104 VecOperandF64 vdst(gpuDynInst, instData.VDST);
8105
8106 src.readSrc();
8107
8108 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8109 if (wf->execMask(lane)) {
8110 vdst[lane] = std::floor(src[lane]);
8111 }
8112 }
8113
8114 vdst.write();
8115 }
8116
8117 Inst_VOP1__V_FRACT_F32::Inst_VOP1__V_FRACT_F32(InFmt_VOP1 *iFmt)
8118 : Inst_VOP1(iFmt, "v_fract_f32")
8119 {
8120 setFlag(ALU);
8121 setFlag(F32);
8122 } // Inst_VOP1__V_FRACT_F32
8123
8124 Inst_VOP1__V_FRACT_F32::~Inst_VOP1__V_FRACT_F32()
8125 {
8126 } // ~Inst_VOP1__V_FRACT_F32
8127
8128 // D.f = modf(S0.f).
8129 void
8130 Inst_VOP1__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst)
8131 {
8132 Wavefront *wf = gpuDynInst->wavefront();
8133 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8134 VecOperandF32 vdst(gpuDynInst, instData.VDST);
8135
8136 src.readSrc();
8137
8138 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8139 if (wf->execMask(lane)) {
8140 VecElemF32 int_part(0.0);
8141 vdst[lane] = std::modf(src[lane], &int_part);
8142 }
8143 }
8144
8145 vdst.write();
8146 }
8147
8148 Inst_VOP1__V_TRUNC_F32::Inst_VOP1__V_TRUNC_F32(InFmt_VOP1 *iFmt)
8149 : Inst_VOP1(iFmt, "v_trunc_f32")
8150 {
8151 setFlag(ALU);
8152 setFlag(F32);
8153 } // Inst_VOP1__V_TRUNC_F32
8154
8155 Inst_VOP1__V_TRUNC_F32::~Inst_VOP1__V_TRUNC_F32()
8156 {
8157 } // ~Inst_VOP1__V_TRUNC_F32
8158
8159 // D.f = trunc(S0.f), return integer part of S0.f.
8160 void
8161 Inst_VOP1__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst)
8162 {
8163 Wavefront *wf = gpuDynInst->wavefront();
8164 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8165 VecOperandF32 vdst (gpuDynInst, instData.VDST);
8166
8167 src.readSrc();
8168
8169 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8170 if (wf->execMask(lane)) {
8171 vdst[lane] = std::trunc(src[lane]);
8172 }
8173 }
8174
8175 vdst.write();
8176 }
8177
8178 Inst_VOP1__V_CEIL_F32::Inst_VOP1__V_CEIL_F32(InFmt_VOP1 *iFmt)
8179 : Inst_VOP1(iFmt, "v_ceil_f32")
8180 {
8181 setFlag(ALU);
8182 setFlag(F32);
8183 } // Inst_VOP1__V_CEIL_F32
8184
8185 Inst_VOP1__V_CEIL_F32::~Inst_VOP1__V_CEIL_F32()
8186 {
8187 } // ~Inst_VOP1__V_CEIL_F32
8188
8189 // D.f = ceil(S0.f);
8190 void
8191 Inst_VOP1__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst)
8192 {
8193 Wavefront *wf = gpuDynInst->wavefront();
8194 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8195 VecOperandF32 vdst(gpuDynInst, instData.VDST);
8196
8197 src.readSrc();
8198
8199 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8200 if (wf->execMask(lane)) {
8201 vdst[lane] = std::ceil(src[lane]);
8202 }
8203 }
8204
8205 vdst.write();
8206 }
8207
8208 Inst_VOP1__V_RNDNE_F32::Inst_VOP1__V_RNDNE_F32(InFmt_VOP1 *iFmt)
8209 : Inst_VOP1(iFmt, "v_rndne_f32")
8210 {
8211 setFlag(ALU);
8212 setFlag(F32);
8213 } // Inst_VOP1__V_RNDNE_F32
8214
8215 Inst_VOP1__V_RNDNE_F32::~Inst_VOP1__V_RNDNE_F32()
8216 {
8217 } // ~Inst_VOP1__V_RNDNE_F32
8218
8219 // D.f = round_nearest_even(S0.f).
8220 void
8221 Inst_VOP1__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst)
8222 {
8223 Wavefront *wf = gpuDynInst->wavefront();
8224 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8225 VecOperandF32 vdst(gpuDynInst, instData.VDST);
8226
8227 src.readSrc();
8228
8229 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8230 if (wf->execMask(lane)) {
8231 vdst[lane] = roundNearestEven(src[lane]);
8232 }
8233 }
8234
8235 vdst.write();
8236 }
8237
8238 Inst_VOP1__V_FLOOR_F32::Inst_VOP1__V_FLOOR_F32(InFmt_VOP1 *iFmt)
8239 : Inst_VOP1(iFmt, "v_floor_f32")
8240 {
8241 setFlag(ALU);
8242 setFlag(F32);
8243 } // Inst_VOP1__V_FLOOR_F32
8244
8245 Inst_VOP1__V_FLOOR_F32::~Inst_VOP1__V_FLOOR_F32()
8246 {
8247 } // ~Inst_VOP1__V_FLOOR_F32
8248
8249 // D.f = floor(S0.f);
8250 void
8251 Inst_VOP1__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst)
8252 {
8253 Wavefront *wf = gpuDynInst->wavefront();
8254 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8255 VecOperandF32 vdst(gpuDynInst, instData.VDST);
8256
8257 src.readSrc();
8258
8259 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8260 if (wf->execMask(lane)) {
8261 vdst[lane] = std::floor(src[lane]);
8262 }
8263 }
8264
8265 vdst.write();
8266 }
8267
8268 Inst_VOP1__V_EXP_F32::Inst_VOP1__V_EXP_F32(InFmt_VOP1 *iFmt)
8269 : Inst_VOP1(iFmt, "v_exp_f32")
8270 {
8271 setFlag(ALU);
8272 setFlag(F32);
8273 } // Inst_VOP1__V_EXP_F32
8274
8275 Inst_VOP1__V_EXP_F32::~Inst_VOP1__V_EXP_F32()
8276 {
8277 } // ~Inst_VOP1__V_EXP_F32
8278
8279 // D.f = pow(2.0, S0.f).
8280 void
8281 Inst_VOP1__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst)
8282 {
8283 Wavefront *wf = gpuDynInst->wavefront();
8284 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8285 VecOperandF32 vdst(gpuDynInst, instData.VDST);
8286
8287 src.readSrc();
8288
8289 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8290 if (wf->execMask(lane)) {
8291 vdst[lane] = std::pow(2.0, src[lane]);
8292 }
8293 }
8294
8295 vdst.write();
8296 }
8297
8298 Inst_VOP1__V_LOG_F32::Inst_VOP1__V_LOG_F32(InFmt_VOP1 *iFmt)
8299 : Inst_VOP1(iFmt, "v_log_f32")
8300 {
8301 setFlag(ALU);
8302 setFlag(F32);
8303 } // Inst_VOP1__V_LOG_F32
8304
8305 Inst_VOP1__V_LOG_F32::~Inst_VOP1__V_LOG_F32()
8306 {
8307 } // ~Inst_VOP1__V_LOG_F32
8308
8309 // D.f = log2(S0.f).
8310 void
8311 Inst_VOP1__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst)
8312 {
8313 Wavefront *wf = gpuDynInst->wavefront();
8314 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8315 VecOperandF32 vdst(gpuDynInst, instData.VDST);
8316
8317 src.readSrc();
8318
8319 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8320 if (wf->execMask(lane)) {
8321 vdst[lane] = std::log2(src[lane]);
8322 }
8323 }
8324
8325 vdst.write();
8326 }
8327
8328 Inst_VOP1__V_RCP_F32::Inst_VOP1__V_RCP_F32(InFmt_VOP1 *iFmt)
8329 : Inst_VOP1(iFmt, "v_rcp_f32")
8330 {
8331 setFlag(ALU);
8332 setFlag(F32);
8333 } // Inst_VOP1__V_RCP_F32
8334
8335 Inst_VOP1__V_RCP_F32::~Inst_VOP1__V_RCP_F32()
8336 {
8337 } // ~Inst_VOP1__V_RCP_F32
8338
8339 // D.f = 1.0 / S0.f.
8340 void
8341 Inst_VOP1__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst)
8342 {
8343 Wavefront *wf = gpuDynInst->wavefront();
8344 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8345 VecOperandF32 vdst(gpuDynInst, instData.VDST);
8346
8347 src.readSrc();
8348
8349 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8350 if (wf->execMask(lane)) {
8351 vdst[lane] = 1.0 / src[lane];
8352 }
8353 }
8354
8355 vdst.write();
8356 }
8357
8358 Inst_VOP1__V_RCP_IFLAG_F32::Inst_VOP1__V_RCP_IFLAG_F32(InFmt_VOP1 *iFmt)
8359 : Inst_VOP1(iFmt, "v_rcp_iflag_f32")
8360 {
8361 setFlag(ALU);
8362 setFlag(F32);
8363 } // Inst_VOP1__V_RCP_IFLAG_F32
8364
8365 Inst_VOP1__V_RCP_IFLAG_F32::~Inst_VOP1__V_RCP_IFLAG_F32()
8366 {
8367 } // ~Inst_VOP1__V_RCP_IFLAG_F32
8368
8369 // D.f = 1.0 / S0.f.
8370 void
8371 Inst_VOP1__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst)
8372 {
8373 Wavefront *wf = gpuDynInst->wavefront();
8374 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8375 VecOperandF32 vdst(gpuDynInst, instData.VDST);
8376
8377 src.readSrc();
8378
8379 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8380 if (wf->execMask(lane)) {
8381 vdst[lane] = 1.0 / src[lane];
8382 }
8383 }
8384
8385 vdst.write();
8386 }
8387
8388 Inst_VOP1__V_RSQ_F32::Inst_VOP1__V_RSQ_F32(InFmt_VOP1 *iFmt)
8389 : Inst_VOP1(iFmt, "v_rsq_f32")
8390 {
8391 setFlag(ALU);
8392 setFlag(F32);
8393 } // Inst_VOP1__V_RSQ_F32
8394
8395 Inst_VOP1__V_RSQ_F32::~Inst_VOP1__V_RSQ_F32()
8396 {
8397 } // ~Inst_VOP1__V_RSQ_F32
8398
8399 // D.f = 1.0 / sqrt(S0.f).
8400 void
8401 Inst_VOP1__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst)
8402 {
8403 Wavefront *wf = gpuDynInst->wavefront();
8404 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8405 VecOperandF32 vdst(gpuDynInst, instData.VDST);
8406
8407 src.readSrc();
8408
8409 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8410 if (wf->execMask(lane)) {
8411 vdst[lane] = 1.0 / std::sqrt(src[lane]);
8412 }
8413 }
8414
8415 vdst.write();
8416 }
8417
8418 Inst_VOP1__V_RCP_F64::Inst_VOP1__V_RCP_F64(InFmt_VOP1 *iFmt)
8419 : Inst_VOP1(iFmt, "v_rcp_f64")
8420 {
8421 setFlag(ALU);
8422 setFlag(F64);
8423 } // Inst_VOP1__V_RCP_F64
8424
8425 Inst_VOP1__V_RCP_F64::~Inst_VOP1__V_RCP_F64()
8426 {
8427 } // ~Inst_VOP1__V_RCP_F64
8428
8429 // D.d = 1.0 / S0.d.
8430 void
8431 Inst_VOP1__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst)
8432 {
8433 Wavefront *wf = gpuDynInst->wavefront();
8434 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8435 VecOperandF64 vdst(gpuDynInst, instData.VDST);
8436
8437 src.readSrc();
8438
8439 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8440 if (wf->execMask(lane)) {
8441 if (std::fpclassify(src[lane]) == FP_ZERO) {
8442 vdst[lane] = +INFINITY;
8443 } else if (std::isnan(src[lane])) {
8444 vdst[lane] = NAN;
8445 } else if (std::isinf(src[lane])) {
8446 if (std::signbit(src[lane])) {
8447 vdst[lane] = -0.0;
8448 } else {
8449 vdst[lane] = 0.0;
8450 }
8451 } else {
8452 vdst[lane] = 1.0 / src[lane];
8453 }
8454 }
8455 }
8456
8457 vdst.write();
8458 }
8459
8460 Inst_VOP1__V_RSQ_F64::Inst_VOP1__V_RSQ_F64(InFmt_VOP1 *iFmt)
8461 : Inst_VOP1(iFmt, "v_rsq_f64")
8462 {
8463 setFlag(ALU);
8464 setFlag(F64);
8465 } // Inst_VOP1__V_RSQ_F64
8466
8467 Inst_VOP1__V_RSQ_F64::~Inst_VOP1__V_RSQ_F64()
8468 {
8469 } // ~Inst_VOP1__V_RSQ_F64
8470
8471 // D.d = 1.0 / sqrt(S0.d).
8472 void
8473 Inst_VOP1__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst)
8474 {
8475 Wavefront *wf = gpuDynInst->wavefront();
8476 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8477 VecOperandF64 vdst(gpuDynInst, instData.VDST);
8478
8479 src.readSrc();
8480
8481 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8482 if (wf->execMask(lane)) {
8483 if (std::fpclassify(src[lane]) == FP_ZERO) {
8484 vdst[lane] = +INFINITY;
8485 } else if (std::isnan(src[lane])) {
8486 vdst[lane] = NAN;
8487 } else if (std::isinf(src[lane])
8488 && !std::signbit(src[lane])) {
8489 vdst[lane] = 0.0;
8490 } else if (std::signbit(src[lane])) {
8491 vdst[lane] = NAN;
8492 } else {
8493 vdst[lane] = 1.0 / std::sqrt(src[lane]);
8494 }
8495 }
8496 }
8497
8498 vdst.write();
8499 }
8500
8501 Inst_VOP1__V_SQRT_F32::Inst_VOP1__V_SQRT_F32(InFmt_VOP1 *iFmt)
8502 : Inst_VOP1(iFmt, "v_sqrt_f32")
8503 {
8504 setFlag(ALU);
8505 setFlag(F32);
8506 } // Inst_VOP1__V_SQRT_F32
8507
8508 Inst_VOP1__V_SQRT_F32::~Inst_VOP1__V_SQRT_F32()
8509 {
8510 } // ~Inst_VOP1__V_SQRT_F32
8511
8512 // D.f = sqrt(S0.f).
8513 void
8514 Inst_VOP1__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst)
8515 {
8516 Wavefront *wf = gpuDynInst->wavefront();
8517 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8518 VecOperandF32 vdst(gpuDynInst, instData.VDST);
8519
8520 src.readSrc();
8521
8522 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8523 if (wf->execMask(lane)) {
8524 vdst[lane] = std::sqrt(src[lane]);
8525 }
8526 }
8527
8528 vdst.write();
8529 }
8530
8531 Inst_VOP1__V_SQRT_F64::Inst_VOP1__V_SQRT_F64(InFmt_VOP1 *iFmt)
8532 : Inst_VOP1(iFmt, "v_sqrt_f64")
8533 {
8534 setFlag(ALU);
8535 setFlag(F64);
8536 } // Inst_VOP1__V_SQRT_F64
8537
8538 Inst_VOP1__V_SQRT_F64::~Inst_VOP1__V_SQRT_F64()
8539 {
8540 } // ~Inst_VOP1__V_SQRT_F64
8541
8542 // D.d = sqrt(S0.d).
8543 void
8544 Inst_VOP1__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst)
8545 {
8546 Wavefront *wf = gpuDynInst->wavefront();
8547 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8548 VecOperandF64 vdst(gpuDynInst, instData.VDST);
8549
8550 src.readSrc();
8551
8552 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8553 if (wf->execMask(lane)) {
8554 vdst[lane] = std::sqrt(src[lane]);
8555 }
8556 }
8557
8558 vdst.write();
8559 }
8560
8561 Inst_VOP1__V_SIN_F32::Inst_VOP1__V_SIN_F32(InFmt_VOP1 *iFmt)
8562 : Inst_VOP1(iFmt, "v_sin_f32")
8563 {
8564 setFlag(ALU);
8565 setFlag(F32);
8566 } // Inst_VOP1__V_SIN_F32
8567
8568 Inst_VOP1__V_SIN_F32::~Inst_VOP1__V_SIN_F32()
8569 {
8570 } // ~Inst_VOP1__V_SIN_F32
8571
8572 // D.f = sin(S0.f * 2 * PI).
8573 void
8574 Inst_VOP1__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst)
8575 {
8576 Wavefront *wf = gpuDynInst->wavefront();
8577 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8578 ConstScalarOperandF32 pi(gpuDynInst, REG_PI);
8579 VecOperandF32 vdst(gpuDynInst, instData.VDST);
8580
8581 src.readSrc();
8582 pi.read();
8583
8584 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8585 if (wf->execMask(lane)) {
8586 if (src[lane] < -256.0 || src[lane] > 256.0) {
8587 vdst[lane] = 0.0;
8588 } else {
8589 vdst[lane] = std::sin(src[lane] * 2.0 * pi.rawData());
8590 }
8591 }
8592 }
8593
8594 vdst.write();
8595 }
8596
8597 Inst_VOP1__V_COS_F32::Inst_VOP1__V_COS_F32(InFmt_VOP1 *iFmt)
8598 : Inst_VOP1(iFmt, "v_cos_f32")
8599 {
8600 setFlag(ALU);
8601 setFlag(F32);
8602 } // Inst_VOP1__V_COS_F32
8603
8604 Inst_VOP1__V_COS_F32::~Inst_VOP1__V_COS_F32()
8605 {
8606 } // ~Inst_VOP1__V_COS_F32
8607
8608 // D.f = cos(S0.f * 2 * PI).
8609 void
8610 Inst_VOP1__V_COS_F32::execute(GPUDynInstPtr gpuDynInst)
8611 {
8612 Wavefront *wf = gpuDynInst->wavefront();
8613 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8614 ConstScalarOperandF32 pi(gpuDynInst, REG_PI);
8615 VecOperandF32 vdst(gpuDynInst, instData.VDST);
8616
8617 src.readSrc();
8618 pi.read();
8619
8620 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8621 if (wf->execMask(lane)) {
8622 if (src[lane] < -256.0 || src[lane] > 256.0) {
8623 vdst[lane] = 0.0;
8624 } else {
8625 vdst[lane] = std::cos(src[lane] * 2.0 * pi.rawData());
8626 }
8627 }
8628 }
8629
8630 vdst.write();
8631 }
8632
8633 Inst_VOP1__V_NOT_B32::Inst_VOP1__V_NOT_B32(InFmt_VOP1 *iFmt)
8634 : Inst_VOP1(iFmt, "v_not_b32")
8635 {
8636 setFlag(ALU);
8637 } // Inst_VOP1__V_NOT_B32
8638
8639 Inst_VOP1__V_NOT_B32::~Inst_VOP1__V_NOT_B32()
8640 {
8641 } // ~Inst_VOP1__V_NOT_B32
8642
8643 // D.u = ~S0.u.
8644 // Input and output modifiers not supported.
8645 void
8646 Inst_VOP1__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst)
8647 {
8648 Wavefront *wf = gpuDynInst->wavefront();
8649 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
8650 VecOperandU32 vdst(gpuDynInst, instData.VDST);
8651
8652 src.readSrc();
8653
8654 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8655 if (wf->execMask(lane)) {
8656 vdst[lane] = ~src[lane];
8657 }
8658 }
8659
8660 vdst.write();
8661 }
8662
8663 Inst_VOP1__V_BFREV_B32::Inst_VOP1__V_BFREV_B32(InFmt_VOP1 *iFmt)
8664 : Inst_VOP1(iFmt, "v_bfrev_b32")
8665 {
8666 setFlag(ALU);
8667 } // Inst_VOP1__V_BFREV_B32
8668
8669 Inst_VOP1__V_BFREV_B32::~Inst_VOP1__V_BFREV_B32()
8670 {
8671 } // ~Inst_VOP1__V_BFREV_B32
8672
8673 // D.u[31:0] = S0.u[0:31], bitfield reverse.
8674 // Input and output modifiers not supported.
8675 void
8676 Inst_VOP1__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst)
8677 {
8678 Wavefront *wf = gpuDynInst->wavefront();
8679 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
8680 VecOperandU32 vdst(gpuDynInst, instData.VDST);
8681
8682 src.readSrc();
8683
8684 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8685 if (wf->execMask(lane)) {
8686 vdst[lane] = reverseBits(src[lane]);
8687 }
8688 }
8689
8690 vdst.write();
8691 }
8692
8693 Inst_VOP1__V_FFBH_U32::Inst_VOP1__V_FFBH_U32(InFmt_VOP1 *iFmt)
8694 : Inst_VOP1(iFmt, "v_ffbh_u32")
8695 {
8696 setFlag(ALU);
8697 } // Inst_VOP1__V_FFBH_U32
8698
8699 Inst_VOP1__V_FFBH_U32::~Inst_VOP1__V_FFBH_U32()
8700 {
8701 } // ~Inst_VOP1__V_FFBH_U32
8702
8703 // D.u = position of first 1 in S0.u from MSB;
8704 // D.u = 0xffffffff if S0.u == 0.
8705 void
8706 Inst_VOP1__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst)
8707 {
8708 Wavefront *wf = gpuDynInst->wavefront();
8709 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
8710 VecOperandU32 vdst(gpuDynInst, instData.VDST);
8711
8712 src.readSrc();
8713
8714 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8715 if (wf->execMask(lane)) {
8716 vdst[lane] = findFirstOneMsb(src[lane]);
8717 }
8718 }
8719
8720 vdst.write();
8721 }
8722
8723 Inst_VOP1__V_FFBL_B32::Inst_VOP1__V_FFBL_B32(InFmt_VOP1 *iFmt)
8724 : Inst_VOP1(iFmt, "v_ffbl_b32")
8725 {
8726 setFlag(ALU);
8727 } // Inst_VOP1__V_FFBL_B32
8728
8729 Inst_VOP1__V_FFBL_B32::~Inst_VOP1__V_FFBL_B32()
8730 {
8731 } // ~Inst_VOP1__V_FFBL_B32
8732
8733 // D.u = position of first 1 in S0.u from LSB;
8734 // D.u = 0xffffffff if S0.u == 0.
8735 void
8736 Inst_VOP1__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst)
8737 {
8738 Wavefront *wf = gpuDynInst->wavefront();
8739 ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
8740 VecOperandU32 vdst(gpuDynInst, instData.VDST);
8741
8742 src.readSrc();
8743
8744 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8745 if (wf->execMask(lane)) {
8746 vdst[lane] = findFirstOne(src[lane]);
8747 }
8748 }
8749
8750 vdst.write();
8751 }
8752
8753 Inst_VOP1__V_FFBH_I32::Inst_VOP1__V_FFBH_I32(InFmt_VOP1 *iFmt)
8754 : Inst_VOP1(iFmt, "v_ffbh_i32")
8755 {
8756 setFlag(ALU);
8757 } // Inst_VOP1__V_FFBH_I32
8758
8759 Inst_VOP1__V_FFBH_I32::~Inst_VOP1__V_FFBH_I32()
8760 {
8761 } // ~Inst_VOP1__V_FFBH_I32
8762
8763 // D.u = position of first bit different from sign bit in S0.i from MSB;
8764 // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff.
8765 void
8766 Inst_VOP1__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst)
8767 {
8768 Wavefront *wf = gpuDynInst->wavefront();
8769 ConstVecOperandI32 src(gpuDynInst, instData.SRC0);
8770 VecOperandU32 vdst(gpuDynInst, instData.VDST);
8771
8772 src.readSrc();
8773
8774 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8775 if (wf->execMask(lane)) {
8776 vdst[lane] = firstOppositeSignBit(src[lane]);
8777 }
8778 }
8779
8780 vdst.write();
8781 }
8782
8783 Inst_VOP1__V_FREXP_EXP_I32_F64::Inst_VOP1__V_FREXP_EXP_I32_F64(
8784 InFmt_VOP1 *iFmt)
8785 : Inst_VOP1(iFmt, "v_frexp_exp_i32_f64")
8786 {
8787 setFlag(ALU);
8788 setFlag(F64);
8789 } // Inst_VOP1__V_FREXP_EXP_I32_F64
8790
8791 Inst_VOP1__V_FREXP_EXP_I32_F64::~Inst_VOP1__V_FREXP_EXP_I32_F64()
8792 {
8793 } // ~Inst_VOP1__V_FREXP_EXP_I32_F64
8794
8795 void
8796 Inst_VOP1__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst)
8797 {
8798 Wavefront *wf = gpuDynInst->wavefront();
8799 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8800 VecOperandI32 vdst(gpuDynInst, instData.VDST);
8801
8802 src.readSrc();
8803
8804 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8805 if (wf->execMask(lane)) {
8806 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
8807 vdst[lane] = 0;
8808 } else {
8809 VecElemI32 exp = 0;
8810 std::frexp(src[lane], &exp);
8811 vdst[lane] = exp;
8812 }
8813 }
8814 }
8815
8816 vdst.write();
8817 }
8818
8819 Inst_VOP1__V_FREXP_MANT_F64::Inst_VOP1__V_FREXP_MANT_F64(InFmt_VOP1 *iFmt)
8820 : Inst_VOP1(iFmt, "v_frexp_mant_f64")
8821 {
8822 setFlag(ALU);
8823 setFlag(F64);
8824 } // Inst_VOP1__V_FREXP_MANT_F64
8825
8826 Inst_VOP1__V_FREXP_MANT_F64::~Inst_VOP1__V_FREXP_MANT_F64()
8827 {
8828 } // ~Inst_VOP1__V_FREXP_MANT_F64
8829
8830 void
8831 Inst_VOP1__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst)
8832 {
8833 Wavefront *wf = gpuDynInst->wavefront();
8834 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8835 VecOperandF64 vdst(gpuDynInst, instData.VDST);
8836
8837 src.readSrc();
8838
8839 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8840 if (wf->execMask(lane)) {
8841 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
8842 vdst[lane] = src[lane];
8843 } else {
8844 VecElemI32 exp(0);
8845 vdst[lane] = std::frexp(src[lane], &exp);
8846 }
8847 }
8848 }
8849
8850 vdst.write();
8851 }
8852
8853 Inst_VOP1__V_FRACT_F64::Inst_VOP1__V_FRACT_F64(InFmt_VOP1 *iFmt)
8854 : Inst_VOP1(iFmt, "v_fract_f64")
8855 {
8856 setFlag(ALU);
8857 setFlag(F64);
8858 } // Inst_VOP1__V_FRACT_F64
8859
8860 Inst_VOP1__V_FRACT_F64::~Inst_VOP1__V_FRACT_F64()
8861 {
8862 } // ~Inst_VOP1__V_FRACT_F64
8863
8864 void
8865 Inst_VOP1__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst)
8866 {
8867 Wavefront *wf = gpuDynInst->wavefront();
8868 ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8869 VecOperandF64 vdst(gpuDynInst, instData.VDST);
8870
8871 src.readSrc();
8872
8873 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8874 if (wf->execMask(lane)) {
8875 VecElemF64 int_part(0.0);
8876 vdst[lane] = std::modf(src[lane], &int_part);
8877 }
8878 }
8879
8880 vdst.write();
8881 }
8882
8883 Inst_VOP1__V_FREXP_EXP_I32_F32::Inst_VOP1__V_FREXP_EXP_I32_F32(
8884 InFmt_VOP1 *iFmt)
8885 : Inst_VOP1(iFmt, "v_frexp_exp_i32_f32")
8886 {
8887 setFlag(ALU);
8888 setFlag(F32);
8889 } // Inst_VOP1__V_FREXP_EXP_I32_F32
8890
8891 Inst_VOP1__V_FREXP_EXP_I32_F32::~Inst_VOP1__V_FREXP_EXP_I32_F32()
8892 {
8893 } // ~Inst_VOP1__V_FREXP_EXP_I32_F32
8894
8895 // frexp(S0.f, Exponent(S0.f))
8896 // if (S0.f == INF || S0.f == NAN) then D.i = 0;
8897 // else D.i = Exponent(S0.f);
8898 void
8899 Inst_VOP1__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst)
8900 {
8901 Wavefront *wf = gpuDynInst->wavefront();
8902 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8903 VecOperandI32 vdst(gpuDynInst, instData.VDST);
8904
8905 src.readSrc();
8906
8907 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8908 if (wf->execMask(lane)) {
8909 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
8910 vdst[lane] = 0;
8911 } else {
8912 VecElemI32 exp(0);
8913 std::frexp(src[lane], &exp);
8914 vdst[lane] = exp;
8915 }
8916 }
8917 }
8918
8919 vdst.write();
8920 }
8921
8922 Inst_VOP1__V_FREXP_MANT_F32::Inst_VOP1__V_FREXP_MANT_F32(InFmt_VOP1 *iFmt)
8923 : Inst_VOP1(iFmt, "v_frexp_mant_f32")
8924 {
8925 setFlag(ALU);
8926 setFlag(F32);
8927 } // Inst_VOP1__V_FREXP_MANT_F32
8928
8929 Inst_VOP1__V_FREXP_MANT_F32::~Inst_VOP1__V_FREXP_MANT_F32()
8930 {
8931 } // ~Inst_VOP1__V_FREXP_MANT_F32
8932
8933 // if (S0.f == INF || S0.f == NAN) then D.f = S0.f;
8934 // else D.f = frexp(S0.f, Exponent(S0.f)).
8935 void
8936 Inst_VOP1__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst)
8937 {
8938 Wavefront *wf = gpuDynInst->wavefront();
8939 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8940 VecOperandF32 vdst(gpuDynInst, instData.VDST);
8941
8942 src.readSrc();
8943
8944 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8945 if (wf->execMask(lane)) {
8946 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
8947 vdst[lane] = src[lane];
8948 } else {
8949 VecElemI32 exp(0);
8950 vdst[lane] = std::frexp(src[lane], &exp);
8951 }
8952 }
8953 }
8954
8955 vdst.write();
8956 }
8957
8958 Inst_VOP1__V_CLREXCP::Inst_VOP1__V_CLREXCP(InFmt_VOP1 *iFmt)
8959 : Inst_VOP1(iFmt, "v_clrexcp")
8960 {
8961 setFlag(ALU);
8962 } // Inst_VOP1__V_CLREXCP
8963
8964 Inst_VOP1__V_CLREXCP::~Inst_VOP1__V_CLREXCP()
8965 {
8966 } // ~Inst_VOP1__V_CLREXCP
8967
8968 void
8969 Inst_VOP1__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst)
8970 {
8971 panicUnimplemented();
8972 }
8973
8974 Inst_VOP1__V_CVT_F16_U16::Inst_VOP1__V_CVT_F16_U16(InFmt_VOP1 *iFmt)
8975 : Inst_VOP1(iFmt, "v_cvt_f16_u16")
8976 {
8977 setFlag(ALU);
8978 setFlag(F16);
8979 } // Inst_VOP1__V_CVT_F16_U16
8980
8981 Inst_VOP1__V_CVT_F16_U16::~Inst_VOP1__V_CVT_F16_U16()
8982 {
8983 } // ~Inst_VOP1__V_CVT_F16_U16
8984
8985 // D.f16 = uint16_to_flt16(S.u16).
8986 void
8987 Inst_VOP1__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst)
8988 {
8989 panicUnimplemented();
8990 }
8991
8992 Inst_VOP1__V_CVT_F16_I16::Inst_VOP1__V_CVT_F16_I16(InFmt_VOP1 *iFmt)
8993 : Inst_VOP1(iFmt, "v_cvt_f16_i16")
8994 {
8995 setFlag(ALU);
8996 setFlag(F16);
8997 } // Inst_VOP1__V_CVT_F16_I16
8998
8999 Inst_VOP1__V_CVT_F16_I16::~Inst_VOP1__V_CVT_F16_I16()
9000 {
9001 } // ~Inst_VOP1__V_CVT_F16_I16
9002
9003 // D.f16 = int16_to_flt16(S.i16).
9004 void
9005 Inst_VOP1__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst)
9006 {
9007 panicUnimplemented();
9008 }
9009
9010 Inst_VOP1__V_CVT_U16_F16::Inst_VOP1__V_CVT_U16_F16(InFmt_VOP1 *iFmt)
9011 : Inst_VOP1(iFmt, "v_cvt_u16_f16")
9012 {
9013 setFlag(ALU);
9014 setFlag(F16);
9015 } // Inst_VOP1__V_CVT_U16_F16
9016
9017 Inst_VOP1__V_CVT_U16_F16::~Inst_VOP1__V_CVT_U16_F16()
9018 {
9019 } // ~Inst_VOP1__V_CVT_U16_F16
9020
9021 // D.u16 = flt16_to_uint16(S.f16).
9022 void
9023 Inst_VOP1__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst)
9024 {
9025 panicUnimplemented();
9026 }
9027
9028 Inst_VOP1__V_CVT_I16_F16::Inst_VOP1__V_CVT_I16_F16(InFmt_VOP1 *iFmt)
9029 : Inst_VOP1(iFmt, "v_cvt_i16_f16")
9030 {
9031 setFlag(ALU);
9032 setFlag(F16);
9033 } // Inst_VOP1__V_CVT_I16_F16
9034
9035 Inst_VOP1__V_CVT_I16_F16::~Inst_VOP1__V_CVT_I16_F16()
9036 {
9037 } // ~Inst_VOP1__V_CVT_I16_F16
9038
9039 // D.i16 = flt16_to_int16(S.f16).
9040 void
9041 Inst_VOP1__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst)
9042 {
9043 panicUnimplemented();
9044 }
9045
9046 Inst_VOP1__V_RCP_F16::Inst_VOP1__V_RCP_F16(InFmt_VOP1 *iFmt)
9047 : Inst_VOP1(iFmt, "v_rcp_f16")
9048 {
9049 setFlag(ALU);
9050 setFlag(F16);
9051 } // Inst_VOP1__V_RCP_F16
9052
9053 Inst_VOP1__V_RCP_F16::~Inst_VOP1__V_RCP_F16()
9054 {
9055 } // ~Inst_VOP1__V_RCP_F16
9056
9057 // if (S0.f16 == 1.0f)
9058 // D.f16 = 1.0f;
9059 // else
9060 // D.f16 = 1 / S0.f16;
9061 void
9062 Inst_VOP1__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst)
9063 {
9064 panicUnimplemented();
9065 }
9066
9067 Inst_VOP1__V_SQRT_F16::Inst_VOP1__V_SQRT_F16(InFmt_VOP1 *iFmt)
9068 : Inst_VOP1(iFmt, "v_sqrt_f16")
9069 {
9070 setFlag(ALU);
9071 setFlag(F16);
9072 } // Inst_VOP1__V_SQRT_F16
9073
9074 Inst_VOP1__V_SQRT_F16::~Inst_VOP1__V_SQRT_F16()
9075 {
9076 } // ~Inst_VOP1__V_SQRT_F16
9077
9078 // if (S0.f16 == 1.0f)
9079 // D.f16 = 1.0f;
9080 // else
9081 // D.f16 = sqrt(S0.f16);
9082 void
9083 Inst_VOP1__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst)
9084 {
9085 panicUnimplemented();
9086 }
9087
9088 Inst_VOP1__V_RSQ_F16::Inst_VOP1__V_RSQ_F16(InFmt_VOP1 *iFmt)
9089 : Inst_VOP1(iFmt, "v_rsq_f16")
9090 {
9091 setFlag(ALU);
9092 setFlag(F16);
9093 } // Inst_VOP1__V_RSQ_F16
9094
9095 Inst_VOP1__V_RSQ_F16::~Inst_VOP1__V_RSQ_F16()
9096 {
9097 } // ~Inst_VOP1__V_RSQ_F16
9098
9099 // if (S0.f16 == 1.0f)
9100 // D.f16 = 1.0f;
9101 // else
9102 // D.f16 = 1 / sqrt(S0.f16);
9103 void
9104 Inst_VOP1__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst)
9105 {
9106 panicUnimplemented();
9107 }
9108
9109 Inst_VOP1__V_LOG_F16::Inst_VOP1__V_LOG_F16(InFmt_VOP1 *iFmt)
9110 : Inst_VOP1(iFmt, "v_log_f16")
9111 {
9112 setFlag(ALU);
9113 setFlag(F16);
9114 } // Inst_VOP1__V_LOG_F16
9115
9116 Inst_VOP1__V_LOG_F16::~Inst_VOP1__V_LOG_F16()
9117 {
9118 } // ~Inst_VOP1__V_LOG_F16
9119
9120 // if (S0.f16 == 1.0f)
9121 // D.f16 = 0.0f;
9122 // else
9123 // D.f16 = log2(S0.f16);
9124 void
9125 Inst_VOP1__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst)
9126 {
9127 panicUnimplemented();
9128 }
9129
9130 Inst_VOP1__V_EXP_F16::Inst_VOP1__V_EXP_F16(InFmt_VOP1 *iFmt)
9131 : Inst_VOP1(iFmt, "v_exp_f16")
9132 {
9133 setFlag(ALU);
9134 setFlag(F16);
9135 } // Inst_VOP1__V_EXP_F16
9136
9137 Inst_VOP1__V_EXP_F16::~Inst_VOP1__V_EXP_F16()
9138 {
9139 } // ~Inst_VOP1__V_EXP_F16
9140
9141 // if (S0.f16 == 0.0f)
9142 // D.f16 = 1.0f;
9143 // else
9144 // D.f16 = pow(2.0, S0.f16).
9145 void
9146 Inst_VOP1__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst)
9147 {
9148 panicUnimplemented();
9149 }
9150
9151 Inst_VOP1__V_FREXP_MANT_F16::Inst_VOP1__V_FREXP_MANT_F16(InFmt_VOP1 *iFmt)
9152 : Inst_VOP1(iFmt, "v_frexp_mant_f16")
9153 {
9154 setFlag(ALU);
9155 setFlag(F16);
9156 } // Inst_VOP1__V_FREXP_MANT_F16
9157
9158 Inst_VOP1__V_FREXP_MANT_F16::~Inst_VOP1__V_FREXP_MANT_F16()
9159 {
9160 } // ~Inst_VOP1__V_FREXP_MANT_F16
9161
9162 // if (S0.f16 == +-INF || S0.f16 == NAN)
9163 // D.f16 = S0.f16;
9164 // else
9165 // D.f16 = mantissa(S0.f16).
9166 void
9167 Inst_VOP1__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst)
9168 {
9169 panicUnimplemented();
9170 }
9171
9172 Inst_VOP1__V_FREXP_EXP_I16_F16::Inst_VOP1__V_FREXP_EXP_I16_F16(
9173 InFmt_VOP1 *iFmt)
9174 : Inst_VOP1(iFmt, "v_frexp_exp_i16_f16")
9175 {
9176 setFlag(ALU);
9177 setFlag(F16);
9178 } // Inst_VOP1__V_FREXP_EXP_I16_F16
9179
9180 Inst_VOP1__V_FREXP_EXP_I16_F16::~Inst_VOP1__V_FREXP_EXP_I16_F16()
9181 {
9182 } // ~Inst_VOP1__V_FREXP_EXP_I16_F16
9183
9184 // frexp(S0.f16, Exponent(S0.f16))
9185 // if (S0.f16 == +-INF || S0.f16 == NAN)
9186 // D.i16 = 0;
9187 // else
9188 // D.i16 = Exponent(S0.f16);
9189 void
9190 Inst_VOP1__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst)
9191 {
9192 panicUnimplemented();
9193 }
9194
9195 Inst_VOP1__V_FLOOR_F16::Inst_VOP1__V_FLOOR_F16(InFmt_VOP1 *iFmt)
9196 : Inst_VOP1(iFmt, "v_floor_f16")
9197 {
9198 setFlag(ALU);
9199 setFlag(F16);
9200 } // Inst_VOP1__V_FLOOR_F16
9201
9202 Inst_VOP1__V_FLOOR_F16::~Inst_VOP1__V_FLOOR_F16()
9203 {
9204 } // ~Inst_VOP1__V_FLOOR_F16
9205
9206 // D.f16 = floor(S0.f16);
9207 void
9208 Inst_VOP1__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst)
9209 {
9210 panicUnimplemented();
9211 }
9212
9213 Inst_VOP1__V_CEIL_F16::Inst_VOP1__V_CEIL_F16(InFmt_VOP1 *iFmt)
9214 : Inst_VOP1(iFmt, "v_ceil_f16")
9215 {
9216 setFlag(ALU);
9217 setFlag(F16);
9218 } // Inst_VOP1__V_CEIL_F16
9219
9220 Inst_VOP1__V_CEIL_F16::~Inst_VOP1__V_CEIL_F16()
9221 {
9222 } // ~Inst_VOP1__V_CEIL_F16
9223
9224 // D.f16 = ceil(S0.f16);
9225 void
9226 Inst_VOP1__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst)
9227 {
9228 panicUnimplemented();
9229 }
9230
9231 Inst_VOP1__V_TRUNC_F16::Inst_VOP1__V_TRUNC_F16(InFmt_VOP1 *iFmt)
9232 : Inst_VOP1(iFmt, "v_trunc_f16")
9233 {
9234 setFlag(ALU);
9235 setFlag(F16);
9236 } // Inst_VOP1__V_TRUNC_F16
9237
9238 Inst_VOP1__V_TRUNC_F16::~Inst_VOP1__V_TRUNC_F16()
9239 {
9240 } // ~Inst_VOP1__V_TRUNC_F16
9241
9242 // D.f16 = trunc(S0.f16).
9243 void
9244 Inst_VOP1__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst)
9245 {
9246 panicUnimplemented();
9247 }
9248
9249 Inst_VOP1__V_RNDNE_F16::Inst_VOP1__V_RNDNE_F16(InFmt_VOP1 *iFmt)
9250 : Inst_VOP1(iFmt, "v_rndne_f16")
9251 {
9252 setFlag(ALU);
9253 setFlag(F16);
9254 } // Inst_VOP1__V_RNDNE_F16
9255
9256 Inst_VOP1__V_RNDNE_F16::~Inst_VOP1__V_RNDNE_F16()
9257 {
9258 } // ~Inst_VOP1__V_RNDNE_F16
9259
9260 // D.f16 = roundNearestEven(S0.f16);
9261 void
9262 Inst_VOP1__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst)
9263 {
9264 panicUnimplemented();
9265 }
9266
9267 Inst_VOP1__V_FRACT_F16::Inst_VOP1__V_FRACT_F16(InFmt_VOP1 *iFmt)
9268 : Inst_VOP1(iFmt, "v_fract_f16")
9269 {
9270 setFlag(ALU);
9271 setFlag(F16);
9272 } // Inst_VOP1__V_FRACT_F16
9273
9274 Inst_VOP1__V_FRACT_F16::~Inst_VOP1__V_FRACT_F16()
9275 {
9276 } // ~Inst_VOP1__V_FRACT_F16
9277
9278 // D.f16 = S0.f16 + -floor(S0.f16).
9279 void
9280 Inst_VOP1__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst)
9281 {
9282 panicUnimplemented();
9283 }
9284
9285 Inst_VOP1__V_SIN_F16::Inst_VOP1__V_SIN_F16(InFmt_VOP1 *iFmt)
9286 : Inst_VOP1(iFmt, "v_sin_f16")
9287 {
9288 setFlag(ALU);
9289 setFlag(F16);
9290 } // Inst_VOP1__V_SIN_F16
9291
9292 Inst_VOP1__V_SIN_F16::~Inst_VOP1__V_SIN_F16()
9293 {
9294 } // ~Inst_VOP1__V_SIN_F16
9295
9296 // D.f16 = sin(S0.f16 * 2 * PI).
9297 void
9298 Inst_VOP1__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst)
9299 {
9300 panicUnimplemented();
9301 }
9302
9303 Inst_VOP1__V_COS_F16::Inst_VOP1__V_COS_F16(InFmt_VOP1 *iFmt)
9304 : Inst_VOP1(iFmt, "v_cos_f16")
9305 {
9306 setFlag(ALU);
9307 setFlag(F16);
9308 } // Inst_VOP1__V_COS_F16
9309
9310 Inst_VOP1__V_COS_F16::~Inst_VOP1__V_COS_F16()
9311 {
9312 } // ~Inst_VOP1__V_COS_F16
9313
9314 // D.f16 = cos(S0.f16 * 2 * PI).
9315 void
9316 Inst_VOP1__V_COS_F16::execute(GPUDynInstPtr gpuDynInst)
9317 {
9318 panicUnimplemented();
9319 }
9320
9321 Inst_VOP1__V_EXP_LEGACY_F32::Inst_VOP1__V_EXP_LEGACY_F32(InFmt_VOP1 *iFmt)
9322 : Inst_VOP1(iFmt, "v_exp_legacy_f32")
9323 {
9324 setFlag(ALU);
9325 setFlag(F32);
9326 } // Inst_VOP1__V_EXP_LEGACY_F32
9327
9328 Inst_VOP1__V_EXP_LEGACY_F32::~Inst_VOP1__V_EXP_LEGACY_F32()
9329 {
9330 } // ~Inst_VOP1__V_EXP_LEGACY_F32
9331
9332 // D.f = pow(2.0, S0.f)
9333 void
9334 Inst_VOP1__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
9335 {
9336 Wavefront *wf = gpuDynInst->wavefront();
9337 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
9338 VecOperandF32 vdst(gpuDynInst, instData.VDST);
9339
9340 src.readSrc();
9341
9342 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
9343 if (wf->execMask(lane)) {
9344 vdst[lane] = std::pow(2.0, src[lane]);
9345 }
9346 }
9347
9348 vdst.write();
9349 }
9350
9351 Inst_VOP1__V_LOG_LEGACY_F32::Inst_VOP1__V_LOG_LEGACY_F32(InFmt_VOP1 *iFmt)
9352 : Inst_VOP1(iFmt, "v_log_legacy_f32")
9353 {
9354 setFlag(ALU);
9355 setFlag(F32);
9356 } // Inst_VOP1__V_LOG_LEGACY_F32
9357
9358 Inst_VOP1__V_LOG_LEGACY_F32::~Inst_VOP1__V_LOG_LEGACY_F32()
9359 {
9360 } // ~Inst_VOP1__V_LOG_LEGACY_F32
9361
9362 // D.f = log2(S0.f).
9363 void
9364 Inst_VOP1__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
9365 {
9366 Wavefront *wf = gpuDynInst->wavefront();
9367 ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
9368 VecOperandF32 vdst(gpuDynInst, instData.VDST);
9369
9370 src.readSrc();
9371
9372 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
9373 if (wf->execMask(lane)) {
9374 vdst[lane] = std::log2(src[lane]);
9375 }
9376 }
9377
9378 vdst.write();
9379 }
9380
9381 Inst_VOPC__V_CMP_CLASS_F32::Inst_VOPC__V_CMP_CLASS_F32(InFmt_VOPC *iFmt)
9382 : Inst_VOPC(iFmt, "v_cmp_class_f32")
9383 {
9384 setFlag(ALU);
9385 setFlag(F32);
9386 } // Inst_VOPC__V_CMP_CLASS_F32
9387
9388 Inst_VOPC__V_CMP_CLASS_F32::~Inst_VOPC__V_CMP_CLASS_F32()
9389 {
9390 } // ~Inst_VOPC__V_CMP_CLASS_F32
9391
9392 // VCC = IEEE numeric class function specified in S1.u, performed on S0.f
9393 // The function reports true if the floating point value is any of the
9394 // numeric types selected in S1.u according to the following list:
9395 // S1.u[0] -- value is a signaling NaN.
9396 // S1.u[1] -- value is a quiet NaN.
9397 // S1.u[2] -- value is negative infinity.
9398 // S1.u[3] -- value is a negative normal value.
9399 // S1.u[4] -- value is a negative denormal value.
9400 // S1.u[5] -- value is negative zero.
9401 // S1.u[6] -- value is positive zero.
9402 // S1.u[7] -- value is a positive denormal value.
9403 // S1.u[8] -- value is a positive normal value.
9404 // S1.u[9] -- value is positive infinity.
9405 void
9406 Inst_VOPC__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst)
9407 {
9408 Wavefront *wf = gpuDynInst->wavefront();
9409 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
9410 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
9411 ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
9412
9413 src0.readSrc();
9414 src1.read();
9415
9416 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
9417 if (wf->execMask(lane)) {
9418 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
9419 // is NaN
9420 if (std::isnan(src0[lane])) {
9421 vcc.setBit(lane, 1);
9422 continue;
9423 }
9424 }
9425 if (bits(src1[lane], 2)) {
9426 // is -infinity
9427 if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
9428 vcc.setBit(lane, 1);
9429 continue;
9430 }
9431 }
9432 if (bits(src1[lane], 3)) {
9433 // is -normal
9434 if (std::isnormal(src0[lane])
9435 && std::signbit(src0[lane])) {
9436 vcc.setBit(lane, 1);
9437 continue;
9438 }
9439 }
9440 if (bits(src1[lane], 4)) {
9441 // is -denormal
9442 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9443 && std::signbit(src0[lane])) {
9444 vcc.setBit(lane, 1);
9445 continue;
9446 }
9447 }
9448 if (bits(src1[lane], 5)) {
9449 // is -zero
9450 if (std::fpclassify(src0[lane]) == FP_ZERO
9451 && std::signbit(src0[lane])) {
9452 vcc.setBit(lane, 1);
9453 continue;
9454 }
9455 }
9456 if (bits(src1[lane], 6)) {
9457 // is +zero
9458 if (std::fpclassify(src0[lane]) == FP_ZERO
9459 && !std::signbit(src0[lane])) {
9460 vcc.setBit(lane, 1);
9461 continue;
9462 }
9463 }
9464 if (bits(src1[lane], 7)) {
9465 // is +denormal
9466 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9467 && !std::signbit(src0[lane])) {
9468 vcc.setBit(lane, 1);
9469 continue;
9470 }
9471 }
9472 if (bits(src1[lane], 8)) {
9473 // is +normal
9474 if (std::isnormal(src0[lane])
9475 && !std::signbit(src0[lane])) {
9476 vcc.setBit(lane, 1);
9477 continue;
9478 }
9479 }
9480 if (bits(src1[lane], 9)) {
9481 // is +infinity
9482 if (std::isinf(src0[lane]) && !std::signbit(src0[lane])) {
9483 vcc.setBit(lane, 1);
9484 continue;
9485 }
9486 }
9487 }
9488 }
9489
9490 vcc.write();
9491 }
9492
9493 Inst_VOPC__V_CMPX_CLASS_F32::Inst_VOPC__V_CMPX_CLASS_F32(InFmt_VOPC *iFmt)
9494 : Inst_VOPC(iFmt, "v_cmpx_class_f32")
9495 {
9496 setFlag(ALU);
9497 setFlag(F32);
9498 } // Inst_VOPC__V_CMPX_CLASS_F32
9499
9500 Inst_VOPC__V_CMPX_CLASS_F32::~Inst_VOPC__V_CMPX_CLASS_F32()
9501 {
9502 } // ~Inst_VOPC__V_CMPX_CLASS_F32
9503
9504 // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
9505 // S0.f The function reports true if the floating point value is any of
9506 // the numeric types selected in S1.u according to the following list:
9507 // S1.u[0] -- value is a signaling NaN.
9508 // S1.u[1] -- value is a quiet NaN.
9509 // S1.u[2] -- value is negative infinity.
9510 // S1.u[3] -- value is a negative normal value.
9511 // S1.u[4] -- value is a negative denormal value.
9512 // S1.u[5] -- value is negative zero.
9513 // S1.u[6] -- value is positive zero.
9514 // S1.u[7] -- value is a positive denormal value.
9515 // S1.u[8] -- value is a positive normal value.
9516 // S1.u[9] -- value is positive infinity.
9517 void
9518 Inst_VOPC__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst)
9519 {
9520 Wavefront *wf = gpuDynInst->wavefront();
9521 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
9522 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
9523 ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
9524
9525 src0.readSrc();
9526 src1.read();
9527
9528 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
9529 if (wf->execMask(lane)) {
9530 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
9531 // is NaN
9532 if (std::isnan(src0[lane])) {
9533 vcc.setBit(lane, 1);
9534 continue;
9535 }
9536 }
9537 if (bits(src1[lane], 2)) {
9538 // is -infinity
9539 if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
9540 vcc.setBit(lane, 1);
9541 continue;
9542 }
9543 }
9544 if (bits(src1[lane], 3)) {
9545 // is -normal
9546 if (std::isnormal(src0[lane])
9547 && std::signbit(src0[lane])) {
9548 vcc.setBit(lane, 1);
9549 continue;
9550 }
9551 }
9552 if (bits(src1[lane], 4)) {
9553 // is -denormal
9554 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9555 && std::signbit(src0[lane])) {
9556 vcc.setBit(lane, 1);
9557 continue;
9558 }
9559 }
9560 if (bits(src1[lane], 5)) {
9561 // is -zero
9562 if (std::fpclassify(src0[lane]) == FP_ZERO
9563 && std::signbit(src0[lane])) {
9564 vcc.setBit(lane, 1);
9565 continue;
9566 }
9567 }
9568 if (bits(src1[lane], 6)) {
9569 // is +zero
9570 if (std::fpclassify(src0[lane]) == FP_ZERO
9571 && !std::signbit(src0[lane])) {
9572 vcc.setBit(lane, 1);
9573 continue;
9574 }
9575 }
9576 if (bits(src1[lane], 7)) {
9577 // is +denormal
9578 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9579 && !std::signbit(src0[lane])) {
9580 vcc.setBit(lane, 1);
9581 continue;
9582 }
9583 }
9584 if (bits(src1[lane], 8)) {
9585 // is +normal
9586 if (std::isnormal(src0[lane])
9587 && !std::signbit(src0[lane])) {
9588 vcc.setBit(lane, 1);
9589 continue;
9590 }
9591 }
9592 if (bits(src1[lane], 9)) {
9593 // is +infinity
9594 if (std::isinf(src0[lane]) && !std::signbit(src0[lane])) {
9595 vcc.setBit(lane, 1);
9596 continue;
9597 }
9598 }
9599 }
9600 }
9601
9602 vcc.write();
9603 wf->execMask() = vcc.rawData();
9604 }
9605
9606 Inst_VOPC__V_CMP_CLASS_F64::Inst_VOPC__V_CMP_CLASS_F64(InFmt_VOPC *iFmt)
9607 : Inst_VOPC(iFmt, "v_cmp_class_f64")
9608 {
9609 setFlag(ALU);
9610 setFlag(F64);
9611 } // Inst_VOPC__V_CMP_CLASS_F64
9612
9613 Inst_VOPC__V_CMP_CLASS_F64::~Inst_VOPC__V_CMP_CLASS_F64()
9614 {
9615 } // ~Inst_VOPC__V_CMP_CLASS_F64
9616
9617 // VCC = IEEE numeric class function specified in S1.u, performed on S0.d
9618 // The function reports true if the floating point value is any of the
9619 // numeric types selected in S1.u according to the following list:
9620 // S1.u[0] -- value is a signaling NaN.
9621 // S1.u[1] -- value is a quiet NaN.
9622 // S1.u[2] -- value is negative infinity.
9623 // S1.u[3] -- value is a negative normal value.
9624 // S1.u[4] -- value is a negative denormal value.
9625 // S1.u[5] -- value is negative zero.
9626 // S1.u[6] -- value is positive zero.
9627 // S1.u[7] -- value is a positive denormal value.
9628 // S1.u[8] -- value is a positive normal value.
9629 // S1.u[9] -- value is positive infinity.
9630 void
9631 Inst_VOPC__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst)
9632 {
9633 Wavefront *wf = gpuDynInst->wavefront();
9634 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
9635 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
9636 ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
9637
9638 src0.readSrc();
9639 src1.read();
9640
9641 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
9642 if (wf->execMask(lane)) {
9643 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
9644 // is NaN
9645 if (std::isnan(src0[lane])) {
9646 vcc.setBit(lane, 1);
9647 continue;
9648 }
9649 }
9650 if (bits(src1[lane], 2)) {
9651 // is -infinity
9652 if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
9653 vcc.setBit(lane, 1);
9654 continue;
9655 }
9656 }
9657 if (bits(src1[lane], 3)) {
9658 // is -normal
9659 if (std::isnormal(src0[lane])
9660 && std::signbit(src0[lane])) {
9661 vcc.setBit(lane, 1);
9662 continue;
9663 }
9664 }
9665 if (bits(src1[lane], 4)) {
9666 // is -denormal
9667 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9668 && std::signbit(src0[lane])) {
9669 vcc.setBit(lane, 1);
9670 continue;
9671 }
9672 }
9673 if (bits(src1[lane], 5)) {
9674 // is -zero
9675 if (std::fpclassify(src0[lane]) == FP_ZERO
9676 && std::signbit(src0[lane])) {
9677 vcc.setBit(lane, 1);
9678 continue;
9679 }
9680 }
9681 if (bits(src1[lane], 6)) {
9682 // is +zero
9683 if (std::fpclassify(src0[lane]) == FP_ZERO
9684 && !std::signbit(src0[lane])) {
9685 vcc.setBit(lane, 1);
9686 continue;
9687 }
9688 }
9689 if (bits(src1[lane], 7)) {
9690 // is +denormal
9691 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9692 && !std::signbit(src0[lane])) {
9693 vcc.setBit(lane, 1);
9694 continue;
9695 }
9696 }
9697 if (bits(src1[lane], 8)) {
9698 // is +normal
9699 if (std::isnormal(src0[lane])
9700 && !std::signbit(src0[lane])) {
9701 vcc.setBit(lane, 1);
9702 continue;
9703 }
9704 }
9705 if (bits(src1[lane], 9)) {
9706 // is +infinity
9707 if (std::isinf(src0[lane])
9708 && !std::signbit(src0[lane])) {
9709 vcc.setBit(lane, 1);
9710 continue;
9711 }
9712 }
9713 }
9714 }
9715
9716 vcc.write();
9717 }
9718
9719 Inst_VOPC__V_CMPX_CLASS_F64::Inst_VOPC__V_CMPX_CLASS_F64(InFmt_VOPC *iFmt)
9720 : Inst_VOPC(iFmt, "v_cmpx_class_f64")
9721 {
9722 setFlag(ALU);
9723 setFlag(F64);
9724 } // Inst_VOPC__V_CMPX_CLASS_F64
9725
9726 Inst_VOPC__V_CMPX_CLASS_F64::~Inst_VOPC__V_CMPX_CLASS_F64()
9727 {
9728 } // ~Inst_VOPC__V_CMPX_CLASS_F64
9729
9730 // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
9731 // S0.d The function reports true if the floating point value is any of
9732 // the numeric types selected in S1.u according to the following list:
9733 // S1.u[0] -- value is a signaling NaN.
9734 // S1.u[1] -- value is a quiet NaN.
9735 // S1.u[2] -- value is negative infinity.
9736 // S1.u[3] -- value is a negative normal value.
9737 // S1.u[4] -- value is a negative denormal value.
9738 // S1.u[5] -- value is negative zero.
9739 // S1.u[6] -- value is positive zero.
9740 // S1.u[7] -- value is a positive denormal value.
9741 // S1.u[8] -- value is a positive normal value.
9742 // S1.u[9] -- value is positive infinity.
9743 void
9744 Inst_VOPC__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst)
9745 {
9746 Wavefront *wf = gpuDynInst->wavefront();
9747 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
9748 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
9749 ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
9750
9751 src0.readSrc();
9752 src1.read();
9753
9754 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
9755 if (wf->execMask(lane)) {
9756 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
9757 // is NaN
9758 if (std::isnan(src0[lane])) {
9759 vcc.setBit(lane, 1);
9760 continue;
9761 }
9762 }
9763 if (bits(src1[lane], 2)) {
9764 // is -infinity
9765 if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
9766 vcc.setBit(lane, 1);
9767 continue;
9768 }
9769 }
9770 if (bits(src1[lane], 3)) {
9771 // is -normal
9772 if (std::isnormal(src0[lane])
9773 && std::signbit(src0[lane])) {
9774 vcc.setBit(lane, 1);
9775 continue;
9776 }
9777 }
9778 if (bits(src1[lane], 4)) {
9779 // is -denormal
9780 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9781 && std::signbit(src0[lane])) {
9782 vcc.setBit(lane, 1);
9783 continue;
9784 }
9785 }
9786 if (bits(src1[lane], 5)) {
9787 // is -zero
9788 if (std::fpclassify(src0[lane]) == FP_ZERO
9789 && std::signbit(src0[lane])) {
9790 vcc.setBit(lane, 1);
9791 continue;
9792 }
9793 }
9794 if (bits(src1[lane], 6)) {
9795 // is +zero
9796 if (std::fpclassify(src0[lane]) == FP_ZERO
9797 && !std::signbit(src0[lane])) {
9798 vcc.setBit(lane, 1);
9799 continue;
9800 }
9801 }
9802 if (bits(src1[lane], 7)) {
9803 // is +denormal
9804 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9805 && !std::signbit(src0[lane])) {
9806 vcc.setBit(lane, 1);
9807 continue;
9808 }
9809 }
9810 if (bits(src1[lane], 8)) {
9811 // is +normal
9812 if (std::isnormal(src0[lane])
9813 && !std::signbit(src0[lane])) {
9814 vcc.setBit(lane, 1);
9815 continue;
9816 }
9817 }
9818 if (bits(src1[lane], 9)) {
9819 // is +infinity
9820 if (std::isinf(src0[lane])
9821 && !std::signbit(src0[lane])) {
9822 vcc.setBit(lane, 1);
9823 continue;
9824 }
9825 }
9826 }
9827 }
9828
9829 vcc.write();
9830 wf->execMask() = vcc.rawData();
9831 }
9832
9833 Inst_VOPC__V_CMP_CLASS_F16::Inst_VOPC__V_CMP_CLASS_F16(InFmt_VOPC *iFmt)
9834 : Inst_VOPC(iFmt, "v_cmp_class_f16")
9835 {
9836 setFlag(ALU);
9837 setFlag(F16);
9838 } // Inst_VOPC__V_CMP_CLASS_F16
9839
9840 Inst_VOPC__V_CMP_CLASS_F16::~Inst_VOPC__V_CMP_CLASS_F16()
9841 {
9842 } // ~Inst_VOPC__V_CMP_CLASS_F16
9843
9844 // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16
9845 // The function reports true if the floating point value is any of the
9846 // numeric types selected in S1.u according to the following list:
9847 // S1.u[0] -- value is a signaling NaN.
9848 // S1.u[1] -- value is a quiet NaN.
9849 // S1.u[2] -- value is negative infinity.
9850 // S1.u[3] -- value is a negative normal value.
9851 // S1.u[4] -- value is a negative denormal value.
9852 // S1.u[5] -- value is negative zero.
9853 // S1.u[6] -- value is positive zero.
9854 // S1.u[7] -- value is a positive denormal value.
9855 // S1.u[8] -- value is a positive normal value.
9856 // S1.u[9] -- value is positive infinity.
9857 void
9858 Inst_VOPC__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst)
9859 {
9860 panicUnimplemented();
9861 }
9862
9863 Inst_VOPC__V_CMPX_CLASS_F16::Inst_VOPC__V_CMPX_CLASS_F16(InFmt_VOPC *iFmt)
9864 : Inst_VOPC(iFmt, "v_cmpx_class_f16")
9865 {
9866 setFlag(ALU);
9867 setFlag(F16);
9868 } // Inst_VOPC__V_CMPX_CLASS_F16
9869
9870 Inst_VOPC__V_CMPX_CLASS_F16::~Inst_VOPC__V_CMPX_CLASS_F16()
9871 {
9872 } // ~Inst_VOPC__V_CMPX_CLASS_F16
9873
9874 // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
9875 // S0.f16
9876 // The function reports true if the floating point value is any of the
9877 // numeric types selected in S1.u according to the following list:
9878 // S1.u[0] -- value is a signaling NaN.
9879 // S1.u[1] -- value is a quiet NaN.
9880 // S1.u[2] -- value is negative infinity.
9881 // S1.u[3] -- value is a negative normal value.
9882 // S1.u[4] -- value is a negative denormal value.
9883 // S1.u[5] -- value is negative zero.
9884 // S1.u[6] -- value is positive zero.
9885 // S1.u[7] -- value is a positive denormal value.
9886 // S1.u[8] -- value is a positive normal value.
9887 // S1.u[9] -- value is positive infinity.
9888 void
9889 Inst_VOPC__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst)
9890 {
9891 panicUnimplemented();
9892 }
9893
9894 Inst_VOPC__V_CMP_F_F16::Inst_VOPC__V_CMP_F_F16(InFmt_VOPC *iFmt)
9895 : Inst_VOPC(iFmt, "v_cmp_f_f16")
9896 {
9897 setFlag(ALU);
9898 setFlag(F16);
9899 } // Inst_VOPC__V_CMP_F_F16
9900
9901 Inst_VOPC__V_CMP_F_F16::~Inst_VOPC__V_CMP_F_F16()
9902 {
9903 } // ~Inst_VOPC__V_CMP_F_F16
9904
9905 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
9906 void
9907 Inst_VOPC__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst)
9908 {
9909 panicUnimplemented();
9910 }
9911
9912 Inst_VOPC__V_CMP_LT_F16::Inst_VOPC__V_CMP_LT_F16(InFmt_VOPC *iFmt)
9913 : Inst_VOPC(iFmt, "v_cmp_lt_f16")
9914 {
9915 setFlag(ALU);
9916 setFlag(F16);
9917 } // Inst_VOPC__V_CMP_LT_F16
9918
9919 Inst_VOPC__V_CMP_LT_F16::~Inst_VOPC__V_CMP_LT_F16()
9920 {
9921 } // ~Inst_VOPC__V_CMP_LT_F16
9922
9923 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
9924 void
9925 Inst_VOPC__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst)
9926 {
9927 panicUnimplemented();
9928 }
9929
9930 Inst_VOPC__V_CMP_EQ_F16::Inst_VOPC__V_CMP_EQ_F16(InFmt_VOPC *iFmt)
9931 : Inst_VOPC(iFmt, "v_cmp_eq_f16")
9932 {
9933 setFlag(ALU);
9934 setFlag(F16);
9935 } // Inst_VOPC__V_CMP_EQ_F16
9936
9937 Inst_VOPC__V_CMP_EQ_F16::~Inst_VOPC__V_CMP_EQ_F16()
9938 {
9939 } // ~Inst_VOPC__V_CMP_EQ_F16
9940
9941 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
9942 void
9943 Inst_VOPC__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst)
9944 {
9945 panicUnimplemented();
9946 }
9947
9948 Inst_VOPC__V_CMP_LE_F16::Inst_VOPC__V_CMP_LE_F16(InFmt_VOPC *iFmt)
9949 : Inst_VOPC(iFmt, "v_cmp_le_f16")
9950 {
9951 setFlag(ALU);
9952 setFlag(F16);
9953 } // Inst_VOPC__V_CMP_LE_F16
9954
9955 Inst_VOPC__V_CMP_LE_F16::~Inst_VOPC__V_CMP_LE_F16()
9956 {
9957 } // ~Inst_VOPC__V_CMP_LE_F16
9958
9959 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
9960 void
9961 Inst_VOPC__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst)
9962 {
9963 panicUnimplemented();
9964 }
9965
9966 Inst_VOPC__V_CMP_GT_F16::Inst_VOPC__V_CMP_GT_F16(InFmt_VOPC *iFmt)
9967 : Inst_VOPC(iFmt, "v_cmp_gt_f16")
9968 {
9969 setFlag(ALU);
9970 setFlag(F16);
9971 } // Inst_VOPC__V_CMP_GT_F16
9972
9973 Inst_VOPC__V_CMP_GT_F16::~Inst_VOPC__V_CMP_GT_F16()
9974 {
9975 } // ~Inst_VOPC__V_CMP_GT_F16
9976
9977 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
9978 void
9979 Inst_VOPC__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst)
9980 {
9981 panicUnimplemented();
9982 }
9983
9984 Inst_VOPC__V_CMP_LG_F16::Inst_VOPC__V_CMP_LG_F16(InFmt_VOPC *iFmt)
9985 : Inst_VOPC(iFmt, "v_cmp_lg_f16")
9986 {
9987 setFlag(ALU);
9988 setFlag(F16);
9989 } // Inst_VOPC__V_CMP_LG_F16
9990
9991 Inst_VOPC__V_CMP_LG_F16::~Inst_VOPC__V_CMP_LG_F16()
9992 {
9993 } // ~Inst_VOPC__V_CMP_LG_F16
9994
9995 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
9996 void
9997 Inst_VOPC__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst)
9998 {
9999 panicUnimplemented();
10000 }
10001
10002 Inst_VOPC__V_CMP_GE_F16::Inst_VOPC__V_CMP_GE_F16(InFmt_VOPC *iFmt)
10003 : Inst_VOPC(iFmt, "v_cmp_ge_f16")
10004 {
10005 setFlag(ALU);
10006 setFlag(F16);
10007 } // Inst_VOPC__V_CMP_GE_F16
10008
10009 Inst_VOPC__V_CMP_GE_F16::~Inst_VOPC__V_CMP_GE_F16()
10010 {
10011 } // ~Inst_VOPC__V_CMP_GE_F16
10012
10013 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
10014 void
10015 Inst_VOPC__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst)
10016 {
10017 panicUnimplemented();
10018 }
10019
10020 Inst_VOPC__V_CMP_O_F16::Inst_VOPC__V_CMP_O_F16(InFmt_VOPC *iFmt)
10021 : Inst_VOPC(iFmt, "v_cmp_o_f16")
10022 {
10023 setFlag(ALU);
10024 setFlag(F16);
10025 } // Inst_VOPC__V_CMP_O_F16
10026
10027 Inst_VOPC__V_CMP_O_F16::~Inst_VOPC__V_CMP_O_F16()
10028 {
10029 } // ~Inst_VOPC__V_CMP_O_F16
10030
10031 // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
10032 void
10033 Inst_VOPC__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst)
10034 {
10035 panicUnimplemented();
10036 }
10037
10038 Inst_VOPC__V_CMP_U_F16::Inst_VOPC__V_CMP_U_F16(InFmt_VOPC *iFmt)
10039 : Inst_VOPC(iFmt, "v_cmp_u_f16")
10040 {
10041 setFlag(ALU);
10042 setFlag(F16);
10043 } // Inst_VOPC__V_CMP_U_F16
10044
10045 Inst_VOPC__V_CMP_U_F16::~Inst_VOPC__V_CMP_U_F16()
10046 {
10047 } // ~Inst_VOPC__V_CMP_U_F16
10048
10049 // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
10050 void
10051 Inst_VOPC__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst)
10052 {
10053 panicUnimplemented();
10054 }
10055
10056 Inst_VOPC__V_CMP_NGE_F16::Inst_VOPC__V_CMP_NGE_F16(InFmt_VOPC *iFmt)
10057 : Inst_VOPC(iFmt, "v_cmp_nge_f16")
10058 {
10059 setFlag(ALU);
10060 setFlag(F16);
10061 } // Inst_VOPC__V_CMP_NGE_F16
10062
10063 Inst_VOPC__V_CMP_NGE_F16::~Inst_VOPC__V_CMP_NGE_F16()
10064 {
10065 } // ~Inst_VOPC__V_CMP_NGE_F16
10066
10067 // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
10068 void
10069 Inst_VOPC__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst)
10070 {
10071 panicUnimplemented();
10072 }
10073
10074 Inst_VOPC__V_CMP_NLG_F16::Inst_VOPC__V_CMP_NLG_F16(InFmt_VOPC *iFmt)
10075 : Inst_VOPC(iFmt, "v_cmp_nlg_f16")
10076 {
10077 setFlag(ALU);
10078 setFlag(F16);
10079 } // Inst_VOPC__V_CMP_NLG_F16
10080
10081 Inst_VOPC__V_CMP_NLG_F16::~Inst_VOPC__V_CMP_NLG_F16()
10082 {
10083 } // ~Inst_VOPC__V_CMP_NLG_F16
10084
10085 // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
10086 void
10087 Inst_VOPC__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst)
10088 {
10089 panicUnimplemented();
10090 }
10091
10092 Inst_VOPC__V_CMP_NGT_F16::Inst_VOPC__V_CMP_NGT_F16(InFmt_VOPC *iFmt)
10093 : Inst_VOPC(iFmt, "v_cmp_ngt_f16")
10094 {
10095 setFlag(ALU);
10096 setFlag(F16);
10097 } // Inst_VOPC__V_CMP_NGT_F16
10098
10099 Inst_VOPC__V_CMP_NGT_F16::~Inst_VOPC__V_CMP_NGT_F16()
10100 {
10101 } // ~Inst_VOPC__V_CMP_NGT_F16
10102
10103 // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
10104 void
10105 Inst_VOPC__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst)
10106 {
10107 panicUnimplemented();
10108 }
10109
10110 Inst_VOPC__V_CMP_NLE_F16::Inst_VOPC__V_CMP_NLE_F16(InFmt_VOPC *iFmt)
10111 : Inst_VOPC(iFmt, "v_cmp_nle_f16")
10112 {
10113 setFlag(ALU);
10114 setFlag(F16);
10115 } // Inst_VOPC__V_CMP_NLE_F16
10116
10117 Inst_VOPC__V_CMP_NLE_F16::~Inst_VOPC__V_CMP_NLE_F16()
10118 {
10119 } // ~Inst_VOPC__V_CMP_NLE_F16
10120
10121 // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
10122 void
10123 Inst_VOPC__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst)
10124 {
10125 panicUnimplemented();
10126 }
10127
10128 Inst_VOPC__V_CMP_NEQ_F16::Inst_VOPC__V_CMP_NEQ_F16(InFmt_VOPC *iFmt)
10129 : Inst_VOPC(iFmt, "v_cmp_neq_f16")
10130 {
10131 setFlag(ALU);
10132 setFlag(F16);
10133 } // Inst_VOPC__V_CMP_NEQ_F16
10134
10135 Inst_VOPC__V_CMP_NEQ_F16::~Inst_VOPC__V_CMP_NEQ_F16()
10136 {
10137 } // ~Inst_VOPC__V_CMP_NEQ_F16
10138
10139 // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
10140 void
10141 Inst_VOPC__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst)
10142 {
10143 panicUnimplemented();
10144 }
10145
10146 Inst_VOPC__V_CMP_NLT_F16::Inst_VOPC__V_CMP_NLT_F16(InFmt_VOPC *iFmt)
10147 : Inst_VOPC(iFmt, "v_cmp_nlt_f16")
10148 {
10149 setFlag(ALU);
10150 setFlag(F16);
10151 } // Inst_VOPC__V_CMP_NLT_F16
10152
10153 Inst_VOPC__V_CMP_NLT_F16::~Inst_VOPC__V_CMP_NLT_F16()
10154 {
10155 } // ~Inst_VOPC__V_CMP_NLT_F16
10156
10157 // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
10158 void
10159 Inst_VOPC__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst)
10160 {
10161 panicUnimplemented();
10162 }
10163
10164 Inst_VOPC__V_CMP_TRU_F16::Inst_VOPC__V_CMP_TRU_F16(InFmt_VOPC *iFmt)
10165 : Inst_VOPC(iFmt, "v_cmp_tru_f16")
10166 {
10167 setFlag(ALU);
10168 setFlag(F16);
10169 } // Inst_VOPC__V_CMP_TRU_F16
10170
10171 Inst_VOPC__V_CMP_TRU_F16::~Inst_VOPC__V_CMP_TRU_F16()
10172 {
10173 } // ~Inst_VOPC__V_CMP_TRU_F16
10174
10175 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
10176 void
10177 Inst_VOPC__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst)
10178 {
10179 panicUnimplemented();
10180 }
10181
10182 Inst_VOPC__V_CMPX_F_F16::Inst_VOPC__V_CMPX_F_F16(InFmt_VOPC *iFmt)
10183 : Inst_VOPC(iFmt, "v_cmpx_f_f16")
10184 {
10185 setFlag(ALU);
10186 setFlag(F16);
10187 } // Inst_VOPC__V_CMPX_F_F16
10188
10189 Inst_VOPC__V_CMPX_F_F16::~Inst_VOPC__V_CMPX_F_F16()
10190 {
10191 } // ~Inst_VOPC__V_CMPX_F_F16
10192
10193 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
10194 void
10195 Inst_VOPC__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst)
10196 {
10197 panicUnimplemented();
10198 }
10199
10200 Inst_VOPC__V_CMPX_LT_F16::Inst_VOPC__V_CMPX_LT_F16(InFmt_VOPC *iFmt)
10201 : Inst_VOPC(iFmt, "v_cmpx_lt_f16")
10202 {
10203 setFlag(ALU);
10204 setFlag(F16);
10205 } // Inst_VOPC__V_CMPX_LT_F16
10206
10207 Inst_VOPC__V_CMPX_LT_F16::~Inst_VOPC__V_CMPX_LT_F16()
10208 {
10209 } // ~Inst_VOPC__V_CMPX_LT_F16
10210
10211 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
10212 void
10213 Inst_VOPC__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst)
10214 {
10215 panicUnimplemented();
10216 }
10217
10218 Inst_VOPC__V_CMPX_EQ_F16::Inst_VOPC__V_CMPX_EQ_F16(InFmt_VOPC *iFmt)
10219 : Inst_VOPC(iFmt, "v_cmpx_eq_f16")
10220 {
10221 setFlag(ALU);
10222 setFlag(F16);
10223 } // Inst_VOPC__V_CMPX_EQ_F16
10224
10225 Inst_VOPC__V_CMPX_EQ_F16::~Inst_VOPC__V_CMPX_EQ_F16()
10226 {
10227 } // ~Inst_VOPC__V_CMPX_EQ_F16
10228
10229 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
10230 void
10231 Inst_VOPC__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst)
10232 {
10233 panicUnimplemented();
10234 }
10235
10236 Inst_VOPC__V_CMPX_LE_F16::Inst_VOPC__V_CMPX_LE_F16(InFmt_VOPC *iFmt)
10237 : Inst_VOPC(iFmt, "v_cmpx_le_f16")
10238 {
10239 setFlag(ALU);
10240 setFlag(F16);
10241 } // Inst_VOPC__V_CMPX_LE_F16
10242
10243 Inst_VOPC__V_CMPX_LE_F16::~Inst_VOPC__V_CMPX_LE_F16()
10244 {
10245 } // ~Inst_VOPC__V_CMPX_LE_F16
10246
10247 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
10248 void
10249 Inst_VOPC__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst)
10250 {
10251 panicUnimplemented();
10252 }
10253
10254 Inst_VOPC__V_CMPX_GT_F16::Inst_VOPC__V_CMPX_GT_F16(InFmt_VOPC *iFmt)
10255 : Inst_VOPC(iFmt, "v_cmpx_gt_f16")
10256 {
10257 setFlag(ALU);
10258 setFlag(F16);
10259 } // Inst_VOPC__V_CMPX_GT_F16
10260
10261 Inst_VOPC__V_CMPX_GT_F16::~Inst_VOPC__V_CMPX_GT_F16()
10262 {
10263 } // ~Inst_VOPC__V_CMPX_GT_F16
10264
10265 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
10266 void
10267 Inst_VOPC__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst)
10268 {
10269 panicUnimplemented();
10270 }
10271
10272 Inst_VOPC__V_CMPX_LG_F16::Inst_VOPC__V_CMPX_LG_F16(InFmt_VOPC *iFmt)
10273 : Inst_VOPC(iFmt, "v_cmpx_lg_f16")
10274 {
10275 setFlag(ALU);
10276 setFlag(F16);
10277 } // Inst_VOPC__V_CMPX_LG_F16
10278
10279 Inst_VOPC__V_CMPX_LG_F16::~Inst_VOPC__V_CMPX_LG_F16()
10280 {
10281 } // ~Inst_VOPC__V_CMPX_LG_F16
10282
10283 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
10284 void
10285 Inst_VOPC__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst)
10286 {
10287 panicUnimplemented();
10288 }
10289
10290 Inst_VOPC__V_CMPX_GE_F16::Inst_VOPC__V_CMPX_GE_F16(InFmt_VOPC *iFmt)
10291 : Inst_VOPC(iFmt, "v_cmpx_ge_f16")
10292 {
10293 setFlag(ALU);
10294 setFlag(F16);
10295 } // Inst_VOPC__V_CMPX_GE_F16
10296
10297 Inst_VOPC__V_CMPX_GE_F16::~Inst_VOPC__V_CMPX_GE_F16()
10298 {
10299 } // ~Inst_VOPC__V_CMPX_GE_F16
10300
10301 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
10302 void
10303 Inst_VOPC__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst)
10304 {
10305 panicUnimplemented();
10306 }
10307
10308 Inst_VOPC__V_CMPX_O_F16::Inst_VOPC__V_CMPX_O_F16(InFmt_VOPC *iFmt)
10309 : Inst_VOPC(iFmt, "v_cmpx_o_f16")
10310 {
10311 setFlag(ALU);
10312 setFlag(F16);
10313 } // Inst_VOPC__V_CMPX_O_F16
10314
10315 Inst_VOPC__V_CMPX_O_F16::~Inst_VOPC__V_CMPX_O_F16()
10316 {
10317 } // ~Inst_VOPC__V_CMPX_O_F16
10318
10319 // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
10320 // encoding.
10321 void
10322 Inst_VOPC__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst)
10323 {
10324 panicUnimplemented();
10325 }
10326
10327 Inst_VOPC__V_CMPX_U_F16::Inst_VOPC__V_CMPX_U_F16(InFmt_VOPC *iFmt)
10328 : Inst_VOPC(iFmt, "v_cmpx_u_f16")
10329 {
10330 setFlag(ALU);
10331 setFlag(F16);
10332 } // Inst_VOPC__V_CMPX_U_F16
10333
10334 Inst_VOPC__V_CMPX_U_F16::~Inst_VOPC__V_CMPX_U_F16()
10335 {
10336 } // ~Inst_VOPC__V_CMPX_U_F16
10337
10338 // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
10339 // encoding.
10340 void
10341 Inst_VOPC__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst)
10342 {
10343 panicUnimplemented();
10344 }
10345
10346 Inst_VOPC__V_CMPX_NGE_F16::Inst_VOPC__V_CMPX_NGE_F16(InFmt_VOPC *iFmt)
10347 : Inst_VOPC(iFmt, "v_cmpx_nge_f16")
10348 {
10349 setFlag(ALU);
10350 setFlag(F16);
10351 } // Inst_VOPC__V_CMPX_NGE_F16
10352
10353 Inst_VOPC__V_CMPX_NGE_F16::~Inst_VOPC__V_CMPX_NGE_F16()
10354 {
10355 } // ~Inst_VOPC__V_CMPX_NGE_F16
10356
10357 // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
10358 void
10359 Inst_VOPC__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst)
10360 {
10361 panicUnimplemented();
10362 }
10363
10364 Inst_VOPC__V_CMPX_NLG_F16::Inst_VOPC__V_CMPX_NLG_F16(InFmt_VOPC *iFmt)
10365 : Inst_VOPC(iFmt, "v_cmpx_nlg_f16")
10366 {
10367 setFlag(ALU);
10368 setFlag(F16);
10369 } // Inst_VOPC__V_CMPX_NLG_F16
10370
10371 Inst_VOPC__V_CMPX_NLG_F16::~Inst_VOPC__V_CMPX_NLG_F16()
10372 {
10373 } // ~Inst_VOPC__V_CMPX_NLG_F16
10374
10375 // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
10376 void
10377 Inst_VOPC__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst)
10378 {
10379 panicUnimplemented();
10380 }
10381
10382 Inst_VOPC__V_CMPX_NGT_F16::Inst_VOPC__V_CMPX_NGT_F16(InFmt_VOPC *iFmt)
10383 : Inst_VOPC(iFmt, "v_cmpx_ngt_f16")
10384 {
10385 setFlag(ALU);
10386 setFlag(F16);
10387 } // Inst_VOPC__V_CMPX_NGT_F16
10388
10389 Inst_VOPC__V_CMPX_NGT_F16::~Inst_VOPC__V_CMPX_NGT_F16()
10390 {
10391 } // ~Inst_VOPC__V_CMPX_NGT_F16
10392
10393 // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
10394 void
10395 Inst_VOPC__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst)
10396 {
10397 panicUnimplemented();
10398 }
10399
10400 Inst_VOPC__V_CMPX_NLE_F16::Inst_VOPC__V_CMPX_NLE_F16(InFmt_VOPC *iFmt)
10401 : Inst_VOPC(iFmt, "v_cmpx_nle_f16")
10402 {
10403 setFlag(ALU);
10404 setFlag(F16);
10405 } // Inst_VOPC__V_CMPX_NLE_F16
10406
10407 Inst_VOPC__V_CMPX_NLE_F16::~Inst_VOPC__V_CMPX_NLE_F16()
10408 {
10409 } // ~Inst_VOPC__V_CMPX_NLE_F16
10410
10411 // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
10412 void
10413 Inst_VOPC__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst)
10414 {
10415 panicUnimplemented();
10416 }
10417
10418 Inst_VOPC__V_CMPX_NEQ_F16::Inst_VOPC__V_CMPX_NEQ_F16(InFmt_VOPC *iFmt)
10419 : Inst_VOPC(iFmt, "v_cmpx_neq_f16")
10420 {
10421 setFlag(ALU);
10422 setFlag(F16);
10423 } // Inst_VOPC__V_CMPX_NEQ_F16
10424
10425 Inst_VOPC__V_CMPX_NEQ_F16::~Inst_VOPC__V_CMPX_NEQ_F16()
10426 {
10427 } // ~Inst_VOPC__V_CMPX_NEQ_F16
10428
10429 // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
10430 void
10431 Inst_VOPC__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst)
10432 {
10433 panicUnimplemented();
10434 }
10435
10436 Inst_VOPC__V_CMPX_NLT_F16::Inst_VOPC__V_CMPX_NLT_F16(InFmt_VOPC *iFmt)
10437 : Inst_VOPC(iFmt, "v_cmpx_nlt_f16")
10438 {
10439 setFlag(ALU);
10440 setFlag(F16);
10441 } // Inst_VOPC__V_CMPX_NLT_F16
10442
10443 Inst_VOPC__V_CMPX_NLT_F16::~Inst_VOPC__V_CMPX_NLT_F16()
10444 {
10445 } // ~Inst_VOPC__V_CMPX_NLT_F16
10446
10447 // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
10448 void
10449 Inst_VOPC__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst)
10450 {
10451 panicUnimplemented();
10452 }
10453
10454 Inst_VOPC__V_CMPX_TRU_F16::Inst_VOPC__V_CMPX_TRU_F16(InFmt_VOPC *iFmt)
10455 : Inst_VOPC(iFmt, "v_cmpx_tru_f16")
10456 {
10457 setFlag(ALU);
10458 setFlag(F16);
10459 } // Inst_VOPC__V_CMPX_TRU_F16
10460
10461 Inst_VOPC__V_CMPX_TRU_F16::~Inst_VOPC__V_CMPX_TRU_F16()
10462 {
10463 } // ~Inst_VOPC__V_CMPX_TRU_F16
10464
10465 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
10466 void
10467 Inst_VOPC__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst)
10468 {
10469 panicUnimplemented();
10470 }
10471
10472 Inst_VOPC__V_CMP_F_F32::Inst_VOPC__V_CMP_F_F32(InFmt_VOPC *iFmt)
10473 : Inst_VOPC(iFmt, "v_cmp_f_f32")
10474 {
10475 setFlag(ALU);
10476 setFlag(F32);
10477 } // Inst_VOPC__V_CMP_F_F32
10478
10479 Inst_VOPC__V_CMP_F_F32::~Inst_VOPC__V_CMP_F_F32()
10480 {
10481 } // ~Inst_VOPC__V_CMP_F_F32
10482
10483 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
10484 void
10485 Inst_VOPC__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst)
10486 {
10487 Wavefront *wf = gpuDynInst->wavefront();
10488 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10489
10490 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10491 if (wf->execMask(lane)) {
10492 vcc.setBit(lane, 0);
10493 }
10494 }
10495
10496 vcc.write();
10497 }
10498
10499 Inst_VOPC__V_CMP_LT_F32::Inst_VOPC__V_CMP_LT_F32(InFmt_VOPC *iFmt)
10500 : Inst_VOPC(iFmt, "v_cmp_lt_f32")
10501 {
10502 setFlag(ALU);
10503 setFlag(F32);
10504 } // Inst_VOPC__V_CMP_LT_F32
10505
10506 Inst_VOPC__V_CMP_LT_F32::~Inst_VOPC__V_CMP_LT_F32()
10507 {
10508 } // ~Inst_VOPC__V_CMP_LT_F32
10509
10510 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
10511 void
10512 Inst_VOPC__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst)
10513 {
10514 Wavefront *wf = gpuDynInst->wavefront();
10515 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10516 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10517 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10518
10519 src0.readSrc();
10520 src1.read();
10521
10522 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10523 if (wf->execMask(lane)) {
10524 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
10525 }
10526 }
10527
10528 vcc.write();
10529 }
10530
10531 Inst_VOPC__V_CMP_EQ_F32::Inst_VOPC__V_CMP_EQ_F32(InFmt_VOPC *iFmt)
10532 : Inst_VOPC(iFmt, "v_cmp_eq_f32")
10533 {
10534 setFlag(ALU);
10535 setFlag(F32);
10536 } // Inst_VOPC__V_CMP_EQ_F32
10537
10538 Inst_VOPC__V_CMP_EQ_F32::~Inst_VOPC__V_CMP_EQ_F32()
10539 {
10540 } // ~Inst_VOPC__V_CMP_EQ_F32
10541
10542 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
10543 void
10544 Inst_VOPC__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst)
10545 {
10546 Wavefront *wf = gpuDynInst->wavefront();
10547 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10548 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10549 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10550
10551 src0.readSrc();
10552 src1.read();
10553
10554 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10555 if (wf->execMask(lane)) {
10556 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
10557 }
10558 }
10559
10560 vcc.write();
10561 }
10562
10563 Inst_VOPC__V_CMP_LE_F32::Inst_VOPC__V_CMP_LE_F32(InFmt_VOPC *iFmt)
10564 : Inst_VOPC(iFmt, "v_cmp_le_f32")
10565 {
10566 setFlag(ALU);
10567 setFlag(F32);
10568 } // Inst_VOPC__V_CMP_LE_F32
10569
10570 Inst_VOPC__V_CMP_LE_F32::~Inst_VOPC__V_CMP_LE_F32()
10571 {
10572 } // ~Inst_VOPC__V_CMP_LE_F32
10573
10574 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
10575 void
10576 Inst_VOPC__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst)
10577 {
10578 Wavefront *wf = gpuDynInst->wavefront();
10579 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10580 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10581 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10582
10583 src0.readSrc();
10584 src1.read();
10585
10586 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10587 if (wf->execMask(lane)) {
10588 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
10589 }
10590 }
10591
10592 vcc.write();
10593 }
10594
10595 Inst_VOPC__V_CMP_GT_F32::Inst_VOPC__V_CMP_GT_F32(InFmt_VOPC *iFmt)
10596 : Inst_VOPC(iFmt, "v_cmp_gt_f32")
10597 {
10598 setFlag(ALU);
10599 setFlag(F32);
10600 } // Inst_VOPC__V_CMP_GT_F32
10601
10602 Inst_VOPC__V_CMP_GT_F32::~Inst_VOPC__V_CMP_GT_F32()
10603 {
10604 } // ~Inst_VOPC__V_CMP_GT_F32
10605
10606 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
10607 void
10608 Inst_VOPC__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst)
10609 {
10610 Wavefront *wf = gpuDynInst->wavefront();
10611 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10612 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10613 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10614
10615 src0.readSrc();
10616 src1.read();
10617
10618 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10619 if (wf->execMask(lane)) {
10620 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
10621 }
10622 }
10623
10624 vcc.write();
10625 }
10626
10627 Inst_VOPC__V_CMP_LG_F32::Inst_VOPC__V_CMP_LG_F32(InFmt_VOPC *iFmt)
10628 : Inst_VOPC(iFmt, "v_cmp_lg_f32")
10629 {
10630 setFlag(ALU);
10631 setFlag(F32);
10632 } // Inst_VOPC__V_CMP_LG_F32
10633
10634 Inst_VOPC__V_CMP_LG_F32::~Inst_VOPC__V_CMP_LG_F32()
10635 {
10636 } // ~Inst_VOPC__V_CMP_LG_F32
10637
10638 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
10639 void
10640 Inst_VOPC__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst)
10641 {
10642 Wavefront *wf = gpuDynInst->wavefront();
10643 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10644 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10645 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10646
10647 src0.readSrc();
10648 src1.read();
10649
10650 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10651 if (wf->execMask(lane)) {
10652 vcc.setBit(lane, (src0[lane] < src1[lane]
10653 || src0[lane] > src1[lane]) ? 1 : 0);
10654 }
10655 }
10656
10657 vcc.write();
10658 }
10659
10660 Inst_VOPC__V_CMP_GE_F32::Inst_VOPC__V_CMP_GE_F32(InFmt_VOPC *iFmt)
10661 : Inst_VOPC(iFmt, "v_cmp_ge_f32")
10662 {
10663 setFlag(ALU);
10664 setFlag(F32);
10665 } // Inst_VOPC__V_CMP_GE_F32
10666
10667 Inst_VOPC__V_CMP_GE_F32::~Inst_VOPC__V_CMP_GE_F32()
10668 {
10669 } // ~Inst_VOPC__V_CMP_GE_F32
10670
10671 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
10672 void
10673 Inst_VOPC__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst)
10674 {
10675 Wavefront *wf = gpuDynInst->wavefront();
10676 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10677 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10678 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10679
10680 src0.readSrc();
10681 src1.read();
10682
10683 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10684 if (wf->execMask(lane)) {
10685 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
10686 }
10687 }
10688
10689 vcc.write();
10690 }
10691
10692 Inst_VOPC__V_CMP_O_F32::Inst_VOPC__V_CMP_O_F32(InFmt_VOPC *iFmt)
10693 : Inst_VOPC(iFmt, "v_cmp_o_f32")
10694 {
10695 setFlag(ALU);
10696 setFlag(F32);
10697 } // Inst_VOPC__V_CMP_O_F32
10698
10699 Inst_VOPC__V_CMP_O_F32::~Inst_VOPC__V_CMP_O_F32()
10700 {
10701 } // ~Inst_VOPC__V_CMP_O_F32
10702
10703 // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
10704 void
10705 Inst_VOPC__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst)
10706 {
10707 Wavefront *wf = gpuDynInst->wavefront();
10708 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10709 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10710 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10711
10712 src0.readSrc();
10713 src1.read();
10714
10715 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10716 if (wf->execMask(lane)) {
10717 vcc.setBit(lane, (!std::isnan(src0[lane])
10718 && !std::isnan(src1[lane])) ? 1 : 0);
10719 }
10720 }
10721
10722 vcc.write();
10723 }
10724
10725 Inst_VOPC__V_CMP_U_F32::Inst_VOPC__V_CMP_U_F32(InFmt_VOPC *iFmt)
10726 : Inst_VOPC(iFmt, "v_cmp_u_f32")
10727 {
10728 setFlag(ALU);
10729 setFlag(F32);
10730 } // Inst_VOPC__V_CMP_U_F32
10731
10732 Inst_VOPC__V_CMP_U_F32::~Inst_VOPC__V_CMP_U_F32()
10733 {
10734 } // ~Inst_VOPC__V_CMP_U_F32
10735
10736 // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
10737 void
10738 Inst_VOPC__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst)
10739 {
10740 Wavefront *wf = gpuDynInst->wavefront();
10741 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10742 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10743 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10744
10745 src0.readSrc();
10746 src1.read();
10747
10748 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10749 if (wf->execMask(lane)) {
10750 vcc.setBit(lane, (std::isnan(src0[lane])
10751 || std::isnan(src1[lane])) ? 1 : 0);
10752 }
10753 }
10754
10755 vcc.write();
10756 }
10757
10758 Inst_VOPC__V_CMP_NGE_F32::Inst_VOPC__V_CMP_NGE_F32(InFmt_VOPC *iFmt)
10759 : Inst_VOPC(iFmt, "v_cmp_nge_f32")
10760 {
10761 setFlag(ALU);
10762 setFlag(F32);
10763 } // Inst_VOPC__V_CMP_NGE_F32
10764
10765 Inst_VOPC__V_CMP_NGE_F32::~Inst_VOPC__V_CMP_NGE_F32()
10766 {
10767 } // ~Inst_VOPC__V_CMP_NGE_F32
10768
10769 // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
10770 void
10771 Inst_VOPC__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst)
10772 {
10773 Wavefront *wf = gpuDynInst->wavefront();
10774 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10775 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10776 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10777
10778 src0.readSrc();
10779 src1.read();
10780
10781 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10782 if (wf->execMask(lane)) {
10783 vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
10784 }
10785 }
10786
10787 vcc.write();
10788 }
10789
10790 Inst_VOPC__V_CMP_NLG_F32::Inst_VOPC__V_CMP_NLG_F32(InFmt_VOPC *iFmt)
10791 : Inst_VOPC(iFmt, "v_cmp_nlg_f32")
10792 {
10793 setFlag(ALU);
10794 setFlag(F32);
10795 } // Inst_VOPC__V_CMP_NLG_F32
10796
10797 Inst_VOPC__V_CMP_NLG_F32::~Inst_VOPC__V_CMP_NLG_F32()
10798 {
10799 } // ~Inst_VOPC__V_CMP_NLG_F32
10800
10801 // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
10802 void
10803 Inst_VOPC__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst)
10804 {
10805 Wavefront *wf = gpuDynInst->wavefront();
10806 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10807 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10808 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10809
10810 src0.readSrc();
10811 src1.read();
10812
10813 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10814 if (wf->execMask(lane)) {
10815 vcc.setBit(lane, !(src0[lane] < src1[lane]
10816 || src0[lane] > src1[lane]) ? 1 : 0);
10817 }
10818 }
10819
10820 vcc.write();
10821 }
10822
10823 Inst_VOPC__V_CMP_NGT_F32::Inst_VOPC__V_CMP_NGT_F32(InFmt_VOPC *iFmt)
10824 : Inst_VOPC(iFmt, "v_cmp_ngt_f32")
10825 {
10826 setFlag(ALU);
10827 setFlag(F32);
10828 } // Inst_VOPC__V_CMP_NGT_F32
10829
10830 Inst_VOPC__V_CMP_NGT_F32::~Inst_VOPC__V_CMP_NGT_F32()
10831 {
10832 } // ~Inst_VOPC__V_CMP_NGT_F32
10833
10834 // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
10835 void
10836 Inst_VOPC__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst)
10837 {
10838 Wavefront *wf = gpuDynInst->wavefront();
10839 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10840 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10841 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10842
10843 src0.readSrc();
10844 src1.read();
10845
10846 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10847 if (wf->execMask(lane)) {
10848 vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
10849 }
10850 }
10851
10852 vcc.write();
10853 }
10854
10855 Inst_VOPC__V_CMP_NLE_F32::Inst_VOPC__V_CMP_NLE_F32(InFmt_VOPC *iFmt)
10856 : Inst_VOPC(iFmt, "v_cmp_nle_f32")
10857 {
10858 setFlag(ALU);
10859 setFlag(F32);
10860 } // Inst_VOPC__V_CMP_NLE_F32
10861
10862 Inst_VOPC__V_CMP_NLE_F32::~Inst_VOPC__V_CMP_NLE_F32()
10863 {
10864 } // ~Inst_VOPC__V_CMP_NLE_F32
10865
10866 // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
10867 void
10868 Inst_VOPC__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst)
10869 {
10870 Wavefront *wf = gpuDynInst->wavefront();
10871 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10872 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10873 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10874
10875 src0.readSrc();
10876 src1.read();
10877
10878 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10879 if (wf->execMask(lane)) {
10880 vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
10881 }
10882 }
10883
10884 vcc.write();
10885 }
10886
10887 Inst_VOPC__V_CMP_NEQ_F32::Inst_VOPC__V_CMP_NEQ_F32(InFmt_VOPC *iFmt)
10888 : Inst_VOPC(iFmt, "v_cmp_neq_f32")
10889 {
10890 setFlag(ALU);
10891 setFlag(F32);
10892 } // Inst_VOPC__V_CMP_NEQ_F32
10893
10894 Inst_VOPC__V_CMP_NEQ_F32::~Inst_VOPC__V_CMP_NEQ_F32()
10895 {
10896 } // ~Inst_VOPC__V_CMP_NEQ_F32
10897
10898 // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
10899 void
10900 Inst_VOPC__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst)
10901 {
10902 Wavefront *wf = gpuDynInst->wavefront();
10903 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10904 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10905 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10906
10907 src0.readSrc();
10908 src1.read();
10909
10910 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10911 if (wf->execMask(lane)) {
10912 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
10913 }
10914 }
10915
10916 vcc.write();
10917 }
10918
10919 Inst_VOPC__V_CMP_NLT_F32::Inst_VOPC__V_CMP_NLT_F32(InFmt_VOPC *iFmt)
10920 : Inst_VOPC(iFmt, "v_cmp_nlt_f32")
10921 {
10922 setFlag(ALU);
10923 setFlag(F32);
10924 } // Inst_VOPC__V_CMP_NLT_F32
10925
10926 Inst_VOPC__V_CMP_NLT_F32::~Inst_VOPC__V_CMP_NLT_F32()
10927 {
10928 } // ~Inst_VOPC__V_CMP_NLT_F32
10929
10930 // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
10931 void
10932 Inst_VOPC__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst)
10933 {
10934 Wavefront *wf = gpuDynInst->wavefront();
10935 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10936 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10937 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10938
10939 src0.readSrc();
10940 src1.read();
10941
10942 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10943 if (wf->execMask(lane)) {
10944 vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
10945 }
10946 }
10947
10948 vcc.write();
10949 }
10950
10951 Inst_VOPC__V_CMP_TRU_F32::Inst_VOPC__V_CMP_TRU_F32(InFmt_VOPC *iFmt)
10952 : Inst_VOPC(iFmt, "v_cmp_tru_f32")
10953 {
10954 setFlag(ALU);
10955 setFlag(F32);
10956 } // Inst_VOPC__V_CMP_TRU_F32
10957
10958 Inst_VOPC__V_CMP_TRU_F32::~Inst_VOPC__V_CMP_TRU_F32()
10959 {
10960 } // ~Inst_VOPC__V_CMP_TRU_F32
10961
10962 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
10963 void
10964 Inst_VOPC__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst)
10965 {
10966 Wavefront *wf = gpuDynInst->wavefront();
10967 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10968
10969 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10970 if (wf->execMask(lane)) {
10971 vcc.setBit(lane, 1);
10972 }
10973 }
10974
10975 vcc.write();
10976 }
10977
10978 Inst_VOPC__V_CMPX_F_F32::Inst_VOPC__V_CMPX_F_F32(InFmt_VOPC *iFmt)
10979 : Inst_VOPC(iFmt, "v_cmpx_f_f32")
10980 {
10981 setFlag(ALU);
10982 setFlag(F32);
10983 } // Inst_VOPC__V_CMPX_F_F32
10984
10985 Inst_VOPC__V_CMPX_F_F32::~Inst_VOPC__V_CMPX_F_F32()
10986 {
10987 } // ~Inst_VOPC__V_CMPX_F_F32
10988
10989 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
10990 void
10991 Inst_VOPC__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst)
10992 {
10993 Wavefront *wf = gpuDynInst->wavefront();
10994 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10995
10996 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10997 if (wf->execMask(lane)) {
10998 vcc.setBit(lane, 0);
10999 }
11000 }
11001
11002 vcc.write();
11003 wf->execMask() = vcc.rawData();
11004 }
11005
11006 Inst_VOPC__V_CMPX_LT_F32::Inst_VOPC__V_CMPX_LT_F32(InFmt_VOPC *iFmt)
11007 : Inst_VOPC(iFmt, "v_cmpx_lt_f32")
11008 {
11009 setFlag(ALU);
11010 setFlag(F32);
11011 } // Inst_VOPC__V_CMPX_LT_F32
11012
11013 Inst_VOPC__V_CMPX_LT_F32::~Inst_VOPC__V_CMPX_LT_F32()
11014 {
11015 } // ~Inst_VOPC__V_CMPX_LT_F32
11016
11017 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
11018 void
11019 Inst_VOPC__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst)
11020 {
11021 Wavefront *wf = gpuDynInst->wavefront();
11022 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11023 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11024 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11025
11026 src0.readSrc();
11027 src1.read();
11028
11029 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11030 if (wf->execMask(lane)) {
11031 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
11032 }
11033 }
11034
11035 vcc.write();
11036 wf->execMask() = vcc.rawData();
11037 }
11038
11039 Inst_VOPC__V_CMPX_EQ_F32::Inst_VOPC__V_CMPX_EQ_F32(InFmt_VOPC *iFmt)
11040 : Inst_VOPC(iFmt, "v_cmpx_eq_f32")
11041 {
11042 setFlag(ALU);
11043 setFlag(F32);
11044 } // Inst_VOPC__V_CMPX_EQ_F32
11045
11046 Inst_VOPC__V_CMPX_EQ_F32::~Inst_VOPC__V_CMPX_EQ_F32()
11047 {
11048 } // ~Inst_VOPC__V_CMPX_EQ_F32
11049
11050 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
11051 void
11052 Inst_VOPC__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst)
11053 {
11054 Wavefront *wf = gpuDynInst->wavefront();
11055 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11056 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11057 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11058
11059 src0.readSrc();
11060 src1.read();
11061
11062 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11063 if (wf->execMask(lane)) {
11064 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
11065 }
11066 }
11067
11068 vcc.write();
11069 wf->execMask() = vcc.rawData();
11070 }
11071
11072 Inst_VOPC__V_CMPX_LE_F32::Inst_VOPC__V_CMPX_LE_F32(InFmt_VOPC *iFmt)
11073 : Inst_VOPC(iFmt, "v_cmpx_le_f32")
11074 {
11075 setFlag(ALU);
11076 setFlag(F32);
11077 } // Inst_VOPC__V_CMPX_LE_F32
11078
11079 Inst_VOPC__V_CMPX_LE_F32::~Inst_VOPC__V_CMPX_LE_F32()
11080 {
11081 } // ~Inst_VOPC__V_CMPX_LE_F32
11082
11083 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
11084 void
11085 Inst_VOPC__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst)
11086 {
11087 Wavefront *wf = gpuDynInst->wavefront();
11088 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11089 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11090 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11091
11092 src0.readSrc();
11093 src1.read();
11094
11095 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11096 if (wf->execMask(lane)) {
11097 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
11098 }
11099 }
11100
11101 vcc.write();
11102 wf->execMask() = vcc.rawData();
11103 }
11104
11105 Inst_VOPC__V_CMPX_GT_F32::Inst_VOPC__V_CMPX_GT_F32(InFmt_VOPC *iFmt)
11106 : Inst_VOPC(iFmt, "v_cmpx_gt_f32")
11107 {
11108 setFlag(ALU);
11109 setFlag(F32);
11110 } // Inst_VOPC__V_CMPX_GT_F32
11111
11112 Inst_VOPC__V_CMPX_GT_F32::~Inst_VOPC__V_CMPX_GT_F32()
11113 {
11114 } // ~Inst_VOPC__V_CMPX_GT_F32
11115
11116 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
11117 void
11118 Inst_VOPC__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst)
11119 {
11120 Wavefront *wf = gpuDynInst->wavefront();
11121 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11122 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11123 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11124
11125 src0.readSrc();
11126 src1.read();
11127
11128 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11129 if (wf->execMask(lane)) {
11130 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
11131 }
11132 }
11133
11134 vcc.write();
11135 wf->execMask() = vcc.rawData();
11136 }
11137
11138 Inst_VOPC__V_CMPX_LG_F32::Inst_VOPC__V_CMPX_LG_F32(InFmt_VOPC *iFmt)
11139 : Inst_VOPC(iFmt, "v_cmpx_lg_f32")
11140 {
11141 setFlag(ALU);
11142 setFlag(F32);
11143 } // Inst_VOPC__V_CMPX_LG_F32
11144
11145 Inst_VOPC__V_CMPX_LG_F32::~Inst_VOPC__V_CMPX_LG_F32()
11146 {
11147 } // ~Inst_VOPC__V_CMPX_LG_F32
11148
11149 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
11150 void
11151 Inst_VOPC__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst)
11152 {
11153 Wavefront *wf = gpuDynInst->wavefront();
11154 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11155 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11156 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11157
11158 src0.readSrc();
11159 src1.read();
11160
11161 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11162 if (wf->execMask(lane)) {
11163 vcc.setBit(lane, (src0[lane] < src1[lane]
11164 || src0[lane] > src1[lane]) ? 1 : 0);
11165 }
11166 }
11167
11168 vcc.write();
11169 wf->execMask() = vcc.rawData();
11170 }
11171
11172 Inst_VOPC__V_CMPX_GE_F32::Inst_VOPC__V_CMPX_GE_F32(InFmt_VOPC *iFmt)
11173 : Inst_VOPC(iFmt, "v_cmpx_ge_f32")
11174 {
11175 setFlag(ALU);
11176 setFlag(F32);
11177 } // Inst_VOPC__V_CMPX_GE_F32
11178
11179 Inst_VOPC__V_CMPX_GE_F32::~Inst_VOPC__V_CMPX_GE_F32()
11180 {
11181 } // ~Inst_VOPC__V_CMPX_GE_F32
11182
11183 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
11184 void
11185 Inst_VOPC__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst)
11186 {
11187 Wavefront *wf = gpuDynInst->wavefront();
11188 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11189 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11190 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11191
11192 src0.readSrc();
11193 src1.read();
11194
11195 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11196 if (wf->execMask(lane)) {
11197 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
11198 }
11199 }
11200
11201 vcc.write();
11202 wf->execMask() = vcc.rawData();
11203 }
11204
11205 Inst_VOPC__V_CMPX_O_F32::Inst_VOPC__V_CMPX_O_F32(InFmt_VOPC *iFmt)
11206 : Inst_VOPC(iFmt, "v_cmpx_o_f32")
11207 {
11208 setFlag(ALU);
11209 setFlag(F32);
11210 } // Inst_VOPC__V_CMPX_O_F32
11211
11212 Inst_VOPC__V_CMPX_O_F32::~Inst_VOPC__V_CMPX_O_F32()
11213 {
11214 } // ~Inst_VOPC__V_CMPX_O_F32
11215
11216 // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
11217 // encoding.
11218 void
11219 Inst_VOPC__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst)
11220 {
11221 Wavefront *wf = gpuDynInst->wavefront();
11222 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11223 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11224 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11225
11226 src0.readSrc();
11227 src1.read();
11228
11229 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11230 if (wf->execMask(lane)) {
11231 vcc.setBit(lane, (!std::isnan(src0[lane])
11232 && !std::isnan(src1[lane])) ? 1 : 0);
11233 }
11234 }
11235
11236 vcc.write();
11237 wf->execMask() = vcc.rawData();
11238 }
11239
11240 Inst_VOPC__V_CMPX_U_F32::Inst_VOPC__V_CMPX_U_F32(InFmt_VOPC *iFmt)
11241 : Inst_VOPC(iFmt, "v_cmpx_u_f32")
11242 {
11243 setFlag(ALU);
11244 setFlag(F32);
11245 } // Inst_VOPC__V_CMPX_U_F32
11246
11247 Inst_VOPC__V_CMPX_U_F32::~Inst_VOPC__V_CMPX_U_F32()
11248 {
11249 } // ~Inst_VOPC__V_CMPX_U_F32
11250
11251 // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
11252 // encoding.
11253 void
11254 Inst_VOPC__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst)
11255 {
11256 Wavefront *wf = gpuDynInst->wavefront();
11257 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11258 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11259 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11260
11261 src0.readSrc();
11262 src1.read();
11263
11264 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11265 if (wf->execMask(lane)) {
11266 vcc.setBit(lane, (std::isnan(src0[lane])
11267 || std::isnan(src1[lane])) ? 1 : 0);
11268 }
11269 }
11270
11271 vcc.write();
11272 wf->execMask() = vcc.rawData();
11273 }
11274
11275 Inst_VOPC__V_CMPX_NGE_F32::Inst_VOPC__V_CMPX_NGE_F32(InFmt_VOPC *iFmt)
11276 : Inst_VOPC(iFmt, "v_cmpx_nge_f32")
11277 {
11278 setFlag(ALU);
11279 setFlag(F32);
11280 } // Inst_VOPC__V_CMPX_NGE_F32
11281
11282 Inst_VOPC__V_CMPX_NGE_F32::~Inst_VOPC__V_CMPX_NGE_F32()
11283 {
11284 } // ~Inst_VOPC__V_CMPX_NGE_F32
11285
11286 // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
11287 void
11288 Inst_VOPC__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst)
11289 {
11290 Wavefront *wf = gpuDynInst->wavefront();
11291 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11292 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11293 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11294
11295 src0.readSrc();
11296 src1.read();
11297
11298 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11299 if (wf->execMask(lane)) {
11300 vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
11301 }
11302 }
11303
11304 vcc.write();
11305 wf->execMask() = vcc.rawData();
11306 }
11307
11308 Inst_VOPC__V_CMPX_NLG_F32::Inst_VOPC__V_CMPX_NLG_F32(InFmt_VOPC *iFmt)
11309 : Inst_VOPC(iFmt, "v_cmpx_nlg_f32")
11310 {
11311 setFlag(ALU);
11312 setFlag(F32);
11313 } // Inst_VOPC__V_CMPX_NLG_F32
11314
11315 Inst_VOPC__V_CMPX_NLG_F32::~Inst_VOPC__V_CMPX_NLG_F32()
11316 {
11317 } // ~Inst_VOPC__V_CMPX_NLG_F32
11318
11319 // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
11320 void
11321 Inst_VOPC__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst)
11322 {
11323 Wavefront *wf = gpuDynInst->wavefront();
11324 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11325 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11326 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11327
11328 src0.readSrc();
11329 src1.read();
11330
11331 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11332 if (wf->execMask(lane)) {
11333 vcc.setBit(lane, !(src0[lane] < src1[lane]
11334 || src0[lane] > src1[lane]) ? 1 : 0);
11335 }
11336 }
11337
11338 vcc.write();
11339 wf->execMask() = vcc.rawData();
11340 }
11341
11342 Inst_VOPC__V_CMPX_NGT_F32::Inst_VOPC__V_CMPX_NGT_F32(InFmt_VOPC *iFmt)
11343 : Inst_VOPC(iFmt, "v_cmpx_ngt_f32")
11344 {
11345 setFlag(ALU);
11346 setFlag(F32);
11347 } // Inst_VOPC__V_CMPX_NGT_F32
11348
11349 Inst_VOPC__V_CMPX_NGT_F32::~Inst_VOPC__V_CMPX_NGT_F32()
11350 {
11351 } // ~Inst_VOPC__V_CMPX_NGT_F32
11352
11353 // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
11354 void
11355 Inst_VOPC__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst)
11356 {
11357 Wavefront *wf = gpuDynInst->wavefront();
11358 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11359 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11360 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11361
11362 src0.readSrc();
11363 src1.read();
11364
11365 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11366 if (wf->execMask(lane)) {
11367 vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
11368 }
11369 }
11370
11371 vcc.write();
11372 wf->execMask() = vcc.rawData();
11373 }
11374
11375 Inst_VOPC__V_CMPX_NLE_F32::Inst_VOPC__V_CMPX_NLE_F32(InFmt_VOPC *iFmt)
11376 : Inst_VOPC(iFmt, "v_cmpx_nle_f32")
11377 {
11378 setFlag(ALU);
11379 setFlag(F32);
11380 } // Inst_VOPC__V_CMPX_NLE_F32
11381
11382 Inst_VOPC__V_CMPX_NLE_F32::~Inst_VOPC__V_CMPX_NLE_F32()
11383 {
11384 } // ~Inst_VOPC__V_CMPX_NLE_F32
11385
11386 // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
11387 void
11388 Inst_VOPC__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst)
11389 {
11390 Wavefront *wf = gpuDynInst->wavefront();
11391 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11392 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11393 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11394
11395 src0.readSrc();
11396 src1.read();
11397
11398 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11399 if (wf->execMask(lane)) {
11400 vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
11401 }
11402 }
11403
11404 vcc.write();
11405 wf->execMask() = vcc.rawData();
11406 }
11407
11408 Inst_VOPC__V_CMPX_NEQ_F32::Inst_VOPC__V_CMPX_NEQ_F32(InFmt_VOPC *iFmt)
11409 : Inst_VOPC(iFmt, "v_cmpx_neq_f32")
11410 {
11411 setFlag(ALU);
11412 setFlag(F32);
11413 } // Inst_VOPC__V_CMPX_NEQ_F32
11414
11415 Inst_VOPC__V_CMPX_NEQ_F32::~Inst_VOPC__V_CMPX_NEQ_F32()
11416 {
11417 } // ~Inst_VOPC__V_CMPX_NEQ_F32
11418
11419 // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
11420 void
11421 Inst_VOPC__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst)
11422 {
11423 Wavefront *wf = gpuDynInst->wavefront();
11424 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11425 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11426 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11427
11428 src0.readSrc();
11429 src1.read();
11430
11431 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11432 if (wf->execMask(lane)) {
11433 vcc.setBit(lane, !(src0[lane] == src1[lane]) ? 1 : 0);
11434 }
11435 }
11436
11437 vcc.write();
11438 }
11439
11440 Inst_VOPC__V_CMPX_NLT_F32::Inst_VOPC__V_CMPX_NLT_F32(InFmt_VOPC *iFmt)
11441 : Inst_VOPC(iFmt, "v_cmpx_nlt_f32")
11442 {
11443 setFlag(ALU);
11444 setFlag(F32);
11445 } // Inst_VOPC__V_CMPX_NLT_F32
11446
11447 Inst_VOPC__V_CMPX_NLT_F32::~Inst_VOPC__V_CMPX_NLT_F32()
11448 {
11449 } // ~Inst_VOPC__V_CMPX_NLT_F32
11450
11451 // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
11452 void
11453 Inst_VOPC__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst)
11454 {
11455 Wavefront *wf = gpuDynInst->wavefront();
11456 ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11457 ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11458 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11459
11460 src0.readSrc();
11461 src1.read();
11462
11463 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11464 if (wf->execMask(lane)) {
11465 vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
11466 }
11467 }
11468
11469 vcc.write();
11470 wf->execMask() = vcc.rawData();
11471 }
11472
11473 Inst_VOPC__V_CMPX_TRU_F32::Inst_VOPC__V_CMPX_TRU_F32(InFmt_VOPC *iFmt)
11474 : Inst_VOPC(iFmt, "v_cmpx_tru_f32")
11475 {
11476 setFlag(ALU);
11477 setFlag(F32);
11478 } // Inst_VOPC__V_CMPX_TRU_F32
11479
11480 Inst_VOPC__V_CMPX_TRU_F32::~Inst_VOPC__V_CMPX_TRU_F32()
11481 {
11482 } // ~Inst_VOPC__V_CMPX_TRU_F32
11483
11484 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
11485 void
11486 Inst_VOPC__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst)
11487 {
11488 Wavefront *wf = gpuDynInst->wavefront();
11489 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11490
11491 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11492 if (wf->execMask(lane)) {
11493 vcc.setBit(lane, 1);
11494 }
11495 }
11496
11497 vcc.write();
11498 wf->execMask() = vcc.rawData();
11499 }
11500
11501 Inst_VOPC__V_CMP_F_F64::Inst_VOPC__V_CMP_F_F64(InFmt_VOPC *iFmt)
11502 : Inst_VOPC(iFmt, "v_cmp_f_f64")
11503 {
11504 setFlag(ALU);
11505 setFlag(F64);
11506 } // Inst_VOPC__V_CMP_F_F64
11507
11508 Inst_VOPC__V_CMP_F_F64::~Inst_VOPC__V_CMP_F_F64()
11509 {
11510 } // ~Inst_VOPC__V_CMP_F_F64
11511
11512 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
11513 void
11514 Inst_VOPC__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst)
11515 {
11516 Wavefront *wf = gpuDynInst->wavefront();
11517 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11518
11519 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11520 if (wf->execMask(lane)) {
11521 vcc.setBit(lane, 0);
11522 }
11523 }
11524
11525 vcc.write();
11526 }
11527
11528 Inst_VOPC__V_CMP_LT_F64::Inst_VOPC__V_CMP_LT_F64(InFmt_VOPC *iFmt)
11529 : Inst_VOPC(iFmt, "v_cmp_lt_f64")
11530 {
11531 setFlag(ALU);
11532 setFlag(F64);
11533 } // Inst_VOPC__V_CMP_LT_F64
11534
11535 Inst_VOPC__V_CMP_LT_F64::~Inst_VOPC__V_CMP_LT_F64()
11536 {
11537 } // ~Inst_VOPC__V_CMP_LT_F64
11538
11539 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
11540 void
11541 Inst_VOPC__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst)
11542 {
11543 Wavefront *wf = gpuDynInst->wavefront();
11544 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11545 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11546 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11547
11548 src0.readSrc();
11549 src1.read();
11550
11551 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11552 if (wf->execMask(lane)) {
11553 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
11554 }
11555 }
11556
11557 vcc.write();
11558 }
11559
11560 Inst_VOPC__V_CMP_EQ_F64::Inst_VOPC__V_CMP_EQ_F64(InFmt_VOPC *iFmt)
11561 : Inst_VOPC(iFmt, "v_cmp_eq_f64")
11562 {
11563 setFlag(ALU);
11564 setFlag(F64);
11565 } // Inst_VOPC__V_CMP_EQ_F64
11566
11567 Inst_VOPC__V_CMP_EQ_F64::~Inst_VOPC__V_CMP_EQ_F64()
11568 {
11569 } // ~Inst_VOPC__V_CMP_EQ_F64
11570
11571 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
11572 void
11573 Inst_VOPC__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst)
11574 {
11575 Wavefront *wf = gpuDynInst->wavefront();
11576 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11577 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11578 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11579
11580 src0.readSrc();
11581 src1.read();
11582
11583 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11584 if (wf->execMask(lane)) {
11585 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
11586 }
11587 }
11588
11589 vcc.write();
11590 }
11591
11592 Inst_VOPC__V_CMP_LE_F64::Inst_VOPC__V_CMP_LE_F64(InFmt_VOPC *iFmt)
11593 : Inst_VOPC(iFmt, "v_cmp_le_f64")
11594 {
11595 setFlag(ALU);
11596 setFlag(F64);
11597 } // Inst_VOPC__V_CMP_LE_F64
11598
11599 Inst_VOPC__V_CMP_LE_F64::~Inst_VOPC__V_CMP_LE_F64()
11600 {
11601 } // ~Inst_VOPC__V_CMP_LE_F64
11602
11603 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
11604 void
11605 Inst_VOPC__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst)
11606 {
11607 Wavefront *wf = gpuDynInst->wavefront();
11608 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11609 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11610 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11611
11612 src0.readSrc();
11613 src1.read();
11614
11615 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11616 if (wf->execMask(lane)) {
11617 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
11618 }
11619 }
11620
11621 vcc.write();
11622 }
11623
11624 Inst_VOPC__V_CMP_GT_F64::Inst_VOPC__V_CMP_GT_F64(InFmt_VOPC *iFmt)
11625 : Inst_VOPC(iFmt, "v_cmp_gt_f64")
11626 {
11627 setFlag(ALU);
11628 setFlag(F64);
11629 } // Inst_VOPC__V_CMP_GT_F64
11630
11631 Inst_VOPC__V_CMP_GT_F64::~Inst_VOPC__V_CMP_GT_F64()
11632 {
11633 } // ~Inst_VOPC__V_CMP_GT_F64
11634
11635 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
11636 void
11637 Inst_VOPC__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst)
11638 {
11639 Wavefront *wf = gpuDynInst->wavefront();
11640 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11641 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11642 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11643
11644 src0.readSrc();
11645 src1.read();
11646
11647 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11648 if (wf->execMask(lane)) {
11649 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
11650 }
11651 }
11652
11653 vcc.write();
11654 }
11655
11656 Inst_VOPC__V_CMP_LG_F64::Inst_VOPC__V_CMP_LG_F64(InFmt_VOPC *iFmt)
11657 : Inst_VOPC(iFmt, "v_cmp_lg_f64")
11658 {
11659 setFlag(ALU);
11660 setFlag(F64);
11661 } // Inst_VOPC__V_CMP_LG_F64
11662
11663 Inst_VOPC__V_CMP_LG_F64::~Inst_VOPC__V_CMP_LG_F64()
11664 {
11665 } // ~Inst_VOPC__V_CMP_LG_F64
11666
11667 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
11668 void
11669 Inst_VOPC__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst)
11670 {
11671 Wavefront *wf = gpuDynInst->wavefront();
11672 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11673 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11674 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11675
11676 src0.readSrc();
11677 src1.read();
11678
11679 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11680 if (wf->execMask(lane)) {
11681 vcc.setBit(lane, (src0[lane] < src1[lane]
11682 || src0[lane] > src1[lane]) ? 1 : 0);
11683 }
11684 }
11685
11686 vcc.write();
11687 }
11688
11689 Inst_VOPC__V_CMP_GE_F64::Inst_VOPC__V_CMP_GE_F64(InFmt_VOPC *iFmt)
11690 : Inst_VOPC(iFmt, "v_cmp_ge_f64")
11691 {
11692 setFlag(ALU);
11693 setFlag(F64);
11694 } // Inst_VOPC__V_CMP_GE_F64
11695
11696 Inst_VOPC__V_CMP_GE_F64::~Inst_VOPC__V_CMP_GE_F64()
11697 {
11698 } // ~Inst_VOPC__V_CMP_GE_F64
11699
11700 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
11701 void
11702 Inst_VOPC__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst)
11703 {
11704 Wavefront *wf = gpuDynInst->wavefront();
11705 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11706 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11707 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11708
11709 src0.readSrc();
11710 src1.read();
11711
11712 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11713 if (wf->execMask(lane)) {
11714 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
11715 }
11716 }
11717
11718 vcc.write();
11719 }
11720
11721 Inst_VOPC__V_CMP_O_F64::Inst_VOPC__V_CMP_O_F64(InFmt_VOPC *iFmt)
11722 : Inst_VOPC(iFmt, "v_cmp_o_f64")
11723 {
11724 setFlag(ALU);
11725 setFlag(F64);
11726 } // Inst_VOPC__V_CMP_O_F64
11727
11728 Inst_VOPC__V_CMP_O_F64::~Inst_VOPC__V_CMP_O_F64()
11729 {
11730 } // ~Inst_VOPC__V_CMP_O_F64
11731
11732 // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
11733 void
11734 Inst_VOPC__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst)
11735 {
11736 Wavefront *wf = gpuDynInst->wavefront();
11737 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11738 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11739 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11740
11741 src0.readSrc();
11742 src1.read();
11743
11744 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11745 if (wf->execMask(lane)) {
11746 vcc.setBit(lane, (!std::isnan(src0[lane])
11747 && !std::isnan(src1[lane])) ? 1 : 0);
11748 }
11749 }
11750
11751 vcc.write();
11752 }
11753
11754 Inst_VOPC__V_CMP_U_F64::Inst_VOPC__V_CMP_U_F64(InFmt_VOPC *iFmt)
11755 : Inst_VOPC(iFmt, "v_cmp_u_f64")
11756 {
11757 setFlag(ALU);
11758 setFlag(F64);
11759 } // Inst_VOPC__V_CMP_U_F64
11760
11761 Inst_VOPC__V_CMP_U_F64::~Inst_VOPC__V_CMP_U_F64()
11762 {
11763 } // ~Inst_VOPC__V_CMP_U_F64
11764
11765 // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
11766 void
11767 Inst_VOPC__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst)
11768 {
11769 Wavefront *wf = gpuDynInst->wavefront();
11770 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11771 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11772 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11773
11774 src0.readSrc();
11775 src1.read();
11776
11777 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11778 if (wf->execMask(lane)) {
11779 vcc.setBit(lane, (std::isnan(src0[lane])
11780 || std::isnan(src1[lane])) ? 1 : 0);
11781 }
11782 }
11783
11784 vcc.write();
11785 }
11786
11787 Inst_VOPC__V_CMP_NGE_F64::Inst_VOPC__V_CMP_NGE_F64(InFmt_VOPC *iFmt)
11788 : Inst_VOPC(iFmt, "v_cmp_nge_f64")
11789 {
11790 setFlag(ALU);
11791 setFlag(F64);
11792 } // Inst_VOPC__V_CMP_NGE_F64
11793
11794 Inst_VOPC__V_CMP_NGE_F64::~Inst_VOPC__V_CMP_NGE_F64()
11795 {
11796 } // ~Inst_VOPC__V_CMP_NGE_F64
11797
11798 // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
11799 void
11800 Inst_VOPC__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst)
11801 {
11802 Wavefront *wf = gpuDynInst->wavefront();
11803 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11804 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11805 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11806
11807 src0.readSrc();
11808 src1.read();
11809
11810 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11811 if (wf->execMask(lane)) {
11812 vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
11813 }
11814 }
11815
11816 vcc.write();
11817 }
11818
11819 Inst_VOPC__V_CMP_NLG_F64::Inst_VOPC__V_CMP_NLG_F64(InFmt_VOPC *iFmt)
11820 : Inst_VOPC(iFmt, "v_cmp_nlg_f64")
11821 {
11822 setFlag(ALU);
11823 setFlag(F64);
11824 } // Inst_VOPC__V_CMP_NLG_F64
11825
11826 Inst_VOPC__V_CMP_NLG_F64::~Inst_VOPC__V_CMP_NLG_F64()
11827 {
11828 } // ~Inst_VOPC__V_CMP_NLG_F64
11829
11830 // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
11831 void
11832 Inst_VOPC__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst)
11833 {
11834 Wavefront *wf = gpuDynInst->wavefront();
11835 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11836 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11837 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11838
11839 src0.readSrc();
11840 src1.read();
11841
11842 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11843 if (wf->execMask(lane)) {
11844 vcc.setBit(lane, !(src0[lane] < src1[lane]
11845 || src0[lane] > src1[lane]) ? 1 : 0);
11846 }
11847 }
11848
11849 vcc.write();
11850 }
11851
11852 Inst_VOPC__V_CMP_NGT_F64::Inst_VOPC__V_CMP_NGT_F64(InFmt_VOPC *iFmt)
11853 : Inst_VOPC(iFmt, "v_cmp_ngt_f64")
11854 {
11855 setFlag(ALU);
11856 setFlag(F64);
11857 } // Inst_VOPC__V_CMP_NGT_F64
11858
11859 Inst_VOPC__V_CMP_NGT_F64::~Inst_VOPC__V_CMP_NGT_F64()
11860 {
11861 } // ~Inst_VOPC__V_CMP_NGT_F64
11862
11863 // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
11864 void
11865 Inst_VOPC__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst)
11866 {
11867 Wavefront *wf = gpuDynInst->wavefront();
11868 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11869 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11870 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11871
11872 src0.readSrc();
11873 src1.read();
11874
11875 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11876 if (wf->execMask(lane)) {
11877 vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
11878 }
11879 }
11880
11881 vcc.write();
11882 }
11883
11884 Inst_VOPC__V_CMP_NLE_F64::Inst_VOPC__V_CMP_NLE_F64(InFmt_VOPC *iFmt)
11885 : Inst_VOPC(iFmt, "v_cmp_nle_f64")
11886 {
11887 setFlag(ALU);
11888 setFlag(F64);
11889 } // Inst_VOPC__V_CMP_NLE_F64
11890
11891 Inst_VOPC__V_CMP_NLE_F64::~Inst_VOPC__V_CMP_NLE_F64()
11892 {
11893 } // ~Inst_VOPC__V_CMP_NLE_F64
11894
11895 // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
11896 void
11897 Inst_VOPC__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst)
11898 {
11899 Wavefront *wf = gpuDynInst->wavefront();
11900 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11901 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11902 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11903
11904 src0.readSrc();
11905 src1.read();
11906
11907 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11908 if (wf->execMask(lane)) {
11909 vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
11910 }
11911 }
11912
11913 vcc.write();
11914 }
11915
11916 Inst_VOPC__V_CMP_NEQ_F64::Inst_VOPC__V_CMP_NEQ_F64(InFmt_VOPC *iFmt)
11917 : Inst_VOPC(iFmt, "v_cmp_neq_f64")
11918 {
11919 setFlag(ALU);
11920 setFlag(F64);
11921 } // Inst_VOPC__V_CMP_NEQ_F64
11922
11923 Inst_VOPC__V_CMP_NEQ_F64::~Inst_VOPC__V_CMP_NEQ_F64()
11924 {
11925 } // ~Inst_VOPC__V_CMP_NEQ_F64
11926
11927 // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
11928 void
11929 Inst_VOPC__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst)
11930 {
11931 Wavefront *wf = gpuDynInst->wavefront();
11932 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11933 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11934 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11935
11936 src0.readSrc();
11937 src1.read();
11938
11939 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11940 if (wf->execMask(lane)) {
11941 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
11942 }
11943 }
11944
11945 vcc.write();
11946 }
11947
11948 Inst_VOPC__V_CMP_NLT_F64::Inst_VOPC__V_CMP_NLT_F64(InFmt_VOPC *iFmt)
11949 : Inst_VOPC(iFmt, "v_cmp_nlt_f64")
11950 {
11951 setFlag(ALU);
11952 setFlag(F64);
11953 } // Inst_VOPC__V_CMP_NLT_F64
11954
11955 Inst_VOPC__V_CMP_NLT_F64::~Inst_VOPC__V_CMP_NLT_F64()
11956 {
11957 } // ~Inst_VOPC__V_CMP_NLT_F64
11958
11959 // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
11960 void
11961 Inst_VOPC__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst)
11962 {
11963 Wavefront *wf = gpuDynInst->wavefront();
11964 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11965 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11966 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11967
11968 src0.readSrc();
11969 src1.read();
11970
11971 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11972 if (wf->execMask(lane)) {
11973 vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
11974 }
11975 }
11976
11977 vcc.write();
11978 }
11979
11980 Inst_VOPC__V_CMP_TRU_F64::Inst_VOPC__V_CMP_TRU_F64(InFmt_VOPC *iFmt)
11981 : Inst_VOPC(iFmt, "v_cmp_tru_f64")
11982 {
11983 setFlag(ALU);
11984 setFlag(F64);
11985 } // Inst_VOPC__V_CMP_TRU_F64
11986
11987 Inst_VOPC__V_CMP_TRU_F64::~Inst_VOPC__V_CMP_TRU_F64()
11988 {
11989 } // ~Inst_VOPC__V_CMP_TRU_F64
11990
11991 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
11992 void
11993 Inst_VOPC__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst)
11994 {
11995 Wavefront *wf = gpuDynInst->wavefront();
11996 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11997
11998 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11999 if (wf->execMask(lane)) {
12000 vcc.setBit(lane, 1);
12001 }
12002 }
12003
12004 vcc.write();
12005 }
12006
12007 Inst_VOPC__V_CMPX_F_F64::Inst_VOPC__V_CMPX_F_F64(InFmt_VOPC *iFmt)
12008 : Inst_VOPC(iFmt, "v_cmpx_f_f64")
12009 {
12010 setFlag(ALU);
12011 setFlag(F64);
12012 } // Inst_VOPC__V_CMPX_F_F64
12013
12014 Inst_VOPC__V_CMPX_F_F64::~Inst_VOPC__V_CMPX_F_F64()
12015 {
12016 } // ~Inst_VOPC__V_CMPX_F_F64
12017
12018 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
12019 void
12020 Inst_VOPC__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst)
12021 {
12022 Wavefront *wf = gpuDynInst->wavefront();
12023 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12024
12025 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12026 if (wf->execMask(lane)) {
12027 vcc.setBit(lane, 0);
12028 }
12029 }
12030
12031 vcc.write();
12032 wf->execMask() = vcc.rawData();
12033 }
12034
12035 Inst_VOPC__V_CMPX_LT_F64::Inst_VOPC__V_CMPX_LT_F64(InFmt_VOPC *iFmt)
12036 : Inst_VOPC(iFmt, "v_cmpx_lt_f64")
12037 {
12038 setFlag(ALU);
12039 setFlag(F64);
12040 } // Inst_VOPC__V_CMPX_LT_F64
12041
12042 Inst_VOPC__V_CMPX_LT_F64::~Inst_VOPC__V_CMPX_LT_F64()
12043 {
12044 } // ~Inst_VOPC__V_CMPX_LT_F64
12045
12046 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
12047 void
12048 Inst_VOPC__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst)
12049 {
12050 Wavefront *wf = gpuDynInst->wavefront();
12051 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12052 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12053 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12054
12055 src0.readSrc();
12056 src1.read();
12057
12058 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12059 if (wf->execMask(lane)) {
12060 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
12061 }
12062 }
12063
12064 vcc.write();
12065 wf->execMask() = vcc.rawData();
12066 }
12067
12068 Inst_VOPC__V_CMPX_EQ_F64::Inst_VOPC__V_CMPX_EQ_F64(InFmt_VOPC *iFmt)
12069 : Inst_VOPC(iFmt, "v_cmpx_eq_f64")
12070 {
12071 setFlag(ALU);
12072 setFlag(F64);
12073 } // Inst_VOPC__V_CMPX_EQ_F64
12074
12075 Inst_VOPC__V_CMPX_EQ_F64::~Inst_VOPC__V_CMPX_EQ_F64()
12076 {
12077 } // ~Inst_VOPC__V_CMPX_EQ_F64
12078
12079 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
12080 void
12081 Inst_VOPC__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst)
12082 {
12083 Wavefront *wf = gpuDynInst->wavefront();
12084 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12085 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12086 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12087
12088 src0.readSrc();
12089 src1.read();
12090
12091 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12092 if (wf->execMask(lane)) {
12093 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
12094 }
12095 }
12096
12097 vcc.write();
12098 wf->execMask() = vcc.rawData();
12099 }
12100
12101 Inst_VOPC__V_CMPX_LE_F64::Inst_VOPC__V_CMPX_LE_F64(InFmt_VOPC *iFmt)
12102 : Inst_VOPC(iFmt, "v_cmpx_le_f64")
12103 {
12104 setFlag(ALU);
12105 setFlag(F64);
12106 } // Inst_VOPC__V_CMPX_LE_F64
12107
12108 Inst_VOPC__V_CMPX_LE_F64::~Inst_VOPC__V_CMPX_LE_F64()
12109 {
12110 } // ~Inst_VOPC__V_CMPX_LE_F64
12111
12112 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
12113 void
12114 Inst_VOPC__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst)
12115 {
12116 Wavefront *wf = gpuDynInst->wavefront();
12117 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12118 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12119 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12120
12121 src0.readSrc();
12122 src1.read();
12123
12124 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12125 if (wf->execMask(lane)) {
12126 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
12127 }
12128 }
12129
12130 wf->execMask() = vcc.rawData();
12131 vcc.write();
12132 }
12133
12134 Inst_VOPC__V_CMPX_GT_F64::Inst_VOPC__V_CMPX_GT_F64(InFmt_VOPC *iFmt)
12135 : Inst_VOPC(iFmt, "v_cmpx_gt_f64")
12136 {
12137 setFlag(ALU);
12138 setFlag(F64);
12139 } // Inst_VOPC__V_CMPX_GT_F64
12140
12141 Inst_VOPC__V_CMPX_GT_F64::~Inst_VOPC__V_CMPX_GT_F64()
12142 {
12143 } // ~Inst_VOPC__V_CMPX_GT_F64
12144
12145 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
12146 void
12147 Inst_VOPC__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst)
12148 {
12149 Wavefront *wf = gpuDynInst->wavefront();
12150 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12151 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12152 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12153
12154 src0.readSrc();
12155 src1.read();
12156
12157 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12158 if (wf->execMask(lane)) {
12159 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
12160 }
12161 }
12162
12163 wf->execMask() = vcc.rawData();
12164 vcc.write();
12165 }
12166
12167 Inst_VOPC__V_CMPX_LG_F64::Inst_VOPC__V_CMPX_LG_F64(InFmt_VOPC *iFmt)
12168 : Inst_VOPC(iFmt, "v_cmpx_lg_f64")
12169 {
12170 setFlag(ALU);
12171 setFlag(F64);
12172 } // Inst_VOPC__V_CMPX_LG_F64
12173
12174 Inst_VOPC__V_CMPX_LG_F64::~Inst_VOPC__V_CMPX_LG_F64()
12175 {
12176 } // ~Inst_VOPC__V_CMPX_LG_F64
12177
12178 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
12179 void
12180 Inst_VOPC__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst)
12181 {
12182 Wavefront *wf = gpuDynInst->wavefront();
12183 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12184 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12185 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12186
12187 src0.readSrc();
12188 src1.read();
12189
12190 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12191 if (wf->execMask(lane)) {
12192 vcc.setBit(lane, (src0[lane] < src1[lane]
12193 || src0[lane] > src1[lane]) ? 1 : 0);
12194 }
12195 }
12196
12197 wf->execMask() = vcc.rawData();
12198 vcc.write();
12199 }
12200
12201 Inst_VOPC__V_CMPX_GE_F64::Inst_VOPC__V_CMPX_GE_F64(InFmt_VOPC *iFmt)
12202 : Inst_VOPC(iFmt, "v_cmpx_ge_f64")
12203 {
12204 setFlag(ALU);
12205 setFlag(F64);
12206 } // Inst_VOPC__V_CMPX_GE_F64
12207
12208 Inst_VOPC__V_CMPX_GE_F64::~Inst_VOPC__V_CMPX_GE_F64()
12209 {
12210 } // ~Inst_VOPC__V_CMPX_GE_F64
12211
12212 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
12213 void
12214 Inst_VOPC__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst)
12215 {
12216 Wavefront *wf = gpuDynInst->wavefront();
12217 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12218 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12219 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12220
12221 src0.readSrc();
12222 src1.read();
12223
12224 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12225 if (wf->execMask(lane)) {
12226 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
12227 }
12228 }
12229
12230 wf->execMask() = vcc.rawData();
12231 vcc.write();
12232 }
12233
12234 Inst_VOPC__V_CMPX_O_F64::Inst_VOPC__V_CMPX_O_F64(InFmt_VOPC *iFmt)
12235 : Inst_VOPC(iFmt, "v_cmpx_o_f64")
12236 {
12237 setFlag(ALU);
12238 setFlag(F64);
12239 } // Inst_VOPC__V_CMPX_O_F64
12240
12241 Inst_VOPC__V_CMPX_O_F64::~Inst_VOPC__V_CMPX_O_F64()
12242 {
12243 } // ~Inst_VOPC__V_CMPX_O_F64
12244
12245 // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
12246 // encoding.
12247 void
12248 Inst_VOPC__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst)
12249 {
12250 Wavefront *wf = gpuDynInst->wavefront();
12251 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12252 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12253 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12254
12255 src0.readSrc();
12256 src1.read();
12257
12258 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12259 if (wf->execMask(lane)) {
12260 vcc.setBit(lane, (!std::isnan(src0[lane])
12261 && !std::isnan(src1[lane])) ? 1 : 0);
12262 }
12263 }
12264
12265 wf->execMask() = vcc.rawData();
12266 vcc.write();
12267 }
12268
12269 Inst_VOPC__V_CMPX_U_F64::Inst_VOPC__V_CMPX_U_F64(InFmt_VOPC *iFmt)
12270 : Inst_VOPC(iFmt, "v_cmpx_u_f64")
12271 {
12272 setFlag(ALU);
12273 setFlag(F64);
12274 } // Inst_VOPC__V_CMPX_U_F64
12275
12276 Inst_VOPC__V_CMPX_U_F64::~Inst_VOPC__V_CMPX_U_F64()
12277 {
12278 } // ~Inst_VOPC__V_CMPX_U_F64
12279
12280 // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
12281 // encoding.
12282 void
12283 Inst_VOPC__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst)
12284 {
12285 Wavefront *wf = gpuDynInst->wavefront();
12286 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12287 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12288 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12289
12290 src0.readSrc();
12291 src1.read();
12292
12293 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12294 if (wf->execMask(lane)) {
12295 vcc.setBit(lane, (std::isnan(src0[lane])
12296 || std::isnan(src1[lane])) ? 1 : 0);
12297 }
12298 }
12299
12300 wf->execMask() = vcc.rawData();
12301 vcc.write();
12302 }
12303
12304 Inst_VOPC__V_CMPX_NGE_F64::Inst_VOPC__V_CMPX_NGE_F64(InFmt_VOPC *iFmt)
12305 : Inst_VOPC(iFmt, "v_cmpx_nge_f64")
12306 {
12307 setFlag(ALU);
12308 setFlag(F64);
12309 } // Inst_VOPC__V_CMPX_NGE_F64
12310
12311 Inst_VOPC__V_CMPX_NGE_F64::~Inst_VOPC__V_CMPX_NGE_F64()
12312 {
12313 } // ~Inst_VOPC__V_CMPX_NGE_F64
12314
12315 // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
12316 void
12317 Inst_VOPC__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst)
12318 {
12319 Wavefront *wf = gpuDynInst->wavefront();
12320 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12321 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12322 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12323
12324 src0.readSrc();
12325 src1.read();
12326
12327 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12328 if (wf->execMask(lane)) {
12329 vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
12330 }
12331 }
12332
12333 wf->execMask() = vcc.rawData();
12334 vcc.write();
12335 }
12336
12337 Inst_VOPC__V_CMPX_NLG_F64::Inst_VOPC__V_CMPX_NLG_F64(InFmt_VOPC *iFmt)
12338 : Inst_VOPC(iFmt, "v_cmpx_nlg_f64")
12339 {
12340 setFlag(ALU);
12341 setFlag(F64);
12342 } // Inst_VOPC__V_CMPX_NLG_F64
12343
12344 Inst_VOPC__V_CMPX_NLG_F64::~Inst_VOPC__V_CMPX_NLG_F64()
12345 {
12346 } // ~Inst_VOPC__V_CMPX_NLG_F64
12347
12348 // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
12349 void
12350 Inst_VOPC__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst)
12351 {
12352 Wavefront *wf = gpuDynInst->wavefront();
12353 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12354 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12355 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12356
12357 src0.readSrc();
12358 src1.read();
12359
12360 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12361 if (wf->execMask(lane)) {
12362 vcc.setBit(lane, !(src0[lane] < src1[lane]
12363 || src0[lane] > src1[lane]) ? 1 : 0);
12364 }
12365 }
12366
12367 wf->execMask() = vcc.rawData();
12368 vcc.write();
12369 }
12370
12371 Inst_VOPC__V_CMPX_NGT_F64::Inst_VOPC__V_CMPX_NGT_F64(InFmt_VOPC *iFmt)
12372 : Inst_VOPC(iFmt, "v_cmpx_ngt_f64")
12373 {
12374 setFlag(ALU);
12375 setFlag(F64);
12376 } // Inst_VOPC__V_CMPX_NGT_F64
12377
12378 Inst_VOPC__V_CMPX_NGT_F64::~Inst_VOPC__V_CMPX_NGT_F64()
12379 {
12380 } // ~Inst_VOPC__V_CMPX_NGT_F64
12381
12382 // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
12383 void
12384 Inst_VOPC__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst)
12385 {
12386 Wavefront *wf = gpuDynInst->wavefront();
12387 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12388 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12389 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12390
12391 src0.readSrc();
12392 src1.read();
12393
12394 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12395 if (wf->execMask(lane)) {
12396 vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
12397 }
12398 }
12399
12400 wf->execMask() = vcc.rawData();
12401 vcc.write();
12402 }
12403
12404 Inst_VOPC__V_CMPX_NLE_F64::Inst_VOPC__V_CMPX_NLE_F64(InFmt_VOPC *iFmt)
12405 : Inst_VOPC(iFmt, "v_cmpx_nle_f64")
12406 {
12407 setFlag(ALU);
12408 setFlag(F64);
12409 } // Inst_VOPC__V_CMPX_NLE_F64
12410
12411 Inst_VOPC__V_CMPX_NLE_F64::~Inst_VOPC__V_CMPX_NLE_F64()
12412 {
12413 } // ~Inst_VOPC__V_CMPX_NLE_F64
12414
12415 // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
12416 void
12417 Inst_VOPC__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst)
12418 {
12419 Wavefront *wf = gpuDynInst->wavefront();
12420 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12421 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12422 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12423
12424 src0.readSrc();
12425 src1.read();
12426
12427 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12428 if (wf->execMask(lane)) {
12429 vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
12430 }
12431 }
12432
12433 wf->execMask() = vcc.rawData();
12434 vcc.write();
12435 }
12436
12437 Inst_VOPC__V_CMPX_NEQ_F64::Inst_VOPC__V_CMPX_NEQ_F64(InFmt_VOPC *iFmt)
12438 : Inst_VOPC(iFmt, "v_cmpx_neq_f64")
12439 {
12440 setFlag(ALU);
12441 setFlag(F64);
12442 } // Inst_VOPC__V_CMPX_NEQ_F64
12443
12444 Inst_VOPC__V_CMPX_NEQ_F64::~Inst_VOPC__V_CMPX_NEQ_F64()
12445 {
12446 } // ~Inst_VOPC__V_CMPX_NEQ_F64
12447
12448 // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
12449 void
12450 Inst_VOPC__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst)
12451 {
12452 Wavefront *wf = gpuDynInst->wavefront();
12453 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12454 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12455 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12456
12457 src0.readSrc();
12458 src1.read();
12459
12460 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12461 if (wf->execMask(lane)) {
12462 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
12463 }
12464 }
12465
12466 wf->execMask() = vcc.rawData();
12467 vcc.write();
12468 }
12469
12470 Inst_VOPC__V_CMPX_NLT_F64::Inst_VOPC__V_CMPX_NLT_F64(InFmt_VOPC *iFmt)
12471 : Inst_VOPC(iFmt, "v_cmpx_nlt_f64")
12472 {
12473 setFlag(ALU);
12474 setFlag(F64);
12475 } // Inst_VOPC__V_CMPX_NLT_F64
12476
12477 Inst_VOPC__V_CMPX_NLT_F64::~Inst_VOPC__V_CMPX_NLT_F64()
12478 {
12479 } // ~Inst_VOPC__V_CMPX_NLT_F64
12480
12481 // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
12482 void
12483 Inst_VOPC__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst)
12484 {
12485 Wavefront *wf = gpuDynInst->wavefront();
12486 ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12487 ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12488 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12489
12490 src0.readSrc();
12491 src1.read();
12492
12493 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12494 if (wf->execMask(lane)) {
12495 vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
12496 }
12497 }
12498
12499 wf->execMask() = vcc.rawData();
12500 vcc.write();
12501 }
12502
12503 Inst_VOPC__V_CMPX_TRU_F64::Inst_VOPC__V_CMPX_TRU_F64(InFmt_VOPC *iFmt)
12504 : Inst_VOPC(iFmt, "v_cmpx_tru_f64")
12505 {
12506 setFlag(ALU);
12507 setFlag(F64);
12508 } // Inst_VOPC__V_CMPX_TRU_F64
12509
12510 Inst_VOPC__V_CMPX_TRU_F64::~Inst_VOPC__V_CMPX_TRU_F64()
12511 {
12512 } // ~Inst_VOPC__V_CMPX_TRU_F64
12513
12514 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
12515 void
12516 Inst_VOPC__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst)
12517 {
12518 Wavefront *wf = gpuDynInst->wavefront();
12519 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12520
12521 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12522 if (wf->execMask(lane)) {
12523 vcc.setBit(lane, 1);
12524 }
12525 }
12526
12527 wf->execMask() = vcc.rawData();
12528 vcc.write();
12529 }
12530
12531 Inst_VOPC__V_CMP_F_I16::Inst_VOPC__V_CMP_F_I16(InFmt_VOPC *iFmt)
12532 : Inst_VOPC(iFmt, "v_cmp_f_i16")
12533 {
12534 setFlag(ALU);
12535 } // Inst_VOPC__V_CMP_F_I16
12536
12537 Inst_VOPC__V_CMP_F_I16::~Inst_VOPC__V_CMP_F_I16()
12538 {
12539 } // ~Inst_VOPC__V_CMP_F_I16
12540
12541 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
12542 void
12543 Inst_VOPC__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst)
12544 {
12545 Wavefront *wf = gpuDynInst->wavefront();
12546 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12547
12548 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12549 if (wf->execMask(lane)) {
12550 vcc.setBit(lane, 0);
12551 }
12552 }
12553
12554 vcc.write();
12555 }
12556
12557 Inst_VOPC__V_CMP_LT_I16::Inst_VOPC__V_CMP_LT_I16(InFmt_VOPC *iFmt)
12558 : Inst_VOPC(iFmt, "v_cmp_lt_i16")
12559 {
12560 setFlag(ALU);
12561 } // Inst_VOPC__V_CMP_LT_I16
12562
12563 Inst_VOPC__V_CMP_LT_I16::~Inst_VOPC__V_CMP_LT_I16()
12564 {
12565 } // ~Inst_VOPC__V_CMP_LT_I16
12566
12567 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
12568 void
12569 Inst_VOPC__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst)
12570 {
12571 Wavefront *wf = gpuDynInst->wavefront();
12572 ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
12573 ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
12574 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12575
12576 src0.readSrc();
12577 src1.read();
12578
12579 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12580 if (wf->execMask(lane)) {
12581 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
12582 }
12583 }
12584
12585 vcc.write();
12586 }
12587
12588 Inst_VOPC__V_CMP_EQ_I16::Inst_VOPC__V_CMP_EQ_I16(InFmt_VOPC *iFmt)
12589 : Inst_VOPC(iFmt, "v_cmp_eq_i16")
12590 {
12591 setFlag(ALU);
12592 } // Inst_VOPC__V_CMP_EQ_I16
12593
12594 Inst_VOPC__V_CMP_EQ_I16::~Inst_VOPC__V_CMP_EQ_I16()
12595 {
12596 } // ~Inst_VOPC__V_CMP_EQ_I16
12597
12598 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
12599 void
12600 Inst_VOPC__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst)
12601 {
12602 Wavefront *wf = gpuDynInst->wavefront();
12603 ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
12604 ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
12605 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12606
12607 src0.readSrc();
12608 src1.read();
12609
12610 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12611 if (wf->execMask(lane)) {
12612 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
12613 }
12614 }
12615
12616 vcc.write();
12617 }
12618
12619 Inst_VOPC__V_CMP_LE_I16::Inst_VOPC__V_CMP_LE_I16(InFmt_VOPC *iFmt)
12620 : Inst_VOPC(iFmt, "v_cmp_le_i16")
12621 {
12622 setFlag(ALU);
12623 } // Inst_VOPC__V_CMP_LE_I16
12624
12625 Inst_VOPC__V_CMP_LE_I16::~Inst_VOPC__V_CMP_LE_I16()
12626 {
12627 } // ~Inst_VOPC__V_CMP_LE_I16
12628
12629 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
12630 void
12631 Inst_VOPC__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst)
12632 {
12633 Wavefront *wf = gpuDynInst->wavefront();
12634 ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
12635 ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
12636 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12637
12638 src0.readSrc();
12639 src1.read();
12640
12641 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12642 if (wf->execMask(lane)) {
12643 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
12644 }
12645 }
12646
12647 vcc.write();
12648 }
12649
12650 Inst_VOPC__V_CMP_GT_I16::Inst_VOPC__V_CMP_GT_I16(InFmt_VOPC *iFmt)
12651 : Inst_VOPC(iFmt, "v_cmp_gt_i16")
12652 {
12653 setFlag(ALU);
12654 } // Inst_VOPC__V_CMP_GT_I16
12655
12656 Inst_VOPC__V_CMP_GT_I16::~Inst_VOPC__V_CMP_GT_I16()
12657 {
12658 } // ~Inst_VOPC__V_CMP_GT_I16
12659
12660 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
12661 void
12662 Inst_VOPC__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst)
12663 {
12664 Wavefront *wf = gpuDynInst->wavefront();
12665 ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
12666 ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
12667 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12668
12669 src0.readSrc();
12670 src1.read();
12671
12672 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12673 if (wf->execMask(lane)) {
12674 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
12675 }
12676 }
12677
12678 vcc.write();
12679 }
12680
12681 Inst_VOPC__V_CMP_NE_I16::Inst_VOPC__V_CMP_NE_I16(InFmt_VOPC *iFmt)
12682 : Inst_VOPC(iFmt, "v_cmp_ne_i16")
12683 {
12684 setFlag(ALU);
12685 } // Inst_VOPC__V_CMP_NE_I16
12686
12687 Inst_VOPC__V_CMP_NE_I16::~Inst_VOPC__V_CMP_NE_I16()
12688 {
12689 } // ~Inst_VOPC__V_CMP_NE_I16
12690
12691 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
12692 void
12693 Inst_VOPC__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst)
12694 {
12695 Wavefront *wf = gpuDynInst->wavefront();
12696 ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
12697 ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
12698 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12699
12700 src0.readSrc();
12701 src1.read();
12702
12703 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12704 if (wf->execMask(lane)) {
12705 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
12706 }
12707 }
12708
12709 vcc.write();
12710 }
12711
12712 Inst_VOPC__V_CMP_GE_I16::Inst_VOPC__V_CMP_GE_I16(InFmt_VOPC *iFmt)
12713 : Inst_VOPC(iFmt, "v_cmp_ge_i16")
12714 {
12715 setFlag(ALU);
12716 } // Inst_VOPC__V_CMP_GE_I16
12717
12718 Inst_VOPC__V_CMP_GE_I16::~Inst_VOPC__V_CMP_GE_I16()
12719 {
12720 } // ~Inst_VOPC__V_CMP_GE_I16
12721
12722 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
12723 void
12724 Inst_VOPC__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst)
12725 {
12726 Wavefront *wf = gpuDynInst->wavefront();
12727 ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
12728 ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
12729 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12730
12731 src0.readSrc();
12732 src1.read();
12733
12734 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12735 if (wf->execMask(lane)) {
12736 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
12737 }
12738 }
12739
12740 vcc.write();
12741 }
12742
12743 Inst_VOPC__V_CMP_T_I16::Inst_VOPC__V_CMP_T_I16(InFmt_VOPC *iFmt)
12744 : Inst_VOPC(iFmt, "v_cmp_t_i16")
12745 {
12746 setFlag(ALU);
12747 } // Inst_VOPC__V_CMP_T_I16
12748
12749 Inst_VOPC__V_CMP_T_I16::~Inst_VOPC__V_CMP_T_I16()
12750 {
12751 } // ~Inst_VOPC__V_CMP_T_I16
12752
12753 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
12754 void
12755 Inst_VOPC__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst)
12756 {
12757 Wavefront *wf = gpuDynInst->wavefront();
12758 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12759
12760 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12761 if (wf->execMask(lane)) {
12762 vcc.setBit(lane, 1);
12763 }
12764 }
12765
12766 vcc.write();
12767 }
12768
12769 Inst_VOPC__V_CMP_F_U16::Inst_VOPC__V_CMP_F_U16(InFmt_VOPC *iFmt)
12770 : Inst_VOPC(iFmt, "v_cmp_f_u16")
12771 {
12772 setFlag(ALU);
12773 } // Inst_VOPC__V_CMP_F_U16
12774
12775 Inst_VOPC__V_CMP_F_U16::~Inst_VOPC__V_CMP_F_U16()
12776 {
12777 } // ~Inst_VOPC__V_CMP_F_U16
12778
12779 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
12780 void
12781 Inst_VOPC__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst)
12782 {
12783 Wavefront *wf = gpuDynInst->wavefront();
12784 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12785
12786 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12787 if (wf->execMask(lane)) {
12788 vcc.setBit(lane, 0);
12789 }
12790 }
12791
12792 vcc.write();
12793 }
12794
12795 Inst_VOPC__V_CMP_LT_U16::Inst_VOPC__V_CMP_LT_U16(InFmt_VOPC *iFmt)
12796 : Inst_VOPC(iFmt, "v_cmp_lt_u16")
12797 {
12798 setFlag(ALU);
12799 } // Inst_VOPC__V_CMP_LT_U16
12800
12801 Inst_VOPC__V_CMP_LT_U16::~Inst_VOPC__V_CMP_LT_U16()
12802 {
12803 } // ~Inst_VOPC__V_CMP_LT_U16
12804
12805 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
12806 void
12807 Inst_VOPC__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst)
12808 {
12809 Wavefront *wf = gpuDynInst->wavefront();
12810 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
12811 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
12812 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12813
12814 src0.readSrc();
12815 src1.read();
12816
12817 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12818 if (wf->execMask(lane)) {
12819 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
12820 }
12821 }
12822
12823 vcc.write();
12824 }
12825
12826 Inst_VOPC__V_CMP_EQ_U16::Inst_VOPC__V_CMP_EQ_U16(InFmt_VOPC *iFmt)
12827 : Inst_VOPC(iFmt, "v_cmp_eq_u16")
12828 {
12829 setFlag(ALU);
12830 } // Inst_VOPC__V_CMP_EQ_U16
12831
12832 Inst_VOPC__V_CMP_EQ_U16::~Inst_VOPC__V_CMP_EQ_U16()
12833 {
12834 } // ~Inst_VOPC__V_CMP_EQ_U16
12835
12836 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
12837 void
12838 Inst_VOPC__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst)
12839 {
12840 Wavefront *wf = gpuDynInst->wavefront();
12841 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
12842 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
12843 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12844
12845 src0.readSrc();
12846 src1.read();
12847
12848 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12849 if (wf->execMask(lane)) {
12850 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
12851 }
12852 }
12853
12854 vcc.write();
12855 }
12856
12857 Inst_VOPC__V_CMP_LE_U16::Inst_VOPC__V_CMP_LE_U16(InFmt_VOPC *iFmt)
12858 : Inst_VOPC(iFmt, "v_cmp_le_u16")
12859 {
12860 setFlag(ALU);
12861 } // Inst_VOPC__V_CMP_LE_U16
12862
12863 Inst_VOPC__V_CMP_LE_U16::~Inst_VOPC__V_CMP_LE_U16()
12864 {
12865 } // ~Inst_VOPC__V_CMP_LE_U16
12866
12867 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
12868 void
12869 Inst_VOPC__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst)
12870 {
12871 Wavefront *wf = gpuDynInst->wavefront();
12872 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
12873 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
12874 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12875
12876 src0.readSrc();
12877 src1.read();
12878
12879 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12880 if (wf->execMask(lane)) {
12881 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
12882 }
12883 }
12884
12885 vcc.write();
12886 }
12887
12888 Inst_VOPC__V_CMP_GT_U16::Inst_VOPC__V_CMP_GT_U16(InFmt_VOPC *iFmt)
12889 : Inst_VOPC(iFmt, "v_cmp_gt_u16")
12890 {
12891 setFlag(ALU);
12892 } // Inst_VOPC__V_CMP_GT_U16
12893
12894 Inst_VOPC__V_CMP_GT_U16::~Inst_VOPC__V_CMP_GT_U16()
12895 {
12896 } // ~Inst_VOPC__V_CMP_GT_U16
12897
12898 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
12899 void
12900 Inst_VOPC__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst)
12901 {
12902 Wavefront *wf = gpuDynInst->wavefront();
12903 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
12904 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
12905 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12906
12907 src0.readSrc();
12908 src1.read();
12909
12910 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12911 if (wf->execMask(lane)) {
12912 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
12913 }
12914 }
12915
12916 vcc.write();
12917 }
12918
12919 Inst_VOPC__V_CMP_NE_U16::Inst_VOPC__V_CMP_NE_U16(InFmt_VOPC *iFmt)
12920 : Inst_VOPC(iFmt, "v_cmp_ne_u16")
12921 {
12922 setFlag(ALU);
12923 } // Inst_VOPC__V_CMP_NE_U16
12924
12925 Inst_VOPC__V_CMP_NE_U16::~Inst_VOPC__V_CMP_NE_U16()
12926 {
12927 } // ~Inst_VOPC__V_CMP_NE_U16
12928
12929 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
12930 void
12931 Inst_VOPC__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst)
12932 {
12933 Wavefront *wf = gpuDynInst->wavefront();
12934 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
12935 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
12936 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12937
12938 src0.readSrc();
12939 src1.read();
12940
12941 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12942 if (wf->execMask(lane)) {
12943 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
12944 }
12945 }
12946
12947 vcc.write();
12948 }
12949
12950 Inst_VOPC__V_CMP_GE_U16::Inst_VOPC__V_CMP_GE_U16(InFmt_VOPC *iFmt)
12951 : Inst_VOPC(iFmt, "v_cmp_ge_u16")
12952 {
12953 setFlag(ALU);
12954 } // Inst_VOPC__V_CMP_GE_U16
12955
12956 Inst_VOPC__V_CMP_GE_U16::~Inst_VOPC__V_CMP_GE_U16()
12957 {
12958 } // ~Inst_VOPC__V_CMP_GE_U16
12959
12960 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
12961 void
12962 Inst_VOPC__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst)
12963 {
12964 Wavefront *wf = gpuDynInst->wavefront();
12965 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
12966 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
12967 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12968
12969 src0.readSrc();
12970 src1.read();
12971
12972 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12973 if (wf->execMask(lane)) {
12974 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
12975 }
12976 }
12977
12978 vcc.write();
12979 }
12980
12981 Inst_VOPC__V_CMP_T_U16::Inst_VOPC__V_CMP_T_U16(InFmt_VOPC *iFmt)
12982 : Inst_VOPC(iFmt, "v_cmp_t_u16")
12983 {
12984 setFlag(ALU);
12985 } // Inst_VOPC__V_CMP_T_U16
12986
12987 Inst_VOPC__V_CMP_T_U16::~Inst_VOPC__V_CMP_T_U16()
12988 {
12989 } // ~Inst_VOPC__V_CMP_T_U16
12990
12991 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
12992 void
12993 Inst_VOPC__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst)
12994 {
12995 Wavefront *wf = gpuDynInst->wavefront();
12996 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12997
12998 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12999 if (wf->execMask(lane)) {
13000 vcc.setBit(lane, 1);
13001 }
13002 }
13003
13004 vcc.write();
13005 }
13006
13007 Inst_VOPC__V_CMPX_F_I16::Inst_VOPC__V_CMPX_F_I16(InFmt_VOPC *iFmt)
13008 : Inst_VOPC(iFmt, "v_cmpx_f_i16")
13009 {
13010 setFlag(ALU);
13011 } // Inst_VOPC__V_CMPX_F_I16
13012
13013 Inst_VOPC__V_CMPX_F_I16::~Inst_VOPC__V_CMPX_F_I16()
13014 {
13015 } // ~Inst_VOPC__V_CMPX_F_I16
13016
13017 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
13018 void
13019 Inst_VOPC__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst)
13020 {
13021 Wavefront *wf = gpuDynInst->wavefront();
13022 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13023
13024 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13025 if (wf->execMask(lane)) {
13026 vcc.setBit(lane, 0);
13027 }
13028 }
13029
13030 wf->execMask() = vcc.rawData();
13031 vcc.write();
13032 }
13033
13034 Inst_VOPC__V_CMPX_LT_I16::Inst_VOPC__V_CMPX_LT_I16(InFmt_VOPC *iFmt)
13035 : Inst_VOPC(iFmt, "v_cmpx_lt_i16")
13036 {
13037 setFlag(ALU);
13038 } // Inst_VOPC__V_CMPX_LT_I16
13039
13040 Inst_VOPC__V_CMPX_LT_I16::~Inst_VOPC__V_CMPX_LT_I16()
13041 {
13042 } // ~Inst_VOPC__V_CMPX_LT_I16
13043
13044 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
13045 void
13046 Inst_VOPC__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst)
13047 {
13048 Wavefront *wf = gpuDynInst->wavefront();
13049 ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
13050 ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
13051 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13052
13053 src0.readSrc();
13054 src1.read();
13055
13056 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13057 if (wf->execMask(lane)) {
13058 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
13059 }
13060 }
13061
13062 wf->execMask() = vcc.rawData();
13063 vcc.write();
13064 }
13065
13066 Inst_VOPC__V_CMPX_EQ_I16::Inst_VOPC__V_CMPX_EQ_I16(InFmt_VOPC *iFmt)
13067 : Inst_VOPC(iFmt, "v_cmpx_eq_i16")
13068 {
13069 setFlag(ALU);
13070 } // Inst_VOPC__V_CMPX_EQ_I16
13071
13072 Inst_VOPC__V_CMPX_EQ_I16::~Inst_VOPC__V_CMPX_EQ_I16()
13073 {
13074 } // ~Inst_VOPC__V_CMPX_EQ_I16
13075
13076 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
13077 void
13078 Inst_VOPC__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst)
13079 {
13080 Wavefront *wf = gpuDynInst->wavefront();
13081 ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
13082 ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
13083 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13084
13085 src0.readSrc();
13086 src1.read();
13087
13088 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13089 if (wf->execMask(lane)) {
13090 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
13091 }
13092 }
13093
13094 wf->execMask() = vcc.rawData();
13095 vcc.write();
13096 }
13097
13098 Inst_VOPC__V_CMPX_LE_I16::Inst_VOPC__V_CMPX_LE_I16(InFmt_VOPC *iFmt)
13099 : Inst_VOPC(iFmt, "v_cmpx_le_i16")
13100 {
13101 setFlag(ALU);
13102 } // Inst_VOPC__V_CMPX_LE_I16
13103
13104 Inst_VOPC__V_CMPX_LE_I16::~Inst_VOPC__V_CMPX_LE_I16()
13105 {
13106 } // ~Inst_VOPC__V_CMPX_LE_I16
13107
13108 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
13109 void
13110 Inst_VOPC__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst)
13111 {
13112 Wavefront *wf = gpuDynInst->wavefront();
13113 ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
13114 ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
13115 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13116
13117 src0.readSrc();
13118 src1.read();
13119
13120 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13121 if (wf->execMask(lane)) {
13122 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
13123 }
13124 }
13125
13126 wf->execMask() = vcc.rawData();
13127 vcc.write();
13128 }
13129
13130 Inst_VOPC__V_CMPX_GT_I16::Inst_VOPC__V_CMPX_GT_I16(InFmt_VOPC *iFmt)
13131 : Inst_VOPC(iFmt, "v_cmpx_gt_i16")
13132 {
13133 setFlag(ALU);
13134 } // Inst_VOPC__V_CMPX_GT_I16
13135
13136 Inst_VOPC__V_CMPX_GT_I16::~Inst_VOPC__V_CMPX_GT_I16()
13137 {
13138 } // ~Inst_VOPC__V_CMPX_GT_I16
13139
13140 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
13141 void
13142 Inst_VOPC__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst)
13143 {
13144 Wavefront *wf = gpuDynInst->wavefront();
13145 ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
13146 ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
13147 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13148
13149 src0.readSrc();
13150 src1.read();
13151
13152 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13153 if (wf->execMask(lane)) {
13154 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
13155 }
13156 }
13157
13158 wf->execMask() = vcc.rawData();
13159 vcc.write();
13160 }
13161
13162 Inst_VOPC__V_CMPX_NE_I16::Inst_VOPC__V_CMPX_NE_I16(InFmt_VOPC *iFmt)
13163 : Inst_VOPC(iFmt, "v_cmpx_ne_i16")
13164 {
13165 setFlag(ALU);
13166 } // Inst_VOPC__V_CMPX_NE_I16
13167
13168 Inst_VOPC__V_CMPX_NE_I16::~Inst_VOPC__V_CMPX_NE_I16()
13169 {
13170 } // ~Inst_VOPC__V_CMPX_NE_I16
13171
13172 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
13173 void
13174 Inst_VOPC__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst)
13175 {
13176 Wavefront *wf = gpuDynInst->wavefront();
13177 ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
13178 ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
13179 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13180
13181 src0.readSrc();
13182 src1.read();
13183
13184 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13185 if (wf->execMask(lane)) {
13186 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
13187 }
13188 }
13189
13190 wf->execMask() = vcc.rawData();
13191 vcc.write();
13192 }
13193
13194 Inst_VOPC__V_CMPX_GE_I16::Inst_VOPC__V_CMPX_GE_I16(InFmt_VOPC *iFmt)
13195 : Inst_VOPC(iFmt, "v_cmpx_ge_i16")
13196 {
13197 setFlag(ALU);
13198 } // Inst_VOPC__V_CMPX_GE_I16
13199
13200 Inst_VOPC__V_CMPX_GE_I16::~Inst_VOPC__V_CMPX_GE_I16()
13201 {
13202 } // ~Inst_VOPC__V_CMPX_GE_I16
13203
13204 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
13205 void
13206 Inst_VOPC__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst)
13207 {
13208 Wavefront *wf = gpuDynInst->wavefront();
13209 ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
13210 ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
13211 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13212
13213 src0.readSrc();
13214 src1.read();
13215
13216 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13217 if (wf->execMask(lane)) {
13218 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
13219 }
13220 }
13221
13222 wf->execMask() = vcc.rawData();
13223 vcc.write();
13224 }
13225
13226 Inst_VOPC__V_CMPX_T_I16::Inst_VOPC__V_CMPX_T_I16(InFmt_VOPC *iFmt)
13227 : Inst_VOPC(iFmt, "v_cmpx_t_i16")
13228 {
13229 setFlag(ALU);
13230 } // Inst_VOPC__V_CMPX_T_I16
13231
13232 Inst_VOPC__V_CMPX_T_I16::~Inst_VOPC__V_CMPX_T_I16()
13233 {
13234 } // ~Inst_VOPC__V_CMPX_T_I16
13235
13236 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
13237 void
13238 Inst_VOPC__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst)
13239 {
13240 Wavefront *wf = gpuDynInst->wavefront();
13241 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13242
13243 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13244 if (wf->execMask(lane)) {
13245 vcc.setBit(lane, 1);
13246 }
13247 }
13248
13249 wf->execMask() = vcc.rawData();
13250 vcc.write();
13251 }
13252
13253 Inst_VOPC__V_CMPX_F_U16::Inst_VOPC__V_CMPX_F_U16(InFmt_VOPC *iFmt)
13254 : Inst_VOPC(iFmt, "v_cmpx_f_u16")
13255 {
13256 setFlag(ALU);
13257 } // Inst_VOPC__V_CMPX_F_U16
13258
13259 Inst_VOPC__V_CMPX_F_U16::~Inst_VOPC__V_CMPX_F_U16()
13260 {
13261 } // ~Inst_VOPC__V_CMPX_F_U16
13262
13263 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
13264 void
13265 Inst_VOPC__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst)
13266 {
13267 Wavefront *wf = gpuDynInst->wavefront();
13268 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13269
13270 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13271 if (wf->execMask(lane)) {
13272 vcc.setBit(lane, 0);
13273 }
13274 }
13275
13276 wf->execMask() = vcc.rawData();
13277 vcc.write();
13278 }
13279
13280 Inst_VOPC__V_CMPX_LT_U16::Inst_VOPC__V_CMPX_LT_U16(InFmt_VOPC *iFmt)
13281 : Inst_VOPC(iFmt, "v_cmpx_lt_u16")
13282 {
13283 setFlag(ALU);
13284 } // Inst_VOPC__V_CMPX_LT_U16
13285
13286 Inst_VOPC__V_CMPX_LT_U16::~Inst_VOPC__V_CMPX_LT_U16()
13287 {
13288 } // ~Inst_VOPC__V_CMPX_LT_U16
13289
13290 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
13291 void
13292 Inst_VOPC__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst)
13293 {
13294 Wavefront *wf = gpuDynInst->wavefront();
13295 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
13296 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
13297 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13298
13299 src0.readSrc();
13300 src1.read();
13301
13302 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13303 if (wf->execMask(lane)) {
13304 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
13305 }
13306 }
13307
13308 wf->execMask() = vcc.rawData();
13309 vcc.write();
13310 }
13311
13312 Inst_VOPC__V_CMPX_EQ_U16::Inst_VOPC__V_CMPX_EQ_U16(InFmt_VOPC *iFmt)
13313 : Inst_VOPC(iFmt, "v_cmpx_eq_u16")
13314 {
13315 setFlag(ALU);
13316 } // Inst_VOPC__V_CMPX_EQ_U16
13317
13318 Inst_VOPC__V_CMPX_EQ_U16::~Inst_VOPC__V_CMPX_EQ_U16()
13319 {
13320 } // ~Inst_VOPC__V_CMPX_EQ_U16
13321
13322 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
13323 void
13324 Inst_VOPC__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst)
13325 {
13326 Wavefront *wf = gpuDynInst->wavefront();
13327 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
13328 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
13329 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13330
13331 src0.readSrc();
13332 src1.read();
13333
13334 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13335 if (wf->execMask(lane)) {
13336 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
13337 }
13338 }
13339
13340 wf->execMask() = vcc.rawData();
13341 vcc.write();
13342 }
13343
13344 Inst_VOPC__V_CMPX_LE_U16::Inst_VOPC__V_CMPX_LE_U16(InFmt_VOPC *iFmt)
13345 : Inst_VOPC(iFmt, "v_cmpx_le_u16")
13346 {
13347 setFlag(ALU);
13348 } // Inst_VOPC__V_CMPX_LE_U16
13349
13350 Inst_VOPC__V_CMPX_LE_U16::~Inst_VOPC__V_CMPX_LE_U16()
13351 {
13352 } // ~Inst_VOPC__V_CMPX_LE_U16
13353
13354 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
13355 void
13356 Inst_VOPC__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst)
13357 {
13358 Wavefront *wf = gpuDynInst->wavefront();
13359 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
13360 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
13361 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13362
13363 src0.readSrc();
13364 src1.read();
13365
13366 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13367 if (wf->execMask(lane)) {
13368 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
13369 }
13370 }
13371
13372 wf->execMask() = vcc.rawData();
13373 vcc.write();
13374 }
13375
13376 Inst_VOPC__V_CMPX_GT_U16::Inst_VOPC__V_CMPX_GT_U16(InFmt_VOPC *iFmt)
13377 : Inst_VOPC(iFmt, "v_cmpx_gt_u16")
13378 {
13379 setFlag(ALU);
13380 } // Inst_VOPC__V_CMPX_GT_U16
13381
13382 Inst_VOPC__V_CMPX_GT_U16::~Inst_VOPC__V_CMPX_GT_U16()
13383 {
13384 } // ~Inst_VOPC__V_CMPX_GT_U16
13385
13386 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
13387 void
13388 Inst_VOPC__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst)
13389 {
13390 Wavefront *wf = gpuDynInst->wavefront();
13391 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
13392 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
13393 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13394
13395 src0.readSrc();
13396 src1.read();
13397
13398 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13399 if (wf->execMask(lane)) {
13400 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
13401 }
13402 }
13403
13404 wf->execMask() = vcc.rawData();
13405 vcc.write();
13406 }
13407
13408 Inst_VOPC__V_CMPX_NE_U16::Inst_VOPC__V_CMPX_NE_U16(InFmt_VOPC *iFmt)
13409 : Inst_VOPC(iFmt, "v_cmpx_ne_u16")
13410 {
13411 setFlag(ALU);
13412 } // Inst_VOPC__V_CMPX_NE_U16
13413
13414 Inst_VOPC__V_CMPX_NE_U16::~Inst_VOPC__V_CMPX_NE_U16()
13415 {
13416 } // ~Inst_VOPC__V_CMPX_NE_U16
13417
13418 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
13419 void
13420 Inst_VOPC__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst)
13421 {
13422 Wavefront *wf = gpuDynInst->wavefront();
13423 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
13424 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
13425 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13426
13427 src0.readSrc();
13428 src1.read();
13429
13430 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13431 if (wf->execMask(lane)) {
13432 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
13433 }
13434 }
13435
13436 wf->execMask() = vcc.rawData();
13437 vcc.write();
13438 }
13439
13440 Inst_VOPC__V_CMPX_GE_U16::Inst_VOPC__V_CMPX_GE_U16(InFmt_VOPC *iFmt)
13441 : Inst_VOPC(iFmt, "v_cmpx_ge_u16")
13442 {
13443 setFlag(ALU);
13444 } // Inst_VOPC__V_CMPX_GE_U16
13445
13446 Inst_VOPC__V_CMPX_GE_U16::~Inst_VOPC__V_CMPX_GE_U16()
13447 {
13448 } // ~Inst_VOPC__V_CMPX_GE_U16
13449
13450 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
13451 void
13452 Inst_VOPC__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst)
13453 {
13454 Wavefront *wf = gpuDynInst->wavefront();
13455 ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
13456 ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
13457 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13458
13459 src0.readSrc();
13460 src1.read();
13461
13462 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13463 if (wf->execMask(lane)) {
13464 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
13465 }
13466 }
13467
13468 wf->execMask() = vcc.rawData();
13469 vcc.write();
13470 }
13471
13472 Inst_VOPC__V_CMPX_T_U16::Inst_VOPC__V_CMPX_T_U16(InFmt_VOPC *iFmt)
13473 : Inst_VOPC(iFmt, "v_cmpx_t_u16")
13474 {
13475 setFlag(ALU);
13476 } // Inst_VOPC__V_CMPX_T_U16
13477
13478 Inst_VOPC__V_CMPX_T_U16::~Inst_VOPC__V_CMPX_T_U16()
13479 {
13480 } // ~Inst_VOPC__V_CMPX_T_U16
13481
13482 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
13483 void
13484 Inst_VOPC__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst)
13485 {
13486 Wavefront *wf = gpuDynInst->wavefront();
13487 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13488
13489 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13490 if (wf->execMask(lane)) {
13491 vcc.setBit(lane, 1);
13492 }
13493 }
13494
13495 wf->execMask() = vcc.rawData();
13496 vcc.write();
13497 }
13498
13499 Inst_VOPC__V_CMP_F_I32::Inst_VOPC__V_CMP_F_I32(InFmt_VOPC *iFmt)
13500 : Inst_VOPC(iFmt, "v_cmp_f_i32")
13501 {
13502 setFlag(ALU);
13503 } // Inst_VOPC__V_CMP_F_I32
13504
13505 Inst_VOPC__V_CMP_F_I32::~Inst_VOPC__V_CMP_F_I32()
13506 {
13507 } // ~Inst_VOPC__V_CMP_F_I32
13508
13509 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
13510 void
13511 Inst_VOPC__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst)
13512 {
13513 Wavefront *wf = gpuDynInst->wavefront();
13514 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13515
13516 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13517 if (wf->execMask(lane)) {
13518 vcc.setBit(lane, 0);
13519 }
13520 }
13521
13522 vcc.write();
13523 }
13524
13525 Inst_VOPC__V_CMP_LT_I32::Inst_VOPC__V_CMP_LT_I32(InFmt_VOPC *iFmt)
13526 : Inst_VOPC(iFmt, "v_cmp_lt_i32")
13527 {
13528 setFlag(ALU);
13529 } // Inst_VOPC__V_CMP_LT_I32
13530
13531 Inst_VOPC__V_CMP_LT_I32::~Inst_VOPC__V_CMP_LT_I32()
13532 {
13533 } // ~Inst_VOPC__V_CMP_LT_I32
13534
13535 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
13536 void
13537 Inst_VOPC__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst)
13538 {
13539 Wavefront *wf = gpuDynInst->wavefront();
13540 ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
13541 ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
13542 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13543
13544 src0.readSrc();
13545 src1.read();
13546
13547 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13548 if (wf->execMask(lane)) {
13549 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
13550 }
13551 }
13552
13553 vcc.write();
13554 }
13555
13556 Inst_VOPC__V_CMP_EQ_I32::Inst_VOPC__V_CMP_EQ_I32(InFmt_VOPC *iFmt)
13557 : Inst_VOPC(iFmt, "v_cmp_eq_i32")
13558 {
13559 setFlag(ALU);
13560 } // Inst_VOPC__V_CMP_EQ_I32
13561
13562 Inst_VOPC__V_CMP_EQ_I32::~Inst_VOPC__V_CMP_EQ_I32()
13563 {
13564 } // ~Inst_VOPC__V_CMP_EQ_I32
13565
13566 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
13567 void
13568 Inst_VOPC__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
13569 {
13570 Wavefront *wf = gpuDynInst->wavefront();
13571 ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
13572 ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
13573 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13574
13575 src0.readSrc();
13576 src1.read();
13577
13578 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13579 if (wf->execMask(lane)) {
13580 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
13581 }
13582 }
13583
13584 vcc.write();
13585 }
13586
13587 Inst_VOPC__V_CMP_LE_I32::Inst_VOPC__V_CMP_LE_I32(InFmt_VOPC *iFmt)
13588 : Inst_VOPC(iFmt, "v_cmp_le_i32")
13589 {
13590 setFlag(ALU);
13591 } // Inst_VOPC__V_CMP_LE_I32
13592
13593 Inst_VOPC__V_CMP_LE_I32::~Inst_VOPC__V_CMP_LE_I32()
13594 {
13595 } // ~Inst_VOPC__V_CMP_LE_I32
13596
13597 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
13598 void
13599 Inst_VOPC__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst)
13600 {
13601 Wavefront *wf = gpuDynInst->wavefront();
13602 ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
13603 ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
13604 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13605
13606 src0.readSrc();
13607 src1.read();
13608
13609 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13610 if (wf->execMask(lane)) {
13611 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
13612 }
13613 }
13614
13615 vcc.write();
13616 }
13617
13618 Inst_VOPC__V_CMP_GT_I32::Inst_VOPC__V_CMP_GT_I32(InFmt_VOPC *iFmt)
13619 : Inst_VOPC(iFmt, "v_cmp_gt_i32")
13620 {
13621 setFlag(ALU);
13622 } // Inst_VOPC__V_CMP_GT_I32
13623
13624 Inst_VOPC__V_CMP_GT_I32::~Inst_VOPC__V_CMP_GT_I32()
13625 {
13626 } // ~Inst_VOPC__V_CMP_GT_I32
13627
13628 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
13629 void
13630 Inst_VOPC__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst)
13631 {
13632 Wavefront *wf = gpuDynInst->wavefront();
13633 ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
13634 ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
13635 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13636
13637 src0.readSrc();
13638 src1.read();
13639
13640 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13641 if (wf->execMask(lane)) {
13642 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
13643 }
13644 }
13645
13646 vcc.write();
13647 }
13648
13649 Inst_VOPC__V_CMP_NE_I32::Inst_VOPC__V_CMP_NE_I32(InFmt_VOPC *iFmt)
13650 : Inst_VOPC(iFmt, "v_cmp_ne_i32")
13651 {
13652 setFlag(ALU);
13653 } // Inst_VOPC__V_CMP_NE_I32
13654
13655 Inst_VOPC__V_CMP_NE_I32::~Inst_VOPC__V_CMP_NE_I32()
13656 {
13657 } // ~Inst_VOPC__V_CMP_NE_I32
13658
13659 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
13660 void
13661 Inst_VOPC__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst)
13662 {
13663 Wavefront *wf = gpuDynInst->wavefront();
13664 ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
13665 ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
13666 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13667
13668 src0.readSrc();
13669 src1.read();
13670
13671 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13672 if (wf->execMask(lane)) {
13673 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
13674 }
13675 }
13676
13677 vcc.write();
13678 }
13679
13680 Inst_VOPC__V_CMP_GE_I32::Inst_VOPC__V_CMP_GE_I32(InFmt_VOPC *iFmt)
13681 : Inst_VOPC(iFmt, "v_cmp_ge_i32")
13682 {
13683 setFlag(ALU);
13684 } // Inst_VOPC__V_CMP_GE_I32
13685
13686 Inst_VOPC__V_CMP_GE_I32::~Inst_VOPC__V_CMP_GE_I32()
13687 {
13688 } // ~Inst_VOPC__V_CMP_GE_I32
13689
13690 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
13691 void
13692 Inst_VOPC__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst)
13693 {
13694 Wavefront *wf = gpuDynInst->wavefront();
13695 ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
13696 ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
13697 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13698
13699 src0.readSrc();
13700 src1.read();
13701
13702 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13703 if (wf->execMask(lane)) {
13704 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
13705 }
13706 }
13707
13708 vcc.write();
13709 }
13710
13711 Inst_VOPC__V_CMP_T_I32::Inst_VOPC__V_CMP_T_I32(InFmt_VOPC *iFmt)
13712 : Inst_VOPC(iFmt, "v_cmp_t_i32")
13713 {
13714 setFlag(ALU);
13715 } // Inst_VOPC__V_CMP_T_I32
13716
13717 Inst_VOPC__V_CMP_T_I32::~Inst_VOPC__V_CMP_T_I32()
13718 {
13719 } // ~Inst_VOPC__V_CMP_T_I32
13720
13721 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
13722 void
13723 Inst_VOPC__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst)
13724 {
13725 Wavefront *wf = gpuDynInst->wavefront();
13726 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13727
13728 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13729 if (wf->execMask(lane)) {
13730 vcc.setBit(lane, 1);
13731 }
13732 }
13733
13734 vcc.write();
13735 }
13736
13737 Inst_VOPC__V_CMP_F_U32::Inst_VOPC__V_CMP_F_U32(InFmt_VOPC *iFmt)
13738 : Inst_VOPC(iFmt, "v_cmp_f_u32")
13739 {
13740 setFlag(ALU);
13741 } // Inst_VOPC__V_CMP_F_U32
13742
13743 Inst_VOPC__V_CMP_F_U32::~Inst_VOPC__V_CMP_F_U32()
13744 {
13745 } // ~Inst_VOPC__V_CMP_F_U32
13746
13747 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
13748 void
13749 Inst_VOPC__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst)
13750 {
13751 Wavefront *wf = gpuDynInst->wavefront();
13752 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13753
13754 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13755 if (wf->execMask(lane)) {
13756 vcc.setBit(lane, 0);
13757 }
13758 }
13759
13760 vcc.write();
13761 }
13762
13763 Inst_VOPC__V_CMP_LT_U32::Inst_VOPC__V_CMP_LT_U32(InFmt_VOPC *iFmt)
13764 : Inst_VOPC(iFmt, "v_cmp_lt_u32")
13765 {
13766 setFlag(ALU);
13767 } // Inst_VOPC__V_CMP_LT_U32
13768
13769 Inst_VOPC__V_CMP_LT_U32::~Inst_VOPC__V_CMP_LT_U32()
13770 {
13771 } // ~Inst_VOPC__V_CMP_LT_U32
13772
13773 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
13774 void
13775 Inst_VOPC__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst)
13776 {
13777 Wavefront *wf = gpuDynInst->wavefront();
13778 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
13779 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
13780 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13781
13782 src0.readSrc();
13783 src1.read();
13784
13785 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13786 if (wf->execMask(lane)) {
13787 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
13788 }
13789 }
13790
13791 vcc.write();
13792 }
13793
13794 Inst_VOPC__V_CMP_EQ_U32::Inst_VOPC__V_CMP_EQ_U32(InFmt_VOPC *iFmt)
13795 : Inst_VOPC(iFmt, "v_cmp_eq_u32")
13796 {
13797 setFlag(ALU);
13798 } // Inst_VOPC__V_CMP_EQ_U32
13799
13800 Inst_VOPC__V_CMP_EQ_U32::~Inst_VOPC__V_CMP_EQ_U32()
13801 {
13802 } // ~Inst_VOPC__V_CMP_EQ_U32
13803
13804 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
13805 void
13806 Inst_VOPC__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
13807 {
13808 Wavefront *wf = gpuDynInst->wavefront();
13809 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
13810 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
13811 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13812
13813 src0.readSrc();
13814 src1.read();
13815
13816 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13817 if (wf->execMask(lane)) {
13818 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
13819 }
13820 }
13821
13822 vcc.write();
13823 }
13824
13825 Inst_VOPC__V_CMP_LE_U32::Inst_VOPC__V_CMP_LE_U32(InFmt_VOPC *iFmt)
13826 : Inst_VOPC(iFmt, "v_cmp_le_u32")
13827 {
13828 setFlag(ALU);
13829 } // Inst_VOPC__V_CMP_LE_U32
13830
13831 Inst_VOPC__V_CMP_LE_U32::~Inst_VOPC__V_CMP_LE_U32()
13832 {
13833 } // ~Inst_VOPC__V_CMP_LE_U32
13834
13835 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
13836 void
13837 Inst_VOPC__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst)
13838 {
13839 Wavefront *wf = gpuDynInst->wavefront();
13840 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
13841 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
13842 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13843
13844 src0.readSrc();
13845 src1.read();
13846
13847 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13848 if (wf->execMask(lane)) {
13849 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
13850 }
13851 }
13852
13853 vcc.write();
13854 }
13855
13856 Inst_VOPC__V_CMP_GT_U32::Inst_VOPC__V_CMP_GT_U32(InFmt_VOPC *iFmt)
13857 : Inst_VOPC(iFmt, "v_cmp_gt_u32")
13858 {
13859 setFlag(ALU);
13860 } // Inst_VOPC__V_CMP_GT_U32
13861
13862 Inst_VOPC__V_CMP_GT_U32::~Inst_VOPC__V_CMP_GT_U32()
13863 {
13864 } // ~Inst_VOPC__V_CMP_GT_U32
13865
13866 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
13867 void
13868 Inst_VOPC__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst)
13869 {
13870 Wavefront *wf = gpuDynInst->wavefront();
13871 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
13872 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
13873 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13874
13875 src0.readSrc();
13876 src1.read();
13877
13878 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13879 if (wf->execMask(lane)) {
13880 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
13881 }
13882 }
13883
13884 vcc.write();
13885 }
13886
13887 Inst_VOPC__V_CMP_NE_U32::Inst_VOPC__V_CMP_NE_U32(InFmt_VOPC *iFmt)
13888 : Inst_VOPC(iFmt, "v_cmp_ne_u32")
13889 {
13890 setFlag(ALU);
13891 } // Inst_VOPC__V_CMP_NE_U32
13892
13893 Inst_VOPC__V_CMP_NE_U32::~Inst_VOPC__V_CMP_NE_U32()
13894 {
13895 } // ~Inst_VOPC__V_CMP_NE_U32
13896
13897 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
13898 void
13899 Inst_VOPC__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst)
13900 {
13901 Wavefront *wf = gpuDynInst->wavefront();
13902 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
13903 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
13904 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13905
13906 src0.readSrc();
13907 src1.read();
13908
13909 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13910 if (wf->execMask(lane)) {
13911 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
13912 }
13913 }
13914
13915 vcc.write();
13916 }
13917
13918 Inst_VOPC__V_CMP_GE_U32::Inst_VOPC__V_CMP_GE_U32(InFmt_VOPC *iFmt)
13919 : Inst_VOPC(iFmt, "v_cmp_ge_u32")
13920 {
13921 setFlag(ALU);
13922 } // Inst_VOPC__V_CMP_GE_U32
13923
13924 Inst_VOPC__V_CMP_GE_U32::~Inst_VOPC__V_CMP_GE_U32()
13925 {
13926 } // ~Inst_VOPC__V_CMP_GE_U32
13927
13928 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
13929 void
13930 Inst_VOPC__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst)
13931 {
13932 Wavefront *wf = gpuDynInst->wavefront();
13933 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
13934 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
13935 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13936
13937 src0.readSrc();
13938 src1.read();
13939
13940 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13941 if (wf->execMask(lane)) {
13942 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
13943 }
13944 }
13945
13946 vcc.write();
13947 }
13948
13949 Inst_VOPC__V_CMP_T_U32::Inst_VOPC__V_CMP_T_U32(InFmt_VOPC *iFmt)
13950 : Inst_VOPC(iFmt, "v_cmp_t_u32")
13951 {
13952 setFlag(ALU);
13953 } // Inst_VOPC__V_CMP_T_U32
13954
13955 Inst_VOPC__V_CMP_T_U32::~Inst_VOPC__V_CMP_T_U32()
13956 {
13957 } // ~Inst_VOPC__V_CMP_T_U32
13958
13959 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
13960 void
13961 Inst_VOPC__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst)
13962 {
13963 Wavefront *wf = gpuDynInst->wavefront();
13964 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13965
13966 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13967 if (wf->execMask(lane)) {
13968 vcc.setBit(lane, 1);
13969 }
13970 }
13971
13972 vcc.write();
13973 }
13974
13975 Inst_VOPC__V_CMPX_F_I32::Inst_VOPC__V_CMPX_F_I32(InFmt_VOPC *iFmt)
13976 : Inst_VOPC(iFmt, "v_cmpx_f_i32")
13977 {
13978 setFlag(ALU);
13979 } // Inst_VOPC__V_CMPX_F_I32
13980
13981 Inst_VOPC__V_CMPX_F_I32::~Inst_VOPC__V_CMPX_F_I32()
13982 {
13983 } // ~Inst_VOPC__V_CMPX_F_I32
13984
13985 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
13986 void
13987 Inst_VOPC__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst)
13988 {
13989 Wavefront *wf = gpuDynInst->wavefront();
13990 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13991
13992 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13993 if (wf->execMask(lane)) {
13994 vcc.setBit(lane, 0);
13995 }
13996 }
13997
13998 wf->execMask() = vcc.rawData();
13999 vcc.write();
14000 }
14001
14002 Inst_VOPC__V_CMPX_LT_I32::Inst_VOPC__V_CMPX_LT_I32(InFmt_VOPC *iFmt)
14003 : Inst_VOPC(iFmt, "v_cmpx_lt_i32")
14004 {
14005 setFlag(ALU);
14006 } // Inst_VOPC__V_CMPX_LT_I32
14007
14008 Inst_VOPC__V_CMPX_LT_I32::~Inst_VOPC__V_CMPX_LT_I32()
14009 {
14010 } // ~Inst_VOPC__V_CMPX_LT_I32
14011
14012 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14013 void
14014 Inst_VOPC__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst)
14015 {
14016 Wavefront *wf = gpuDynInst->wavefront();
14017 ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
14018 ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
14019 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14020
14021 src0.readSrc();
14022 src1.read();
14023
14024 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14025 if (wf->execMask(lane)) {
14026 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
14027 }
14028 }
14029
14030 wf->execMask() = vcc.rawData();
14031 vcc.write();
14032 }
14033
14034 Inst_VOPC__V_CMPX_EQ_I32::Inst_VOPC__V_CMPX_EQ_I32(InFmt_VOPC *iFmt)
14035 : Inst_VOPC(iFmt, "v_cmpx_eq_i32")
14036 {
14037 setFlag(ALU);
14038 } // Inst_VOPC__V_CMPX_EQ_I32
14039
14040 Inst_VOPC__V_CMPX_EQ_I32::~Inst_VOPC__V_CMPX_EQ_I32()
14041 {
14042 } // ~Inst_VOPC__V_CMPX_EQ_I32
14043
14044 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
14045 void
14046 Inst_VOPC__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
14047 {
14048 Wavefront *wf = gpuDynInst->wavefront();
14049 ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
14050 ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
14051 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14052
14053 src0.readSrc();
14054 src1.read();
14055
14056 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14057 if (wf->execMask(lane)) {
14058 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
14059 }
14060 }
14061
14062 wf->execMask() = vcc.rawData();
14063 vcc.write();
14064 }
14065
14066 Inst_VOPC__V_CMPX_LE_I32::Inst_VOPC__V_CMPX_LE_I32(InFmt_VOPC *iFmt)
14067 : Inst_VOPC(iFmt, "v_cmpx_le_i32")
14068 {
14069 setFlag(ALU);
14070 } // Inst_VOPC__V_CMPX_LE_I32
14071
14072 Inst_VOPC__V_CMPX_LE_I32::~Inst_VOPC__V_CMPX_LE_I32()
14073 {
14074 } // ~Inst_VOPC__V_CMPX_LE_I32
14075
14076 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
14077 void
14078 Inst_VOPC__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst)
14079 {
14080 Wavefront *wf = gpuDynInst->wavefront();
14081 ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
14082 ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
14083 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14084
14085 src0.readSrc();
14086 src1.read();
14087
14088 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14089 if (wf->execMask(lane)) {
14090 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
14091 }
14092 }
14093
14094 wf->execMask() = vcc.rawData();
14095 vcc.write();
14096 }
14097
14098 Inst_VOPC__V_CMPX_GT_I32::Inst_VOPC__V_CMPX_GT_I32(InFmt_VOPC *iFmt)
14099 : Inst_VOPC(iFmt, "v_cmpx_gt_i32")
14100 {
14101 setFlag(ALU);
14102 } // Inst_VOPC__V_CMPX_GT_I32
14103
14104 Inst_VOPC__V_CMPX_GT_I32::~Inst_VOPC__V_CMPX_GT_I32()
14105 {
14106 } // ~Inst_VOPC__V_CMPX_GT_I32
14107
14108 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
14109 void
14110 Inst_VOPC__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst)
14111 {
14112 Wavefront *wf = gpuDynInst->wavefront();
14113 ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
14114 ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
14115 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14116
14117 src0.readSrc();
14118 src1.read();
14119
14120 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14121 if (wf->execMask(lane)) {
14122 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
14123 }
14124 }
14125
14126 wf->execMask() = vcc.rawData();
14127 vcc.write();
14128 }
14129
14130 Inst_VOPC__V_CMPX_NE_I32::Inst_VOPC__V_CMPX_NE_I32(InFmt_VOPC *iFmt)
14131 : Inst_VOPC(iFmt, "v_cmpx_ne_i32")
14132 {
14133 setFlag(ALU);
14134 } // Inst_VOPC__V_CMPX_NE_I32
14135
14136 Inst_VOPC__V_CMPX_NE_I32::~Inst_VOPC__V_CMPX_NE_I32()
14137 {
14138 } // ~Inst_VOPC__V_CMPX_NE_I32
14139
14140 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
14141 void
14142 Inst_VOPC__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst)
14143 {
14144 Wavefront *wf = gpuDynInst->wavefront();
14145 ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
14146 ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
14147 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14148
14149 src0.readSrc();
14150 src1.read();
14151
14152 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14153 if (wf->execMask(lane)) {
14154 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
14155 }
14156 }
14157
14158 wf->execMask() = vcc.rawData();
14159 vcc.write();
14160 }
14161
14162 Inst_VOPC__V_CMPX_GE_I32::Inst_VOPC__V_CMPX_GE_I32(InFmt_VOPC *iFmt)
14163 : Inst_VOPC(iFmt, "v_cmpx_ge_i32")
14164 {
14165 setFlag(ALU);
14166 } // Inst_VOPC__V_CMPX_GE_I32
14167
14168 Inst_VOPC__V_CMPX_GE_I32::~Inst_VOPC__V_CMPX_GE_I32()
14169 {
14170 } // ~Inst_VOPC__V_CMPX_GE_I32
14171
14172 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
14173 void
14174 Inst_VOPC__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst)
14175 {
14176 Wavefront *wf = gpuDynInst->wavefront();
14177 ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
14178 ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
14179 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14180
14181 src0.readSrc();
14182 src1.read();
14183
14184 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14185 if (wf->execMask(lane)) {
14186 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
14187 }
14188 }
14189
14190 wf->execMask() = vcc.rawData();
14191 vcc.write();
14192 }
14193
14194 Inst_VOPC__V_CMPX_T_I32::Inst_VOPC__V_CMPX_T_I32(InFmt_VOPC *iFmt)
14195 : Inst_VOPC(iFmt, "v_cmpx_t_i32")
14196 {
14197 setFlag(ALU);
14198 } // Inst_VOPC__V_CMPX_T_I32
14199
14200 Inst_VOPC__V_CMPX_T_I32::~Inst_VOPC__V_CMPX_T_I32()
14201 {
14202 } // ~Inst_VOPC__V_CMPX_T_I32
14203
14204 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
14205 void
14206 Inst_VOPC__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst)
14207 {
14208 Wavefront *wf = gpuDynInst->wavefront();
14209 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14210
14211 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14212 if (wf->execMask(lane)) {
14213 vcc.setBit(lane, 1);
14214 }
14215 }
14216
14217 wf->execMask() = vcc.rawData();
14218 vcc.write();
14219 }
14220
14221 Inst_VOPC__V_CMPX_F_U32::Inst_VOPC__V_CMPX_F_U32(InFmt_VOPC *iFmt)
14222 : Inst_VOPC(iFmt, "v_cmpx_f_u32")
14223 {
14224 setFlag(ALU);
14225 } // Inst_VOPC__V_CMPX_F_U32
14226
14227 Inst_VOPC__V_CMPX_F_U32::~Inst_VOPC__V_CMPX_F_U32()
14228 {
14229 } // ~Inst_VOPC__V_CMPX_F_U32
14230
14231 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
14232 void
14233 Inst_VOPC__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst)
14234 {
14235 Wavefront *wf = gpuDynInst->wavefront();
14236 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14237
14238 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14239 if (wf->execMask(lane)) {
14240 vcc.setBit(lane, 0);
14241 }
14242 }
14243
14244 wf->execMask() = vcc.rawData();
14245 vcc.write();
14246 }
14247
14248 Inst_VOPC__V_CMPX_LT_U32::Inst_VOPC__V_CMPX_LT_U32(InFmt_VOPC *iFmt)
14249 : Inst_VOPC(iFmt, "v_cmpx_lt_u32")
14250 {
14251 setFlag(ALU);
14252 } // Inst_VOPC__V_CMPX_LT_U32
14253
14254 Inst_VOPC__V_CMPX_LT_U32::~Inst_VOPC__V_CMPX_LT_U32()
14255 {
14256 } // ~Inst_VOPC__V_CMPX_LT_U32
14257
14258 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14259 void
14260 Inst_VOPC__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst)
14261 {
14262 Wavefront *wf = gpuDynInst->wavefront();
14263 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
14264 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
14265 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14266
14267 src0.readSrc();
14268 src1.read();
14269
14270 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14271 if (wf->execMask(lane)) {
14272 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
14273 }
14274 }
14275
14276 wf->execMask() = vcc.rawData();
14277 vcc.write();
14278 }
14279
14280 Inst_VOPC__V_CMPX_EQ_U32::Inst_VOPC__V_CMPX_EQ_U32(InFmt_VOPC *iFmt)
14281 : Inst_VOPC(iFmt, "v_cmpx_eq_u32")
14282 {
14283 setFlag(ALU);
14284 } // Inst_VOPC__V_CMPX_EQ_U32
14285
14286 Inst_VOPC__V_CMPX_EQ_U32::~Inst_VOPC__V_CMPX_EQ_U32()
14287 {
14288 } // ~Inst_VOPC__V_CMPX_EQ_U32
14289
14290 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
14291 void
14292 Inst_VOPC__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
14293 {
14294 Wavefront *wf = gpuDynInst->wavefront();
14295 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
14296 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
14297 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14298
14299 src0.readSrc();
14300 src1.read();
14301
14302 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14303 if (wf->execMask(lane)) {
14304 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
14305 }
14306 }
14307
14308 wf->execMask() = vcc.rawData();
14309 vcc.write();
14310 }
14311
14312 Inst_VOPC__V_CMPX_LE_U32::Inst_VOPC__V_CMPX_LE_U32(InFmt_VOPC *iFmt)
14313 : Inst_VOPC(iFmt, "v_cmpx_le_u32")
14314 {
14315 setFlag(ALU);
14316 } // Inst_VOPC__V_CMPX_LE_U32
14317
14318 Inst_VOPC__V_CMPX_LE_U32::~Inst_VOPC__V_CMPX_LE_U32()
14319 {
14320 } // ~Inst_VOPC__V_CMPX_LE_U32
14321
14322 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
14323 void
14324 Inst_VOPC__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst)
14325 {
14326 Wavefront *wf = gpuDynInst->wavefront();
14327 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
14328 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
14329 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14330
14331 src0.readSrc();
14332 src1.read();
14333
14334 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14335 if (wf->execMask(lane)) {
14336 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
14337 }
14338 }
14339
14340 wf->execMask() = vcc.rawData();
14341 vcc.write();
14342 }
14343
14344 Inst_VOPC__V_CMPX_GT_U32::Inst_VOPC__V_CMPX_GT_U32(InFmt_VOPC *iFmt)
14345 : Inst_VOPC(iFmt, "v_cmpx_gt_u32")
14346 {
14347 setFlag(ALU);
14348 } // Inst_VOPC__V_CMPX_GT_U32
14349
14350 Inst_VOPC__V_CMPX_GT_U32::~Inst_VOPC__V_CMPX_GT_U32()
14351 {
14352 } // ~Inst_VOPC__V_CMPX_GT_U32
14353
14354 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
14355 void
14356 Inst_VOPC__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst)
14357 {
14358 Wavefront *wf = gpuDynInst->wavefront();
14359 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
14360 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
14361 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14362
14363 src0.readSrc();
14364 src1.read();
14365
14366 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14367 if (wf->execMask(lane)) {
14368 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
14369 }
14370 }
14371
14372 wf->execMask() = vcc.rawData();
14373 vcc.write();
14374 }
14375
14376 Inst_VOPC__V_CMPX_NE_U32::Inst_VOPC__V_CMPX_NE_U32(InFmt_VOPC *iFmt)
14377 : Inst_VOPC(iFmt, "v_cmpx_ne_u32")
14378 {
14379 setFlag(ALU);
14380 } // Inst_VOPC__V_CMPX_NE_U32
14381
14382 Inst_VOPC__V_CMPX_NE_U32::~Inst_VOPC__V_CMPX_NE_U32()
14383 {
14384 } // ~Inst_VOPC__V_CMPX_NE_U32
14385
14386 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
14387 void
14388 Inst_VOPC__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst)
14389 {
14390 Wavefront *wf = gpuDynInst->wavefront();
14391 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
14392 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
14393 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14394
14395 src0.readSrc();
14396 src1.read();
14397
14398 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14399 if (wf->execMask(lane)) {
14400 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
14401 }
14402 }
14403
14404 wf->execMask() = vcc.rawData();
14405 vcc.write();
14406 }
14407
14408 Inst_VOPC__V_CMPX_GE_U32::Inst_VOPC__V_CMPX_GE_U32(InFmt_VOPC *iFmt)
14409 : Inst_VOPC(iFmt, "v_cmpx_ge_u32")
14410 {
14411 setFlag(ALU);
14412 } // Inst_VOPC__V_CMPX_GE_U32
14413
14414 Inst_VOPC__V_CMPX_GE_U32::~Inst_VOPC__V_CMPX_GE_U32()
14415 {
14416 } // ~Inst_VOPC__V_CMPX_GE_U32
14417
14418 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
14419 void
14420 Inst_VOPC__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst)
14421 {
14422 Wavefront *wf = gpuDynInst->wavefront();
14423 ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
14424 ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
14425 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14426
14427 src0.readSrc();
14428 src1.read();
14429
14430 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14431 if (wf->execMask(lane)) {
14432 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
14433 }
14434 }
14435
14436 wf->execMask() = vcc.rawData();
14437 vcc.write();
14438 }
14439
14440 Inst_VOPC__V_CMPX_T_U32::Inst_VOPC__V_CMPX_T_U32(InFmt_VOPC *iFmt)
14441 : Inst_VOPC(iFmt, "v_cmpx_t_u32")
14442 {
14443 setFlag(ALU);
14444 } // Inst_VOPC__V_CMPX_T_U32
14445
14446 Inst_VOPC__V_CMPX_T_U32::~Inst_VOPC__V_CMPX_T_U32()
14447 {
14448 } // ~Inst_VOPC__V_CMPX_T_U32
14449
14450 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
14451 void
14452 Inst_VOPC__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst)
14453 {
14454 Wavefront *wf = gpuDynInst->wavefront();
14455 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14456
14457 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14458 if (wf->execMask(lane)) {
14459 vcc.setBit(lane, 1);
14460 }
14461 }
14462
14463 wf->execMask() = vcc.rawData();
14464 vcc.write();
14465 }
14466
14467 Inst_VOPC__V_CMP_F_I64::Inst_VOPC__V_CMP_F_I64(InFmt_VOPC *iFmt)
14468 : Inst_VOPC(iFmt, "v_cmp_f_i64")
14469 {
14470 setFlag(ALU);
14471 } // Inst_VOPC__V_CMP_F_I64
14472
14473 Inst_VOPC__V_CMP_F_I64::~Inst_VOPC__V_CMP_F_I64()
14474 {
14475 } // ~Inst_VOPC__V_CMP_F_I64
14476
14477 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
14478 void
14479 Inst_VOPC__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst)
14480 {
14481 Wavefront *wf = gpuDynInst->wavefront();
14482 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14483
14484 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14485 if (wf->execMask(lane)) {
14486 vcc.setBit(lane, 0);
14487 }
14488 }
14489
14490 vcc.write();
14491 }
14492
14493 Inst_VOPC__V_CMP_LT_I64::Inst_VOPC__V_CMP_LT_I64(InFmt_VOPC *iFmt)
14494 : Inst_VOPC(iFmt, "v_cmp_lt_i64")
14495 {
14496 setFlag(ALU);
14497 } // Inst_VOPC__V_CMP_LT_I64
14498
14499 Inst_VOPC__V_CMP_LT_I64::~Inst_VOPC__V_CMP_LT_I64()
14500 {
14501 } // ~Inst_VOPC__V_CMP_LT_I64
14502
14503 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14504 void
14505 Inst_VOPC__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst)
14506 {
14507 Wavefront *wf = gpuDynInst->wavefront();
14508 ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
14509 ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
14510 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14511
14512 src0.readSrc();
14513 src1.read();
14514
14515 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14516 if (wf->execMask(lane)) {
14517 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
14518 }
14519 }
14520
14521 vcc.write();
14522 }
14523
14524 Inst_VOPC__V_CMP_EQ_I64::Inst_VOPC__V_CMP_EQ_I64(InFmt_VOPC *iFmt)
14525 : Inst_VOPC(iFmt, "v_cmp_eq_i64")
14526 {
14527 setFlag(ALU);
14528 } // Inst_VOPC__V_CMP_EQ_I64
14529
14530 Inst_VOPC__V_CMP_EQ_I64::~Inst_VOPC__V_CMP_EQ_I64()
14531 {
14532 } // ~Inst_VOPC__V_CMP_EQ_I64
14533
14534 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
14535 void
14536 Inst_VOPC__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst)
14537 {
14538 Wavefront *wf = gpuDynInst->wavefront();
14539 ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
14540 ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
14541 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14542
14543 src0.readSrc();
14544 src1.read();
14545
14546 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14547 if (wf->execMask(lane)) {
14548 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
14549 }
14550 }
14551
14552 vcc.write();
14553 }
14554
14555 Inst_VOPC__V_CMP_LE_I64::Inst_VOPC__V_CMP_LE_I64(InFmt_VOPC *iFmt)
14556 : Inst_VOPC(iFmt, "v_cmp_le_i64")
14557 {
14558 setFlag(ALU);
14559 } // Inst_VOPC__V_CMP_LE_I64
14560
14561 Inst_VOPC__V_CMP_LE_I64::~Inst_VOPC__V_CMP_LE_I64()
14562 {
14563 } // ~Inst_VOPC__V_CMP_LE_I64
14564
14565 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
14566 void
14567 Inst_VOPC__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst)
14568 {
14569 Wavefront *wf = gpuDynInst->wavefront();
14570 ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
14571 ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
14572 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14573
14574 src0.readSrc();
14575 src1.read();
14576
14577 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14578 if (wf->execMask(lane)) {
14579 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
14580 }
14581 }
14582
14583 vcc.write();
14584 }
14585
14586 Inst_VOPC__V_CMP_GT_I64::Inst_VOPC__V_CMP_GT_I64(InFmt_VOPC *iFmt)
14587 : Inst_VOPC(iFmt, "v_cmp_gt_i64")
14588 {
14589 setFlag(ALU);
14590 } // Inst_VOPC__V_CMP_GT_I64
14591
14592 Inst_VOPC__V_CMP_GT_I64::~Inst_VOPC__V_CMP_GT_I64()
14593 {
14594 } // ~Inst_VOPC__V_CMP_GT_I64
14595
14596 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
14597 void
14598 Inst_VOPC__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst)
14599 {
14600 Wavefront *wf = gpuDynInst->wavefront();
14601 ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
14602 ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
14603 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14604
14605 src0.readSrc();
14606 src1.read();
14607
14608 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14609 if (wf->execMask(lane)) {
14610 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
14611 }
14612 }
14613
14614 vcc.write();
14615 }
14616
14617 Inst_VOPC__V_CMP_NE_I64::Inst_VOPC__V_CMP_NE_I64(InFmt_VOPC *iFmt)
14618 : Inst_VOPC(iFmt, "v_cmp_ne_i64")
14619 {
14620 setFlag(ALU);
14621 } // Inst_VOPC__V_CMP_NE_I64
14622
14623 Inst_VOPC__V_CMP_NE_I64::~Inst_VOPC__V_CMP_NE_I64()
14624 {
14625 } // ~Inst_VOPC__V_CMP_NE_I64
14626
14627 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
14628 void
14629 Inst_VOPC__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst)
14630 {
14631 Wavefront *wf = gpuDynInst->wavefront();
14632 ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
14633 ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
14634 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14635
14636 src0.readSrc();
14637 src1.read();
14638
14639 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14640 if (wf->execMask(lane)) {
14641 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
14642 }
14643 }
14644
14645 vcc.write();
14646 }
14647
14648 Inst_VOPC__V_CMP_GE_I64::Inst_VOPC__V_CMP_GE_I64(InFmt_VOPC *iFmt)
14649 : Inst_VOPC(iFmt, "v_cmp_ge_i64")
14650 {
14651 setFlag(ALU);
14652 } // Inst_VOPC__V_CMP_GE_I64
14653
14654 Inst_VOPC__V_CMP_GE_I64::~Inst_VOPC__V_CMP_GE_I64()
14655 {
14656 } // ~Inst_VOPC__V_CMP_GE_I64
14657
14658 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
14659 void
14660 Inst_VOPC__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst)
14661 {
14662 Wavefront *wf = gpuDynInst->wavefront();
14663 ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
14664 ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
14665 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14666
14667 src0.readSrc();
14668 src1.read();
14669
14670 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14671 if (wf->execMask(lane)) {
14672 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
14673 }
14674 }
14675
14676 vcc.write();
14677 }
14678
14679 Inst_VOPC__V_CMP_T_I64::Inst_VOPC__V_CMP_T_I64(InFmt_VOPC *iFmt)
14680 : Inst_VOPC(iFmt, "v_cmp_t_i64")
14681 {
14682 setFlag(ALU);
14683 } // Inst_VOPC__V_CMP_T_I64
14684
14685 Inst_VOPC__V_CMP_T_I64::~Inst_VOPC__V_CMP_T_I64()
14686 {
14687 } // ~Inst_VOPC__V_CMP_T_I64
14688
14689 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
14690 void
14691 Inst_VOPC__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst)
14692 {
14693 Wavefront *wf = gpuDynInst->wavefront();
14694 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14695
14696 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14697 if (wf->execMask(lane)) {
14698 vcc.setBit(lane, 1);
14699 }
14700 }
14701
14702 vcc.write();
14703 }
14704
14705 Inst_VOPC__V_CMP_F_U64::Inst_VOPC__V_CMP_F_U64(InFmt_VOPC *iFmt)
14706 : Inst_VOPC(iFmt, "v_cmp_f_u64")
14707 {
14708 setFlag(ALU);
14709 } // Inst_VOPC__V_CMP_F_U64
14710
14711 Inst_VOPC__V_CMP_F_U64::~Inst_VOPC__V_CMP_F_U64()
14712 {
14713 } // ~Inst_VOPC__V_CMP_F_U64
14714
14715 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
14716 void
14717 Inst_VOPC__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst)
14718 {
14719 Wavefront *wf = gpuDynInst->wavefront();
14720 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14721
14722 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14723 if (wf->execMask(lane)) {
14724 vcc.setBit(lane, 0);
14725 }
14726 }
14727
14728 vcc.write();
14729 }
14730
14731 Inst_VOPC__V_CMP_LT_U64::Inst_VOPC__V_CMP_LT_U64(InFmt_VOPC *iFmt)
14732 : Inst_VOPC(iFmt, "v_cmp_lt_u64")
14733 {
14734 setFlag(ALU);
14735 } // Inst_VOPC__V_CMP_LT_U64
14736
14737 Inst_VOPC__V_CMP_LT_U64::~Inst_VOPC__V_CMP_LT_U64()
14738 {
14739 } // ~Inst_VOPC__V_CMP_LT_U64
14740
14741 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14742 void
14743 Inst_VOPC__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst)
14744 {
14745 Wavefront *wf = gpuDynInst->wavefront();
14746 ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
14747 ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
14748 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14749
14750 src0.readSrc();
14751 src1.read();
14752
14753 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14754 if (wf->execMask(lane)) {
14755 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
14756 }
14757 }
14758
14759 vcc.write();
14760 }
14761
14762 Inst_VOPC__V_CMP_EQ_U64::Inst_VOPC__V_CMP_EQ_U64(InFmt_VOPC *iFmt)
14763 : Inst_VOPC(iFmt, "v_cmp_eq_u64")
14764 {
14765 setFlag(ALU);
14766 } // Inst_VOPC__V_CMP_EQ_U64
14767
14768 Inst_VOPC__V_CMP_EQ_U64::~Inst_VOPC__V_CMP_EQ_U64()
14769 {
14770 } // ~Inst_VOPC__V_CMP_EQ_U64
14771
14772 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
14773 void
14774 Inst_VOPC__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst)
14775 {
14776 Wavefront *wf = gpuDynInst->wavefront();
14777 ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
14778 ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
14779 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14780
14781 src0.readSrc();
14782 src1.read();
14783
14784 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14785 if (wf->execMask(lane)) {
14786 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
14787 }
14788 }
14789
14790 vcc.write();
14791 }
14792
14793 Inst_VOPC__V_CMP_LE_U64::Inst_VOPC__V_CMP_LE_U64(InFmt_VOPC *iFmt)
14794 : Inst_VOPC(iFmt, "v_cmp_le_u64")
14795 {
14796 setFlag(ALU);
14797 } // Inst_VOPC__V_CMP_LE_U64
14798
14799 Inst_VOPC__V_CMP_LE_U64::~Inst_VOPC__V_CMP_LE_U64()
14800 {
14801 } // ~Inst_VOPC__V_CMP_LE_U64
14802
14803 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
14804 void
14805 Inst_VOPC__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst)
14806 {
14807 Wavefront *wf = gpuDynInst->wavefront();
14808 ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
14809 ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
14810 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14811
14812 src0.readSrc();
14813 src1.read();
14814
14815 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14816 if (wf->execMask(lane)) {
14817 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
14818 }
14819 }
14820
14821 vcc.write();
14822 }
14823
14824 Inst_VOPC__V_CMP_GT_U64::Inst_VOPC__V_CMP_GT_U64(InFmt_VOPC *iFmt)
14825 : Inst_VOPC(iFmt, "v_cmp_gt_u64")
14826 {
14827 setFlag(ALU);
14828 } // Inst_VOPC__V_CMP_GT_U64
14829
14830 Inst_VOPC__V_CMP_GT_U64::~Inst_VOPC__V_CMP_GT_U64()
14831 {
14832 } // ~Inst_VOPC__V_CMP_GT_U64
14833
14834 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
14835 void
14836 Inst_VOPC__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst)
14837 {
14838 Wavefront *wf = gpuDynInst->wavefront();
14839 ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
14840 ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
14841 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14842
14843 src0.readSrc();
14844 src1.read();
14845
14846 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14847 if (wf->execMask(lane)) {
14848 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
14849 }
14850 }
14851
14852 vcc.write();
14853 }
14854
14855 Inst_VOPC__V_CMP_NE_U64::Inst_VOPC__V_CMP_NE_U64(InFmt_VOPC *iFmt)
14856 : Inst_VOPC(iFmt, "v_cmp_ne_u64")
14857 {
14858 setFlag(ALU);
14859 } // Inst_VOPC__V_CMP_NE_U64
14860
14861 Inst_VOPC__V_CMP_NE_U64::~Inst_VOPC__V_CMP_NE_U64()
14862 {
14863 } // ~Inst_VOPC__V_CMP_NE_U64
14864
14865 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
14866 void
14867 Inst_VOPC__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst)
14868 {
14869 Wavefront *wf = gpuDynInst->wavefront();
14870 ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
14871 ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
14872 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14873
14874 src0.readSrc();
14875 src1.read();
14876
14877 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14878 if (wf->execMask(lane)) {
14879 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
14880 }
14881 }
14882
14883 vcc.write();
14884 }
14885
14886 Inst_VOPC__V_CMP_GE_U64::Inst_VOPC__V_CMP_GE_U64(InFmt_VOPC *iFmt)
14887 : Inst_VOPC(iFmt, "v_cmp_ge_u64")
14888 {
14889 setFlag(ALU);
14890 } // Inst_VOPC__V_CMP_GE_U64
14891
14892 Inst_VOPC__V_CMP_GE_U64::~Inst_VOPC__V_CMP_GE_U64()
14893 {
14894 } // ~Inst_VOPC__V_CMP_GE_U64
14895
14896 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
14897 void
14898 Inst_VOPC__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst)
14899 {
14900 Wavefront *wf = gpuDynInst->wavefront();
14901 ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
14902 ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
14903 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14904
14905 src0.readSrc();
14906 src1.read();
14907
14908 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14909 if (wf->execMask(lane)) {
14910 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
14911 }
14912 }
14913
14914 vcc.write();
14915 }
14916
14917 Inst_VOPC__V_CMP_T_U64::Inst_VOPC__V_CMP_T_U64(InFmt_VOPC *iFmt)
14918 : Inst_VOPC(iFmt, "v_cmp_t_u64")
14919 {
14920 setFlag(ALU);
14921 } // Inst_VOPC__V_CMP_T_U64
14922
14923 Inst_VOPC__V_CMP_T_U64::~Inst_VOPC__V_CMP_T_U64()
14924 {
14925 } // ~Inst_VOPC__V_CMP_T_U64
14926
14927 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
14928 void
14929 Inst_VOPC__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst)
14930 {
14931 Wavefront *wf = gpuDynInst->wavefront();
14932 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14933
14934 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14935 if (wf->execMask(lane)) {
14936 vcc.setBit(lane, 1);
14937 }
14938 }
14939
14940 vcc.write();
14941 }
14942
14943 Inst_VOPC__V_CMPX_F_I64::Inst_VOPC__V_CMPX_F_I64(InFmt_VOPC *iFmt)
14944 : Inst_VOPC(iFmt, "v_cmpx_f_i64")
14945 {
14946 setFlag(ALU);
14947 } // Inst_VOPC__V_CMPX_F_I64
14948
14949 Inst_VOPC__V_CMPX_F_I64::~Inst_VOPC__V_CMPX_F_I64()
14950 {
14951 } // ~Inst_VOPC__V_CMPX_F_I64
14952
14953 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
14954 void
14955 Inst_VOPC__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst)
14956 {
14957 Wavefront *wf = gpuDynInst->wavefront();
14958 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14959
14960 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14961 if (wf->execMask(lane)) {
14962 vcc.setBit(lane, 0);
14963 }
14964 }
14965
14966 wf->execMask() = vcc.rawData();
14967 vcc.write();
14968 }
14969
14970 Inst_VOPC__V_CMPX_LT_I64::Inst_VOPC__V_CMPX_LT_I64(InFmt_VOPC *iFmt)
14971 : Inst_VOPC(iFmt, "v_cmpx_lt_i64")
14972 {
14973 setFlag(ALU);
14974 } // Inst_VOPC__V_CMPX_LT_I64
14975
14976 Inst_VOPC__V_CMPX_LT_I64::~Inst_VOPC__V_CMPX_LT_I64()
14977 {
14978 } // ~Inst_VOPC__V_CMPX_LT_I64
14979
14980 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14981 void
14982 Inst_VOPC__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst)
14983 {
14984 Wavefront *wf = gpuDynInst->wavefront();
14985 ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
14986 ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
14987 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14988
14989 src0.readSrc();
14990 src1.read();
14991
14992 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14993 if (wf->execMask(lane)) {
14994 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
14995 }
14996 }
14997
14998 wf->execMask() = vcc.rawData();
14999 vcc.write();
15000 }
15001
15002 Inst_VOPC__V_CMPX_EQ_I64::Inst_VOPC__V_CMPX_EQ_I64(InFmt_VOPC *iFmt)
15003 : Inst_VOPC(iFmt, "v_cmpx_eq_i64")
15004 {
15005 setFlag(ALU);
15006 } // Inst_VOPC__V_CMPX_EQ_I64
15007
15008 Inst_VOPC__V_CMPX_EQ_I64::~Inst_VOPC__V_CMPX_EQ_I64()
15009 {
15010 } // ~Inst_VOPC__V_CMPX_EQ_I64
15011
15012 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
15013 void
15014 Inst_VOPC__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst)
15015 {
15016 Wavefront *wf = gpuDynInst->wavefront();
15017 ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
15018 ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
15019 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15020
15021 src0.readSrc();
15022 src1.read();
15023
15024 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15025 if (wf->execMask(lane)) {
15026 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
15027 }
15028 }
15029
15030 wf->execMask() = vcc.rawData();
15031 vcc.write();
15032 }
15033
15034 Inst_VOPC__V_CMPX_LE_I64::Inst_VOPC__V_CMPX_LE_I64(InFmt_VOPC *iFmt)
15035 : Inst_VOPC(iFmt, "v_cmpx_le_i64")
15036 {
15037 setFlag(ALU);
15038 } // Inst_VOPC__V_CMPX_LE_I64
15039
15040 Inst_VOPC__V_CMPX_LE_I64::~Inst_VOPC__V_CMPX_LE_I64()
15041 {
15042 } // ~Inst_VOPC__V_CMPX_LE_I64
15043
15044 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
15045 void
15046 Inst_VOPC__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst)
15047 {
15048 Wavefront *wf = gpuDynInst->wavefront();
15049 ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
15050 ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
15051 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15052
15053 src0.readSrc();
15054 src1.read();
15055
15056 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15057 if (wf->execMask(lane)) {
15058 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
15059 }
15060 }
15061
15062 wf->execMask() = vcc.rawData();
15063 vcc.write();
15064 }
15065
15066 Inst_VOPC__V_CMPX_GT_I64::Inst_VOPC__V_CMPX_GT_I64(InFmt_VOPC *iFmt)
15067 : Inst_VOPC(iFmt, "v_cmpx_gt_i64")
15068 {
15069 setFlag(ALU);
15070 } // Inst_VOPC__V_CMPX_GT_I64
15071
15072 Inst_VOPC__V_CMPX_GT_I64::~Inst_VOPC__V_CMPX_GT_I64()
15073 {
15074 } // ~Inst_VOPC__V_CMPX_GT_I64
15075
15076 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
15077 void
15078 Inst_VOPC__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst)
15079 {
15080 Wavefront *wf = gpuDynInst->wavefront();
15081 ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
15082 ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
15083 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15084
15085 src0.readSrc();
15086 src1.read();
15087
15088 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15089 if (wf->execMask(lane)) {
15090 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
15091 }
15092 }
15093
15094 wf->execMask() = vcc.rawData();
15095 vcc.write();
15096 }
15097
15098 Inst_VOPC__V_CMPX_NE_I64::Inst_VOPC__V_CMPX_NE_I64(InFmt_VOPC *iFmt)
15099 : Inst_VOPC(iFmt, "v_cmpx_ne_i64")
15100 {
15101 setFlag(ALU);
15102 } // Inst_VOPC__V_CMPX_NE_I64
15103
15104 Inst_VOPC__V_CMPX_NE_I64::~Inst_VOPC__V_CMPX_NE_I64()
15105 {
15106 } // ~Inst_VOPC__V_CMPX_NE_I64
15107
15108 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
15109 void
15110 Inst_VOPC__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst)
15111 {
15112 Wavefront *wf = gpuDynInst->wavefront();
15113 ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
15114 ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
15115 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15116
15117 src0.readSrc();
15118 src1.read();
15119
15120 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15121 if (wf->execMask(lane)) {
15122 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
15123 }
15124 }
15125
15126 wf->execMask() = vcc.rawData();
15127 vcc.write();
15128 }
15129
15130 Inst_VOPC__V_CMPX_GE_I64::Inst_VOPC__V_CMPX_GE_I64(InFmt_VOPC *iFmt)
15131 : Inst_VOPC(iFmt, "v_cmpx_ge_i64")
15132 {
15133 setFlag(ALU);
15134 } // Inst_VOPC__V_CMPX_GE_I64
15135
15136 Inst_VOPC__V_CMPX_GE_I64::~Inst_VOPC__V_CMPX_GE_I64()
15137 {
15138 } // ~Inst_VOPC__V_CMPX_GE_I64
15139
15140 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
15141 void
15142 Inst_VOPC__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst)
15143 {
15144 Wavefront *wf = gpuDynInst->wavefront();
15145 ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
15146 ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
15147 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15148
15149 src0.readSrc();
15150 src1.read();
15151
15152 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15153 if (wf->execMask(lane)) {
15154 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
15155 }
15156 }
15157
15158 wf->execMask() = vcc.rawData();
15159 vcc.write();
15160 }
15161
15162 Inst_VOPC__V_CMPX_T_I64::Inst_VOPC__V_CMPX_T_I64(InFmt_VOPC *iFmt)
15163 : Inst_VOPC(iFmt, "v_cmpx_t_i64")
15164 {
15165 setFlag(ALU);
15166 } // Inst_VOPC__V_CMPX_T_I64
15167
15168 Inst_VOPC__V_CMPX_T_I64::~Inst_VOPC__V_CMPX_T_I64()
15169 {
15170 } // ~Inst_VOPC__V_CMPX_T_I64
15171
15172 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
15173 void
15174 Inst_VOPC__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst)
15175 {
15176 Wavefront *wf = gpuDynInst->wavefront();
15177 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15178
15179 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15180 if (wf->execMask(lane)) {
15181 vcc.setBit(lane, 1);
15182 }
15183 }
15184
15185 wf->execMask() = vcc.rawData();
15186 vcc.write();
15187 }
15188
15189 Inst_VOPC__V_CMPX_F_U64::Inst_VOPC__V_CMPX_F_U64(InFmt_VOPC *iFmt)
15190 : Inst_VOPC(iFmt, "v_cmpx_f_u64")
15191 {
15192 setFlag(ALU);
15193 } // Inst_VOPC__V_CMPX_F_U64
15194
15195 Inst_VOPC__V_CMPX_F_U64::~Inst_VOPC__V_CMPX_F_U64()
15196 {
15197 } // ~Inst_VOPC__V_CMPX_F_U64
15198
15199 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
15200 void
15201 Inst_VOPC__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst)
15202 {
15203 Wavefront *wf = gpuDynInst->wavefront();
15204 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15205
15206 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15207 if (wf->execMask(lane)) {
15208 vcc.setBit(lane, 0);
15209 }
15210 }
15211
15212 wf->execMask() = vcc.rawData();
15213 vcc.write();
15214 }
15215
15216 Inst_VOPC__V_CMPX_LT_U64::Inst_VOPC__V_CMPX_LT_U64(InFmt_VOPC *iFmt)
15217 : Inst_VOPC(iFmt, "v_cmpx_lt_u64")
15218 {
15219 setFlag(ALU);
15220 } // Inst_VOPC__V_CMPX_LT_U64
15221
15222 Inst_VOPC__V_CMPX_LT_U64::~Inst_VOPC__V_CMPX_LT_U64()
15223 {
15224 } // ~Inst_VOPC__V_CMPX_LT_U64
15225
15226 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
15227 void
15228 Inst_VOPC__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst)
15229 {
15230 Wavefront *wf = gpuDynInst->wavefront();
15231 ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
15232 ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
15233 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15234
15235 src0.readSrc();
15236 src1.read();
15237
15238 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15239 if (wf->execMask(lane)) {
15240 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
15241 }
15242 }
15243
15244 wf->execMask() = vcc.rawData();
15245 vcc.write();
15246 }
15247
15248 Inst_VOPC__V_CMPX_EQ_U64::Inst_VOPC__V_CMPX_EQ_U64(InFmt_VOPC *iFmt)
15249 : Inst_VOPC(iFmt, "v_cmpx_eq_u64")
15250 {
15251 setFlag(ALU);
15252 } // Inst_VOPC__V_CMPX_EQ_U64
15253
15254 Inst_VOPC__V_CMPX_EQ_U64::~Inst_VOPC__V_CMPX_EQ_U64()
15255 {
15256 } // ~Inst_VOPC__V_CMPX_EQ_U64
15257
15258 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
15259 void
15260 Inst_VOPC__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst)
15261 {
15262 Wavefront *wf = gpuDynInst->wavefront();
15263 ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
15264 ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
15265 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15266
15267 src0.readSrc();
15268 src1.read();
15269
15270 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15271 if (wf->execMask(lane)) {
15272 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
15273 }
15274 }
15275
15276 wf->execMask() = vcc.rawData();
15277 vcc.write();
15278 }
15279
15280 Inst_VOPC__V_CMPX_LE_U64::Inst_VOPC__V_CMPX_LE_U64(InFmt_VOPC *iFmt)
15281 : Inst_VOPC(iFmt, "v_cmpx_le_u64")
15282 {
15283 setFlag(ALU);
15284 } // Inst_VOPC__V_CMPX_LE_U64
15285
15286 Inst_VOPC__V_CMPX_LE_U64::~Inst_VOPC__V_CMPX_LE_U64()
15287 {
15288 } // ~Inst_VOPC__V_CMPX_LE_U64
15289
15290 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
15291 void
15292 Inst_VOPC__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst)
15293 {
15294 Wavefront *wf = gpuDynInst->wavefront();
15295 ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
15296 ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
15297 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15298
15299 src0.readSrc();
15300 src1.read();
15301
15302 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15303 if (wf->execMask(lane)) {
15304 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
15305 }
15306 }
15307
15308 wf->execMask() = vcc.rawData();
15309 vcc.write();
15310 }
15311
15312 Inst_VOPC__V_CMPX_GT_U64::Inst_VOPC__V_CMPX_GT_U64(InFmt_VOPC *iFmt)
15313 : Inst_VOPC(iFmt, "v_cmpx_gt_u64")
15314 {
15315 setFlag(ALU);
15316 } // Inst_VOPC__V_CMPX_GT_U64
15317
15318 Inst_VOPC__V_CMPX_GT_U64::~Inst_VOPC__V_CMPX_GT_U64()
15319 {
15320 } // ~Inst_VOPC__V_CMPX_GT_U64
15321
15322 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
15323 void
15324 Inst_VOPC__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst)
15325 {
15326 Wavefront *wf = gpuDynInst->wavefront();
15327 ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
15328 ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
15329 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15330
15331 src0.readSrc();
15332 src1.read();
15333
15334 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15335 if (wf->execMask(lane)) {
15336 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
15337 }
15338 }
15339
15340 wf->execMask() = vcc.rawData();
15341 vcc.write();
15342 }
15343
15344 Inst_VOPC__V_CMPX_NE_U64::Inst_VOPC__V_CMPX_NE_U64(InFmt_VOPC *iFmt)
15345 : Inst_VOPC(iFmt, "v_cmpx_ne_u64")
15346 {
15347 setFlag(ALU);
15348 } // Inst_VOPC__V_CMPX_NE_U64
15349
15350 Inst_VOPC__V_CMPX_NE_U64::~Inst_VOPC__V_CMPX_NE_U64()
15351 {
15352 } // ~Inst_VOPC__V_CMPX_NE_U64
15353
15354 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
15355 void
15356 Inst_VOPC__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst)
15357 {
15358 Wavefront *wf = gpuDynInst->wavefront();
15359 ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
15360 ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
15361 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15362
15363 src0.readSrc();
15364 src1.read();
15365
15366 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15367 if (wf->execMask(lane)) {
15368 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
15369 }
15370 }
15371
15372 wf->execMask() = vcc.rawData();
15373 vcc.write();
15374 }
15375
15376 Inst_VOPC__V_CMPX_GE_U64::Inst_VOPC__V_CMPX_GE_U64(InFmt_VOPC *iFmt)
15377 : Inst_VOPC(iFmt, "v_cmpx_ge_u64")
15378 {
15379 setFlag(ALU);
15380 } // Inst_VOPC__V_CMPX_GE_U64
15381
15382 Inst_VOPC__V_CMPX_GE_U64::~Inst_VOPC__V_CMPX_GE_U64()
15383 {
15384 } // ~Inst_VOPC__V_CMPX_GE_U64
15385
15386 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
15387 void
15388 Inst_VOPC__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst)
15389 {
15390 Wavefront *wf = gpuDynInst->wavefront();
15391 ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
15392 ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
15393 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15394
15395 src0.readSrc();
15396 src1.read();
15397
15398 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15399 if (wf->execMask(lane)) {
15400 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
15401 }
15402 }
15403
15404 wf->execMask() = vcc.rawData();
15405 vcc.write();
15406 }
15407
15408 Inst_VOPC__V_CMPX_T_U64::Inst_VOPC__V_CMPX_T_U64(InFmt_VOPC *iFmt)
15409 : Inst_VOPC(iFmt, "v_cmpx_t_u64")
15410 {
15411 setFlag(ALU);
15412 } // Inst_VOPC__V_CMPX_T_U64
15413
15414 Inst_VOPC__V_CMPX_T_U64::~Inst_VOPC__V_CMPX_T_U64()
15415 {
15416 } // ~Inst_VOPC__V_CMPX_T_U64
15417
15418 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
15419 void
15420 Inst_VOPC__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst)
15421 {
15422 Wavefront *wf = gpuDynInst->wavefront();
15423 ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15424
15425 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15426 if (wf->execMask(lane)) {
15427 vcc.setBit(lane, 1);
15428 }
15429 }
15430
15431 wf->execMask() = vcc.rawData();
15432 vcc.write();
15433 }
15434
15435 Inst_VINTRP__V_INTERP_P1_F32::Inst_VINTRP__V_INTERP_P1_F32(
15436 InFmt_VINTRP *iFmt)
15437 : Inst_VINTRP(iFmt, "v_interp_p1_f32")
15438 {
15439 setFlag(ALU);
15440 setFlag(F32);
15441 } // Inst_VINTRP__V_INTERP_P1_F32
15442
15443 Inst_VINTRP__V_INTERP_P1_F32::~Inst_VINTRP__V_INTERP_P1_F32()
15444 {
15445 } // ~Inst_VINTRP__V_INTERP_P1_F32
15446
15447 // D.f = P10 * S.f + P0; parameter interpolation
15448 void
15449 Inst_VINTRP__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst)
15450 {
15451 panicUnimplemented();
15452 }
15453
15454 Inst_VINTRP__V_INTERP_P2_F32::Inst_VINTRP__V_INTERP_P2_F32(
15455 InFmt_VINTRP *iFmt)
15456 : Inst_VINTRP(iFmt, "v_interp_p2_f32")
15457 {
15458 setFlag(ALU);
15459 setFlag(F32);
15460 } // Inst_VINTRP__V_INTERP_P2_F32
15461
15462 Inst_VINTRP__V_INTERP_P2_F32::~Inst_VINTRP__V_INTERP_P2_F32()
15463 {
15464 } // ~Inst_VINTRP__V_INTERP_P2_F32
15465
15466 // D.f = P20 * S.f + D.f; parameter interpolation
15467 void
15468 Inst_VINTRP__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst)
15469 {
15470 panicUnimplemented();
15471 }
15472
15473 Inst_VINTRP__V_INTERP_MOV_F32::Inst_VINTRP__V_INTERP_MOV_F32(
15474 InFmt_VINTRP *iFmt)
15475 : Inst_VINTRP(iFmt, "v_interp_mov_f32")
15476 {
15477 setFlag(ALU);
15478 setFlag(F32);
15479 } // Inst_VINTRP__V_INTERP_MOV_F32
15480
15481 Inst_VINTRP__V_INTERP_MOV_F32::~Inst_VINTRP__V_INTERP_MOV_F32()
15482 {
15483 } // ~Inst_VINTRP__V_INTERP_MOV_F32
15484
15485 void
15486 Inst_VINTRP__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst)
15487 {
15488 panicUnimplemented();
15489 }
15490
15491 Inst_VOP3__V_CMP_CLASS_F32::Inst_VOP3__V_CMP_CLASS_F32(
15492 InFmt_VOP3 *iFmt)
15493 : Inst_VOP3(iFmt, "v_cmp_class_f32", true)
15494 {
15495 setFlag(ALU);
15496 setFlag(F32);
15497 } // Inst_VOP3__V_CMP_CLASS_F32
15498
15499 Inst_VOP3__V_CMP_CLASS_F32::~Inst_VOP3__V_CMP_CLASS_F32()
15500 {
15501 } // ~Inst_VOP3__V_CMP_CLASS_F32
15502
15503 // VCC = IEEE numeric class function specified in S1.u, performed on S0.f
15504 // The function reports true if the floating point value is any of the
15505 // numeric types selected in S1.u according to the following list:
15506 // S1.u[0] -- value is a signaling NaN.
15507 // S1.u[1] -- value is a quiet NaN.
15508 // S1.u[2] -- value is negative infinity.
15509 // S1.u[3] -- value is a negative normal value.
15510 // S1.u[4] -- value is a negative denormal value.
15511 // S1.u[5] -- value is negative zero.
15512 // S1.u[6] -- value is positive zero.
15513 // S1.u[7] -- value is a positive denormal value.
15514 // S1.u[8] -- value is a positive normal value.
15515 // S1.u[9] -- value is positive infinity.
15516 void
15517 Inst_VOP3__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst)
15518 {
15519 Wavefront *wf = gpuDynInst->wavefront();
15520 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
15521 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
15522 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
15523
15524 src0.readSrc();
15525 src1.readSrc();
15526
15527 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15528 if (wf->execMask(lane)) {
15529 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
15530 // is NaN
15531 if (std::isnan(src0[lane])) {
15532 sdst.setBit(lane, 1);
15533 continue;
15534 }
15535 }
15536 if (bits(src1[lane], 2)) {
15537 // is -infinity
15538 if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
15539 sdst.setBit(lane, 1);
15540 continue;
15541 }
15542 }
15543 if (bits(src1[lane], 3)) {
15544 // is -normal
15545 if (std::isnormal(src0[lane])
15546 && std::signbit(src0[lane])) {
15547 sdst.setBit(lane, 1);
15548 continue;
15549 }
15550 }
15551 if (bits(src1[lane], 4)) {
15552 // is -denormal
15553 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15554 && std::signbit(src0[lane])) {
15555 sdst.setBit(lane, 1);
15556 continue;
15557 }
15558 }
15559 if (bits(src1[lane], 5)) {
15560 // is -zero
15561 if (std::fpclassify(src0[lane]) == FP_ZERO
15562 && std::signbit(src0[lane])) {
15563 sdst.setBit(lane, 1);
15564 continue;
15565 }
15566 }
15567 if (bits(src1[lane], 6)) {
15568 // is +zero
15569 if (std::fpclassify(src0[lane]) == FP_ZERO
15570 && !std::signbit(src0[lane])) {
15571 sdst.setBit(lane, 1);
15572 continue;
15573 }
15574 }
15575 if (bits(src1[lane], 7)) {
15576 // is +denormal
15577 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15578 && !std::signbit(src0[lane])) {
15579 sdst.setBit(lane, 1);
15580 continue;
15581 }
15582 }
15583 if (bits(src1[lane], 8)) {
15584 // is +normal
15585 if (std::isnormal(src0[lane])
15586 && !std::signbit(src0[lane])) {
15587 sdst.setBit(lane, 1);
15588 continue;
15589 }
15590 }
15591 if (bits(src1[lane], 9)) {
15592 // is +infinity
15593 if (std::isinf(src0[lane])
15594 && !std::signbit(src0[lane])) {
15595 sdst.setBit(lane, 1);
15596 continue;
15597 }
15598 }
15599 }
15600 }
15601
15602 sdst.write();
15603 }
15604
15605 Inst_VOP3__V_CMPX_CLASS_F32::Inst_VOP3__V_CMPX_CLASS_F32(
15606 InFmt_VOP3 *iFmt)
15607 : Inst_VOP3(iFmt, "v_cmpx_class_f32", true)
15608 {
15609 setFlag(ALU);
15610 setFlag(F32);
15611 } // Inst_VOP3__V_CMPX_CLASS_F32
15612
15613 Inst_VOP3__V_CMPX_CLASS_F32::~Inst_VOP3__V_CMPX_CLASS_F32()
15614 {
15615 } // ~Inst_VOP3__V_CMPX_CLASS_F32
15616
15617 // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
15618 // S0.f
15619 // The function reports true if the floating point value is any of the
15620 // numeric types selected in S1.u according to the following list:
15621 // S1.u[0] -- value is a signaling NaN.
15622 // S1.u[1] -- value is a quiet NaN.
15623 // S1.u[2] -- value is negative infinity.
15624 // S1.u[3] -- value is a negative normal value.
15625 // S1.u[4] -- value is a negative denormal value.
15626 // S1.u[5] -- value is negative zero.
15627 // S1.u[6] -- value is positive zero.
15628 // S1.u[7] -- value is a positive denormal value.
15629 // S1.u[8] -- value is a positive normal value.
15630 // S1.u[9] -- value is positive infinity.
15631 void
15632 Inst_VOP3__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst)
15633 {
15634 Wavefront *wf = gpuDynInst->wavefront();
15635 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
15636 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
15637 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
15638
15639 src0.readSrc();
15640 src1.readSrc();
15641
15642 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15643 if (wf->execMask(lane)) {
15644 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
15645 // is NaN
15646 if (std::isnan(src0[lane])) {
15647 sdst.setBit(lane, 1);
15648 continue;
15649 }
15650 }
15651 if (bits(src1[lane], 2)) {
15652 // is -infinity
15653 if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
15654 sdst.setBit(lane, 1);
15655 continue;
15656 }
15657 }
15658 if (bits(src1[lane], 3)) {
15659 // is -normal
15660 if (std::isnormal(src0[lane])
15661 && std::signbit(src0[lane])) {
15662 sdst.setBit(lane, 1);
15663 continue;
15664 }
15665 }
15666 if (bits(src1[lane], 4)) {
15667 // is -denormal
15668 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15669 && std::signbit(src0[lane])) {
15670 sdst.setBit(lane, 1);
15671 continue;
15672 }
15673 }
15674 if (bits(src1[lane], 5)) {
15675 // is -zero
15676 if (std::fpclassify(src0[lane]) == FP_ZERO
15677 && std::signbit(src0[lane])) {
15678 sdst.setBit(lane, 1);
15679 continue;
15680 }
15681 }
15682 if (bits(src1[lane], 6)) {
15683 // is +zero
15684 if (std::fpclassify(src0[lane]) == FP_ZERO
15685 && !std::signbit(src0[lane])) {
15686 sdst.setBit(lane, 1);
15687 continue;
15688 }
15689 }
15690 if (bits(src1[lane], 7)) {
15691 // is +denormal
15692 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15693 && !std::signbit(src0[lane])) {
15694 sdst.setBit(lane, 1);
15695 continue;
15696 }
15697 }
15698 if (bits(src1[lane], 8)) {
15699 // is +normal
15700 if (std::isnormal(src0[lane])
15701 && !std::signbit(src0[lane])) {
15702 sdst.setBit(lane, 1);
15703 continue;
15704 }
15705 }
15706 if (bits(src1[lane], 9)) {
15707 // is +infinity
15708 if (std::isinf(src0[lane])
15709 && !std::signbit(src0[lane])) {
15710 sdst.setBit(lane, 1);
15711 continue;
15712 }
15713 }
15714 }
15715 }
15716
15717 wf->execMask() = sdst.rawData();
15718 sdst.write();
15719 }
15720
15721 Inst_VOP3__V_CMP_CLASS_F64::Inst_VOP3__V_CMP_CLASS_F64(
15722 InFmt_VOP3 *iFmt)
15723 : Inst_VOP3(iFmt, "v_cmp_class_f64", true)
15724 {
15725 setFlag(ALU);
15726 setFlag(F64);
15727 } // Inst_VOP3__V_CMP_CLASS_F64
15728
15729 Inst_VOP3__V_CMP_CLASS_F64::~Inst_VOP3__V_CMP_CLASS_F64()
15730 {
15731 } // ~Inst_VOP3__V_CMP_CLASS_F64
15732
15733 // VCC = IEEE numeric class function specified in S1.u, performed on S0.d
15734 // The function reports true if the floating point value is any of the
15735 // numeric types selected in S1.u according to the following list:
15736 // S1.u[0] -- value is a signaling NaN.
15737 // S1.u[1] -- value is a quiet NaN.
15738 // S1.u[2] -- value is negative infinity.
15739 // S1.u[3] -- value is a negative normal value.
15740 // S1.u[4] -- value is a negative denormal value.
15741 // S1.u[5] -- value is negative zero.
15742 // S1.u[6] -- value is positive zero.
15743 // S1.u[7] -- value is a positive denormal value.
15744 // S1.u[8] -- value is a positive normal value.
15745 // S1.u[9] -- value is positive infinity.
15746 void
15747 Inst_VOP3__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst)
15748 {
15749 Wavefront *wf = gpuDynInst->wavefront();
15750 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
15751 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
15752 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
15753
15754 src0.readSrc();
15755 src1.readSrc();
15756
15757 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15758 if (wf->execMask(lane)) {
15759 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
15760 // is NaN
15761 if (std::isnan(src0[lane])) {
15762 sdst.setBit(lane, 1);
15763 continue;
15764 }
15765 }
15766 if (bits(src1[lane], 2)) {
15767 // is -infinity
15768 if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
15769 sdst.setBit(lane, 1);
15770 continue;
15771 }
15772 }
15773 if (bits(src1[lane], 3)) {
15774 // is -normal
15775 if (std::isnormal(src0[lane])
15776 && std::signbit(src0[lane])) {
15777 sdst.setBit(lane, 1);
15778 continue;
15779 }
15780 }
15781 if (bits(src1[lane], 4)) {
15782 // is -denormal
15783 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15784 && std::signbit(src0[lane])) {
15785 sdst.setBit(lane, 1);
15786 continue;
15787 }
15788 }
15789 if (bits(src1[lane], 5)) {
15790 // is -zero
15791 if (std::fpclassify(src0[lane]) == FP_ZERO
15792 && std::signbit(src0[lane])) {
15793 sdst.setBit(lane, 1);
15794 continue;
15795 }
15796 }
15797 if (bits(src1[lane], 6)) {
15798 // is +zero
15799 if (std::fpclassify(src0[lane]) == FP_ZERO
15800 && !std::signbit(src0[lane])) {
15801 sdst.setBit(lane, 1);
15802 continue;
15803 }
15804 }
15805 if (bits(src1[lane], 7)) {
15806 // is +denormal
15807 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15808 && !std::signbit(src0[lane])) {
15809 sdst.setBit(lane, 1);
15810 continue;
15811 }
15812 }
15813 if (bits(src1[lane], 8)) {
15814 // is +normal
15815 if (std::isnormal(src0[lane])
15816 && !std::signbit(src0[lane])) {
15817 sdst.setBit(lane, 1);
15818 continue;
15819 }
15820 }
15821 if (bits(src1[lane], 9)) {
15822 // is +infinity
15823 if (std::isinf(src0[lane])
15824 && !std::signbit(src0[lane])) {
15825 sdst.setBit(lane, 1);
15826 continue;
15827 }
15828 }
15829 }
15830 }
15831
15832 sdst.write();
15833 }
15834
15835 Inst_VOP3__V_CMPX_CLASS_F64::Inst_VOP3__V_CMPX_CLASS_F64(
15836 InFmt_VOP3 *iFmt)
15837 : Inst_VOP3(iFmt, "v_cmpx_class_f64", true)
15838 {
15839 setFlag(ALU);
15840 setFlag(F64);
15841 } // Inst_VOP3__V_CMPX_CLASS_F64
15842
15843 Inst_VOP3__V_CMPX_CLASS_F64::~Inst_VOP3__V_CMPX_CLASS_F64()
15844 {
15845 } // ~Inst_VOP3__V_CMPX_CLASS_F64
15846
15847 // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
15848 // S0.d
15849 // The function reports true if the floating point value is any of the
15850 // numeric types selected in S1.u according to the following list:
15851 // S1.u[0] -- value is a signaling NaN.
15852 // S1.u[1] -- value is a quiet NaN.
15853 // S1.u[2] -- value is negative infinity.
15854 // S1.u[3] -- value is a negative normal value.
15855 // S1.u[4] -- value is a negative denormal value.
15856 // S1.u[5] -- value is negative zero.
15857 // S1.u[6] -- value is positive zero.
15858 // S1.u[7] -- value is a positive denormal value.
15859 // S1.u[8] -- value is a positive normal value.
15860 // S1.u[9] -- value is positive infinity.
15861 void
15862 Inst_VOP3__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst)
15863 {
15864 Wavefront *wf = gpuDynInst->wavefront();
15865 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
15866 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
15867 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
15868
15869 src0.readSrc();
15870 src1.readSrc();
15871
15872 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15873 if (wf->execMask(lane)) {
15874 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
15875 // is NaN
15876 if (std::isnan(src0[lane])) {
15877 sdst.setBit(lane, 1);
15878 continue;
15879 }
15880 }
15881 if (bits(src1[lane], 2)) {
15882 // is -infinity
15883 if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
15884 sdst.setBit(lane, 1);
15885 continue;
15886 }
15887 }
15888 if (bits(src1[lane], 3)) {
15889 // is -normal
15890 if (std::isnormal(src0[lane])
15891 && std::signbit(src0[lane])) {
15892 sdst.setBit(lane, 1);
15893 continue;
15894 }
15895 }
15896 if (bits(src1[lane], 4)) {
15897 // is -denormal
15898 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15899 && std::signbit(src0[lane])) {
15900 sdst.setBit(lane, 1);
15901 continue;
15902 }
15903 }
15904 if (bits(src1[lane], 5)) {
15905 // is -zero
15906 if (std::fpclassify(src0[lane]) == FP_ZERO
15907 && std::signbit(src0[lane])) {
15908 sdst.setBit(lane, 1);
15909 continue;
15910 }
15911 }
15912 if (bits(src1[lane], 6)) {
15913 // is +zero
15914 if (std::fpclassify(src0[lane]) == FP_ZERO
15915 && !std::signbit(src0[lane])) {
15916 sdst.setBit(lane, 1);
15917 continue;
15918 }
15919 }
15920 if (bits(src1[lane], 7)) {
15921 // is +denormal
15922 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15923 && !std::signbit(src0[lane])) {
15924 sdst.setBit(lane, 1);
15925 continue;
15926 }
15927 }
15928 if (bits(src1[lane], 8)) {
15929 // is +normal
15930 if (std::isnormal(src0[lane])
15931 && !std::signbit(src0[lane])) {
15932 sdst.setBit(lane, 1);
15933 continue;
15934 }
15935 }
15936 if (bits(src1[lane], 9)) {
15937 // is +infinity
15938 if (std::isinf(src0[lane])
15939 && !std::signbit(src0[lane])) {
15940 sdst.setBit(lane, 1);
15941 continue;
15942 }
15943 }
15944 }
15945 }
15946
15947 wf->execMask() = sdst.rawData();
15948 sdst.write();
15949 }
15950
15951 Inst_VOP3__V_CMP_CLASS_F16::Inst_VOP3__V_CMP_CLASS_F16(
15952 InFmt_VOP3 *iFmt)
15953 : Inst_VOP3(iFmt, "v_cmp_class_f16", true)
15954 {
15955 setFlag(ALU);
15956 setFlag(F16);
15957 } // Inst_VOP3__V_CMP_CLASS_F16
15958
15959 Inst_VOP3__V_CMP_CLASS_F16::~Inst_VOP3__V_CMP_CLASS_F16()
15960 {
15961 } // ~Inst_VOP3__V_CMP_CLASS_F16
15962
15963 // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16
15964 // The function reports true if the floating point value is any of the
15965 // numeric types selected in S1.u according to the following list:
15966 // S1.u[0] -- value is a signaling NaN.
15967 // S1.u[1] -- value is a quiet NaN.
15968 // S1.u[2] -- value is negative infinity.
15969 // S1.u[3] -- value is a negative normal value.
15970 // S1.u[4] -- value is a negative denormal value.
15971 // S1.u[5] -- value is negative zero.
15972 // S1.u[6] -- value is positive zero.
15973 // S1.u[7] -- value is a positive denormal value.
15974 // S1.u[8] -- value is a positive normal value.
15975 // S1.u[9] -- value is positive infinity.
15976 void
15977 Inst_VOP3__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst)
15978 {
15979 panicUnimplemented();
15980 }
15981
15982 Inst_VOP3__V_CMPX_CLASS_F16::Inst_VOP3__V_CMPX_CLASS_F16(
15983 InFmt_VOP3 *iFmt)
15984 : Inst_VOP3(iFmt, "v_cmpx_class_f16", true)
15985 {
15986 setFlag(ALU);
15987 setFlag(F16);
15988 } // Inst_VOP3__V_CMPX_CLASS_F16
15989
15990 Inst_VOP3__V_CMPX_CLASS_F16::~Inst_VOP3__V_CMPX_CLASS_F16()
15991 {
15992 } // ~Inst_VOP3__V_CMPX_CLASS_F16
15993
15994 // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
15995 // S0.f16
15996 // The function reports true if the floating point value is any of the
15997 // numeric types selected in S1.u according to the following list:
15998 // S1.u[0] -- value is a signaling NaN.
15999 // S1.u[1] -- value is a quiet NaN.
16000 // S1.u[2] -- value is negative infinity.
16001 // S1.u[3] -- value is a negative normal value.
16002 // S1.u[4] -- value is a negative denormal value.
16003 // S1.u[5] -- value is negative zero.
16004 // S1.u[6] -- value is positive zero.
16005 // S1.u[7] -- value is a positive denormal value.
16006 // S1.u[8] -- value is a positive normal value.
16007 // S1.u[9] -- value is positive infinity.
16008 void
16009 Inst_VOP3__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst)
16010 {
16011 panicUnimplemented();
16012 }
16013
16014 Inst_VOP3__V_CMP_F_F16::Inst_VOP3__V_CMP_F_F16(InFmt_VOP3 *iFmt)
16015 : Inst_VOP3(iFmt, "v_cmp_f_f16", true)
16016 {
16017 setFlag(ALU);
16018 setFlag(F16);
16019 } // Inst_VOP3__V_CMP_F_F16
16020
16021 Inst_VOP3__V_CMP_F_F16::~Inst_VOP3__V_CMP_F_F16()
16022 {
16023 } // ~Inst_VOP3__V_CMP_F_F16
16024
16025 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
16026 void
16027 Inst_VOP3__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst)
16028 {
16029 panicUnimplemented();
16030 }
16031
16032 Inst_VOP3__V_CMP_LT_F16::Inst_VOP3__V_CMP_LT_F16(
16033 InFmt_VOP3 *iFmt)
16034 : Inst_VOP3(iFmt, "v_cmp_lt_f16", true)
16035 {
16036 setFlag(ALU);
16037 setFlag(F16);
16038 } // Inst_VOP3__V_CMP_LT_F16
16039
16040 Inst_VOP3__V_CMP_LT_F16::~Inst_VOP3__V_CMP_LT_F16()
16041 {
16042 } // ~Inst_VOP3__V_CMP_LT_F16
16043
16044 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
16045 void
16046 Inst_VOP3__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst)
16047 {
16048 panicUnimplemented();
16049 }
16050
16051 Inst_VOP3__V_CMP_EQ_F16::Inst_VOP3__V_CMP_EQ_F16(
16052 InFmt_VOP3 *iFmt)
16053 : Inst_VOP3(iFmt, "v_cmp_eq_f16", true)
16054 {
16055 setFlag(ALU);
16056 setFlag(F16);
16057 } // Inst_VOP3__V_CMP_EQ_F16
16058
16059 Inst_VOP3__V_CMP_EQ_F16::~Inst_VOP3__V_CMP_EQ_F16()
16060 {
16061 } // ~Inst_VOP3__V_CMP_EQ_F16
16062
16063 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
16064 void
16065 Inst_VOP3__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst)
16066 {
16067 panicUnimplemented();
16068 }
16069
16070 Inst_VOP3__V_CMP_LE_F16::Inst_VOP3__V_CMP_LE_F16(
16071 InFmt_VOP3 *iFmt)
16072 : Inst_VOP3(iFmt, "v_cmp_le_f16", true)
16073 {
16074 setFlag(ALU);
16075 setFlag(F16);
16076 } // Inst_VOP3__V_CMP_LE_F16
16077
16078 Inst_VOP3__V_CMP_LE_F16::~Inst_VOP3__V_CMP_LE_F16()
16079 {
16080 } // ~Inst_VOP3__V_CMP_LE_F16
16081
16082 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
16083 void
16084 Inst_VOP3__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst)
16085 {
16086 panicUnimplemented();
16087 }
16088
16089 Inst_VOP3__V_CMP_GT_F16::Inst_VOP3__V_CMP_GT_F16(
16090 InFmt_VOP3 *iFmt)
16091 : Inst_VOP3(iFmt, "v_cmp_gt_f16", true)
16092 {
16093 setFlag(ALU);
16094 setFlag(F16);
16095 } // Inst_VOP3__V_CMP_GT_F16
16096
16097 Inst_VOP3__V_CMP_GT_F16::~Inst_VOP3__V_CMP_GT_F16()
16098 {
16099 } // ~Inst_VOP3__V_CMP_GT_F16
16100
16101 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
16102 void
16103 Inst_VOP3__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst)
16104 {
16105 panicUnimplemented();
16106 }
16107
16108 Inst_VOP3__V_CMP_LG_F16::Inst_VOP3__V_CMP_LG_F16(
16109 InFmt_VOP3 *iFmt)
16110 : Inst_VOP3(iFmt, "v_cmp_lg_f16", true)
16111 {
16112 setFlag(ALU);
16113 setFlag(F16);
16114 } // Inst_VOP3__V_CMP_LG_F16
16115
16116 Inst_VOP3__V_CMP_LG_F16::~Inst_VOP3__V_CMP_LG_F16()
16117 {
16118 } // ~Inst_VOP3__V_CMP_LG_F16
16119
16120 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
16121 void
16122 Inst_VOP3__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst)
16123 {
16124 panicUnimplemented();
16125 }
16126
16127 Inst_VOP3__V_CMP_GE_F16::Inst_VOP3__V_CMP_GE_F16(
16128 InFmt_VOP3 *iFmt)
16129 : Inst_VOP3(iFmt, "v_cmp_ge_f16", true)
16130 {
16131 setFlag(ALU);
16132 setFlag(F16);
16133 } // Inst_VOP3__V_CMP_GE_F16
16134
16135 Inst_VOP3__V_CMP_GE_F16::~Inst_VOP3__V_CMP_GE_F16()
16136 {
16137 } // ~Inst_VOP3__V_CMP_GE_F16
16138
16139 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
16140 void
16141 Inst_VOP3__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst)
16142 {
16143 panicUnimplemented();
16144 }
16145
16146 Inst_VOP3__V_CMP_O_F16::Inst_VOP3__V_CMP_O_F16(InFmt_VOP3 *iFmt)
16147 : Inst_VOP3(iFmt, "v_cmp_o_f16", true)
16148 {
16149 setFlag(ALU);
16150 setFlag(F16);
16151 } // Inst_VOP3__V_CMP_O_F16
16152
16153 Inst_VOP3__V_CMP_O_F16::~Inst_VOP3__V_CMP_O_F16()
16154 {
16155 } // ~Inst_VOP3__V_CMP_O_F16
16156
16157 // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
16158 void
16159 Inst_VOP3__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst)
16160 {
16161 panicUnimplemented();
16162 }
16163
16164 Inst_VOP3__V_CMP_U_F16::Inst_VOP3__V_CMP_U_F16(InFmt_VOP3 *iFmt)
16165 : Inst_VOP3(iFmt, "v_cmp_u_f16", true)
16166 {
16167 setFlag(ALU);
16168 setFlag(F16);
16169 } // Inst_VOP3__V_CMP_U_F16
16170
16171 Inst_VOP3__V_CMP_U_F16::~Inst_VOP3__V_CMP_U_F16()
16172 {
16173 } // ~Inst_VOP3__V_CMP_U_F16
16174
16175 // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
16176 void
16177 Inst_VOP3__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst)
16178 {
16179 panicUnimplemented();
16180 }
16181
16182 Inst_VOP3__V_CMP_NGE_F16::Inst_VOP3__V_CMP_NGE_F16(
16183 InFmt_VOP3 *iFmt)
16184 : Inst_VOP3(iFmt, "v_cmp_nge_f16", true)
16185 {
16186 setFlag(ALU);
16187 setFlag(F16);
16188 } // Inst_VOP3__V_CMP_NGE_F16
16189
16190 Inst_VOP3__V_CMP_NGE_F16::~Inst_VOP3__V_CMP_NGE_F16()
16191 {
16192 } // ~Inst_VOP3__V_CMP_NGE_F16
16193
16194 // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
16195 void
16196 Inst_VOP3__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst)
16197 {
16198 panicUnimplemented();
16199 }
16200
16201 Inst_VOP3__V_CMP_NLG_F16::Inst_VOP3__V_CMP_NLG_F16(
16202 InFmt_VOP3 *iFmt)
16203 : Inst_VOP3(iFmt, "v_cmp_nlg_f16", true)
16204 {
16205 setFlag(ALU);
16206 setFlag(F16);
16207 } // Inst_VOP3__V_CMP_NLG_F16
16208
16209 Inst_VOP3__V_CMP_NLG_F16::~Inst_VOP3__V_CMP_NLG_F16()
16210 {
16211 } // ~Inst_VOP3__V_CMP_NLG_F16
16212
16213 // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
16214 void
16215 Inst_VOP3__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst)
16216 {
16217 panicUnimplemented();
16218 }
16219
16220 Inst_VOP3__V_CMP_NGT_F16::Inst_VOP3__V_CMP_NGT_F16(
16221 InFmt_VOP3 *iFmt)
16222 : Inst_VOP3(iFmt, "v_cmp_ngt_f16", true)
16223 {
16224 setFlag(ALU);
16225 setFlag(F16);
16226 } // Inst_VOP3__V_CMP_NGT_F16
16227
16228 Inst_VOP3__V_CMP_NGT_F16::~Inst_VOP3__V_CMP_NGT_F16()
16229 {
16230 } // ~Inst_VOP3__V_CMP_NGT_F16
16231
16232 // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
16233 void
16234 Inst_VOP3__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst)
16235 {
16236 panicUnimplemented();
16237 }
16238
16239 Inst_VOP3__V_CMP_NLE_F16::Inst_VOP3__V_CMP_NLE_F16(
16240 InFmt_VOP3 *iFmt)
16241 : Inst_VOP3(iFmt, "v_cmp_nle_f16", true)
16242 {
16243 setFlag(ALU);
16244 setFlag(F16);
16245 } // Inst_VOP3__V_CMP_NLE_F16
16246
16247 Inst_VOP3__V_CMP_NLE_F16::~Inst_VOP3__V_CMP_NLE_F16()
16248 {
16249 } // ~Inst_VOP3__V_CMP_NLE_F16
16250
16251 // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
16252 void
16253 Inst_VOP3__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst)
16254 {
16255 panicUnimplemented();
16256 }
16257
16258 Inst_VOP3__V_CMP_NEQ_F16::Inst_VOP3__V_CMP_NEQ_F16(
16259 InFmt_VOP3 *iFmt)
16260 : Inst_VOP3(iFmt, "v_cmp_neq_f16", true)
16261 {
16262 setFlag(ALU);
16263 setFlag(F16);
16264 } // Inst_VOP3__V_CMP_NEQ_F16
16265
16266 Inst_VOP3__V_CMP_NEQ_F16::~Inst_VOP3__V_CMP_NEQ_F16()
16267 {
16268 } // ~Inst_VOP3__V_CMP_NEQ_F16
16269
16270 // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
16271 void
16272 Inst_VOP3__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst)
16273 {
16274 panicUnimplemented();
16275 }
16276
16277 Inst_VOP3__V_CMP_NLT_F16::Inst_VOP3__V_CMP_NLT_F16(
16278 InFmt_VOP3 *iFmt)
16279 : Inst_VOP3(iFmt, "v_cmp_nlt_f16", true)
16280 {
16281 setFlag(ALU);
16282 setFlag(F16);
16283 } // Inst_VOP3__V_CMP_NLT_F16
16284
16285 Inst_VOP3__V_CMP_NLT_F16::~Inst_VOP3__V_CMP_NLT_F16()
16286 {
16287 } // ~Inst_VOP3__V_CMP_NLT_F16
16288
16289 // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
16290 void
16291 Inst_VOP3__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst)
16292 {
16293 panicUnimplemented();
16294 }
16295
16296 Inst_VOP3__V_CMP_TRU_F16::Inst_VOP3__V_CMP_TRU_F16(
16297 InFmt_VOP3 *iFmt)
16298 : Inst_VOP3(iFmt, "v_cmp_tru_f16", true)
16299 {
16300 setFlag(ALU);
16301 setFlag(F16);
16302 } // Inst_VOP3__V_CMP_TRU_F16
16303
16304 Inst_VOP3__V_CMP_TRU_F16::~Inst_VOP3__V_CMP_TRU_F16()
16305 {
16306 } // ~Inst_VOP3__V_CMP_TRU_F16
16307
16308 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
16309 void
16310 Inst_VOP3__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst)
16311 {
16312 Wavefront *wf = gpuDynInst->wavefront();
16313 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16314
16315 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16316 if (wf->execMask(lane)) {
16317 sdst.setBit(lane, 1);
16318 }
16319 }
16320
16321 sdst.write();
16322 }
16323
16324 Inst_VOP3__V_CMPX_F_F16::Inst_VOP3__V_CMPX_F_F16(
16325 InFmt_VOP3 *iFmt)
16326 : Inst_VOP3(iFmt, "v_cmpx_f_f16", true)
16327 {
16328 setFlag(ALU);
16329 } // Inst_VOP3__V_CMPX_F_F16
16330
16331 Inst_VOP3__V_CMPX_F_F16::~Inst_VOP3__V_CMPX_F_F16()
16332 {
16333 } // ~Inst_VOP3__V_CMPX_F_F16
16334
16335 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
16336 void
16337 Inst_VOP3__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst)
16338 {
16339 Wavefront *wf = gpuDynInst->wavefront();
16340 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16341
16342 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16343 if (wf->execMask(lane)) {
16344 sdst.setBit(lane, 0);
16345 }
16346 }
16347
16348 wf->execMask() = sdst.rawData();
16349 sdst.write();
16350 }
16351
16352 Inst_VOP3__V_CMPX_LT_F16::Inst_VOP3__V_CMPX_LT_F16(
16353 InFmt_VOP3 *iFmt)
16354 : Inst_VOP3(iFmt, "v_cmpx_lt_f16", true)
16355 {
16356 setFlag(ALU);
16357 setFlag(F16);
16358 } // Inst_VOP3__V_CMPX_LT_F16
16359
16360 Inst_VOP3__V_CMPX_LT_F16::~Inst_VOP3__V_CMPX_LT_F16()
16361 {
16362 } // ~Inst_VOP3__V_CMPX_LT_F16
16363
16364 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
16365 void
16366 Inst_VOP3__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst)
16367 {
16368 panicUnimplemented();
16369 }
16370
16371 Inst_VOP3__V_CMPX_EQ_F16::Inst_VOP3__V_CMPX_EQ_F16(
16372 InFmt_VOP3 *iFmt)
16373 : Inst_VOP3(iFmt, "v_cmpx_eq_f16", true)
16374 {
16375 setFlag(ALU);
16376 setFlag(F16);
16377 } // Inst_VOP3__V_CMPX_EQ_F16
16378
16379 Inst_VOP3__V_CMPX_EQ_F16::~Inst_VOP3__V_CMPX_EQ_F16()
16380 {
16381 } // ~Inst_VOP3__V_CMPX_EQ_F16
16382
16383 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
16384 void
16385 Inst_VOP3__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst)
16386 {
16387 panicUnimplemented();
16388 }
16389
16390 Inst_VOP3__V_CMPX_LE_F16::Inst_VOP3__V_CMPX_LE_F16(
16391 InFmt_VOP3 *iFmt)
16392 : Inst_VOP3(iFmt, "v_cmpx_le_f16", true)
16393 {
16394 setFlag(ALU);
16395 setFlag(F16);
16396 } // Inst_VOP3__V_CMPX_LE_F16
16397
16398 Inst_VOP3__V_CMPX_LE_F16::~Inst_VOP3__V_CMPX_LE_F16()
16399 {
16400 } // ~Inst_VOP3__V_CMPX_LE_F16
16401
16402 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
16403 void
16404 Inst_VOP3__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst)
16405 {
16406 panicUnimplemented();
16407 }
16408
16409 Inst_VOP3__V_CMPX_GT_F16::Inst_VOP3__V_CMPX_GT_F16(
16410 InFmt_VOP3 *iFmt)
16411 : Inst_VOP3(iFmt, "v_cmpx_gt_f16", true)
16412 {
16413 setFlag(ALU);
16414 setFlag(F16);
16415 } // Inst_VOP3__V_CMPX_GT_F16
16416
16417 Inst_VOP3__V_CMPX_GT_F16::~Inst_VOP3__V_CMPX_GT_F16()
16418 {
16419 } // ~Inst_VOP3__V_CMPX_GT_F16
16420
16421 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
16422 void
16423 Inst_VOP3__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst)
16424 {
16425 panicUnimplemented();
16426 }
16427
16428 Inst_VOP3__V_CMPX_LG_F16::Inst_VOP3__V_CMPX_LG_F16(
16429 InFmt_VOP3 *iFmt)
16430 : Inst_VOP3(iFmt, "v_cmpx_lg_f16", true)
16431 {
16432 setFlag(ALU);
16433 setFlag(F16);
16434 } // Inst_VOP3__V_CMPX_LG_F16
16435
16436 Inst_VOP3__V_CMPX_LG_F16::~Inst_VOP3__V_CMPX_LG_F16()
16437 {
16438 } // ~Inst_VOP3__V_CMPX_LG_F16
16439
16440 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
16441 void
16442 Inst_VOP3__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst)
16443 {
16444 panicUnimplemented();
16445 }
16446
16447 Inst_VOP3__V_CMPX_GE_F16::Inst_VOP3__V_CMPX_GE_F16(
16448 InFmt_VOP3 *iFmt)
16449 : Inst_VOP3(iFmt, "v_cmpx_ge_f16", true)
16450 {
16451 setFlag(ALU);
16452 setFlag(F16);
16453 } // Inst_VOP3__V_CMPX_GE_F16
16454
16455 Inst_VOP3__V_CMPX_GE_F16::~Inst_VOP3__V_CMPX_GE_F16()
16456 {
16457 } // ~Inst_VOP3__V_CMPX_GE_F16
16458
16459 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
16460 void
16461 Inst_VOP3__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst)
16462 {
16463 panicUnimplemented();
16464 }
16465
16466 Inst_VOP3__V_CMPX_O_F16::Inst_VOP3__V_CMPX_O_F16(
16467 InFmt_VOP3 *iFmt)
16468 : Inst_VOP3(iFmt, "v_cmpx_o_f16", true)
16469 {
16470 setFlag(ALU);
16471 setFlag(F16);
16472 } // Inst_VOP3__V_CMPX_O_F16
16473
16474 Inst_VOP3__V_CMPX_O_F16::~Inst_VOP3__V_CMPX_O_F16()
16475 {
16476 } // ~Inst_VOP3__V_CMPX_O_F16
16477
16478 // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
16479 // encoding.
16480 void
16481 Inst_VOP3__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst)
16482 {
16483 panicUnimplemented();
16484 }
16485
16486 Inst_VOP3__V_CMPX_U_F16::Inst_VOP3__V_CMPX_U_F16(
16487 InFmt_VOP3 *iFmt)
16488 : Inst_VOP3(iFmt, "v_cmpx_u_f16", true)
16489 {
16490 setFlag(ALU);
16491 setFlag(F16);
16492 } // Inst_VOP3__V_CMPX_U_F16
16493
16494 Inst_VOP3__V_CMPX_U_F16::~Inst_VOP3__V_CMPX_U_F16()
16495 {
16496 } // ~Inst_VOP3__V_CMPX_U_F16
16497
16498 // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
16499 // encoding.
16500 void
16501 Inst_VOP3__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst)
16502 {
16503 panicUnimplemented();
16504 }
16505
16506 Inst_VOP3__V_CMPX_NGE_F16::Inst_VOP3__V_CMPX_NGE_F16(
16507 InFmt_VOP3 *iFmt)
16508 : Inst_VOP3(iFmt, "v_cmpx_nge_f16", true)
16509 {
16510 setFlag(ALU);
16511 setFlag(F16);
16512 } // Inst_VOP3__V_CMPX_NGE_F16
16513
16514 Inst_VOP3__V_CMPX_NGE_F16::~Inst_VOP3__V_CMPX_NGE_F16()
16515 {
16516 } // ~Inst_VOP3__V_CMPX_NGE_F16
16517
16518 // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
16519 void
16520 Inst_VOP3__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst)
16521 {
16522 panicUnimplemented();
16523 }
16524
16525 Inst_VOP3__V_CMPX_NLG_F16::Inst_VOP3__V_CMPX_NLG_F16(
16526 InFmt_VOP3 *iFmt)
16527 : Inst_VOP3(iFmt, "v_cmpx_nlg_f16", true)
16528 {
16529 setFlag(ALU);
16530 setFlag(F16);
16531 } // Inst_VOP3__V_CMPX_NLG_F16
16532
16533 Inst_VOP3__V_CMPX_NLG_F16::~Inst_VOP3__V_CMPX_NLG_F16()
16534 {
16535 } // ~Inst_VOP3__V_CMPX_NLG_F16
16536
16537 // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
16538 void
16539 Inst_VOP3__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst)
16540 {
16541 panicUnimplemented();
16542 }
16543
16544 Inst_VOP3__V_CMPX_NGT_F16::Inst_VOP3__V_CMPX_NGT_F16(
16545 InFmt_VOP3 *iFmt)
16546 : Inst_VOP3(iFmt, "v_cmpx_ngt_f16", true)
16547 {
16548 setFlag(ALU);
16549 setFlag(F16);
16550 } // Inst_VOP3__V_CMPX_NGT_F16
16551
16552 Inst_VOP3__V_CMPX_NGT_F16::~Inst_VOP3__V_CMPX_NGT_F16()
16553 {
16554 } // ~Inst_VOP3__V_CMPX_NGT_F16
16555
16556 // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
16557 void
16558 Inst_VOP3__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst)
16559 {
16560 panicUnimplemented();
16561 }
16562
16563 Inst_VOP3__V_CMPX_NLE_F16::Inst_VOP3__V_CMPX_NLE_F16(
16564 InFmt_VOP3 *iFmt)
16565 : Inst_VOP3(iFmt, "v_cmpx_nle_f16", true)
16566 {
16567 setFlag(ALU);
16568 setFlag(F16);
16569 } // Inst_VOP3__V_CMPX_NLE_F16
16570
16571 Inst_VOP3__V_CMPX_NLE_F16::~Inst_VOP3__V_CMPX_NLE_F16()
16572 {
16573 } // ~Inst_VOP3__V_CMPX_NLE_F16
16574
16575 // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
16576 void
16577 Inst_VOP3__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst)
16578 {
16579 panicUnimplemented();
16580 }
16581
16582 Inst_VOP3__V_CMPX_NEQ_F16::Inst_VOP3__V_CMPX_NEQ_F16(
16583 InFmt_VOP3 *iFmt)
16584 : Inst_VOP3(iFmt, "v_cmpx_neq_f16", true)
16585 {
16586 setFlag(ALU);
16587 setFlag(F16);
16588 } // Inst_VOP3__V_CMPX_NEQ_F16
16589
16590 Inst_VOP3__V_CMPX_NEQ_F16::~Inst_VOP3__V_CMPX_NEQ_F16()
16591 {
16592 } // ~Inst_VOP3__V_CMPX_NEQ_F16
16593
16594 // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
16595 void
16596 Inst_VOP3__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst)
16597 {
16598 panicUnimplemented();
16599 }
16600
16601 Inst_VOP3__V_CMPX_NLT_F16::Inst_VOP3__V_CMPX_NLT_F16(
16602 InFmt_VOP3 *iFmt)
16603 : Inst_VOP3(iFmt, "v_cmpx_nlt_f16", true)
16604 {
16605 setFlag(ALU);
16606 setFlag(F16);
16607 } // Inst_VOP3__V_CMPX_NLT_F16
16608
16609 Inst_VOP3__V_CMPX_NLT_F16::~Inst_VOP3__V_CMPX_NLT_F16()
16610 {
16611 } // ~Inst_VOP3__V_CMPX_NLT_F16
16612
16613 // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
16614 void
16615 Inst_VOP3__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst)
16616 {
16617 panicUnimplemented();
16618 }
16619
16620 Inst_VOP3__V_CMPX_TRU_F16::Inst_VOP3__V_CMPX_TRU_F16(
16621 InFmt_VOP3 *iFmt)
16622 : Inst_VOP3(iFmt, "v_cmpx_tru_f16", true)
16623 {
16624 setFlag(ALU);
16625 setFlag(F16);
16626 } // Inst_VOP3__V_CMPX_TRU_F16
16627
16628 Inst_VOP3__V_CMPX_TRU_F16::~Inst_VOP3__V_CMPX_TRU_F16()
16629 {
16630 } // ~Inst_VOP3__V_CMPX_TRU_F16
16631
16632 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
16633 void
16634 Inst_VOP3__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst)
16635 {
16636 Wavefront *wf = gpuDynInst->wavefront();
16637 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16638
16639 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16640 if (wf->execMask(lane)) {
16641 sdst.setBit(lane, 1);
16642 }
16643 }
16644
16645 wf->execMask() = sdst.rawData();
16646 sdst.write();
16647 }
16648
16649 Inst_VOP3__V_CMP_F_F32::Inst_VOP3__V_CMP_F_F32(InFmt_VOP3 *iFmt)
16650 : Inst_VOP3(iFmt, "v_cmp_f_f32", true)
16651 {
16652 setFlag(ALU);
16653 setFlag(F32);
16654 } // Inst_VOP3__V_CMP_F_F32
16655
16656 Inst_VOP3__V_CMP_F_F32::~Inst_VOP3__V_CMP_F_F32()
16657 {
16658 } // ~Inst_VOP3__V_CMP_F_F32
16659
16660 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
16661 void
16662 Inst_VOP3__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst)
16663 {
16664 Wavefront *wf = gpuDynInst->wavefront();
16665 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16666
16667 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16668 if (wf->execMask(lane)) {
16669 sdst.setBit(lane, 0);
16670 }
16671 }
16672
16673 sdst.write();
16674 }
16675
16676 Inst_VOP3__V_CMP_LT_F32::Inst_VOP3__V_CMP_LT_F32(
16677 InFmt_VOP3 *iFmt)
16678 : Inst_VOP3(iFmt, "v_cmp_lt_f32", true)
16679 {
16680 setFlag(ALU);
16681 setFlag(F32);
16682 } // Inst_VOP3__V_CMP_LT_F32
16683
16684 Inst_VOP3__V_CMP_LT_F32::~Inst_VOP3__V_CMP_LT_F32()
16685 {
16686 } // ~Inst_VOP3__V_CMP_LT_F32
16687
16688 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
16689 void
16690 Inst_VOP3__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst)
16691 {
16692 Wavefront *wf = gpuDynInst->wavefront();
16693 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16694 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16695 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16696
16697 src0.readSrc();
16698 src1.readSrc();
16699
16700 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16701 if (wf->execMask(lane)) {
16702 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
16703 }
16704 }
16705
16706 sdst.write();
16707 }
16708
16709 Inst_VOP3__V_CMP_EQ_F32::Inst_VOP3__V_CMP_EQ_F32(
16710 InFmt_VOP3 *iFmt)
16711 : Inst_VOP3(iFmt, "v_cmp_eq_f32", true)
16712 {
16713 setFlag(ALU);
16714 setFlag(F32);
16715 } // Inst_VOP3__V_CMP_EQ_F32
16716
16717 Inst_VOP3__V_CMP_EQ_F32::~Inst_VOP3__V_CMP_EQ_F32()
16718 {
16719 } // ~Inst_VOP3__V_CMP_EQ_F32
16720
16721 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
16722 void
16723 Inst_VOP3__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst)
16724 {
16725 Wavefront *wf = gpuDynInst->wavefront();
16726 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16727 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16728 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16729
16730 src0.readSrc();
16731 src1.readSrc();
16732
16733 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16734 if (wf->execMask(lane)) {
16735 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
16736 }
16737 }
16738
16739 sdst.write();
16740 }
16741
16742 Inst_VOP3__V_CMP_LE_F32::Inst_VOP3__V_CMP_LE_F32(
16743 InFmt_VOP3 *iFmt)
16744 : Inst_VOP3(iFmt, "v_cmp_le_f32", true)
16745 {
16746 setFlag(ALU);
16747 setFlag(F32);
16748 } // Inst_VOP3__V_CMP_LE_F32
16749
16750 Inst_VOP3__V_CMP_LE_F32::~Inst_VOP3__V_CMP_LE_F32()
16751 {
16752 } // ~Inst_VOP3__V_CMP_LE_F32
16753
16754 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
16755 void
16756 Inst_VOP3__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst)
16757 {
16758 Wavefront *wf = gpuDynInst->wavefront();
16759 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16760 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16761 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16762
16763 src0.readSrc();
16764 src1.readSrc();
16765
16766 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16767 if (wf->execMask(lane)) {
16768 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
16769 }
16770 }
16771
16772 sdst.write();
16773 }
16774
16775 Inst_VOP3__V_CMP_GT_F32::Inst_VOP3__V_CMP_GT_F32(
16776 InFmt_VOP3 *iFmt)
16777 : Inst_VOP3(iFmt, "v_cmp_gt_f32", true)
16778 {
16779 setFlag(ALU);
16780 setFlag(F32);
16781 } // Inst_VOP3__V_CMP_GT_F32
16782
16783 Inst_VOP3__V_CMP_GT_F32::~Inst_VOP3__V_CMP_GT_F32()
16784 {
16785 } // ~Inst_VOP3__V_CMP_GT_F32
16786
16787 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
16788 void
16789 Inst_VOP3__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst)
16790 {
16791 Wavefront *wf = gpuDynInst->wavefront();
16792 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16793 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16794 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16795
16796 src0.readSrc();
16797 src1.readSrc();
16798
16799 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16800 if (wf->execMask(lane)) {
16801 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
16802 }
16803 }
16804
16805 sdst.write();
16806 }
16807
16808 Inst_VOP3__V_CMP_LG_F32::Inst_VOP3__V_CMP_LG_F32(
16809 InFmt_VOP3 *iFmt)
16810 : Inst_VOP3(iFmt, "v_cmp_lg_f32", true)
16811 {
16812 setFlag(ALU);
16813 setFlag(F32);
16814 } // Inst_VOP3__V_CMP_LG_F32
16815
16816 Inst_VOP3__V_CMP_LG_F32::~Inst_VOP3__V_CMP_LG_F32()
16817 {
16818 } // ~Inst_VOP3__V_CMP_LG_F32
16819
16820 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
16821 void
16822 Inst_VOP3__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst)
16823 {
16824 Wavefront *wf = gpuDynInst->wavefront();
16825 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16826 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16827 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16828
16829 src0.readSrc();
16830 src1.readSrc();
16831
16832 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16833 if (wf->execMask(lane)) {
16834 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
16835 }
16836 }
16837
16838 sdst.write();
16839 }
16840
16841 Inst_VOP3__V_CMP_GE_F32::Inst_VOP3__V_CMP_GE_F32(
16842 InFmt_VOP3 *iFmt)
16843 : Inst_VOP3(iFmt, "v_cmp_ge_f32", true)
16844 {
16845 setFlag(ALU);
16846 setFlag(F32);
16847 } // Inst_VOP3__V_CMP_GE_F32
16848
16849 Inst_VOP3__V_CMP_GE_F32::~Inst_VOP3__V_CMP_GE_F32()
16850 {
16851 } // ~Inst_VOP3__V_CMP_GE_F32
16852
16853 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
16854 void
16855 Inst_VOP3__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst)
16856 {
16857 Wavefront *wf = gpuDynInst->wavefront();
16858 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16859 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16860 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16861
16862 src0.readSrc();
16863 src1.readSrc();
16864
16865 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16866 if (wf->execMask(lane)) {
16867 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
16868 }
16869 }
16870
16871 sdst.write();
16872 }
16873
16874 Inst_VOP3__V_CMP_O_F32::Inst_VOP3__V_CMP_O_F32(InFmt_VOP3 *iFmt)
16875 : Inst_VOP3(iFmt, "v_cmp_o_f32", true)
16876 {
16877 setFlag(ALU);
16878 setFlag(F32);
16879 } // Inst_VOP3__V_CMP_O_F32
16880
16881 Inst_VOP3__V_CMP_O_F32::~Inst_VOP3__V_CMP_O_F32()
16882 {
16883 } // ~Inst_VOP3__V_CMP_O_F32
16884
16885 // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
16886 void
16887 Inst_VOP3__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst)
16888 {
16889 Wavefront *wf = gpuDynInst->wavefront();
16890 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16891 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16892 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16893
16894 src0.readSrc();
16895 src1.readSrc();
16896
16897 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16898 if (wf->execMask(lane)) {
16899 sdst.setBit(lane, (!std::isnan(src0[lane])
16900 && !std::isnan(src1[lane])) ? 1 : 0);
16901 }
16902 }
16903
16904 sdst.write();
16905 }
16906
16907 Inst_VOP3__V_CMP_U_F32::Inst_VOP3__V_CMP_U_F32(InFmt_VOP3 *iFmt)
16908 : Inst_VOP3(iFmt, "v_cmp_u_f32", true)
16909 {
16910 setFlag(ALU);
16911 setFlag(F32);
16912 } // Inst_VOP3__V_CMP_U_F32
16913
16914 Inst_VOP3__V_CMP_U_F32::~Inst_VOP3__V_CMP_U_F32()
16915 {
16916 } // ~Inst_VOP3__V_CMP_U_F32
16917
16918 // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
16919 void
16920 Inst_VOP3__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst)
16921 {
16922 Wavefront *wf = gpuDynInst->wavefront();
16923 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16924 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16925 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16926
16927 src0.readSrc();
16928 src1.readSrc();
16929
16930 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16931 if (wf->execMask(lane)) {
16932 sdst.setBit(lane, (std::isnan(src0[lane])
16933 || std::isnan(src1[lane])) ? 1 : 0);
16934 }
16935 }
16936
16937 sdst.write();
16938 }
16939
16940 Inst_VOP3__V_CMP_NGE_F32::Inst_VOP3__V_CMP_NGE_F32(
16941 InFmt_VOP3 *iFmt)
16942 : Inst_VOP3(iFmt, "v_cmp_nge_f32", true)
16943 {
16944 setFlag(ALU);
16945 setFlag(F32);
16946 } // Inst_VOP3__V_CMP_NGE_F32
16947
16948 Inst_VOP3__V_CMP_NGE_F32::~Inst_VOP3__V_CMP_NGE_F32()
16949 {
16950 } // ~Inst_VOP3__V_CMP_NGE_F32
16951
16952 // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
16953 void
16954 Inst_VOP3__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst)
16955 {
16956 Wavefront *wf = gpuDynInst->wavefront();
16957 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16958 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16959 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16960
16961 src0.readSrc();
16962 src1.readSrc();
16963
16964 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16965 if (wf->execMask(lane)) {
16966 sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
16967 }
16968 }
16969
16970 sdst.write();
16971 }
16972
16973 Inst_VOP3__V_CMP_NLG_F32::Inst_VOP3__V_CMP_NLG_F32(
16974 InFmt_VOP3 *iFmt)
16975 : Inst_VOP3(iFmt, "v_cmp_nlg_f32", true)
16976 {
16977 setFlag(ALU);
16978 setFlag(F32);
16979 } // Inst_VOP3__V_CMP_NLG_F32
16980
16981 Inst_VOP3__V_CMP_NLG_F32::~Inst_VOP3__V_CMP_NLG_F32()
16982 {
16983 } // ~Inst_VOP3__V_CMP_NLG_F32
16984
16985 // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
16986 void
16987 Inst_VOP3__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst)
16988 {
16989 Wavefront *wf = gpuDynInst->wavefront();
16990 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16991 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16992 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16993
16994 src0.readSrc();
16995 src1.readSrc();
16996
16997 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16998 if (wf->execMask(lane)) {
16999 sdst.setBit(lane, !(src0[lane] < src1[lane]
17000 || src0[lane] > src1[lane]) ? 1 : 0);
17001 }
17002 }
17003
17004 sdst.write();
17005 }
17006
17007 Inst_VOP3__V_CMP_NGT_F32::Inst_VOP3__V_CMP_NGT_F32(
17008 InFmt_VOP3 *iFmt)
17009 : Inst_VOP3(iFmt, "v_cmp_ngt_f32", true)
17010 {
17011 setFlag(ALU);
17012 setFlag(F32);
17013 } // Inst_VOP3__V_CMP_NGT_F32
17014
17015 Inst_VOP3__V_CMP_NGT_F32::~Inst_VOP3__V_CMP_NGT_F32()
17016 {
17017 } // ~Inst_VOP3__V_CMP_NGT_F32
17018
17019 // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
17020 void
17021 Inst_VOP3__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst)
17022 {
17023 Wavefront *wf = gpuDynInst->wavefront();
17024 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17025 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17026 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17027
17028 src0.readSrc();
17029 src1.readSrc();
17030
17031 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17032 if (wf->execMask(lane)) {
17033 sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
17034 }
17035 }
17036
17037 sdst.write();
17038 }
17039
17040 Inst_VOP3__V_CMP_NLE_F32::Inst_VOP3__V_CMP_NLE_F32(
17041 InFmt_VOP3 *iFmt)
17042 : Inst_VOP3(iFmt, "v_cmp_nle_f32", true)
17043 {
17044 setFlag(ALU);
17045 setFlag(F32);
17046 } // Inst_VOP3__V_CMP_NLE_F32
17047
17048 Inst_VOP3__V_CMP_NLE_F32::~Inst_VOP3__V_CMP_NLE_F32()
17049 {
17050 } // ~Inst_VOP3__V_CMP_NLE_F32
17051
17052 // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
17053 void
17054 Inst_VOP3__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst)
17055 {
17056 Wavefront *wf = gpuDynInst->wavefront();
17057 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17058 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17059 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17060
17061 src0.readSrc();
17062 src1.readSrc();
17063
17064 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17065 if (wf->execMask(lane)) {
17066 sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
17067 }
17068 }
17069
17070 sdst.write();
17071 }
17072
17073 Inst_VOP3__V_CMP_NEQ_F32::Inst_VOP3__V_CMP_NEQ_F32(
17074 InFmt_VOP3 *iFmt)
17075 : Inst_VOP3(iFmt, "v_cmp_neq_f32", true)
17076 {
17077 setFlag(ALU);
17078 setFlag(F32);
17079 } // Inst_VOP3__V_CMP_NEQ_F32
17080
17081 Inst_VOP3__V_CMP_NEQ_F32::~Inst_VOP3__V_CMP_NEQ_F32()
17082 {
17083 } // ~Inst_VOP3__V_CMP_NEQ_F32
17084
17085 // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
17086 void
17087 Inst_VOP3__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst)
17088 {
17089 Wavefront *wf = gpuDynInst->wavefront();
17090 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17091 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17092 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17093
17094 src0.readSrc();
17095 src1.readSrc();
17096
17097 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17098 if (wf->execMask(lane)) {
17099 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
17100 }
17101 }
17102
17103 sdst.write();
17104 }
17105
17106 Inst_VOP3__V_CMP_NLT_F32::Inst_VOP3__V_CMP_NLT_F32(
17107 InFmt_VOP3 *iFmt)
17108 : Inst_VOP3(iFmt, "v_cmp_nlt_f32", true)
17109 {
17110 setFlag(ALU);
17111 setFlag(F32);
17112 } // Inst_VOP3__V_CMP_NLT_F32
17113
17114 Inst_VOP3__V_CMP_NLT_F32::~Inst_VOP3__V_CMP_NLT_F32()
17115 {
17116 } // ~Inst_VOP3__V_CMP_NLT_F32
17117
17118 // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
17119 void
17120 Inst_VOP3__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst)
17121 {
17122 Wavefront *wf = gpuDynInst->wavefront();
17123 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17124 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17125 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17126
17127 src0.readSrc();
17128 src1.readSrc();
17129
17130 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17131 if (wf->execMask(lane)) {
17132 sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
17133 }
17134 }
17135
17136 sdst.write();
17137 }
17138
17139 Inst_VOP3__V_CMP_TRU_F32::Inst_VOP3__V_CMP_TRU_F32(
17140 InFmt_VOP3 *iFmt)
17141 : Inst_VOP3(iFmt, "v_cmp_tru_f32", true)
17142 {
17143 setFlag(ALU);
17144 setFlag(F32);
17145 } // Inst_VOP3__V_CMP_TRU_F32
17146
17147 Inst_VOP3__V_CMP_TRU_F32::~Inst_VOP3__V_CMP_TRU_F32()
17148 {
17149 } // ~Inst_VOP3__V_CMP_TRU_F32
17150
17151 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
17152 void
17153 Inst_VOP3__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst)
17154 {
17155 Wavefront *wf = gpuDynInst->wavefront();
17156 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17157
17158 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17159 if (wf->execMask(lane)) {
17160 sdst.setBit(lane, 1);
17161 }
17162 }
17163
17164 sdst.write();
17165 }
17166
17167 Inst_VOP3__V_CMPX_F_F32::Inst_VOP3__V_CMPX_F_F32(
17168 InFmt_VOP3 *iFmt)
17169 : Inst_VOP3(iFmt, "v_cmpx_f_f32", true)
17170 {
17171 setFlag(ALU);
17172 setFlag(F32);
17173 } // Inst_VOP3__V_CMPX_F_F32
17174
17175 Inst_VOP3__V_CMPX_F_F32::~Inst_VOP3__V_CMPX_F_F32()
17176 {
17177 } // ~Inst_VOP3__V_CMPX_F_F32
17178
17179 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
17180 void
17181 Inst_VOP3__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst)
17182 {
17183 Wavefront *wf = gpuDynInst->wavefront();
17184 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17185
17186 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17187 if (wf->execMask(lane)) {
17188 sdst.setBit(lane, 0);
17189 }
17190 }
17191
17192 wf->execMask() = sdst.rawData();
17193 sdst.write();
17194 }
17195
17196 Inst_VOP3__V_CMPX_LT_F32::Inst_VOP3__V_CMPX_LT_F32(
17197 InFmt_VOP3 *iFmt)
17198 : Inst_VOP3(iFmt, "v_cmpx_lt_f32", true)
17199 {
17200 setFlag(ALU);
17201 setFlag(F32);
17202 } // Inst_VOP3__V_CMPX_LT_F32
17203
17204 Inst_VOP3__V_CMPX_LT_F32::~Inst_VOP3__V_CMPX_LT_F32()
17205 {
17206 } // ~Inst_VOP3__V_CMPX_LT_F32
17207
17208 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
17209 void
17210 Inst_VOP3__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst)
17211 {
17212 Wavefront *wf = gpuDynInst->wavefront();
17213 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17214 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17215 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17216
17217 src0.readSrc();
17218 src1.readSrc();
17219
17220 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17221 if (wf->execMask(lane)) {
17222 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
17223 }
17224 }
17225
17226 wf->execMask() = sdst.rawData();
17227 sdst.write();
17228 }
17229
17230 Inst_VOP3__V_CMPX_EQ_F32::Inst_VOP3__V_CMPX_EQ_F32(
17231 InFmt_VOP3 *iFmt)
17232 : Inst_VOP3(iFmt, "v_cmpx_eq_f32", true)
17233 {
17234 setFlag(ALU);
17235 setFlag(F32);
17236 } // Inst_VOP3__V_CMPX_EQ_F32
17237
17238 Inst_VOP3__V_CMPX_EQ_F32::~Inst_VOP3__V_CMPX_EQ_F32()
17239 {
17240 } // ~Inst_VOP3__V_CMPX_EQ_F32
17241
17242 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
17243 void
17244 Inst_VOP3__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst)
17245 {
17246 Wavefront *wf = gpuDynInst->wavefront();
17247 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17248 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17249 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17250
17251 src0.readSrc();
17252 src1.readSrc();
17253
17254 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17255 if (wf->execMask(lane)) {
17256 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
17257 }
17258 }
17259
17260 wf->execMask() = sdst.rawData();
17261 sdst.write();
17262 }
17263
17264 Inst_VOP3__V_CMPX_LE_F32::Inst_VOP3__V_CMPX_LE_F32(
17265 InFmt_VOP3 *iFmt)
17266 : Inst_VOP3(iFmt, "v_cmpx_le_f32", true)
17267 {
17268 setFlag(ALU);
17269 setFlag(F32);
17270 } // Inst_VOP3__V_CMPX_LE_F32
17271
17272 Inst_VOP3__V_CMPX_LE_F32::~Inst_VOP3__V_CMPX_LE_F32()
17273 {
17274 } // ~Inst_VOP3__V_CMPX_LE_F32
17275
17276 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
17277 void
17278 Inst_VOP3__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst)
17279 {
17280 Wavefront *wf = gpuDynInst->wavefront();
17281 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17282 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17283 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17284
17285 src0.readSrc();
17286 src1.readSrc();
17287
17288 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17289 if (wf->execMask(lane)) {
17290 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
17291 }
17292 }
17293
17294 wf->execMask() = sdst.rawData();
17295 sdst.write();
17296 }
17297
17298 Inst_VOP3__V_CMPX_GT_F32::Inst_VOP3__V_CMPX_GT_F32(
17299 InFmt_VOP3 *iFmt)
17300 : Inst_VOP3(iFmt, "v_cmpx_gt_f32", true)
17301 {
17302 setFlag(ALU);
17303 setFlag(F32);
17304 } // Inst_VOP3__V_CMPX_GT_F32
17305
17306 Inst_VOP3__V_CMPX_GT_F32::~Inst_VOP3__V_CMPX_GT_F32()
17307 {
17308 } // ~Inst_VOP3__V_CMPX_GT_F32
17309
17310 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
17311 void
17312 Inst_VOP3__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst)
17313 {
17314 Wavefront *wf = gpuDynInst->wavefront();
17315 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17316 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17317 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17318
17319 src0.readSrc();
17320 src1.readSrc();
17321
17322 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17323 if (wf->execMask(lane)) {
17324 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
17325 }
17326 }
17327
17328 wf->execMask() = sdst.rawData();
17329 sdst.write();
17330 }
17331
17332 Inst_VOP3__V_CMPX_LG_F32::Inst_VOP3__V_CMPX_LG_F32(
17333 InFmt_VOP3 *iFmt)
17334 : Inst_VOP3(iFmt, "v_cmpx_lg_f32", true)
17335 {
17336 setFlag(ALU);
17337 setFlag(F32);
17338 } // Inst_VOP3__V_CMPX_LG_F32
17339
17340 Inst_VOP3__V_CMPX_LG_F32::~Inst_VOP3__V_CMPX_LG_F32()
17341 {
17342 } // ~Inst_VOP3__V_CMPX_LG_F32
17343
17344 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
17345 void
17346 Inst_VOP3__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst)
17347 {
17348 Wavefront *wf = gpuDynInst->wavefront();
17349 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17350 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17351 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17352
17353 src0.readSrc();
17354 src1.readSrc();
17355
17356 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17357 if (wf->execMask(lane)) {
17358 sdst.setBit(lane, (src0[lane] < src1[lane]
17359 || src0[lane] > src1[lane]) ? 1 : 0);
17360 }
17361 }
17362
17363 wf->execMask() = sdst.rawData();
17364 sdst.write();
17365 }
17366
17367 Inst_VOP3__V_CMPX_GE_F32::Inst_VOP3__V_CMPX_GE_F32(
17368 InFmt_VOP3 *iFmt)
17369 : Inst_VOP3(iFmt, "v_cmpx_ge_f32", true)
17370 {
17371 setFlag(ALU);
17372 setFlag(F32);
17373 } // Inst_VOP3__V_CMPX_GE_F32
17374
17375 Inst_VOP3__V_CMPX_GE_F32::~Inst_VOP3__V_CMPX_GE_F32()
17376 {
17377 } // ~Inst_VOP3__V_CMPX_GE_F32
17378
17379 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
17380 void
17381 Inst_VOP3__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst)
17382 {
17383 Wavefront *wf = gpuDynInst->wavefront();
17384 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17385 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17386 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17387
17388 src0.readSrc();
17389 src1.readSrc();
17390
17391 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17392 if (wf->execMask(lane)) {
17393 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
17394 }
17395 }
17396
17397 wf->execMask() = sdst.rawData();
17398 sdst.write();
17399 }
17400
17401 Inst_VOP3__V_CMPX_O_F32::Inst_VOP3__V_CMPX_O_F32(
17402 InFmt_VOP3 *iFmt)
17403 : Inst_VOP3(iFmt, "v_cmpx_o_f32", true)
17404 {
17405 setFlag(ALU);
17406 setFlag(F32);
17407 } // Inst_VOP3__V_CMPX_O_F32
17408
17409 Inst_VOP3__V_CMPX_O_F32::~Inst_VOP3__V_CMPX_O_F32()
17410 {
17411 } // ~Inst_VOP3__V_CMPX_O_F32
17412
17413 // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
17414 // encoding.
17415 void
17416 Inst_VOP3__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst)
17417 {
17418 Wavefront *wf = gpuDynInst->wavefront();
17419 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17420 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17421 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17422
17423 src0.readSrc();
17424 src1.readSrc();
17425
17426 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17427 if (wf->execMask(lane)) {
17428 sdst.setBit(lane, (!std::isnan(src0[lane])
17429 && !std::isnan(src1[lane])) ? 1 : 0);
17430 }
17431 }
17432
17433 wf->execMask() = sdst.rawData();
17434 sdst.write();
17435 }
17436
17437 Inst_VOP3__V_CMPX_U_F32::Inst_VOP3__V_CMPX_U_F32(
17438 InFmt_VOP3 *iFmt)
17439 : Inst_VOP3(iFmt, "v_cmpx_u_f32", true)
17440 {
17441 setFlag(ALU);
17442 setFlag(F32);
17443 } // Inst_VOP3__V_CMPX_U_F32
17444
17445 Inst_VOP3__V_CMPX_U_F32::~Inst_VOP3__V_CMPX_U_F32()
17446 {
17447 } // ~Inst_VOP3__V_CMPX_U_F32
17448
17449 // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
17450 // encoding.
17451 void
17452 Inst_VOP3__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst)
17453 {
17454 Wavefront *wf = gpuDynInst->wavefront();
17455 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17456 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17457 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17458
17459 src0.readSrc();
17460 src1.readSrc();
17461
17462 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17463 if (wf->execMask(lane)) {
17464 sdst.setBit(lane, (std::isnan(src0[lane])
17465 || std::isnan(src1[lane])) ? 1 : 0);
17466 }
17467 }
17468
17469 wf->execMask() = sdst.rawData();
17470 sdst.write();
17471 }
17472
17473 Inst_VOP3__V_CMPX_NGE_F32::Inst_VOP3__V_CMPX_NGE_F32(
17474 InFmt_VOP3 *iFmt)
17475 : Inst_VOP3(iFmt, "v_cmpx_nge_f32", true)
17476 {
17477 setFlag(ALU);
17478 setFlag(F32);
17479 } // Inst_VOP3__V_CMPX_NGE_F32
17480
17481 Inst_VOP3__V_CMPX_NGE_F32::~Inst_VOP3__V_CMPX_NGE_F32()
17482 {
17483 } // ~Inst_VOP3__V_CMPX_NGE_F32
17484
17485 // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
17486 void
17487 Inst_VOP3__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst)
17488 {
17489 Wavefront *wf = gpuDynInst->wavefront();
17490 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17491 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17492 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17493
17494 src0.readSrc();
17495 src1.readSrc();
17496
17497 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17498 if (wf->execMask(lane)) {
17499 sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
17500 }
17501 }
17502
17503 wf->execMask() = sdst.rawData();
17504 sdst.write();
17505 }
17506
17507 Inst_VOP3__V_CMPX_NLG_F32::Inst_VOP3__V_CMPX_NLG_F32(
17508 InFmt_VOP3 *iFmt)
17509 : Inst_VOP3(iFmt, "v_cmpx_nlg_f32", true)
17510 {
17511 setFlag(ALU);
17512 setFlag(F32);
17513 } // Inst_VOP3__V_CMPX_NLG_F32
17514
17515 Inst_VOP3__V_CMPX_NLG_F32::~Inst_VOP3__V_CMPX_NLG_F32()
17516 {
17517 } // ~Inst_VOP3__V_CMPX_NLG_F32
17518
17519 // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
17520 void
17521 Inst_VOP3__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst)
17522 {
17523 Wavefront *wf = gpuDynInst->wavefront();
17524 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17525 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17526 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17527
17528 src0.readSrc();
17529 src1.readSrc();
17530
17531 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17532 if (wf->execMask(lane)) {
17533 sdst.setBit(lane, !(src0[lane] < src1[lane]
17534 || src0[lane] > src1[lane]) ? 1 : 0);
17535 }
17536 }
17537
17538 wf->execMask() = sdst.rawData();
17539 sdst.write();
17540 }
17541
17542 Inst_VOP3__V_CMPX_NGT_F32::Inst_VOP3__V_CMPX_NGT_F32(
17543 InFmt_VOP3 *iFmt)
17544 : Inst_VOP3(iFmt, "v_cmpx_ngt_f32", true)
17545 {
17546 setFlag(ALU);
17547 setFlag(F32);
17548 } // Inst_VOP3__V_CMPX_NGT_F32
17549
17550 Inst_VOP3__V_CMPX_NGT_F32::~Inst_VOP3__V_CMPX_NGT_F32()
17551 {
17552 } // ~Inst_VOP3__V_CMPX_NGT_F32
17553
17554 // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
17555 void
17556 Inst_VOP3__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst)
17557 {
17558 Wavefront *wf = gpuDynInst->wavefront();
17559 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17560 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17561 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17562
17563 src0.readSrc();
17564 src1.readSrc();
17565
17566 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17567 if (wf->execMask(lane)) {
17568 sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
17569 }
17570 }
17571
17572 wf->execMask() = sdst.rawData();
17573 sdst.write();
17574 }
17575
17576 Inst_VOP3__V_CMPX_NLE_F32::Inst_VOP3__V_CMPX_NLE_F32(
17577 InFmt_VOP3 *iFmt)
17578 : Inst_VOP3(iFmt, "v_cmpx_nle_f32", true)
17579 {
17580 setFlag(ALU);
17581 setFlag(F32);
17582 } // Inst_VOP3__V_CMPX_NLE_F32
17583
17584 Inst_VOP3__V_CMPX_NLE_F32::~Inst_VOP3__V_CMPX_NLE_F32()
17585 {
17586 } // ~Inst_VOP3__V_CMPX_NLE_F32
17587
17588 // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
17589 void
17590 Inst_VOP3__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst)
17591 {
17592 Wavefront *wf = gpuDynInst->wavefront();
17593 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17594 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17595 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17596
17597 src0.readSrc();
17598 src1.readSrc();
17599
17600 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17601 if (wf->execMask(lane)) {
17602 sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
17603 }
17604 }
17605
17606 wf->execMask() = sdst.rawData();
17607 sdst.write();
17608 }
17609
17610 Inst_VOP3__V_CMPX_NEQ_F32::Inst_VOP3__V_CMPX_NEQ_F32(
17611 InFmt_VOP3 *iFmt)
17612 : Inst_VOP3(iFmt, "v_cmpx_neq_f32", true)
17613 {
17614 setFlag(ALU);
17615 setFlag(F32);
17616 } // Inst_VOP3__V_CMPX_NEQ_F32
17617
17618 Inst_VOP3__V_CMPX_NEQ_F32::~Inst_VOP3__V_CMPX_NEQ_F32()
17619 {
17620 } // ~Inst_VOP3__V_CMPX_NEQ_F32
17621
17622 // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
17623 void
17624 Inst_VOP3__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst)
17625 {
17626 Wavefront *wf = gpuDynInst->wavefront();
17627 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17628 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17629 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17630
17631 src0.readSrc();
17632 src1.readSrc();
17633
17634 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17635 if (wf->execMask(lane)) {
17636 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
17637 }
17638 }
17639
17640 wf->execMask() = sdst.rawData();
17641 sdst.write();
17642 }
17643
17644 Inst_VOP3__V_CMPX_NLT_F32::Inst_VOP3__V_CMPX_NLT_F32(
17645 InFmt_VOP3 *iFmt)
17646 : Inst_VOP3(iFmt, "v_cmpx_nlt_f32", true)
17647 {
17648 setFlag(ALU);
17649 setFlag(F32);
17650 } // Inst_VOP3__V_CMPX_NLT_F32
17651
17652 Inst_VOP3__V_CMPX_NLT_F32::~Inst_VOP3__V_CMPX_NLT_F32()
17653 {
17654 } // ~Inst_VOP3__V_CMPX_NLT_F32
17655
17656 // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
17657 void
17658 Inst_VOP3__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst)
17659 {
17660 Wavefront *wf = gpuDynInst->wavefront();
17661 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17662 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17663 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17664
17665 src0.readSrc();
17666 src1.readSrc();
17667
17668 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17669 if (wf->execMask(lane)) {
17670 sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
17671 }
17672 }
17673
17674 wf->execMask() = sdst.rawData();
17675 sdst.write();
17676 }
17677
17678 Inst_VOP3__V_CMPX_TRU_F32::Inst_VOP3__V_CMPX_TRU_F32(
17679 InFmt_VOP3 *iFmt)
17680 : Inst_VOP3(iFmt, "v_cmpx_tru_f32", true)
17681 {
17682 setFlag(ALU);
17683 setFlag(F32);
17684 } // Inst_VOP3__V_CMPX_TRU_F32
17685
17686 Inst_VOP3__V_CMPX_TRU_F32::~Inst_VOP3__V_CMPX_TRU_F32()
17687 {
17688 } // ~Inst_VOP3__V_CMPX_TRU_F32
17689
17690 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
17691 void
17692 Inst_VOP3__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst)
17693 {
17694 Wavefront *wf = gpuDynInst->wavefront();
17695 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17696
17697 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17698 if (wf->execMask(lane)) {
17699 sdst.setBit(lane, 1);
17700 }
17701 }
17702
17703 wf->execMask() = sdst.rawData();
17704 sdst.write();
17705 }
17706
17707 Inst_VOP3__V_CMP_F_F64::Inst_VOP3__V_CMP_F_F64(InFmt_VOP3 *iFmt)
17708 : Inst_VOP3(iFmt, "v_cmp_f_f64", true)
17709 {
17710 setFlag(ALU);
17711 setFlag(F64);
17712 } // Inst_VOP3__V_CMP_F_F64
17713
17714 Inst_VOP3__V_CMP_F_F64::~Inst_VOP3__V_CMP_F_F64()
17715 {
17716 } // ~Inst_VOP3__V_CMP_F_F64
17717
17718 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
17719 void
17720 Inst_VOP3__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst)
17721 {
17722 Wavefront *wf = gpuDynInst->wavefront();
17723 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17724
17725 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17726 if (wf->execMask(lane)) {
17727 sdst.setBit(lane, 0);
17728 }
17729 }
17730
17731 sdst.write();
17732 }
17733
17734 Inst_VOP3__V_CMP_LT_F64::Inst_VOP3__V_CMP_LT_F64(
17735 InFmt_VOP3 *iFmt)
17736 : Inst_VOP3(iFmt, "v_cmp_lt_f64", true)
17737 {
17738 setFlag(ALU);
17739 setFlag(F64);
17740 } // Inst_VOP3__V_CMP_LT_F64
17741
17742 Inst_VOP3__V_CMP_LT_F64::~Inst_VOP3__V_CMP_LT_F64()
17743 {
17744 } // ~Inst_VOP3__V_CMP_LT_F64
17745
17746 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
17747 void
17748 Inst_VOP3__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst)
17749 {
17750 Wavefront *wf = gpuDynInst->wavefront();
17751 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
17752 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
17753 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17754
17755 src0.readSrc();
17756 src1.readSrc();
17757
17758 if (instData.ABS & 0x1) {
17759 src0.absModifier();
17760 }
17761
17762 if (instData.ABS & 0x2) {
17763 src1.absModifier();
17764 }
17765
17766 if (extData.NEG & 0x1) {
17767 src0.negModifier();
17768 }
17769
17770 if (extData.NEG & 0x2) {
17771 src1.negModifier();
17772 }
17773
17774 /**
17775 * input modifiers are supported by FP operations only
17776 */
17777 assert(!(instData.ABS & 0x4));
17778 assert(!(extData.NEG & 0x4));
17779
17780 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17781 if (wf->execMask(lane)) {
17782 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
17783 }
17784 }
17785
17786 sdst.write();
17787 }
17788
17789 Inst_VOP3__V_CMP_EQ_F64::Inst_VOP3__V_CMP_EQ_F64(
17790 InFmt_VOP3 *iFmt)
17791 : Inst_VOP3(iFmt, "v_cmp_eq_f64", true)
17792 {
17793 setFlag(ALU);
17794 setFlag(F64);
17795 } // Inst_VOP3__V_CMP_EQ_F64
17796
17797 Inst_VOP3__V_CMP_EQ_F64::~Inst_VOP3__V_CMP_EQ_F64()
17798 {
17799 } // ~Inst_VOP3__V_CMP_EQ_F64
17800
17801 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
17802 void
17803 Inst_VOP3__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst)
17804 {
17805 Wavefront *wf = gpuDynInst->wavefront();
17806 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
17807 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
17808 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17809
17810 src0.readSrc();
17811 src1.readSrc();
17812
17813 if (instData.ABS & 0x1) {
17814 src0.absModifier();
17815 }
17816
17817 if (instData.ABS & 0x2) {
17818 src1.absModifier();
17819 }
17820
17821 if (extData.NEG & 0x1) {
17822 src0.negModifier();
17823 }
17824
17825 if (extData.NEG & 0x2) {
17826 src1.negModifier();
17827 }
17828
17829 /**
17830 * input modifiers are supported by FP operations only
17831 */
17832 assert(!(instData.ABS & 0x4));
17833 assert(!(extData.NEG & 0x4));
17834
17835 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17836 if (wf->execMask(lane)) {
17837 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
17838 }
17839 }
17840
17841 sdst.write();
17842 }
17843
17844 Inst_VOP3__V_CMP_LE_F64::Inst_VOP3__V_CMP_LE_F64(
17845 InFmt_VOP3 *iFmt)
17846 : Inst_VOP3(iFmt, "v_cmp_le_f64", true)
17847 {
17848 setFlag(ALU);
17849 setFlag(F64);
17850 } // Inst_VOP3__V_CMP_LE_F64
17851
17852 Inst_VOP3__V_CMP_LE_F64::~Inst_VOP3__V_CMP_LE_F64()
17853 {
17854 } // ~Inst_VOP3__V_CMP_LE_F64
17855
17856 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
17857 void
17858 Inst_VOP3__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst)
17859 {
17860 Wavefront *wf = gpuDynInst->wavefront();
17861 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
17862 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
17863 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17864
17865 src0.readSrc();
17866 src1.readSrc();
17867
17868 if (instData.ABS & 0x1) {
17869 src0.absModifier();
17870 }
17871
17872 if (instData.ABS & 0x2) {
17873 src1.absModifier();
17874 }
17875
17876 if (extData.NEG & 0x1) {
17877 src0.negModifier();
17878 }
17879
17880 if (extData.NEG & 0x2) {
17881 src1.negModifier();
17882 }
17883
17884 /**
17885 * input modifiers are supported by FP operations only
17886 */
17887 assert(!(instData.ABS & 0x4));
17888 assert(!(extData.NEG & 0x4));
17889
17890 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17891 if (wf->execMask(lane)) {
17892 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
17893 }
17894 }
17895
17896 sdst.write();
17897 }
17898
17899 Inst_VOP3__V_CMP_GT_F64::Inst_VOP3__V_CMP_GT_F64(
17900 InFmt_VOP3 *iFmt)
17901 : Inst_VOP3(iFmt, "v_cmp_gt_f64", true)
17902 {
17903 setFlag(ALU);
17904 setFlag(F64);
17905 } // Inst_VOP3__V_CMP_GT_F64
17906
17907 Inst_VOP3__V_CMP_GT_F64::~Inst_VOP3__V_CMP_GT_F64()
17908 {
17909 } // ~Inst_VOP3__V_CMP_GT_F64
17910
17911 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
17912 void
17913 Inst_VOP3__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst)
17914 {
17915 Wavefront *wf = gpuDynInst->wavefront();
17916 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
17917 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
17918 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17919
17920 src0.readSrc();
17921 src1.readSrc();
17922
17923 if (instData.ABS & 0x1) {
17924 src0.absModifier();
17925 }
17926
17927 if (instData.ABS & 0x2) {
17928 src1.absModifier();
17929 }
17930
17931 if (extData.NEG & 0x1) {
17932 src0.negModifier();
17933 }
17934
17935 if (extData.NEG & 0x2) {
17936 src1.negModifier();
17937 }
17938
17939 /**
17940 * input modifiers are supported by FP operations only
17941 */
17942 assert(!(instData.ABS & 0x4));
17943 assert(!(extData.NEG & 0x4));
17944
17945 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17946 if (wf->execMask(lane)) {
17947 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
17948 }
17949 }
17950
17951 sdst.write();
17952 }
17953
17954 Inst_VOP3__V_CMP_LG_F64::Inst_VOP3__V_CMP_LG_F64(
17955 InFmt_VOP3 *iFmt)
17956 : Inst_VOP3(iFmt, "v_cmp_lg_f64", true)
17957 {
17958 setFlag(ALU);
17959 setFlag(F64);
17960 } // Inst_VOP3__V_CMP_LG_F64
17961
17962 Inst_VOP3__V_CMP_LG_F64::~Inst_VOP3__V_CMP_LG_F64()
17963 {
17964 } // ~Inst_VOP3__V_CMP_LG_F64
17965
17966 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
17967 void
17968 Inst_VOP3__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst)
17969 {
17970 Wavefront *wf = gpuDynInst->wavefront();
17971 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
17972 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
17973 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17974
17975 src0.readSrc();
17976 src1.readSrc();
17977
17978 if (instData.ABS & 0x1) {
17979 src0.absModifier();
17980 }
17981
17982 if (instData.ABS & 0x2) {
17983 src1.absModifier();
17984 }
17985
17986 if (extData.NEG & 0x1) {
17987 src0.negModifier();
17988 }
17989
17990 if (extData.NEG & 0x2) {
17991 src1.negModifier();
17992 }
17993
17994 /**
17995 * input modifiers are supported by FP operations only
17996 */
17997 assert(!(instData.ABS & 0x4));
17998 assert(!(extData.NEG & 0x4));
17999
18000 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18001 if (wf->execMask(lane)) {
18002 sdst.setBit(lane, (src0[lane] < src1[lane]
18003 || src0[lane] > src1[lane]) ? 1 : 0);
18004 }
18005 }
18006
18007 sdst.write();
18008 }
18009
18010 Inst_VOP3__V_CMP_GE_F64::Inst_VOP3__V_CMP_GE_F64(
18011 InFmt_VOP3 *iFmt)
18012 : Inst_VOP3(iFmt, "v_cmp_ge_f64", true)
18013 {
18014 setFlag(ALU);
18015 setFlag(F64);
18016 } // Inst_VOP3__V_CMP_GE_F64
18017
18018 Inst_VOP3__V_CMP_GE_F64::~Inst_VOP3__V_CMP_GE_F64()
18019 {
18020 } // ~Inst_VOP3__V_CMP_GE_F64
18021
18022 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
18023 void
18024 Inst_VOP3__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst)
18025 {
18026 Wavefront *wf = gpuDynInst->wavefront();
18027 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18028 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18029 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18030
18031 src0.readSrc();
18032 src1.readSrc();
18033
18034 if (instData.ABS & 0x1) {
18035 src0.absModifier();
18036 }
18037
18038 if (instData.ABS & 0x2) {
18039 src1.absModifier();
18040 }
18041
18042 if (extData.NEG & 0x1) {
18043 src0.negModifier();
18044 }
18045
18046 if (extData.NEG & 0x2) {
18047 src1.negModifier();
18048 }
18049
18050 /**
18051 * input modifiers are supported by FP operations only
18052 */
18053 assert(!(instData.ABS & 0x4));
18054 assert(!(extData.NEG & 0x4));
18055
18056 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18057 if (wf->execMask(lane)) {
18058 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
18059 }
18060 }
18061
18062 sdst.write();
18063 }
18064
18065 Inst_VOP3__V_CMP_O_F64::Inst_VOP3__V_CMP_O_F64(InFmt_VOP3 *iFmt)
18066 : Inst_VOP3(iFmt, "v_cmp_o_f64", true)
18067 {
18068 setFlag(ALU);
18069 setFlag(F64);
18070 } // Inst_VOP3__V_CMP_O_F64
18071
18072 Inst_VOP3__V_CMP_O_F64::~Inst_VOP3__V_CMP_O_F64()
18073 {
18074 } // ~Inst_VOP3__V_CMP_O_F64
18075
18076 // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
18077 void
18078 Inst_VOP3__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst)
18079 {
18080 Wavefront *wf = gpuDynInst->wavefront();
18081 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18082 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18083 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18084
18085 src0.readSrc();
18086 src1.readSrc();
18087
18088 if (instData.ABS & 0x1) {
18089 src0.absModifier();
18090 }
18091
18092 if (instData.ABS & 0x2) {
18093 src1.absModifier();
18094 }
18095
18096 if (extData.NEG & 0x1) {
18097 src0.negModifier();
18098 }
18099
18100 if (extData.NEG & 0x2) {
18101 src1.negModifier();
18102 }
18103
18104 /**
18105 * input modifiers are supported by FP operations only
18106 */
18107 assert(!(instData.ABS & 0x4));
18108 assert(!(extData.NEG & 0x4));
18109
18110 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18111 if (wf->execMask(lane)) {
18112 sdst.setBit(lane, (!std::isnan(src0[lane])
18113 && !std::isnan(src1[lane])) ? 1 : 0);
18114 }
18115 }
18116
18117 sdst.write();
18118 }
18119
18120 Inst_VOP3__V_CMP_U_F64::Inst_VOP3__V_CMP_U_F64(InFmt_VOP3 *iFmt)
18121 : Inst_VOP3(iFmt, "v_cmp_u_f64", true)
18122 {
18123 setFlag(ALU);
18124 setFlag(F64);
18125 } // Inst_VOP3__V_CMP_U_F64
18126
18127 Inst_VOP3__V_CMP_U_F64::~Inst_VOP3__V_CMP_U_F64()
18128 {
18129 } // ~Inst_VOP3__V_CMP_U_F64
18130
18131 // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
18132 void
18133 Inst_VOP3__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst)
18134 {
18135 Wavefront *wf = gpuDynInst->wavefront();
18136 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18137 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18138 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18139
18140 src0.readSrc();
18141 src1.readSrc();
18142
18143 if (instData.ABS & 0x1) {
18144 src0.absModifier();
18145 }
18146
18147 if (instData.ABS & 0x2) {
18148 src1.absModifier();
18149 }
18150
18151 if (extData.NEG & 0x1) {
18152 src0.negModifier();
18153 }
18154
18155 if (extData.NEG & 0x2) {
18156 src1.negModifier();
18157 }
18158
18159 /**
18160 * input modifiers are supported by FP operations only
18161 */
18162 assert(!(instData.ABS & 0x4));
18163 assert(!(extData.NEG & 0x4));
18164
18165 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18166 if (wf->execMask(lane)) {
18167 sdst.setBit(lane, (std::isnan(src0[lane])
18168 || std::isnan(src1[lane])) ? 1 : 0);
18169 }
18170 }
18171
18172 sdst.write();
18173 }
18174
18175 Inst_VOP3__V_CMP_NGE_F64::Inst_VOP3__V_CMP_NGE_F64(
18176 InFmt_VOP3 *iFmt)
18177 : Inst_VOP3(iFmt, "v_cmp_nge_f64", true)
18178 {
18179 setFlag(ALU);
18180 setFlag(F64);
18181 } // Inst_VOP3__V_CMP_NGE_F64
18182
18183 Inst_VOP3__V_CMP_NGE_F64::~Inst_VOP3__V_CMP_NGE_F64()
18184 {
18185 } // ~Inst_VOP3__V_CMP_NGE_F64
18186
18187 // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
18188 void
18189 Inst_VOP3__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst)
18190 {
18191 Wavefront *wf = gpuDynInst->wavefront();
18192 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18193 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18194 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18195
18196 src0.readSrc();
18197 src1.readSrc();
18198
18199 if (instData.ABS & 0x1) {
18200 src0.absModifier();
18201 }
18202
18203 if (instData.ABS & 0x2) {
18204 src1.absModifier();
18205 }
18206
18207 if (extData.NEG & 0x1) {
18208 src0.negModifier();
18209 }
18210
18211 if (extData.NEG & 0x2) {
18212 src1.negModifier();
18213 }
18214
18215 /**
18216 * input modifiers are supported by FP operations only
18217 */
18218 assert(!(instData.ABS & 0x4));
18219 assert(!(extData.NEG & 0x4));
18220
18221 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18222 if (wf->execMask(lane)) {
18223 sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
18224 }
18225 }
18226
18227 sdst.write();
18228 }
18229
18230 Inst_VOP3__V_CMP_NLG_F64::Inst_VOP3__V_CMP_NLG_F64(
18231 InFmt_VOP3 *iFmt)
18232 : Inst_VOP3(iFmt, "v_cmp_nlg_f64", true)
18233 {
18234 setFlag(ALU);
18235 setFlag(F64);
18236 } // Inst_VOP3__V_CMP_NLG_F64
18237
18238 Inst_VOP3__V_CMP_NLG_F64::~Inst_VOP3__V_CMP_NLG_F64()
18239 {
18240 } // ~Inst_VOP3__V_CMP_NLG_F64
18241
18242 // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
18243 void
18244 Inst_VOP3__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst)
18245 {
18246 Wavefront *wf = gpuDynInst->wavefront();
18247 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18248 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18249 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18250
18251 src0.readSrc();
18252 src1.readSrc();
18253
18254 if (instData.ABS & 0x1) {
18255 src0.absModifier();
18256 }
18257
18258 if (instData.ABS & 0x2) {
18259 src1.absModifier();
18260 }
18261
18262 if (extData.NEG & 0x1) {
18263 src0.negModifier();
18264 }
18265
18266 if (extData.NEG & 0x2) {
18267 src1.negModifier();
18268 }
18269
18270 /**
18271 * input modifiers are supported by FP operations only
18272 */
18273 assert(!(instData.ABS & 0x4));
18274 assert(!(extData.NEG & 0x4));
18275
18276 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18277 if (wf->execMask(lane)) {
18278 sdst.setBit(lane, !(src0[lane] < src1[lane]
18279 || src0[lane] > src1[lane]) ? 1 : 0);
18280 }
18281 }
18282
18283 sdst.write();
18284 }
18285
18286 Inst_VOP3__V_CMP_NGT_F64::Inst_VOP3__V_CMP_NGT_F64(
18287 InFmt_VOP3 *iFmt)
18288 : Inst_VOP3(iFmt, "v_cmp_ngt_f64", true)
18289 {
18290 setFlag(ALU);
18291 setFlag(F64);
18292 } // Inst_VOP3__V_CMP_NGT_F64
18293
18294 Inst_VOP3__V_CMP_NGT_F64::~Inst_VOP3__V_CMP_NGT_F64()
18295 {
18296 } // ~Inst_VOP3__V_CMP_NGT_F64
18297
18298 // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
18299 void
18300 Inst_VOP3__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst)
18301 {
18302 Wavefront *wf = gpuDynInst->wavefront();
18303 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18304 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18305 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18306
18307 src0.readSrc();
18308 src1.readSrc();
18309
18310 if (instData.ABS & 0x1) {
18311 src0.absModifier();
18312 }
18313
18314 if (instData.ABS & 0x2) {
18315 src1.absModifier();
18316 }
18317
18318 if (extData.NEG & 0x1) {
18319 src0.negModifier();
18320 }
18321
18322 if (extData.NEG & 0x2) {
18323 src1.negModifier();
18324 }
18325
18326 /**
18327 * input modifiers are supported by FP operations only
18328 */
18329 assert(!(instData.ABS & 0x4));
18330 assert(!(extData.NEG & 0x4));
18331
18332 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18333 if (wf->execMask(lane)) {
18334 sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
18335 }
18336 }
18337
18338 sdst.write();
18339 }
18340
18341 Inst_VOP3__V_CMP_NLE_F64::Inst_VOP3__V_CMP_NLE_F64(
18342 InFmt_VOP3 *iFmt)
18343 : Inst_VOP3(iFmt, "v_cmp_nle_f64", true)
18344 {
18345 setFlag(ALU);
18346 setFlag(F64);
18347 } // Inst_VOP3__V_CMP_NLE_F64
18348
18349 Inst_VOP3__V_CMP_NLE_F64::~Inst_VOP3__V_CMP_NLE_F64()
18350 {
18351 } // ~Inst_VOP3__V_CMP_NLE_F64
18352
18353 // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
18354 void
18355 Inst_VOP3__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst)
18356 {
18357 Wavefront *wf = gpuDynInst->wavefront();
18358 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18359 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18360 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18361
18362 src0.readSrc();
18363 src1.readSrc();
18364
18365 if (instData.ABS & 0x1) {
18366 src0.absModifier();
18367 }
18368
18369 if (instData.ABS & 0x2) {
18370 src1.absModifier();
18371 }
18372
18373 if (extData.NEG & 0x1) {
18374 src0.negModifier();
18375 }
18376
18377 if (extData.NEG & 0x2) {
18378 src1.negModifier();
18379 }
18380
18381 /**
18382 * input modifiers are supported by FP operations only
18383 */
18384 assert(!(instData.ABS & 0x4));
18385 assert(!(extData.NEG & 0x4));
18386
18387 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18388 if (wf->execMask(lane)) {
18389 sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
18390 }
18391 }
18392
18393 sdst.write();
18394 }
18395
18396 Inst_VOP3__V_CMP_NEQ_F64::Inst_VOP3__V_CMP_NEQ_F64(
18397 InFmt_VOP3 *iFmt)
18398 : Inst_VOP3(iFmt, "v_cmp_neq_f64", true)
18399 {
18400 setFlag(ALU);
18401 setFlag(F64);
18402 } // Inst_VOP3__V_CMP_NEQ_F64
18403
18404 Inst_VOP3__V_CMP_NEQ_F64::~Inst_VOP3__V_CMP_NEQ_F64()
18405 {
18406 } // ~Inst_VOP3__V_CMP_NEQ_F64
18407
18408 // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
18409 void
18410 Inst_VOP3__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst)
18411 {
18412 Wavefront *wf = gpuDynInst->wavefront();
18413 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18414 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18415 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18416
18417 src0.readSrc();
18418 src1.readSrc();
18419
18420 if (instData.ABS & 0x1) {
18421 src0.absModifier();
18422 }
18423
18424 if (instData.ABS & 0x2) {
18425 src1.absModifier();
18426 }
18427
18428 if (extData.NEG & 0x1) {
18429 src0.negModifier();
18430 }
18431
18432 if (extData.NEG & 0x2) {
18433 src1.negModifier();
18434 }
18435
18436 /**
18437 * input modifiers are supported by FP operations only
18438 */
18439 assert(!(instData.ABS & 0x4));
18440 assert(!(extData.NEG & 0x4));
18441
18442 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18443 if (wf->execMask(lane)) {
18444 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
18445 }
18446 }
18447
18448 sdst.write();
18449 }
18450
18451 Inst_VOP3__V_CMP_NLT_F64::Inst_VOP3__V_CMP_NLT_F64(
18452 InFmt_VOP3 *iFmt)
18453 : Inst_VOP3(iFmt, "v_cmp_nlt_f64", true)
18454 {
18455 setFlag(ALU);
18456 setFlag(F64);
18457 } // Inst_VOP3__V_CMP_NLT_F64
18458
18459 Inst_VOP3__V_CMP_NLT_F64::~Inst_VOP3__V_CMP_NLT_F64()
18460 {
18461 } // ~Inst_VOP3__V_CMP_NLT_F64
18462
18463 // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
18464 void
18465 Inst_VOP3__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst)
18466 {
18467 Wavefront *wf = gpuDynInst->wavefront();
18468 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18469 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18470 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18471
18472 src0.readSrc();
18473 src1.readSrc();
18474
18475 if (instData.ABS & 0x1) {
18476 src0.absModifier();
18477 }
18478
18479 if (instData.ABS & 0x2) {
18480 src1.absModifier();
18481 }
18482
18483 if (extData.NEG & 0x1) {
18484 src0.negModifier();
18485 }
18486
18487 if (extData.NEG & 0x2) {
18488 src1.negModifier();
18489 }
18490
18491 /**
18492 * input modifiers are supported by FP operations only
18493 */
18494 assert(!(instData.ABS & 0x4));
18495 assert(!(extData.NEG & 0x4));
18496
18497 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18498 if (wf->execMask(lane)) {
18499 sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
18500 }
18501 }
18502
18503 sdst.write();
18504 }
18505
18506 Inst_VOP3__V_CMP_TRU_F64::Inst_VOP3__V_CMP_TRU_F64(
18507 InFmt_VOP3 *iFmt)
18508 : Inst_VOP3(iFmt, "v_cmp_tru_f64", true)
18509 {
18510 setFlag(ALU);
18511 setFlag(F64);
18512 } // Inst_VOP3__V_CMP_TRU_F64
18513
18514 Inst_VOP3__V_CMP_TRU_F64::~Inst_VOP3__V_CMP_TRU_F64()
18515 {
18516 } // ~Inst_VOP3__V_CMP_TRU_F64
18517
18518 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
18519 void
18520 Inst_VOP3__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst)
18521 {
18522 Wavefront *wf = gpuDynInst->wavefront();
18523 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18524
18525 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18526 if (wf->execMask(lane)) {
18527 sdst.setBit(lane, 1);
18528 }
18529 }
18530
18531 sdst.write();
18532 }
18533
18534 Inst_VOP3__V_CMPX_F_F64::Inst_VOP3__V_CMPX_F_F64(
18535 InFmt_VOP3 *iFmt)
18536 : Inst_VOP3(iFmt, "v_cmpx_f_f64", true)
18537 {
18538 setFlag(ALU);
18539 setFlag(F64);
18540 } // Inst_VOP3__V_CMPX_F_F64
18541
18542 Inst_VOP3__V_CMPX_F_F64::~Inst_VOP3__V_CMPX_F_F64()
18543 {
18544 } // ~Inst_VOP3__V_CMPX_F_F64
18545
18546 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
18547 void
18548 Inst_VOP3__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst)
18549 {
18550 Wavefront *wf = gpuDynInst->wavefront();
18551 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18552
18553 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18554 if (wf->execMask(lane)) {
18555 sdst.setBit(lane, 0);
18556 }
18557 }
18558
18559 wf->execMask() = sdst.rawData();
18560 sdst.write();
18561 }
18562
18563 Inst_VOP3__V_CMPX_LT_F64::Inst_VOP3__V_CMPX_LT_F64(
18564 InFmt_VOP3 *iFmt)
18565 : Inst_VOP3(iFmt, "v_cmpx_lt_f64", true)
18566 {
18567 setFlag(ALU);
18568 setFlag(F64);
18569 } // Inst_VOP3__V_CMPX_LT_F64
18570
18571 Inst_VOP3__V_CMPX_LT_F64::~Inst_VOP3__V_CMPX_LT_F64()
18572 {
18573 } // ~Inst_VOP3__V_CMPX_LT_F64
18574
18575 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
18576 void
18577 Inst_VOP3__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst)
18578 {
18579 Wavefront *wf = gpuDynInst->wavefront();
18580 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18581 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18582 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18583
18584 src0.readSrc();
18585 src1.readSrc();
18586
18587 if (instData.ABS & 0x1) {
18588 src0.absModifier();
18589 }
18590
18591 if (instData.ABS & 0x2) {
18592 src1.absModifier();
18593 }
18594
18595 if (extData.NEG & 0x1) {
18596 src0.negModifier();
18597 }
18598
18599 if (extData.NEG & 0x2) {
18600 src1.negModifier();
18601 }
18602
18603 /**
18604 * input modifiers are supported by FP operations only
18605 */
18606 assert(!(instData.ABS & 0x4));
18607 assert(!(extData.NEG & 0x4));
18608
18609 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18610 if (wf->execMask(lane)) {
18611 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
18612 }
18613 }
18614
18615 wf->execMask() = sdst.rawData();
18616 sdst.write();
18617 }
18618
18619 Inst_VOP3__V_CMPX_EQ_F64::Inst_VOP3__V_CMPX_EQ_F64(
18620 InFmt_VOP3 *iFmt)
18621 : Inst_VOP3(iFmt, "v_cmpx_eq_f64", true)
18622 {
18623 setFlag(ALU);
18624 setFlag(F64);
18625 } // Inst_VOP3__V_CMPX_EQ_F64
18626
18627 Inst_VOP3__V_CMPX_EQ_F64::~Inst_VOP3__V_CMPX_EQ_F64()
18628 {
18629 } // ~Inst_VOP3__V_CMPX_EQ_F64
18630
18631 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
18632 void
18633 Inst_VOP3__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst)
18634 {
18635 Wavefront *wf = gpuDynInst->wavefront();
18636 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18637 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18638 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18639
18640 src0.readSrc();
18641 src1.readSrc();
18642
18643 if (instData.ABS & 0x1) {
18644 src0.absModifier();
18645 }
18646
18647 if (instData.ABS & 0x2) {
18648 src1.absModifier();
18649 }
18650
18651 if (extData.NEG & 0x1) {
18652 src0.negModifier();
18653 }
18654
18655 if (extData.NEG & 0x2) {
18656 src1.negModifier();
18657 }
18658
18659 /**
18660 * input modifiers are supported by FP operations only
18661 */
18662 assert(!(instData.ABS & 0x4));
18663 assert(!(extData.NEG & 0x4));
18664
18665 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18666 if (wf->execMask(lane)) {
18667 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
18668 }
18669 }
18670
18671 wf->execMask() = sdst.rawData();
18672 sdst.write();
18673 }
18674
18675 Inst_VOP3__V_CMPX_LE_F64::Inst_VOP3__V_CMPX_LE_F64(
18676 InFmt_VOP3 *iFmt)
18677 : Inst_VOP3(iFmt, "v_cmpx_le_f64", true)
18678 {
18679 setFlag(ALU);
18680 setFlag(F64);
18681 } // Inst_VOP3__V_CMPX_LE_F64
18682
18683 Inst_VOP3__V_CMPX_LE_F64::~Inst_VOP3__V_CMPX_LE_F64()
18684 {
18685 } // ~Inst_VOP3__V_CMPX_LE_F64
18686
18687 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
18688 void
18689 Inst_VOP3__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst)
18690 {
18691 Wavefront *wf = gpuDynInst->wavefront();
18692 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18693 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18694 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18695
18696 src0.readSrc();
18697 src1.readSrc();
18698
18699 if (instData.ABS & 0x1) {
18700 src0.absModifier();
18701 }
18702
18703 if (instData.ABS & 0x2) {
18704 src1.absModifier();
18705 }
18706
18707 if (extData.NEG & 0x1) {
18708 src0.negModifier();
18709 }
18710
18711 if (extData.NEG & 0x2) {
18712 src1.negModifier();
18713 }
18714
18715 /**
18716 * input modifiers are supported by FP operations only
18717 */
18718 assert(!(instData.ABS & 0x4));
18719 assert(!(extData.NEG & 0x4));
18720
18721 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18722 if (wf->execMask(lane)) {
18723 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
18724 }
18725 }
18726
18727 wf->execMask() = sdst.rawData();
18728 sdst.write();
18729 }
18730
18731 Inst_VOP3__V_CMPX_GT_F64::Inst_VOP3__V_CMPX_GT_F64(
18732 InFmt_VOP3 *iFmt)
18733 : Inst_VOP3(iFmt, "v_cmpx_gt_f64", true)
18734 {
18735 setFlag(ALU);
18736 setFlag(F64);
18737 } // Inst_VOP3__V_CMPX_GT_F64
18738
18739 Inst_VOP3__V_CMPX_GT_F64::~Inst_VOP3__V_CMPX_GT_F64()
18740 {
18741 } // ~Inst_VOP3__V_CMPX_GT_F64
18742
18743 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
18744 void
18745 Inst_VOP3__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst)
18746 {
18747 Wavefront *wf = gpuDynInst->wavefront();
18748 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18749 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18750 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18751
18752 src0.readSrc();
18753 src1.readSrc();
18754
18755 if (instData.ABS & 0x1) {
18756 src0.absModifier();
18757 }
18758
18759 if (instData.ABS & 0x2) {
18760 src1.absModifier();
18761 }
18762
18763 if (extData.NEG & 0x1) {
18764 src0.negModifier();
18765 }
18766
18767 if (extData.NEG & 0x2) {
18768 src1.negModifier();
18769 }
18770
18771 /**
18772 * input modifiers are supported by FP operations only
18773 */
18774 assert(!(instData.ABS & 0x4));
18775 assert(!(extData.NEG & 0x4));
18776
18777 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18778 if (wf->execMask(lane)) {
18779 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
18780 }
18781 }
18782
18783 wf->execMask() = sdst.rawData();
18784 sdst.write();
18785 }
18786
18787 Inst_VOP3__V_CMPX_LG_F64::Inst_VOP3__V_CMPX_LG_F64(
18788 InFmt_VOP3 *iFmt)
18789 : Inst_VOP3(iFmt, "v_cmpx_lg_f64", true)
18790 {
18791 setFlag(ALU);
18792 setFlag(F64);
18793 } // Inst_VOP3__V_CMPX_LG_F64
18794
18795 Inst_VOP3__V_CMPX_LG_F64::~Inst_VOP3__V_CMPX_LG_F64()
18796 {
18797 } // ~Inst_VOP3__V_CMPX_LG_F64
18798
18799 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
18800 void
18801 Inst_VOP3__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst)
18802 {
18803 Wavefront *wf = gpuDynInst->wavefront();
18804 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18805 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18806 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18807
18808 src0.readSrc();
18809 src1.readSrc();
18810
18811 if (instData.ABS & 0x1) {
18812 src0.absModifier();
18813 }
18814
18815 if (instData.ABS & 0x2) {
18816 src1.absModifier();
18817 }
18818
18819 if (extData.NEG & 0x1) {
18820 src0.negModifier();
18821 }
18822
18823 if (extData.NEG & 0x2) {
18824 src1.negModifier();
18825 }
18826
18827 /**
18828 * input modifiers are supported by FP operations only
18829 */
18830 assert(!(instData.ABS & 0x4));
18831 assert(!(extData.NEG & 0x4));
18832
18833 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18834 if (wf->execMask(lane)) {
18835 sdst.setBit(lane, (src0[lane] < src1[lane]
18836 || src0[lane] > src1[lane]) ? 1 : 0);
18837 }
18838 }
18839
18840 wf->execMask() = sdst.rawData();
18841 sdst.write();
18842 }
18843
18844 Inst_VOP3__V_CMPX_GE_F64::Inst_VOP3__V_CMPX_GE_F64(
18845 InFmt_VOP3 *iFmt)
18846 : Inst_VOP3(iFmt, "v_cmpx_ge_f64", true)
18847 {
18848 setFlag(ALU);
18849 setFlag(F64);
18850 } // Inst_VOP3__V_CMPX_GE_F64
18851
18852 Inst_VOP3__V_CMPX_GE_F64::~Inst_VOP3__V_CMPX_GE_F64()
18853 {
18854 } // ~Inst_VOP3__V_CMPX_GE_F64
18855
18856 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
18857 void
18858 Inst_VOP3__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst)
18859 {
18860 Wavefront *wf = gpuDynInst->wavefront();
18861 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18862 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18863 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18864
18865 src0.readSrc();
18866 src1.readSrc();
18867
18868 if (instData.ABS & 0x1) {
18869 src0.absModifier();
18870 }
18871
18872 if (instData.ABS & 0x2) {
18873 src1.absModifier();
18874 }
18875
18876 if (extData.NEG & 0x1) {
18877 src0.negModifier();
18878 }
18879
18880 if (extData.NEG & 0x2) {
18881 src1.negModifier();
18882 }
18883
18884 /**
18885 * input modifiers are supported by FP operations only
18886 */
18887 assert(!(instData.ABS & 0x4));
18888 assert(!(extData.NEG & 0x4));
18889
18890 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18891 if (wf->execMask(lane)) {
18892 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
18893 }
18894 }
18895
18896 wf->execMask() = sdst.rawData();
18897 sdst.write();
18898 }
18899
18900 Inst_VOP3__V_CMPX_O_F64::Inst_VOP3__V_CMPX_O_F64(
18901 InFmt_VOP3 *iFmt)
18902 : Inst_VOP3(iFmt, "v_cmpx_o_f64", true)
18903 {
18904 setFlag(ALU);
18905 setFlag(F64);
18906 } // Inst_VOP3__V_CMPX_O_F64
18907
18908 Inst_VOP3__V_CMPX_O_F64::~Inst_VOP3__V_CMPX_O_F64()
18909 {
18910 } // ~Inst_VOP3__V_CMPX_O_F64
18911
18912 // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
18913 // encoding.
18914 void
18915 Inst_VOP3__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst)
18916 {
18917 Wavefront *wf = gpuDynInst->wavefront();
18918 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18919 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18920 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18921
18922 src0.readSrc();
18923 src1.readSrc();
18924
18925 if (instData.ABS & 0x1) {
18926 src0.absModifier();
18927 }
18928
18929 if (instData.ABS & 0x2) {
18930 src1.absModifier();
18931 }
18932
18933 if (extData.NEG & 0x1) {
18934 src0.negModifier();
18935 }
18936
18937 if (extData.NEG & 0x2) {
18938 src1.negModifier();
18939 }
18940
18941 /**
18942 * input modifiers are supported by FP operations only
18943 */
18944 assert(!(instData.ABS & 0x4));
18945 assert(!(extData.NEG & 0x4));
18946
18947 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18948 if (wf->execMask(lane)) {
18949 sdst.setBit(lane, (!std::isnan(src0[lane])
18950 && !std::isnan(src1[lane])) ? 1 : 0);
18951 }
18952 }
18953
18954 wf->execMask() = sdst.rawData();
18955 sdst.write();
18956 }
18957
18958 Inst_VOP3__V_CMPX_U_F64::Inst_VOP3__V_CMPX_U_F64(
18959 InFmt_VOP3 *iFmt)
18960 : Inst_VOP3(iFmt, "v_cmpx_u_f64", true)
18961 {
18962 setFlag(ALU);
18963 setFlag(F64);
18964 } // Inst_VOP3__V_CMPX_U_F64
18965
18966 Inst_VOP3__V_CMPX_U_F64::~Inst_VOP3__V_CMPX_U_F64()
18967 {
18968 } // ~Inst_VOP3__V_CMPX_U_F64
18969
18970 // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
18971 // encoding.
18972 void
18973 Inst_VOP3__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst)
18974 {
18975 Wavefront *wf = gpuDynInst->wavefront();
18976 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18977 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18978 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18979
18980 src0.readSrc();
18981 src1.readSrc();
18982
18983 if (instData.ABS & 0x1) {
18984 src0.absModifier();
18985 }
18986
18987 if (instData.ABS & 0x2) {
18988 src1.absModifier();
18989 }
18990
18991 if (extData.NEG & 0x1) {
18992 src0.negModifier();
18993 }
18994
18995 if (extData.NEG & 0x2) {
18996 src1.negModifier();
18997 }
18998
18999 /**
19000 * input modifiers are supported by FP operations only
19001 */
19002 assert(!(instData.ABS & 0x4));
19003 assert(!(extData.NEG & 0x4));
19004
19005 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19006 if (wf->execMask(lane)) {
19007 sdst.setBit(lane, (std::isnan(src0[lane])
19008 || std::isnan(src1[lane])) ? 1 : 0);
19009 }
19010 }
19011
19012 wf->execMask() = sdst.rawData();
19013 sdst.write();
19014 }
19015
19016 Inst_VOP3__V_CMPX_NGE_F64::Inst_VOP3__V_CMPX_NGE_F64(
19017 InFmt_VOP3 *iFmt)
19018 : Inst_VOP3(iFmt, "v_cmpx_nge_f64", true)
19019 {
19020 setFlag(ALU);
19021 setFlag(F64);
19022 } // Inst_VOP3__V_CMPX_NGE_F64
19023
19024 Inst_VOP3__V_CMPX_NGE_F64::~Inst_VOP3__V_CMPX_NGE_F64()
19025 {
19026 } // ~Inst_VOP3__V_CMPX_NGE_F64
19027
19028 // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
19029 void
19030 Inst_VOP3__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst)
19031 {
19032 Wavefront *wf = gpuDynInst->wavefront();
19033 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
19034 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
19035 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19036
19037 src0.readSrc();
19038 src1.readSrc();
19039
19040 if (instData.ABS & 0x1) {
19041 src0.absModifier();
19042 }
19043
19044 if (instData.ABS & 0x2) {
19045 src1.absModifier();
19046 }
19047
19048 if (extData.NEG & 0x1) {
19049 src0.negModifier();
19050 }
19051
19052 if (extData.NEG & 0x2) {
19053 src1.negModifier();
19054 }
19055
19056 /**
19057 * input modifiers are supported by FP operations only
19058 */
19059 assert(!(instData.ABS & 0x4));
19060 assert(!(extData.NEG & 0x4));
19061
19062 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19063 if (wf->execMask(lane)) {
19064 sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
19065 }
19066 }
19067
19068 wf->execMask() = sdst.rawData();
19069 sdst.write();
19070 }
19071
19072 Inst_VOP3__V_CMPX_NLG_F64::Inst_VOP3__V_CMPX_NLG_F64(
19073 InFmt_VOP3 *iFmt)
19074 : Inst_VOP3(iFmt, "v_cmpx_nlg_f64", true)
19075 {
19076 setFlag(ALU);
19077 setFlag(F64);
19078 } // Inst_VOP3__V_CMPX_NLG_F64
19079
19080 Inst_VOP3__V_CMPX_NLG_F64::~Inst_VOP3__V_CMPX_NLG_F64()
19081 {
19082 } // ~Inst_VOP3__V_CMPX_NLG_F64
19083
19084 // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
19085 void
19086 Inst_VOP3__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst)
19087 {
19088 Wavefront *wf = gpuDynInst->wavefront();
19089 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
19090 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
19091 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19092
19093 src0.readSrc();
19094 src1.readSrc();
19095
19096 if (instData.ABS & 0x1) {
19097 src0.absModifier();
19098 }
19099
19100 if (instData.ABS & 0x2) {
19101 src1.absModifier();
19102 }
19103
19104 if (extData.NEG & 0x1) {
19105 src0.negModifier();
19106 }
19107
19108 if (extData.NEG & 0x2) {
19109 src1.negModifier();
19110 }
19111
19112 /**
19113 * input modifiers are supported by FP operations only
19114 */
19115 assert(!(instData.ABS & 0x4));
19116 assert(!(extData.NEG & 0x4));
19117
19118 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19119 if (wf->execMask(lane)) {
19120 sdst.setBit(lane, !(src0[lane] < src1[lane]
19121 || src0[lane] > src1[lane]) ? 1 : 0);
19122 }
19123 }
19124
19125 wf->execMask() = sdst.rawData();
19126 sdst.write();
19127 }
19128
19129 Inst_VOP3__V_CMPX_NGT_F64::Inst_VOP3__V_CMPX_NGT_F64(
19130 InFmt_VOP3 *iFmt)
19131 : Inst_VOP3(iFmt, "v_cmpx_ngt_f64", true)
19132 {
19133 setFlag(ALU);
19134 setFlag(F64);
19135 } // Inst_VOP3__V_CMPX_NGT_F64
19136
19137 Inst_VOP3__V_CMPX_NGT_F64::~Inst_VOP3__V_CMPX_NGT_F64()
19138 {
19139 } // ~Inst_VOP3__V_CMPX_NGT_F64
19140
19141 // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
19142 void
19143 Inst_VOP3__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst)
19144 {
19145 Wavefront *wf = gpuDynInst->wavefront();
19146 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
19147 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
19148 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19149
19150 src0.readSrc();
19151 src1.readSrc();
19152
19153 if (instData.ABS & 0x1) {
19154 src0.absModifier();
19155 }
19156
19157 if (instData.ABS & 0x2) {
19158 src1.absModifier();
19159 }
19160
19161 if (extData.NEG & 0x1) {
19162 src0.negModifier();
19163 }
19164
19165 if (extData.NEG & 0x2) {
19166 src1.negModifier();
19167 }
19168
19169 /**
19170 * input modifiers are supported by FP operations only
19171 */
19172 assert(!(instData.ABS & 0x4));
19173 assert(!(extData.NEG & 0x4));
19174
19175 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19176 if (wf->execMask(lane)) {
19177 sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
19178 }
19179 }
19180
19181 wf->execMask() = sdst.rawData();
19182 sdst.write();
19183 }
19184
19185 Inst_VOP3__V_CMPX_NLE_F64::Inst_VOP3__V_CMPX_NLE_F64(
19186 InFmt_VOP3 *iFmt)
19187 : Inst_VOP3(iFmt, "v_cmpx_nle_f64", true)
19188 {
19189 setFlag(ALU);
19190 setFlag(F64);
19191 } // Inst_VOP3__V_CMPX_NLE_F64
19192
19193 Inst_VOP3__V_CMPX_NLE_F64::~Inst_VOP3__V_CMPX_NLE_F64()
19194 {
19195 } // ~Inst_VOP3__V_CMPX_NLE_F64
19196
19197 // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
19198 void
19199 Inst_VOP3__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst)
19200 {
19201 Wavefront *wf = gpuDynInst->wavefront();
19202 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
19203 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
19204 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19205
19206 src0.readSrc();
19207 src1.readSrc();
19208
19209 if (instData.ABS & 0x1) {
19210 src0.absModifier();
19211 }
19212
19213 if (instData.ABS & 0x2) {
19214 src1.absModifier();
19215 }
19216
19217 if (extData.NEG & 0x1) {
19218 src0.negModifier();
19219 }
19220
19221 if (extData.NEG & 0x2) {
19222 src1.negModifier();
19223 }
19224
19225 /**
19226 * input modifiers are supported by FP operations only
19227 */
19228 assert(!(instData.ABS & 0x4));
19229 assert(!(extData.NEG & 0x4));
19230
19231 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19232 if (wf->execMask(lane)) {
19233 sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
19234 }
19235 }
19236
19237 wf->execMask() = sdst.rawData();
19238 sdst.write();
19239 }
19240
19241 Inst_VOP3__V_CMPX_NEQ_F64::Inst_VOP3__V_CMPX_NEQ_F64(
19242 InFmt_VOP3 *iFmt)
19243 : Inst_VOP3(iFmt, "v_cmpx_neq_f64", true)
19244 {
19245 setFlag(ALU);
19246 setFlag(F64);
19247 } // Inst_VOP3__V_CMPX_NEQ_F64
19248
19249 Inst_VOP3__V_CMPX_NEQ_F64::~Inst_VOP3__V_CMPX_NEQ_F64()
19250 {
19251 } // ~Inst_VOP3__V_CMPX_NEQ_F64
19252
19253 // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
19254 void
19255 Inst_VOP3__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst)
19256 {
19257 Wavefront *wf = gpuDynInst->wavefront();
19258 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
19259 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
19260 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19261
19262 src0.readSrc();
19263 src1.readSrc();
19264
19265 if (instData.ABS & 0x1) {
19266 src0.absModifier();
19267 }
19268
19269 if (instData.ABS & 0x2) {
19270 src1.absModifier();
19271 }
19272
19273 if (extData.NEG & 0x1) {
19274 src0.negModifier();
19275 }
19276
19277 if (extData.NEG & 0x2) {
19278 src1.negModifier();
19279 }
19280
19281 /**
19282 * input modifiers are supported by FP operations only
19283 */
19284 assert(!(instData.ABS & 0x4));
19285 assert(!(extData.NEG & 0x4));
19286
19287 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19288 if (wf->execMask(lane)) {
19289 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
19290 }
19291 }
19292
19293 wf->execMask() = sdst.rawData();
19294 sdst.write();
19295 }
19296
19297 Inst_VOP3__V_CMPX_NLT_F64::Inst_VOP3__V_CMPX_NLT_F64(
19298 InFmt_VOP3 *iFmt)
19299 : Inst_VOP3(iFmt, "v_cmpx_nlt_f64", true)
19300 {
19301 setFlag(ALU);
19302 setFlag(F64);
19303 } // Inst_VOP3__V_CMPX_NLT_F64
19304
19305 Inst_VOP3__V_CMPX_NLT_F64::~Inst_VOP3__V_CMPX_NLT_F64()
19306 {
19307 } // ~Inst_VOP3__V_CMPX_NLT_F64
19308
19309 // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
19310 void
19311 Inst_VOP3__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst)
19312 {
19313 Wavefront *wf = gpuDynInst->wavefront();
19314 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
19315 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
19316 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19317
19318 src0.readSrc();
19319 src1.readSrc();
19320
19321 if (instData.ABS & 0x1) {
19322 src0.absModifier();
19323 }
19324
19325 if (instData.ABS & 0x2) {
19326 src1.absModifier();
19327 }
19328
19329 if (extData.NEG & 0x1) {
19330 src0.negModifier();
19331 }
19332
19333 if (extData.NEG & 0x2) {
19334 src1.negModifier();
19335 }
19336
19337 /**
19338 * input modifiers are supported by FP operations only
19339 */
19340 assert(!(instData.ABS & 0x4));
19341 assert(!(extData.NEG & 0x4));
19342
19343 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19344 if (wf->execMask(lane)) {
19345 sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
19346 }
19347 }
19348
19349 wf->execMask() = sdst.rawData();
19350 sdst.write();
19351 }
19352
19353 Inst_VOP3__V_CMPX_TRU_F64::Inst_VOP3__V_CMPX_TRU_F64(
19354 InFmt_VOP3 *iFmt)
19355 : Inst_VOP3(iFmt, "v_cmpx_tru_f64", true)
19356 {
19357 setFlag(ALU);
19358 setFlag(F64);
19359 } // Inst_VOP3__V_CMPX_TRU_F64
19360
19361 Inst_VOP3__V_CMPX_TRU_F64::~Inst_VOP3__V_CMPX_TRU_F64()
19362 {
19363 } // ~Inst_VOP3__V_CMPX_TRU_F64
19364
19365 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
19366 void
19367 Inst_VOP3__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst)
19368 {
19369 Wavefront *wf = gpuDynInst->wavefront();
19370 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19371
19372 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19373 if (wf->execMask(lane)) {
19374 sdst.setBit(lane, 1);
19375 }
19376 }
19377
19378 wf->execMask() = sdst.rawData();
19379 sdst.write();
19380 }
19381
19382 Inst_VOP3__V_CMP_F_I16::Inst_VOP3__V_CMP_F_I16(InFmt_VOP3 *iFmt)
19383 : Inst_VOP3(iFmt, "v_cmp_f_i16", true)
19384 {
19385 setFlag(ALU);
19386 } // Inst_VOP3__V_CMP_F_I16
19387
19388 Inst_VOP3__V_CMP_F_I16::~Inst_VOP3__V_CMP_F_I16()
19389 {
19390 } // ~Inst_VOP3__V_CMP_F_I16
19391
19392 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
19393 void
19394 Inst_VOP3__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst)
19395 {
19396 Wavefront *wf = gpuDynInst->wavefront();
19397 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19398
19399 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19400 if (wf->execMask(lane)) {
19401 sdst.setBit(lane, 0);
19402 }
19403 }
19404
19405 sdst.write();
19406 }
19407
19408 Inst_VOP3__V_CMP_LT_I16::Inst_VOP3__V_CMP_LT_I16(
19409 InFmt_VOP3 *iFmt)
19410 : Inst_VOP3(iFmt, "v_cmp_lt_i16", true)
19411 {
19412 setFlag(ALU);
19413 } // Inst_VOP3__V_CMP_LT_I16
19414
19415 Inst_VOP3__V_CMP_LT_I16::~Inst_VOP3__V_CMP_LT_I16()
19416 {
19417 } // ~Inst_VOP3__V_CMP_LT_I16
19418
19419 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
19420 void
19421 Inst_VOP3__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst)
19422 {
19423 Wavefront *wf = gpuDynInst->wavefront();
19424 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
19425 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
19426 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19427
19428 src0.readSrc();
19429 src1.readSrc();
19430
19431 /**
19432 * input modifiers are supported by FP operations only
19433 */
19434 assert(!(instData.ABS & 0x1));
19435 assert(!(instData.ABS & 0x2));
19436 assert(!(instData.ABS & 0x4));
19437 assert(!(extData.NEG & 0x1));
19438 assert(!(extData.NEG & 0x2));
19439 assert(!(extData.NEG & 0x4));
19440
19441 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19442 if (wf->execMask(lane)) {
19443 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
19444 }
19445 }
19446
19447 sdst.write();
19448 }
19449
19450 Inst_VOP3__V_CMP_EQ_I16::Inst_VOP3__V_CMP_EQ_I16(
19451 InFmt_VOP3 *iFmt)
19452 : Inst_VOP3(iFmt, "v_cmp_eq_i16", true)
19453 {
19454 setFlag(ALU);
19455 } // Inst_VOP3__V_CMP_EQ_I16
19456
19457 Inst_VOP3__V_CMP_EQ_I16::~Inst_VOP3__V_CMP_EQ_I16()
19458 {
19459 } // ~Inst_VOP3__V_CMP_EQ_I16
19460
19461 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
19462 void
19463 Inst_VOP3__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst)
19464 {
19465 Wavefront *wf = gpuDynInst->wavefront();
19466 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
19467 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
19468 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19469
19470 src0.readSrc();
19471 src1.readSrc();
19472
19473 /**
19474 * input modifiers are supported by FP operations only
19475 */
19476 assert(!(instData.ABS & 0x1));
19477 assert(!(instData.ABS & 0x2));
19478 assert(!(instData.ABS & 0x4));
19479 assert(!(extData.NEG & 0x1));
19480 assert(!(extData.NEG & 0x2));
19481 assert(!(extData.NEG & 0x4));
19482
19483 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19484 if (wf->execMask(lane)) {
19485 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
19486 }
19487 }
19488
19489 sdst.write();
19490 }
19491
19492 Inst_VOP3__V_CMP_LE_I16::Inst_VOP3__V_CMP_LE_I16(
19493 InFmt_VOP3 *iFmt)
19494 : Inst_VOP3(iFmt, "v_cmp_le_i16", true)
19495 {
19496 setFlag(ALU);
19497 } // Inst_VOP3__V_CMP_LE_I16
19498
19499 Inst_VOP3__V_CMP_LE_I16::~Inst_VOP3__V_CMP_LE_I16()
19500 {
19501 } // ~Inst_VOP3__V_CMP_LE_I16
19502
19503 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
19504 void
19505 Inst_VOP3__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst)
19506 {
19507 Wavefront *wf = gpuDynInst->wavefront();
19508 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
19509 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
19510 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19511
19512 src0.readSrc();
19513 src1.readSrc();
19514
19515 /**
19516 * input modifiers are supported by FP operations only
19517 */
19518 assert(!(instData.ABS & 0x1));
19519 assert(!(instData.ABS & 0x2));
19520 assert(!(instData.ABS & 0x4));
19521 assert(!(extData.NEG & 0x1));
19522 assert(!(extData.NEG & 0x2));
19523 assert(!(extData.NEG & 0x4));
19524
19525 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19526 if (wf->execMask(lane)) {
19527 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
19528 }
19529 }
19530
19531 sdst.write();
19532 }
19533
19534 Inst_VOP3__V_CMP_GT_I16::Inst_VOP3__V_CMP_GT_I16(
19535 InFmt_VOP3 *iFmt)
19536 : Inst_VOP3(iFmt, "v_cmp_gt_i16", true)
19537 {
19538 setFlag(ALU);
19539 } // Inst_VOP3__V_CMP_GT_I16
19540
19541 Inst_VOP3__V_CMP_GT_I16::~Inst_VOP3__V_CMP_GT_I16()
19542 {
19543 } // ~Inst_VOP3__V_CMP_GT_I16
19544
19545 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
19546 void
19547 Inst_VOP3__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst)
19548 {
19549 Wavefront *wf = gpuDynInst->wavefront();
19550 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
19551 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
19552 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19553
19554 src0.readSrc();
19555 src1.readSrc();
19556
19557 /**
19558 * input modifiers are supported by FP operations only
19559 */
19560 assert(!(instData.ABS & 0x1));
19561 assert(!(instData.ABS & 0x2));
19562 assert(!(instData.ABS & 0x4));
19563 assert(!(extData.NEG & 0x1));
19564 assert(!(extData.NEG & 0x2));
19565 assert(!(extData.NEG & 0x4));
19566
19567 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19568 if (wf->execMask(lane)) {
19569 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
19570 }
19571 }
19572
19573 sdst.write();
19574 }
19575
19576 Inst_VOP3__V_CMP_NE_I16::Inst_VOP3__V_CMP_NE_I16(
19577 InFmt_VOP3 *iFmt)
19578 : Inst_VOP3(iFmt, "v_cmp_ne_i16", true)
19579 {
19580 setFlag(ALU);
19581 } // Inst_VOP3__V_CMP_NE_I16
19582
19583 Inst_VOP3__V_CMP_NE_I16::~Inst_VOP3__V_CMP_NE_I16()
19584 {
19585 } // ~Inst_VOP3__V_CMP_NE_I16
19586
19587 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
19588 void
19589 Inst_VOP3__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst)
19590 {
19591 Wavefront *wf = gpuDynInst->wavefront();
19592 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
19593 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
19594 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19595
19596 src0.readSrc();
19597 src1.readSrc();
19598
19599 /**
19600 * input modifiers are supported by FP operations only
19601 */
19602 assert(!(instData.ABS & 0x1));
19603 assert(!(instData.ABS & 0x2));
19604 assert(!(instData.ABS & 0x4));
19605 assert(!(extData.NEG & 0x1));
19606 assert(!(extData.NEG & 0x2));
19607 assert(!(extData.NEG & 0x4));
19608
19609 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19610 if (wf->execMask(lane)) {
19611 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
19612 }
19613 }
19614
19615 sdst.write();
19616 }
19617
19618 Inst_VOP3__V_CMP_GE_I16::Inst_VOP3__V_CMP_GE_I16(
19619 InFmt_VOP3 *iFmt)
19620 : Inst_VOP3(iFmt, "v_cmp_ge_i16", true)
19621 {
19622 setFlag(ALU);
19623 } // Inst_VOP3__V_CMP_GE_I16
19624
19625 Inst_VOP3__V_CMP_GE_I16::~Inst_VOP3__V_CMP_GE_I16()
19626 {
19627 } // ~Inst_VOP3__V_CMP_GE_I16
19628
19629 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
19630 void
19631 Inst_VOP3__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst)
19632 {
19633 Wavefront *wf = gpuDynInst->wavefront();
19634 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
19635 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
19636 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19637
19638 src0.readSrc();
19639 src1.readSrc();
19640
19641 /**
19642 * input modifiers are supported by FP operations only
19643 */
19644 assert(!(instData.ABS & 0x1));
19645 assert(!(instData.ABS & 0x2));
19646 assert(!(instData.ABS & 0x4));
19647 assert(!(extData.NEG & 0x1));
19648 assert(!(extData.NEG & 0x2));
19649 assert(!(extData.NEG & 0x4));
19650
19651 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19652 if (wf->execMask(lane)) {
19653 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
19654 }
19655 }
19656
19657 sdst.write();
19658 }
19659
19660 Inst_VOP3__V_CMP_T_I16::Inst_VOP3__V_CMP_T_I16(InFmt_VOP3 *iFmt)
19661 : Inst_VOP3(iFmt, "v_cmp_t_i16", true)
19662 {
19663 setFlag(ALU);
19664 } // Inst_VOP3__V_CMP_T_I16
19665
19666 Inst_VOP3__V_CMP_T_I16::~Inst_VOP3__V_CMP_T_I16()
19667 {
19668 } // ~Inst_VOP3__V_CMP_T_I16
19669
19670 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
19671 void
19672 Inst_VOP3__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst)
19673 {
19674 Wavefront *wf = gpuDynInst->wavefront();
19675 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19676
19677 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19678 if (wf->execMask(lane)) {
19679 sdst.setBit(lane, 1);
19680 }
19681 }
19682
19683 sdst.write();
19684 }
19685
19686 Inst_VOP3__V_CMP_F_U16::Inst_VOP3__V_CMP_F_U16(InFmt_VOP3 *iFmt)
19687 : Inst_VOP3(iFmt, "v_cmp_f_u16", true)
19688 {
19689 setFlag(ALU);
19690 } // Inst_VOP3__V_CMP_F_U16
19691
19692 Inst_VOP3__V_CMP_F_U16::~Inst_VOP3__V_CMP_F_U16()
19693 {
19694 } // ~Inst_VOP3__V_CMP_F_U16
19695
19696 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
19697 void
19698 Inst_VOP3__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst)
19699 {
19700 Wavefront *wf = gpuDynInst->wavefront();
19701 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19702
19703 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19704 if (wf->execMask(lane)) {
19705 sdst.setBit(lane, 0);
19706 }
19707 }
19708
19709 sdst.write();
19710 }
19711
19712 Inst_VOP3__V_CMP_LT_U16::Inst_VOP3__V_CMP_LT_U16(
19713 InFmt_VOP3 *iFmt)
19714 : Inst_VOP3(iFmt, "v_cmp_lt_u16", true)
19715 {
19716 setFlag(ALU);
19717 } // Inst_VOP3__V_CMP_LT_U16
19718
19719 Inst_VOP3__V_CMP_LT_U16::~Inst_VOP3__V_CMP_LT_U16()
19720 {
19721 } // ~Inst_VOP3__V_CMP_LT_U16
19722
19723 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
19724 void
19725 Inst_VOP3__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst)
19726 {
19727 Wavefront *wf = gpuDynInst->wavefront();
19728 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
19729 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
19730 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19731
19732 src0.readSrc();
19733 src1.readSrc();
19734
19735 /**
19736 * input modifiers are supported by FP operations only
19737 */
19738 assert(!(instData.ABS & 0x1));
19739 assert(!(instData.ABS & 0x2));
19740 assert(!(instData.ABS & 0x4));
19741 assert(!(extData.NEG & 0x1));
19742 assert(!(extData.NEG & 0x2));
19743 assert(!(extData.NEG & 0x4));
19744
19745 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19746 if (wf->execMask(lane)) {
19747 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
19748 }
19749 }
19750
19751 sdst.write();
19752 }
19753
19754 Inst_VOP3__V_CMP_EQ_U16::Inst_VOP3__V_CMP_EQ_U16(
19755 InFmt_VOP3 *iFmt)
19756 : Inst_VOP3(iFmt, "v_cmp_eq_u16", true)
19757 {
19758 setFlag(ALU);
19759 } // Inst_VOP3__V_CMP_EQ_U16
19760
19761 Inst_VOP3__V_CMP_EQ_U16::~Inst_VOP3__V_CMP_EQ_U16()
19762 {
19763 } // ~Inst_VOP3__V_CMP_EQ_U16
19764
19765 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
19766 void
19767 Inst_VOP3__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst)
19768 {
19769 Wavefront *wf = gpuDynInst->wavefront();
19770 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
19771 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
19772 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19773
19774 src0.readSrc();
19775 src1.readSrc();
19776
19777 /**
19778 * input modifiers are supported by FP operations only
19779 */
19780 assert(!(instData.ABS & 0x1));
19781 assert(!(instData.ABS & 0x2));
19782 assert(!(instData.ABS & 0x4));
19783 assert(!(extData.NEG & 0x1));
19784 assert(!(extData.NEG & 0x2));
19785 assert(!(extData.NEG & 0x4));
19786
19787 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19788 if (wf->execMask(lane)) {
19789 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
19790 }
19791 }
19792
19793 sdst.write();
19794 }
19795
19796 Inst_VOP3__V_CMP_LE_U16::Inst_VOP3__V_CMP_LE_U16(
19797 InFmt_VOP3 *iFmt)
19798 : Inst_VOP3(iFmt, "v_cmp_le_u16", true)
19799 {
19800 setFlag(ALU);
19801 } // Inst_VOP3__V_CMP_LE_U16
19802
19803 Inst_VOP3__V_CMP_LE_U16::~Inst_VOP3__V_CMP_LE_U16()
19804 {
19805 } // ~Inst_VOP3__V_CMP_LE_U16
19806
19807 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
19808 void
19809 Inst_VOP3__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst)
19810 {
19811 Wavefront *wf = gpuDynInst->wavefront();
19812 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
19813 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
19814 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19815
19816 src0.readSrc();
19817 src1.readSrc();
19818
19819 /**
19820 * input modifiers are supported by FP operations only
19821 */
19822 assert(!(instData.ABS & 0x1));
19823 assert(!(instData.ABS & 0x2));
19824 assert(!(instData.ABS & 0x4));
19825 assert(!(extData.NEG & 0x1));
19826 assert(!(extData.NEG & 0x2));
19827 assert(!(extData.NEG & 0x4));
19828
19829 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19830 if (wf->execMask(lane)) {
19831 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
19832 }
19833 }
19834
19835 sdst.write();
19836 }
19837
19838 Inst_VOP3__V_CMP_GT_U16::Inst_VOP3__V_CMP_GT_U16(
19839 InFmt_VOP3 *iFmt)
19840 : Inst_VOP3(iFmt, "v_cmp_gt_u16", true)
19841 {
19842 setFlag(ALU);
19843 } // Inst_VOP3__V_CMP_GT_U16
19844
19845 Inst_VOP3__V_CMP_GT_U16::~Inst_VOP3__V_CMP_GT_U16()
19846 {
19847 } // ~Inst_VOP3__V_CMP_GT_U16
19848
19849 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
19850 void
19851 Inst_VOP3__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst)
19852 {
19853 Wavefront *wf = gpuDynInst->wavefront();
19854 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
19855 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
19856 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19857
19858 src0.readSrc();
19859 src1.readSrc();
19860
19861 /**
19862 * input modifiers are supported by FP operations only
19863 */
19864 assert(!(instData.ABS & 0x1));
19865 assert(!(instData.ABS & 0x2));
19866 assert(!(instData.ABS & 0x4));
19867 assert(!(extData.NEG & 0x1));
19868 assert(!(extData.NEG & 0x2));
19869 assert(!(extData.NEG & 0x4));
19870
19871 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19872 if (wf->execMask(lane)) {
19873 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
19874 }
19875 }
19876
19877 sdst.write();
19878 }
19879
19880 Inst_VOP3__V_CMP_NE_U16::Inst_VOP3__V_CMP_NE_U16(
19881 InFmt_VOP3 *iFmt)
19882 : Inst_VOP3(iFmt, "v_cmp_ne_u16", true)
19883 {
19884 setFlag(ALU);
19885 } // Inst_VOP3__V_CMP_NE_U16
19886
19887 Inst_VOP3__V_CMP_NE_U16::~Inst_VOP3__V_CMP_NE_U16()
19888 {
19889 } // ~Inst_VOP3__V_CMP_NE_U16
19890
19891 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
19892 void
19893 Inst_VOP3__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst)
19894 {
19895 Wavefront *wf = gpuDynInst->wavefront();
19896 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
19897 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
19898 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19899
19900 src0.readSrc();
19901 src1.readSrc();
19902
19903 /**
19904 * input modifiers are supported by FP operations only
19905 */
19906 assert(!(instData.ABS & 0x1));
19907 assert(!(instData.ABS & 0x2));
19908 assert(!(instData.ABS & 0x4));
19909 assert(!(extData.NEG & 0x1));
19910 assert(!(extData.NEG & 0x2));
19911 assert(!(extData.NEG & 0x4));
19912
19913 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19914 if (wf->execMask(lane)) {
19915 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
19916 }
19917 }
19918
19919 sdst.write();
19920 }
19921
19922 Inst_VOP3__V_CMP_GE_U16::Inst_VOP3__V_CMP_GE_U16(
19923 InFmt_VOP3 *iFmt)
19924 : Inst_VOP3(iFmt, "v_cmp_ge_u16", true)
19925 {
19926 setFlag(ALU);
19927 } // Inst_VOP3__V_CMP_GE_U16
19928
19929 Inst_VOP3__V_CMP_GE_U16::~Inst_VOP3__V_CMP_GE_U16()
19930 {
19931 } // ~Inst_VOP3__V_CMP_GE_U16
19932
19933 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
19934 void
19935 Inst_VOP3__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst)
19936 {
19937 Wavefront *wf = gpuDynInst->wavefront();
19938 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
19939 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
19940 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19941
19942 src0.readSrc();
19943 src1.readSrc();
19944
19945 /**
19946 * input modifiers are supported by FP operations only
19947 */
19948 assert(!(instData.ABS & 0x1));
19949 assert(!(instData.ABS & 0x2));
19950 assert(!(instData.ABS & 0x4));
19951 assert(!(extData.NEG & 0x1));
19952 assert(!(extData.NEG & 0x2));
19953 assert(!(extData.NEG & 0x4));
19954
19955 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19956 if (wf->execMask(lane)) {
19957 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
19958 }
19959 }
19960
19961 sdst.write();
19962 }
19963
19964 Inst_VOP3__V_CMP_T_U16::Inst_VOP3__V_CMP_T_U16(InFmt_VOP3 *iFmt)
19965 : Inst_VOP3(iFmt, "v_cmp_t_u16", true)
19966 {
19967 setFlag(ALU);
19968 } // Inst_VOP3__V_CMP_T_U16
19969
19970 Inst_VOP3__V_CMP_T_U16::~Inst_VOP3__V_CMP_T_U16()
19971 {
19972 } // ~Inst_VOP3__V_CMP_T_U16
19973
19974 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
19975 void
19976 Inst_VOP3__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst)
19977 {
19978 Wavefront *wf = gpuDynInst->wavefront();
19979 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19980
19981 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19982 if (wf->execMask(lane)) {
19983 sdst.setBit(lane, 1);
19984 }
19985 }
19986
19987 sdst.write();
19988 }
19989
19990 Inst_VOP3__V_CMPX_F_I16::Inst_VOP3__V_CMPX_F_I16(
19991 InFmt_VOP3 *iFmt)
19992 : Inst_VOP3(iFmt, "v_cmpx_f_i16", true)
19993 {
19994 setFlag(ALU);
19995 } // Inst_VOP3__V_CMPX_F_I16
19996
19997 Inst_VOP3__V_CMPX_F_I16::~Inst_VOP3__V_CMPX_F_I16()
19998 {
19999 } // ~Inst_VOP3__V_CMPX_F_I16
20000
20001 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
20002 void
20003 Inst_VOP3__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst)
20004 {
20005 Wavefront *wf = gpuDynInst->wavefront();
20006 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20007
20008 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20009 if (wf->execMask(lane)) {
20010 sdst.setBit(lane, 0);
20011 }
20012 }
20013
20014 wf->execMask() = sdst.rawData();
20015 sdst.write();
20016 }
20017
20018 Inst_VOP3__V_CMPX_LT_I16::Inst_VOP3__V_CMPX_LT_I16(
20019 InFmt_VOP3 *iFmt)
20020 : Inst_VOP3(iFmt, "v_cmpx_lt_i16", true)
20021 {
20022 setFlag(ALU);
20023 } // Inst_VOP3__V_CMPX_LT_I16
20024
20025 Inst_VOP3__V_CMPX_LT_I16::~Inst_VOP3__V_CMPX_LT_I16()
20026 {
20027 } // ~Inst_VOP3__V_CMPX_LT_I16
20028
20029 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
20030 void
20031 Inst_VOP3__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst)
20032 {
20033 Wavefront *wf = gpuDynInst->wavefront();
20034 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20035 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20036 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20037
20038 src0.readSrc();
20039 src1.readSrc();
20040
20041 /**
20042 * input modifiers are supported by FP operations only
20043 */
20044 assert(!(instData.ABS & 0x1));
20045 assert(!(instData.ABS & 0x2));
20046 assert(!(instData.ABS & 0x4));
20047 assert(!(extData.NEG & 0x1));
20048 assert(!(extData.NEG & 0x2));
20049 assert(!(extData.NEG & 0x4));
20050
20051 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20052 if (wf->execMask(lane)) {
20053 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
20054 }
20055 }
20056
20057 wf->execMask() = sdst.rawData();
20058 sdst.write();
20059 }
20060
20061 Inst_VOP3__V_CMPX_EQ_I16::Inst_VOP3__V_CMPX_EQ_I16(
20062 InFmt_VOP3 *iFmt)
20063 : Inst_VOP3(iFmt, "v_cmpx_eq_i16", true)
20064 {
20065 setFlag(ALU);
20066 } // Inst_VOP3__V_CMPX_EQ_I16
20067
20068 Inst_VOP3__V_CMPX_EQ_I16::~Inst_VOP3__V_CMPX_EQ_I16()
20069 {
20070 } // ~Inst_VOP3__V_CMPX_EQ_I16
20071
20072 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
20073 void
20074 Inst_VOP3__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst)
20075 {
20076 Wavefront *wf = gpuDynInst->wavefront();
20077 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20078 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20079 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20080
20081 src0.readSrc();
20082 src1.readSrc();
20083
20084 /**
20085 * input modifiers are supported by FP operations only
20086 */
20087 assert(!(instData.ABS & 0x1));
20088 assert(!(instData.ABS & 0x2));
20089 assert(!(instData.ABS & 0x4));
20090 assert(!(extData.NEG & 0x1));
20091 assert(!(extData.NEG & 0x2));
20092 assert(!(extData.NEG & 0x4));
20093
20094 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20095 if (wf->execMask(lane)) {
20096 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
20097 }
20098 }
20099
20100 wf->execMask() = sdst.rawData();
20101 sdst.write();
20102 }
20103
20104 Inst_VOP3__V_CMPX_LE_I16::Inst_VOP3__V_CMPX_LE_I16(
20105 InFmt_VOP3 *iFmt)
20106 : Inst_VOP3(iFmt, "v_cmpx_le_i16", true)
20107 {
20108 setFlag(ALU);
20109 } // Inst_VOP3__V_CMPX_LE_I16
20110
20111 Inst_VOP3__V_CMPX_LE_I16::~Inst_VOP3__V_CMPX_LE_I16()
20112 {
20113 } // ~Inst_VOP3__V_CMPX_LE_I16
20114
20115 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
20116 void
20117 Inst_VOP3__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst)
20118 {
20119 Wavefront *wf = gpuDynInst->wavefront();
20120 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20121 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20122 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20123
20124 src0.readSrc();
20125 src1.readSrc();
20126
20127 /**
20128 * input modifiers are supported by FP operations only
20129 */
20130 assert(!(instData.ABS & 0x1));
20131 assert(!(instData.ABS & 0x2));
20132 assert(!(instData.ABS & 0x4));
20133 assert(!(extData.NEG & 0x1));
20134 assert(!(extData.NEG & 0x2));
20135 assert(!(extData.NEG & 0x4));
20136
20137 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20138 if (wf->execMask(lane)) {
20139 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
20140 }
20141 }
20142
20143 wf->execMask() = sdst.rawData();
20144 sdst.write();
20145 }
20146
20147 Inst_VOP3__V_CMPX_GT_I16::Inst_VOP3__V_CMPX_GT_I16(
20148 InFmt_VOP3 *iFmt)
20149 : Inst_VOP3(iFmt, "v_cmpx_gt_i16", true)
20150 {
20151 setFlag(ALU);
20152 } // Inst_VOP3__V_CMPX_GT_I16
20153
20154 Inst_VOP3__V_CMPX_GT_I16::~Inst_VOP3__V_CMPX_GT_I16()
20155 {
20156 } // ~Inst_VOP3__V_CMPX_GT_I16
20157
20158 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
20159 void
20160 Inst_VOP3__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst)
20161 {
20162 Wavefront *wf = gpuDynInst->wavefront();
20163 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20164 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20165 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20166
20167 src0.readSrc();
20168 src1.readSrc();
20169
20170 /**
20171 * input modifiers are supported by FP operations only
20172 */
20173 assert(!(instData.ABS & 0x1));
20174 assert(!(instData.ABS & 0x2));
20175 assert(!(instData.ABS & 0x4));
20176 assert(!(extData.NEG & 0x1));
20177 assert(!(extData.NEG & 0x2));
20178 assert(!(extData.NEG & 0x4));
20179
20180 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20181 if (wf->execMask(lane)) {
20182 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
20183 }
20184 }
20185
20186 wf->execMask() = sdst.rawData();
20187 sdst.write();
20188 }
20189
20190 Inst_VOP3__V_CMPX_NE_I16::Inst_VOP3__V_CMPX_NE_I16(
20191 InFmt_VOP3 *iFmt)
20192 : Inst_VOP3(iFmt, "v_cmpx_ne_i16", true)
20193 {
20194 setFlag(ALU);
20195 } // Inst_VOP3__V_CMPX_NE_I16
20196
20197 Inst_VOP3__V_CMPX_NE_I16::~Inst_VOP3__V_CMPX_NE_I16()
20198 {
20199 } // ~Inst_VOP3__V_CMPX_NE_I16
20200
20201 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
20202 void
20203 Inst_VOP3__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst)
20204 {
20205 Wavefront *wf = gpuDynInst->wavefront();
20206 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20207 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20208 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20209
20210 src0.readSrc();
20211 src1.readSrc();
20212
20213 /**
20214 * input modifiers are supported by FP operations only
20215 */
20216 assert(!(instData.ABS & 0x1));
20217 assert(!(instData.ABS & 0x2));
20218 assert(!(instData.ABS & 0x4));
20219 assert(!(extData.NEG & 0x1));
20220 assert(!(extData.NEG & 0x2));
20221 assert(!(extData.NEG & 0x4));
20222
20223 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20224 if (wf->execMask(lane)) {
20225 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
20226 }
20227 }
20228
20229 wf->execMask() = sdst.rawData();
20230 sdst.write();
20231 }
20232
20233 Inst_VOP3__V_CMPX_GE_I16::Inst_VOP3__V_CMPX_GE_I16(
20234 InFmt_VOP3 *iFmt)
20235 : Inst_VOP3(iFmt, "v_cmpx_ge_i16", true)
20236 {
20237 setFlag(ALU);
20238 } // Inst_VOP3__V_CMPX_GE_I16
20239
20240 Inst_VOP3__V_CMPX_GE_I16::~Inst_VOP3__V_CMPX_GE_I16()
20241 {
20242 } // ~Inst_VOP3__V_CMPX_GE_I16
20243
20244 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
20245 void
20246 Inst_VOP3__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst)
20247 {
20248 Wavefront *wf = gpuDynInst->wavefront();
20249 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20250 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20251 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20252
20253 src0.readSrc();
20254 src1.readSrc();
20255
20256 /**
20257 * input modifiers are supported by FP operations only
20258 */
20259 assert(!(instData.ABS & 0x1));
20260 assert(!(instData.ABS & 0x2));
20261 assert(!(instData.ABS & 0x4));
20262 assert(!(extData.NEG & 0x1));
20263 assert(!(extData.NEG & 0x2));
20264 assert(!(extData.NEG & 0x4));
20265
20266 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20267 if (wf->execMask(lane)) {
20268 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
20269 }
20270 }
20271
20272 wf->execMask() = sdst.rawData();
20273 sdst.write();
20274 }
20275
20276 Inst_VOP3__V_CMPX_T_I16::Inst_VOP3__V_CMPX_T_I16(
20277 InFmt_VOP3 *iFmt)
20278 : Inst_VOP3(iFmt, "v_cmpx_t_i16", true)
20279 {
20280 setFlag(ALU);
20281 } // Inst_VOP3__V_CMPX_T_I16
20282
20283 Inst_VOP3__V_CMPX_T_I16::~Inst_VOP3__V_CMPX_T_I16()
20284 {
20285 } // ~Inst_VOP3__V_CMPX_T_I16
20286
20287 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
20288 void
20289 Inst_VOP3__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst)
20290 {
20291 Wavefront *wf = gpuDynInst->wavefront();
20292 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20293
20294 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20295 if (wf->execMask(lane)) {
20296 sdst.setBit(lane, 1);
20297 }
20298 }
20299
20300 wf->execMask() = sdst.rawData();
20301 sdst.write();
20302 }
20303
20304 Inst_VOP3__V_CMPX_F_U16::Inst_VOP3__V_CMPX_F_U16(
20305 InFmt_VOP3 *iFmt)
20306 : Inst_VOP3(iFmt, "v_cmpx_f_u16", true)
20307 {
20308 setFlag(ALU);
20309 } // Inst_VOP3__V_CMPX_F_U16
20310
20311 Inst_VOP3__V_CMPX_F_U16::~Inst_VOP3__V_CMPX_F_U16()
20312 {
20313 } // ~Inst_VOP3__V_CMPX_F_U16
20314
20315 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
20316 void
20317 Inst_VOP3__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst)
20318 {
20319 Wavefront *wf = gpuDynInst->wavefront();
20320 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20321
20322 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20323 if (wf->execMask(lane)) {
20324 sdst.setBit(lane, 0);
20325 }
20326 }
20327
20328 wf->execMask() = sdst.rawData();
20329 sdst.write();
20330 }
20331
20332 Inst_VOP3__V_CMPX_LT_U16::Inst_VOP3__V_CMPX_LT_U16(
20333 InFmt_VOP3 *iFmt)
20334 : Inst_VOP3(iFmt, "v_cmpx_lt_u16", true)
20335 {
20336 setFlag(ALU);
20337 } // Inst_VOP3__V_CMPX_LT_U16
20338
20339 Inst_VOP3__V_CMPX_LT_U16::~Inst_VOP3__V_CMPX_LT_U16()
20340 {
20341 } // ~Inst_VOP3__V_CMPX_LT_U16
20342
20343 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
20344 void
20345 Inst_VOP3__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst)
20346 {
20347 Wavefront *wf = gpuDynInst->wavefront();
20348 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
20349 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
20350 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20351
20352 src0.readSrc();
20353 src1.readSrc();
20354
20355 /**
20356 * input modifiers are supported by FP operations only
20357 */
20358 assert(!(instData.ABS & 0x1));
20359 assert(!(instData.ABS & 0x2));
20360 assert(!(instData.ABS & 0x4));
20361 assert(!(extData.NEG & 0x1));
20362 assert(!(extData.NEG & 0x2));
20363 assert(!(extData.NEG & 0x4));
20364
20365 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20366 if (wf->execMask(lane)) {
20367 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
20368 }
20369 }
20370
20371 wf->execMask() = sdst.rawData();
20372 sdst.write();
20373 }
20374
20375 Inst_VOP3__V_CMPX_EQ_U16::Inst_VOP3__V_CMPX_EQ_U16(
20376 InFmt_VOP3 *iFmt)
20377 : Inst_VOP3(iFmt, "v_cmpx_eq_u16", true)
20378 {
20379 setFlag(ALU);
20380 } // Inst_VOP3__V_CMPX_EQ_U16
20381
20382 Inst_VOP3__V_CMPX_EQ_U16::~Inst_VOP3__V_CMPX_EQ_U16()
20383 {
20384 } // ~Inst_VOP3__V_CMPX_EQ_U16
20385
20386 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
20387 void
20388 Inst_VOP3__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst)
20389 {
20390 Wavefront *wf = gpuDynInst->wavefront();
20391 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20392 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20393 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20394
20395 src0.readSrc();
20396 src1.readSrc();
20397
20398 /**
20399 * input modifiers are supported by FP operations only
20400 */
20401 assert(!(instData.ABS & 0x1));
20402 assert(!(instData.ABS & 0x2));
20403 assert(!(instData.ABS & 0x4));
20404 assert(!(extData.NEG & 0x1));
20405 assert(!(extData.NEG & 0x2));
20406 assert(!(extData.NEG & 0x4));
20407
20408 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20409 if (wf->execMask(lane)) {
20410 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
20411 }
20412 }
20413
20414 wf->execMask() = sdst.rawData();
20415 sdst.write();
20416 }
20417
20418 Inst_VOP3__V_CMPX_LE_U16::Inst_VOP3__V_CMPX_LE_U16(
20419 InFmt_VOP3 *iFmt)
20420 : Inst_VOP3(iFmt, "v_cmpx_le_u16", true)
20421 {
20422 setFlag(ALU);
20423 } // Inst_VOP3__V_CMPX_LE_U16
20424
20425 Inst_VOP3__V_CMPX_LE_U16::~Inst_VOP3__V_CMPX_LE_U16()
20426 {
20427 } // ~Inst_VOP3__V_CMPX_LE_U16
20428
20429 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
20430 void
20431 Inst_VOP3__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst)
20432 {
20433 Wavefront *wf = gpuDynInst->wavefront();
20434 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20435 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20436 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20437
20438 src0.readSrc();
20439 src1.readSrc();
20440
20441 /**
20442 * input modifiers are supported by FP operations only
20443 */
20444 assert(!(instData.ABS & 0x1));
20445 assert(!(instData.ABS & 0x2));
20446 assert(!(instData.ABS & 0x4));
20447 assert(!(extData.NEG & 0x1));
20448 assert(!(extData.NEG & 0x2));
20449 assert(!(extData.NEG & 0x4));
20450
20451 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20452 if (wf->execMask(lane)) {
20453 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
20454 }
20455 }
20456
20457 wf->execMask() = sdst.rawData();
20458 sdst.write();
20459 }
20460
20461 Inst_VOP3__V_CMPX_GT_U16::Inst_VOP3__V_CMPX_GT_U16(
20462 InFmt_VOP3 *iFmt)
20463 : Inst_VOP3(iFmt, "v_cmpx_gt_u16", true)
20464 {
20465 setFlag(ALU);
20466 } // Inst_VOP3__V_CMPX_GT_U16
20467
20468 Inst_VOP3__V_CMPX_GT_U16::~Inst_VOP3__V_CMPX_GT_U16()
20469 {
20470 } // ~Inst_VOP3__V_CMPX_GT_U16
20471
20472 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
20473 void
20474 Inst_VOP3__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst)
20475 {
20476 Wavefront *wf = gpuDynInst->wavefront();
20477 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20478 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20479 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20480
20481 src0.readSrc();
20482 src1.readSrc();
20483
20484 /**
20485 * input modifiers are supported by FP operations only
20486 */
20487 assert(!(instData.ABS & 0x1));
20488 assert(!(instData.ABS & 0x2));
20489 assert(!(instData.ABS & 0x4));
20490 assert(!(extData.NEG & 0x1));
20491 assert(!(extData.NEG & 0x2));
20492 assert(!(extData.NEG & 0x4));
20493
20494 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20495 if (wf->execMask(lane)) {
20496 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
20497 }
20498 }
20499
20500 wf->execMask() = sdst.rawData();
20501 sdst.write();
20502 }
20503
20504 Inst_VOP3__V_CMPX_NE_U16::Inst_VOP3__V_CMPX_NE_U16(
20505 InFmt_VOP3 *iFmt)
20506 : Inst_VOP3(iFmt, "v_cmpx_ne_u16", true)
20507 {
20508 setFlag(ALU);
20509 } // Inst_VOP3__V_CMPX_NE_U16
20510
20511 Inst_VOP3__V_CMPX_NE_U16::~Inst_VOP3__V_CMPX_NE_U16()
20512 {
20513 } // ~Inst_VOP3__V_CMPX_NE_U16
20514
20515 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
20516 void
20517 Inst_VOP3__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst)
20518 {
20519 Wavefront *wf = gpuDynInst->wavefront();
20520 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20521 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20522 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20523
20524 src0.readSrc();
20525 src1.readSrc();
20526
20527 /**
20528 * input modifiers are supported by FP operations only
20529 */
20530 assert(!(instData.ABS & 0x1));
20531 assert(!(instData.ABS & 0x2));
20532 assert(!(instData.ABS & 0x4));
20533 assert(!(extData.NEG & 0x1));
20534 assert(!(extData.NEG & 0x2));
20535 assert(!(extData.NEG & 0x4));
20536
20537 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20538 if (wf->execMask(lane)) {
20539 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
20540 }
20541 }
20542
20543 wf->execMask() = sdst.rawData();
20544 sdst.write();
20545 }
20546
20547 Inst_VOP3__V_CMPX_GE_U16::Inst_VOP3__V_CMPX_GE_U16(
20548 InFmt_VOP3 *iFmt)
20549 : Inst_VOP3(iFmt, "v_cmpx_ge_u16", true)
20550 {
20551 setFlag(ALU);
20552 } // Inst_VOP3__V_CMPX_GE_U16
20553
20554 Inst_VOP3__V_CMPX_GE_U16::~Inst_VOP3__V_CMPX_GE_U16()
20555 {
20556 } // ~Inst_VOP3__V_CMPX_GE_U16
20557
20558 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
20559 void
20560 Inst_VOP3__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst)
20561 {
20562 Wavefront *wf = gpuDynInst->wavefront();
20563 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20564 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20565 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20566
20567 src0.readSrc();
20568 src1.readSrc();
20569
20570 /**
20571 * input modifiers are supported by FP operations only
20572 */
20573 assert(!(instData.ABS & 0x1));
20574 assert(!(instData.ABS & 0x2));
20575 assert(!(instData.ABS & 0x4));
20576 assert(!(extData.NEG & 0x1));
20577 assert(!(extData.NEG & 0x2));
20578 assert(!(extData.NEG & 0x4));
20579
20580 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20581 if (wf->execMask(lane)) {
20582 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
20583 }
20584 }
20585
20586 wf->execMask() = sdst.rawData();
20587 sdst.write();
20588 }
20589
20590 Inst_VOP3__V_CMPX_T_U16::Inst_VOP3__V_CMPX_T_U16(
20591 InFmt_VOP3 *iFmt)
20592 : Inst_VOP3(iFmt, "v_cmpx_t_u16", true)
20593 {
20594 setFlag(ALU);
20595 } // Inst_VOP3__V_CMPX_T_U16
20596
20597 Inst_VOP3__V_CMPX_T_U16::~Inst_VOP3__V_CMPX_T_U16()
20598 {
20599 } // ~Inst_VOP3__V_CMPX_T_U16
20600
20601 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
20602 void
20603 Inst_VOP3__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst)
20604 {
20605 Wavefront *wf = gpuDynInst->wavefront();
20606 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20607
20608 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20609 if (wf->execMask(lane)) {
20610 sdst.setBit(lane, 1);
20611 }
20612 }
20613
20614 wf->execMask() = sdst.rawData();
20615 sdst.write();
20616 }
20617
20618 Inst_VOP3__V_CMP_F_I32::Inst_VOP3__V_CMP_F_I32(InFmt_VOP3 *iFmt)
20619 : Inst_VOP3(iFmt, "v_cmp_f_i32", true)
20620 {
20621 setFlag(ALU);
20622 } // Inst_VOP3__V_CMP_F_I32
20623
20624 Inst_VOP3__V_CMP_F_I32::~Inst_VOP3__V_CMP_F_I32()
20625 {
20626 } // ~Inst_VOP3__V_CMP_F_I32
20627
20628 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
20629 void
20630 Inst_VOP3__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst)
20631 {
20632 Wavefront *wf = gpuDynInst->wavefront();
20633 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20634
20635 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20636 if (wf->execMask(lane)) {
20637 sdst.setBit(lane, 0);
20638 }
20639 }
20640
20641 wf->execMask() = sdst.rawData();
20642 sdst.write();
20643 }
20644
20645 Inst_VOP3__V_CMP_LT_I32::Inst_VOP3__V_CMP_LT_I32(
20646 InFmt_VOP3 *iFmt)
20647 : Inst_VOP3(iFmt, "v_cmp_lt_i32", true)
20648 {
20649 setFlag(ALU);
20650 } // Inst_VOP3__V_CMP_LT_I32
20651
20652 Inst_VOP3__V_CMP_LT_I32::~Inst_VOP3__V_CMP_LT_I32()
20653 {
20654 } // ~Inst_VOP3__V_CMP_LT_I32
20655
20656 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
20657 void
20658 Inst_VOP3__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst)
20659 {
20660 Wavefront *wf = gpuDynInst->wavefront();
20661 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
20662 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
20663 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20664
20665 src0.readSrc();
20666 src1.readSrc();
20667
20668 /**
20669 * input modifiers are supported by FP operations only
20670 */
20671 assert(!(instData.ABS & 0x1));
20672 assert(!(instData.ABS & 0x2));
20673 assert(!(instData.ABS & 0x4));
20674 assert(!(extData.NEG & 0x1));
20675 assert(!(extData.NEG & 0x2));
20676 assert(!(extData.NEG & 0x4));
20677
20678 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20679 if (wf->execMask(lane)) {
20680 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
20681 }
20682 }
20683
20684 sdst.write();
20685 }
20686
20687 Inst_VOP3__V_CMP_EQ_I32::Inst_VOP3__V_CMP_EQ_I32(
20688 InFmt_VOP3 *iFmt)
20689 : Inst_VOP3(iFmt, "v_cmp_eq_i32", true)
20690 {
20691 setFlag(ALU);
20692 } // Inst_VOP3__V_CMP_EQ_I32
20693
20694 Inst_VOP3__V_CMP_EQ_I32::~Inst_VOP3__V_CMP_EQ_I32()
20695 {
20696 } // ~Inst_VOP3__V_CMP_EQ_I32
20697
20698 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
20699 void
20700 Inst_VOP3__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
20701 {
20702 Wavefront *wf = gpuDynInst->wavefront();
20703 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
20704 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
20705 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20706
20707 src0.readSrc();
20708 src1.readSrc();
20709
20710 /**
20711 * input modifiers are supported by FP operations only
20712 */
20713 assert(!(instData.ABS & 0x1));
20714 assert(!(instData.ABS & 0x2));
20715 assert(!(instData.ABS & 0x4));
20716 assert(!(extData.NEG & 0x1));
20717 assert(!(extData.NEG & 0x2));
20718 assert(!(extData.NEG & 0x4));
20719
20720 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20721 if (wf->execMask(lane)) {
20722 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
20723 }
20724 }
20725
20726 sdst.write();
20727 }
20728
20729 Inst_VOP3__V_CMP_LE_I32::Inst_VOP3__V_CMP_LE_I32(
20730 InFmt_VOP3 *iFmt)
20731 : Inst_VOP3(iFmt, "v_cmp_le_i32", true)
20732 {
20733 setFlag(ALU);
20734 } // Inst_VOP3__V_CMP_LE_I32
20735
20736 Inst_VOP3__V_CMP_LE_I32::~Inst_VOP3__V_CMP_LE_I32()
20737 {
20738 } // ~Inst_VOP3__V_CMP_LE_I32
20739
20740 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
20741 void
20742 Inst_VOP3__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst)
20743 {
20744 Wavefront *wf = gpuDynInst->wavefront();
20745 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
20746 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
20747 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20748
20749 src0.readSrc();
20750 src1.readSrc();
20751
20752 /**
20753 * input modifiers are supported by FP operations only
20754 */
20755 assert(!(instData.ABS & 0x1));
20756 assert(!(instData.ABS & 0x2));
20757 assert(!(instData.ABS & 0x4));
20758 assert(!(extData.NEG & 0x1));
20759 assert(!(extData.NEG & 0x2));
20760 assert(!(extData.NEG & 0x4));
20761
20762 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20763 if (wf->execMask(lane)) {
20764 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
20765 }
20766 }
20767
20768 sdst.write();
20769 }
20770
20771 Inst_VOP3__V_CMP_GT_I32::Inst_VOP3__V_CMP_GT_I32(
20772 InFmt_VOP3 *iFmt)
20773 : Inst_VOP3(iFmt, "v_cmp_gt_i32", true)
20774 {
20775 setFlag(ALU);
20776 } // Inst_VOP3__V_CMP_GT_I32
20777
20778 Inst_VOP3__V_CMP_GT_I32::~Inst_VOP3__V_CMP_GT_I32()
20779 {
20780 } // ~Inst_VOP3__V_CMP_GT_I32
20781
20782 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
20783 void
20784 Inst_VOP3__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst)
20785 {
20786 Wavefront *wf = gpuDynInst->wavefront();
20787 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
20788 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
20789 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20790
20791 src0.readSrc();
20792 src1.readSrc();
20793
20794 /**
20795 * input modifiers are supported by FP operations only
20796 */
20797 assert(!(instData.ABS & 0x1));
20798 assert(!(instData.ABS & 0x2));
20799 assert(!(instData.ABS & 0x4));
20800 assert(!(extData.NEG & 0x1));
20801 assert(!(extData.NEG & 0x2));
20802 assert(!(extData.NEG & 0x4));
20803
20804 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20805 if (wf->execMask(lane)) {
20806 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
20807 }
20808 }
20809
20810 sdst.write();
20811 }
20812
20813 Inst_VOP3__V_CMP_NE_I32::Inst_VOP3__V_CMP_NE_I32(
20814 InFmt_VOP3 *iFmt)
20815 : Inst_VOP3(iFmt, "v_cmp_ne_i32", true)
20816 {
20817 setFlag(ALU);
20818 } // Inst_VOP3__V_CMP_NE_I32
20819
20820 Inst_VOP3__V_CMP_NE_I32::~Inst_VOP3__V_CMP_NE_I32()
20821 {
20822 } // ~Inst_VOP3__V_CMP_NE_I32
20823
20824 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
20825 void
20826 Inst_VOP3__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst)
20827 {
20828 Wavefront *wf = gpuDynInst->wavefront();
20829 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
20830 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
20831 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20832
20833 src0.readSrc();
20834 src1.readSrc();
20835
20836 /**
20837 * input modifiers are supported by FP operations only
20838 */
20839 assert(!(instData.ABS & 0x1));
20840 assert(!(instData.ABS & 0x2));
20841 assert(!(instData.ABS & 0x4));
20842 assert(!(extData.NEG & 0x1));
20843 assert(!(extData.NEG & 0x2));
20844 assert(!(extData.NEG & 0x4));
20845
20846 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20847 if (wf->execMask(lane)) {
20848 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
20849 }
20850 }
20851
20852 sdst.write();
20853 }
20854
20855 Inst_VOP3__V_CMP_GE_I32::Inst_VOP3__V_CMP_GE_I32(
20856 InFmt_VOP3 *iFmt)
20857 : Inst_VOP3(iFmt, "v_cmp_ge_i32", true)
20858 {
20859 setFlag(ALU);
20860 } // Inst_VOP3__V_CMP_GE_I32
20861
20862 Inst_VOP3__V_CMP_GE_I32::~Inst_VOP3__V_CMP_GE_I32()
20863 {
20864 } // ~Inst_VOP3__V_CMP_GE_I32
20865
20866 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
20867 void
20868 Inst_VOP3__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst)
20869 {
20870 Wavefront *wf = gpuDynInst->wavefront();
20871 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
20872 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
20873 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20874
20875 src0.readSrc();
20876 src1.readSrc();
20877
20878 /**
20879 * input modifiers are supported by FP operations only
20880 */
20881 assert(!(instData.ABS & 0x1));
20882 assert(!(instData.ABS & 0x2));
20883 assert(!(instData.ABS & 0x4));
20884 assert(!(extData.NEG & 0x1));
20885 assert(!(extData.NEG & 0x2));
20886 assert(!(extData.NEG & 0x4));
20887
20888 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20889 if (wf->execMask(lane)) {
20890 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
20891 }
20892 }
20893
20894 sdst.write();
20895 }
20896
20897 Inst_VOP3__V_CMP_T_I32::Inst_VOP3__V_CMP_T_I32(InFmt_VOP3 *iFmt)
20898 : Inst_VOP3(iFmt, "v_cmp_t_i32", true)
20899 {
20900 setFlag(ALU);
20901 } // Inst_VOP3__V_CMP_T_I32
20902
20903 Inst_VOP3__V_CMP_T_I32::~Inst_VOP3__V_CMP_T_I32()
20904 {
20905 } // ~Inst_VOP3__V_CMP_T_I32
20906
20907 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
20908 void
20909 Inst_VOP3__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst)
20910 {
20911 Wavefront *wf = gpuDynInst->wavefront();
20912 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20913
20914 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20915 if (wf->execMask(lane)) {
20916 sdst.setBit(lane, 1);
20917 }
20918 }
20919
20920 sdst.write();
20921 }
20922
20923 Inst_VOP3__V_CMP_F_U32::Inst_VOP3__V_CMP_F_U32(InFmt_VOP3 *iFmt)
20924 : Inst_VOP3(iFmt, "v_cmp_f_u32", true)
20925 {
20926 setFlag(ALU);
20927 } // Inst_VOP3__V_CMP_F_U32
20928
20929 Inst_VOP3__V_CMP_F_U32::~Inst_VOP3__V_CMP_F_U32()
20930 {
20931 } // ~Inst_VOP3__V_CMP_F_U32
20932
20933 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
20934 void
20935 Inst_VOP3__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst)
20936 {
20937 Wavefront *wf = gpuDynInst->wavefront();
20938 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20939
20940 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20941 if (wf->execMask(lane)) {
20942 sdst.setBit(lane, 0);
20943 }
20944 }
20945
20946 sdst.write();
20947 }
20948
20949 Inst_VOP3__V_CMP_LT_U32::Inst_VOP3__V_CMP_LT_U32(
20950 InFmt_VOP3 *iFmt)
20951 : Inst_VOP3(iFmt, "v_cmp_lt_u32", true)
20952 {
20953 setFlag(ALU);
20954 } // Inst_VOP3__V_CMP_LT_U32
20955
20956 Inst_VOP3__V_CMP_LT_U32::~Inst_VOP3__V_CMP_LT_U32()
20957 {
20958 } // ~Inst_VOP3__V_CMP_LT_U32
20959
20960 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
20961 void
20962 Inst_VOP3__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst)
20963 {
20964 Wavefront *wf = gpuDynInst->wavefront();
20965 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
20966 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
20967 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20968
20969 src0.readSrc();
20970 src1.readSrc();
20971
20972 /**
20973 * input modifiers are supported by FP operations only
20974 */
20975 assert(!(instData.ABS & 0x1));
20976 assert(!(instData.ABS & 0x2));
20977 assert(!(instData.ABS & 0x4));
20978 assert(!(extData.NEG & 0x1));
20979 assert(!(extData.NEG & 0x2));
20980 assert(!(extData.NEG & 0x4));
20981
20982 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20983 if (wf->execMask(lane)) {
20984 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
20985 }
20986 }
20987
20988 sdst.write();
20989 }
20990
20991 Inst_VOP3__V_CMP_EQ_U32::Inst_VOP3__V_CMP_EQ_U32(
20992 InFmt_VOP3 *iFmt)
20993 : Inst_VOP3(iFmt, "v_cmp_eq_u32", true)
20994 {
20995 setFlag(ALU);
20996 } // Inst_VOP3__V_CMP_EQ_U32
20997
20998 Inst_VOP3__V_CMP_EQ_U32::~Inst_VOP3__V_CMP_EQ_U32()
20999 {
21000 } // ~Inst_VOP3__V_CMP_EQ_U32
21001
21002 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
21003 void
21004 Inst_VOP3__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
21005 {
21006 Wavefront *wf = gpuDynInst->wavefront();
21007 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21008 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21009 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21010
21011 src0.readSrc();
21012 src1.readSrc();
21013
21014 /**
21015 * input modifiers are supported by FP operations only
21016 */
21017 assert(!(instData.ABS & 0x1));
21018 assert(!(instData.ABS & 0x2));
21019 assert(!(instData.ABS & 0x4));
21020 assert(!(extData.NEG & 0x1));
21021 assert(!(extData.NEG & 0x2));
21022 assert(!(extData.NEG & 0x4));
21023
21024 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21025 if (wf->execMask(lane)) {
21026 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
21027 }
21028 }
21029
21030 sdst.write();
21031 }
21032
21033 Inst_VOP3__V_CMP_LE_U32::Inst_VOP3__V_CMP_LE_U32(
21034 InFmt_VOP3 *iFmt)
21035 : Inst_VOP3(iFmt, "v_cmp_le_u32", true)
21036 {
21037 setFlag(ALU);
21038 } // Inst_VOP3__V_CMP_LE_U32
21039
21040 Inst_VOP3__V_CMP_LE_U32::~Inst_VOP3__V_CMP_LE_U32()
21041 {
21042 } // ~Inst_VOP3__V_CMP_LE_U32
21043
21044 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
21045 void
21046 Inst_VOP3__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst)
21047 {
21048 Wavefront *wf = gpuDynInst->wavefront();
21049 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21050 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21051 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21052
21053 src0.readSrc();
21054 src1.readSrc();
21055
21056 /**
21057 * input modifiers are supported by FP operations only
21058 */
21059 assert(!(instData.ABS & 0x1));
21060 assert(!(instData.ABS & 0x2));
21061 assert(!(instData.ABS & 0x4));
21062 assert(!(extData.NEG & 0x1));
21063 assert(!(extData.NEG & 0x2));
21064 assert(!(extData.NEG & 0x4));
21065
21066 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21067 if (wf->execMask(lane)) {
21068 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
21069 }
21070 }
21071
21072 sdst.write();
21073 }
21074
21075 Inst_VOP3__V_CMP_GT_U32::Inst_VOP3__V_CMP_GT_U32(
21076 InFmt_VOP3 *iFmt)
21077 : Inst_VOP3(iFmt, "v_cmp_gt_u32", true)
21078 {
21079 setFlag(ALU);
21080 } // Inst_VOP3__V_CMP_GT_U32
21081
21082 Inst_VOP3__V_CMP_GT_U32::~Inst_VOP3__V_CMP_GT_U32()
21083 {
21084 } // ~Inst_VOP3__V_CMP_GT_U32
21085
21086 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
21087 void
21088 Inst_VOP3__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst)
21089 {
21090 Wavefront *wf = gpuDynInst->wavefront();
21091 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21092 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21093 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21094
21095 src0.readSrc();
21096 src1.readSrc();
21097
21098 /**
21099 * input modifiers are supported by FP operations only
21100 */
21101 assert(!(instData.ABS & 0x1));
21102 assert(!(instData.ABS & 0x2));
21103 assert(!(instData.ABS & 0x4));
21104 assert(!(extData.NEG & 0x1));
21105 assert(!(extData.NEG & 0x2));
21106 assert(!(extData.NEG & 0x4));
21107
21108 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21109 if (wf->execMask(lane)) {
21110 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
21111 }
21112 }
21113
21114 sdst.write();
21115 }
21116
21117 Inst_VOP3__V_CMP_NE_U32::Inst_VOP3__V_CMP_NE_U32(
21118 InFmt_VOP3 *iFmt)
21119 : Inst_VOP3(iFmt, "v_cmp_ne_u32", true)
21120 {
21121 setFlag(ALU);
21122 } // Inst_VOP3__V_CMP_NE_U32
21123
21124 Inst_VOP3__V_CMP_NE_U32::~Inst_VOP3__V_CMP_NE_U32()
21125 {
21126 } // ~Inst_VOP3__V_CMP_NE_U32
21127
21128 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
21129 void
21130 Inst_VOP3__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst)
21131 {
21132 Wavefront *wf = gpuDynInst->wavefront();
21133 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21134 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21135 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21136
21137 src0.readSrc();
21138 src1.readSrc();
21139
21140 /**
21141 * input modifiers are supported by FP operations only
21142 */
21143 assert(!(instData.ABS & 0x1));
21144 assert(!(instData.ABS & 0x2));
21145 assert(!(instData.ABS & 0x4));
21146 assert(!(extData.NEG & 0x1));
21147 assert(!(extData.NEG & 0x2));
21148 assert(!(extData.NEG & 0x4));
21149
21150 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21151 if (wf->execMask(lane)) {
21152 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
21153 }
21154 }
21155
21156 sdst.write();
21157 }
21158
21159 Inst_VOP3__V_CMP_GE_U32::Inst_VOP3__V_CMP_GE_U32(
21160 InFmt_VOP3 *iFmt)
21161 : Inst_VOP3(iFmt, "v_cmp_ge_u32", true)
21162 {
21163 setFlag(ALU);
21164 } // Inst_VOP3__V_CMP_GE_U32
21165
21166 Inst_VOP3__V_CMP_GE_U32::~Inst_VOP3__V_CMP_GE_U32()
21167 {
21168 } // ~Inst_VOP3__V_CMP_GE_U32
21169
21170 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
21171 void
21172 Inst_VOP3__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst)
21173 {
21174 Wavefront *wf = gpuDynInst->wavefront();
21175 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21176 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21177 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21178
21179 src0.readSrc();
21180 src1.readSrc();
21181
21182 /**
21183 * input modifiers are supported by FP operations only
21184 */
21185 assert(!(instData.ABS & 0x1));
21186 assert(!(instData.ABS & 0x2));
21187 assert(!(instData.ABS & 0x4));
21188 assert(!(extData.NEG & 0x1));
21189 assert(!(extData.NEG & 0x2));
21190 assert(!(extData.NEG & 0x4));
21191
21192 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21193 if (wf->execMask(lane)) {
21194 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
21195 }
21196 }
21197
21198 sdst.write();
21199 }
21200
21201 Inst_VOP3__V_CMP_T_U32::Inst_VOP3__V_CMP_T_U32(InFmt_VOP3 *iFmt)
21202 : Inst_VOP3(iFmt, "v_cmp_t_u32", true)
21203 {
21204 setFlag(ALU);
21205 } // Inst_VOP3__V_CMP_T_U32
21206
21207 Inst_VOP3__V_CMP_T_U32::~Inst_VOP3__V_CMP_T_U32()
21208 {
21209 } // ~Inst_VOP3__V_CMP_T_U32
21210
21211 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
21212 void
21213 Inst_VOP3__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst)
21214 {
21215 Wavefront *wf = gpuDynInst->wavefront();
21216 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21217
21218 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21219 if (wf->execMask(lane)) {
21220 sdst.setBit(lane, 1);
21221 }
21222 }
21223
21224 sdst.write();
21225 }
21226
21227 Inst_VOP3__V_CMPX_F_I32::Inst_VOP3__V_CMPX_F_I32(
21228 InFmt_VOP3 *iFmt)
21229 : Inst_VOP3(iFmt, "v_cmpx_f_i32", true)
21230 {
21231 setFlag(ALU);
21232 } // Inst_VOP3__V_CMPX_F_I32
21233
21234 Inst_VOP3__V_CMPX_F_I32::~Inst_VOP3__V_CMPX_F_I32()
21235 {
21236 } // ~Inst_VOP3__V_CMPX_F_I32
21237
21238 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
21239 void
21240 Inst_VOP3__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst)
21241 {
21242 Wavefront *wf = gpuDynInst->wavefront();
21243 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21244
21245 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21246 if (wf->execMask(lane)) {
21247 sdst.setBit(lane, 0);
21248 }
21249 }
21250
21251 wf->execMask() = sdst.rawData();
21252 sdst.write();
21253 }
21254
21255 Inst_VOP3__V_CMPX_LT_I32::Inst_VOP3__V_CMPX_LT_I32(
21256 InFmt_VOP3 *iFmt)
21257 : Inst_VOP3(iFmt, "v_cmpx_lt_i32", true)
21258 {
21259 setFlag(ALU);
21260 } // Inst_VOP3__V_CMPX_LT_I32
21261
21262 Inst_VOP3__V_CMPX_LT_I32::~Inst_VOP3__V_CMPX_LT_I32()
21263 {
21264 } // ~Inst_VOP3__V_CMPX_LT_I32
21265
21266 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
21267 void
21268 Inst_VOP3__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst)
21269 {
21270 Wavefront *wf = gpuDynInst->wavefront();
21271 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
21272 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
21273 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21274
21275 src0.readSrc();
21276 src1.readSrc();
21277
21278 /**
21279 * input modifiers are supported by FP operations only
21280 */
21281 assert(!(instData.ABS & 0x1));
21282 assert(!(instData.ABS & 0x2));
21283 assert(!(instData.ABS & 0x4));
21284 assert(!(extData.NEG & 0x1));
21285 assert(!(extData.NEG & 0x2));
21286 assert(!(extData.NEG & 0x4));
21287
21288 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21289 if (wf->execMask(lane)) {
21290 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
21291 }
21292 }
21293
21294 wf->execMask() = sdst.rawData();
21295 sdst.write();
21296 }
21297
21298 Inst_VOP3__V_CMPX_EQ_I32::Inst_VOP3__V_CMPX_EQ_I32(
21299 InFmt_VOP3 *iFmt)
21300 : Inst_VOP3(iFmt, "v_cmpx_eq_i32", true)
21301 {
21302 setFlag(ALU);
21303 } // Inst_VOP3__V_CMPX_EQ_I32
21304
21305 Inst_VOP3__V_CMPX_EQ_I32::~Inst_VOP3__V_CMPX_EQ_I32()
21306 {
21307 } // ~Inst_VOP3__V_CMPX_EQ_I32
21308
21309 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
21310 void
21311 Inst_VOP3__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
21312 {
21313 Wavefront *wf = gpuDynInst->wavefront();
21314 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
21315 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
21316 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21317
21318 src0.readSrc();
21319 src1.readSrc();
21320
21321 /**
21322 * input modifiers are supported by FP operations only
21323 */
21324 assert(!(instData.ABS & 0x1));
21325 assert(!(instData.ABS & 0x2));
21326 assert(!(instData.ABS & 0x4));
21327 assert(!(extData.NEG & 0x1));
21328 assert(!(extData.NEG & 0x2));
21329 assert(!(extData.NEG & 0x4));
21330
21331 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21332 if (wf->execMask(lane)) {
21333 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
21334 }
21335 }
21336
21337 wf->execMask() = sdst.rawData();
21338 sdst.write();
21339 }
21340
21341 Inst_VOP3__V_CMPX_LE_I32::Inst_VOP3__V_CMPX_LE_I32(
21342 InFmt_VOP3 *iFmt)
21343 : Inst_VOP3(iFmt, "v_cmpx_le_i32", true)
21344 {
21345 setFlag(ALU);
21346 } // Inst_VOP3__V_CMPX_LE_I32
21347
21348 Inst_VOP3__V_CMPX_LE_I32::~Inst_VOP3__V_CMPX_LE_I32()
21349 {
21350 } // ~Inst_VOP3__V_CMPX_LE_I32
21351
21352 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
21353 void
21354 Inst_VOP3__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst)
21355 {
21356 Wavefront *wf = gpuDynInst->wavefront();
21357 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
21358 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
21359 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21360
21361 src0.readSrc();
21362 src1.readSrc();
21363
21364 /**
21365 * input modifiers are supported by FP operations only
21366 */
21367 assert(!(instData.ABS & 0x1));
21368 assert(!(instData.ABS & 0x2));
21369 assert(!(instData.ABS & 0x4));
21370 assert(!(extData.NEG & 0x1));
21371 assert(!(extData.NEG & 0x2));
21372 assert(!(extData.NEG & 0x4));
21373
21374 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21375 if (wf->execMask(lane)) {
21376 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
21377 }
21378 }
21379
21380 wf->execMask() = sdst.rawData();
21381 sdst.write();
21382 }
21383
21384 Inst_VOP3__V_CMPX_GT_I32::Inst_VOP3__V_CMPX_GT_I32(
21385 InFmt_VOP3 *iFmt)
21386 : Inst_VOP3(iFmt, "v_cmpx_gt_i32", true)
21387 {
21388 setFlag(ALU);
21389 } // Inst_VOP3__V_CMPX_GT_I32
21390
21391 Inst_VOP3__V_CMPX_GT_I32::~Inst_VOP3__V_CMPX_GT_I32()
21392 {
21393 } // ~Inst_VOP3__V_CMPX_GT_I32
21394
21395 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
21396 void
21397 Inst_VOP3__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst)
21398 {
21399 Wavefront *wf = gpuDynInst->wavefront();
21400 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
21401 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
21402 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21403
21404 src0.readSrc();
21405 src1.readSrc();
21406
21407 /**
21408 * input modifiers are supported by FP operations only
21409 */
21410 assert(!(instData.ABS & 0x1));
21411 assert(!(instData.ABS & 0x2));
21412 assert(!(instData.ABS & 0x4));
21413 assert(!(extData.NEG & 0x1));
21414 assert(!(extData.NEG & 0x2));
21415 assert(!(extData.NEG & 0x4));
21416
21417 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21418 if (wf->execMask(lane)) {
21419 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
21420 }
21421 }
21422
21423 wf->execMask() = sdst.rawData();
21424 sdst.write();
21425 }
21426
21427 Inst_VOP3__V_CMPX_NE_I32::Inst_VOP3__V_CMPX_NE_I32(
21428 InFmt_VOP3 *iFmt)
21429 : Inst_VOP3(iFmt, "v_cmpx_ne_i32", true)
21430 {
21431 setFlag(ALU);
21432 } // Inst_VOP3__V_CMPX_NE_I32
21433
21434 Inst_VOP3__V_CMPX_NE_I32::~Inst_VOP3__V_CMPX_NE_I32()
21435 {
21436 } // ~Inst_VOP3__V_CMPX_NE_I32
21437
21438 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
21439 void
21440 Inst_VOP3__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst)
21441 {
21442 Wavefront *wf = gpuDynInst->wavefront();
21443 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
21444 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
21445 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21446
21447 src0.readSrc();
21448 src1.readSrc();
21449
21450 /**
21451 * input modifiers are supported by FP operations only
21452 */
21453 assert(!(instData.ABS & 0x1));
21454 assert(!(instData.ABS & 0x2));
21455 assert(!(instData.ABS & 0x4));
21456 assert(!(extData.NEG & 0x1));
21457 assert(!(extData.NEG & 0x2));
21458 assert(!(extData.NEG & 0x4));
21459
21460 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21461 if (wf->execMask(lane)) {
21462 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
21463 }
21464 }
21465
21466 wf->execMask() = sdst.rawData();
21467 sdst.write();
21468 }
21469
21470 Inst_VOP3__V_CMPX_GE_I32::Inst_VOP3__V_CMPX_GE_I32(
21471 InFmt_VOP3 *iFmt)
21472 : Inst_VOP3(iFmt, "v_cmpx_ge_i32", true)
21473 {
21474 setFlag(ALU);
21475 } // Inst_VOP3__V_CMPX_GE_I32
21476
21477 Inst_VOP3__V_CMPX_GE_I32::~Inst_VOP3__V_CMPX_GE_I32()
21478 {
21479 } // ~Inst_VOP3__V_CMPX_GE_I32
21480
21481 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
21482 void
21483 Inst_VOP3__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst)
21484 {
21485 Wavefront *wf = gpuDynInst->wavefront();
21486 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
21487 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
21488 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21489
21490 src0.readSrc();
21491 src1.readSrc();
21492
21493 /**
21494 * input modifiers are supported by FP operations only
21495 */
21496 assert(!(instData.ABS & 0x1));
21497 assert(!(instData.ABS & 0x2));
21498 assert(!(instData.ABS & 0x4));
21499 assert(!(extData.NEG & 0x1));
21500 assert(!(extData.NEG & 0x2));
21501 assert(!(extData.NEG & 0x4));
21502
21503 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21504 if (wf->execMask(lane)) {
21505 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
21506 }
21507 }
21508
21509 wf->execMask() = sdst.rawData();
21510 sdst.write();
21511 }
21512
21513 Inst_VOP3__V_CMPX_T_I32::Inst_VOP3__V_CMPX_T_I32(
21514 InFmt_VOP3 *iFmt)
21515 : Inst_VOP3(iFmt, "v_cmpx_t_i32", true)
21516 {
21517 setFlag(ALU);
21518 } // Inst_VOP3__V_CMPX_T_I32
21519
21520 Inst_VOP3__V_CMPX_T_I32::~Inst_VOP3__V_CMPX_T_I32()
21521 {
21522 } // ~Inst_VOP3__V_CMPX_T_I32
21523
21524 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
21525 void
21526 Inst_VOP3__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst)
21527 {
21528 Wavefront *wf = gpuDynInst->wavefront();
21529 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21530
21531 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21532 if (wf->execMask(lane)) {
21533 sdst.setBit(lane, 1);
21534 }
21535 }
21536
21537 wf->execMask() = sdst.rawData();
21538 sdst.write();
21539 }
21540
21541 Inst_VOP3__V_CMPX_F_U32::Inst_VOP3__V_CMPX_F_U32(
21542 InFmt_VOP3 *iFmt)
21543 : Inst_VOP3(iFmt, "v_cmpx_f_u32", true)
21544 {
21545 setFlag(ALU);
21546 } // Inst_VOP3__V_CMPX_F_U32
21547
21548 Inst_VOP3__V_CMPX_F_U32::~Inst_VOP3__V_CMPX_F_U32()
21549 {
21550 } // ~Inst_VOP3__V_CMPX_F_U32
21551
21552 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
21553 void
21554 Inst_VOP3__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst)
21555 {
21556 Wavefront *wf = gpuDynInst->wavefront();
21557 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21558
21559 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21560 if (wf->execMask(lane)) {
21561 sdst.setBit(lane, 0);
21562 }
21563 }
21564
21565 wf->execMask() = sdst.rawData();
21566 sdst.write();
21567 }
21568
21569 Inst_VOP3__V_CMPX_LT_U32::Inst_VOP3__V_CMPX_LT_U32(
21570 InFmt_VOP3 *iFmt)
21571 : Inst_VOP3(iFmt, "v_cmpx_lt_u32", true)
21572 {
21573 setFlag(ALU);
21574 } // Inst_VOP3__V_CMPX_LT_U32
21575
21576 Inst_VOP3__V_CMPX_LT_U32::~Inst_VOP3__V_CMPX_LT_U32()
21577 {
21578 } // ~Inst_VOP3__V_CMPX_LT_U32
21579
21580 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
21581 void
21582 Inst_VOP3__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst)
21583 {
21584 Wavefront *wf = gpuDynInst->wavefront();
21585 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21586 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21587 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21588
21589 src0.readSrc();
21590 src1.readSrc();
21591
21592 /**
21593 * input modifiers are supported by FP operations only
21594 */
21595 assert(!(instData.ABS & 0x1));
21596 assert(!(instData.ABS & 0x2));
21597 assert(!(instData.ABS & 0x4));
21598 assert(!(extData.NEG & 0x1));
21599 assert(!(extData.NEG & 0x2));
21600 assert(!(extData.NEG & 0x4));
21601
21602 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21603 if (wf->execMask(lane)) {
21604 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
21605 }
21606 }
21607
21608 wf->execMask() = sdst.rawData();
21609 sdst.write();
21610 }
21611
21612 Inst_VOP3__V_CMPX_EQ_U32::Inst_VOP3__V_CMPX_EQ_U32(
21613 InFmt_VOP3 *iFmt)
21614 : Inst_VOP3(iFmt, "v_cmpx_eq_u32", true)
21615 {
21616 setFlag(ALU);
21617 } // Inst_VOP3__V_CMPX_EQ_U32
21618
21619 Inst_VOP3__V_CMPX_EQ_U32::~Inst_VOP3__V_CMPX_EQ_U32()
21620 {
21621 } // ~Inst_VOP3__V_CMPX_EQ_U32
21622
21623 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
21624 void
21625 Inst_VOP3__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
21626 {
21627 Wavefront *wf = gpuDynInst->wavefront();
21628 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21629 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21630 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21631
21632 src0.readSrc();
21633 src1.readSrc();
21634
21635 /**
21636 * input modifiers are supported by FP operations only
21637 */
21638 assert(!(instData.ABS & 0x1));
21639 assert(!(instData.ABS & 0x2));
21640 assert(!(instData.ABS & 0x4));
21641 assert(!(extData.NEG & 0x1));
21642 assert(!(extData.NEG & 0x2));
21643 assert(!(extData.NEG & 0x4));
21644
21645 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21646 if (wf->execMask(lane)) {
21647 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
21648 }
21649 }
21650
21651 wf->execMask() = sdst.rawData();
21652 sdst.write();
21653 }
21654
21655 Inst_VOP3__V_CMPX_LE_U32::Inst_VOP3__V_CMPX_LE_U32(
21656 InFmt_VOP3 *iFmt)
21657 : Inst_VOP3(iFmt, "v_cmpx_le_u32", true)
21658 {
21659 setFlag(ALU);
21660 } // Inst_VOP3__V_CMPX_LE_U32
21661
21662 Inst_VOP3__V_CMPX_LE_U32::~Inst_VOP3__V_CMPX_LE_U32()
21663 {
21664 } // ~Inst_VOP3__V_CMPX_LE_U32
21665
21666 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
21667 void
21668 Inst_VOP3__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst)
21669 {
21670 Wavefront *wf = gpuDynInst->wavefront();
21671 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21672 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21673 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21674
21675 src0.readSrc();
21676 src1.readSrc();
21677
21678 /**
21679 * input modifiers are supported by FP operations only
21680 */
21681 assert(!(instData.ABS & 0x1));
21682 assert(!(instData.ABS & 0x2));
21683 assert(!(instData.ABS & 0x4));
21684 assert(!(extData.NEG & 0x1));
21685 assert(!(extData.NEG & 0x2));
21686 assert(!(extData.NEG & 0x4));
21687
21688 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21689 if (wf->execMask(lane)) {
21690 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
21691 }
21692 }
21693
21694 wf->execMask() = sdst.rawData();
21695 sdst.write();
21696 }
21697
21698 Inst_VOP3__V_CMPX_GT_U32::Inst_VOP3__V_CMPX_GT_U32(
21699 InFmt_VOP3 *iFmt)
21700 : Inst_VOP3(iFmt, "v_cmpx_gt_u32", true)
21701 {
21702 setFlag(ALU);
21703 } // Inst_VOP3__V_CMPX_GT_U32
21704
21705 Inst_VOP3__V_CMPX_GT_U32::~Inst_VOP3__V_CMPX_GT_U32()
21706 {
21707 } // ~Inst_VOP3__V_CMPX_GT_U32
21708
21709 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
21710 void
21711 Inst_VOP3__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst)
21712 {
21713 Wavefront *wf = gpuDynInst->wavefront();
21714 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21715 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21716 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21717
21718 src0.readSrc();
21719 src1.readSrc();
21720
21721 /**
21722 * input modifiers are supported by FP operations only
21723 */
21724 assert(!(instData.ABS & 0x1));
21725 assert(!(instData.ABS & 0x2));
21726 assert(!(instData.ABS & 0x4));
21727 assert(!(extData.NEG & 0x1));
21728 assert(!(extData.NEG & 0x2));
21729 assert(!(extData.NEG & 0x4));
21730
21731 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21732 if (wf->execMask(lane)) {
21733 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
21734 }
21735 }
21736
21737 wf->execMask() = sdst.rawData();
21738 sdst.write();
21739 }
21740
21741 Inst_VOP3__V_CMPX_NE_U32::Inst_VOP3__V_CMPX_NE_U32(
21742 InFmt_VOP3 *iFmt)
21743 : Inst_VOP3(iFmt, "v_cmpx_ne_u32", true)
21744 {
21745 setFlag(ALU);
21746 } // Inst_VOP3__V_CMPX_NE_U32
21747
21748 Inst_VOP3__V_CMPX_NE_U32::~Inst_VOP3__V_CMPX_NE_U32()
21749 {
21750 } // ~Inst_VOP3__V_CMPX_NE_U32
21751
21752 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
21753 void
21754 Inst_VOP3__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst)
21755 {
21756 Wavefront *wf = gpuDynInst->wavefront();
21757 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21758 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21759 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21760
21761 src0.readSrc();
21762 src1.readSrc();
21763
21764 /**
21765 * input modifiers are supported by FP operations only
21766 */
21767 assert(!(instData.ABS & 0x1));
21768 assert(!(instData.ABS & 0x2));
21769 assert(!(instData.ABS & 0x4));
21770 assert(!(extData.NEG & 0x1));
21771 assert(!(extData.NEG & 0x2));
21772 assert(!(extData.NEG & 0x4));
21773
21774 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21775 if (wf->execMask(lane)) {
21776 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
21777 }
21778 }
21779
21780 wf->execMask() = sdst.rawData();
21781 sdst.write();
21782 }
21783
21784 Inst_VOP3__V_CMPX_GE_U32::Inst_VOP3__V_CMPX_GE_U32(
21785 InFmt_VOP3 *iFmt)
21786 : Inst_VOP3(iFmt, "v_cmpx_ge_u32", true)
21787 {
21788 setFlag(ALU);
21789 } // Inst_VOP3__V_CMPX_GE_U32
21790
21791 Inst_VOP3__V_CMPX_GE_U32::~Inst_VOP3__V_CMPX_GE_U32()
21792 {
21793 } // ~Inst_VOP3__V_CMPX_GE_U32
21794
21795 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
21796 void
21797 Inst_VOP3__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst)
21798 {
21799 Wavefront *wf = gpuDynInst->wavefront();
21800 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21801 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21802 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21803
21804 src0.readSrc();
21805 src1.readSrc();
21806
21807 /**
21808 * input modifiers are supported by FP operations only
21809 */
21810 assert(!(instData.ABS & 0x1));
21811 assert(!(instData.ABS & 0x2));
21812 assert(!(instData.ABS & 0x4));
21813 assert(!(extData.NEG & 0x1));
21814 assert(!(extData.NEG & 0x2));
21815 assert(!(extData.NEG & 0x4));
21816
21817 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21818 if (wf->execMask(lane)) {
21819 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
21820 }
21821 }
21822
21823 wf->execMask() = sdst.rawData();
21824 sdst.write();
21825 }
21826
21827 Inst_VOP3__V_CMPX_T_U32::Inst_VOP3__V_CMPX_T_U32(
21828 InFmt_VOP3 *iFmt)
21829 : Inst_VOP3(iFmt, "v_cmpx_t_u32", true)
21830 {
21831 setFlag(ALU);
21832 } // Inst_VOP3__V_CMPX_T_U32
21833
21834 Inst_VOP3__V_CMPX_T_U32::~Inst_VOP3__V_CMPX_T_U32()
21835 {
21836 } // ~Inst_VOP3__V_CMPX_T_U32
21837
21838 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
21839 void
21840 Inst_VOP3__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst)
21841 {
21842 Wavefront *wf = gpuDynInst->wavefront();
21843 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21844
21845 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21846 if (wf->execMask(lane)) {
21847 sdst.setBit(lane, 1);
21848 }
21849 }
21850
21851 wf->execMask() = sdst.rawData();
21852 sdst.write();
21853 }
21854
21855 Inst_VOP3__V_CMP_F_I64::Inst_VOP3__V_CMP_F_I64(InFmt_VOP3 *iFmt)
21856 : Inst_VOP3(iFmt, "v_cmp_f_i64", true)
21857 {
21858 setFlag(ALU);
21859 } // Inst_VOP3__V_CMP_F_I64
21860
21861 Inst_VOP3__V_CMP_F_I64::~Inst_VOP3__V_CMP_F_I64()
21862 {
21863 } // ~Inst_VOP3__V_CMP_F_I64
21864
21865 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
21866 void
21867 Inst_VOP3__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst)
21868 {
21869 Wavefront *wf = gpuDynInst->wavefront();
21870 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21871
21872 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21873 if (wf->execMask(lane)) {
21874 sdst.setBit(lane, 0);
21875 }
21876 }
21877
21878 sdst.write();
21879 }
21880
21881 Inst_VOP3__V_CMP_LT_I64::Inst_VOP3__V_CMP_LT_I64(
21882 InFmt_VOP3 *iFmt)
21883 : Inst_VOP3(iFmt, "v_cmp_lt_i64", true)
21884 {
21885 setFlag(ALU);
21886 } // Inst_VOP3__V_CMP_LT_I64
21887
21888 Inst_VOP3__V_CMP_LT_I64::~Inst_VOP3__V_CMP_LT_I64()
21889 {
21890 } // ~Inst_VOP3__V_CMP_LT_I64
21891
21892 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
21893 void
21894 Inst_VOP3__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst)
21895 {
21896 Wavefront *wf = gpuDynInst->wavefront();
21897 ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
21898 ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
21899 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21900
21901 src0.readSrc();
21902 src1.readSrc();
21903
21904 /**
21905 * input modifiers are supported by FP operations only
21906 */
21907 assert(!(instData.ABS & 0x1));
21908 assert(!(instData.ABS & 0x2));
21909 assert(!(instData.ABS & 0x4));
21910 assert(!(extData.NEG & 0x1));
21911 assert(!(extData.NEG & 0x2));
21912 assert(!(extData.NEG & 0x4));
21913
21914 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21915 if (wf->execMask(lane)) {
21916 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
21917 }
21918 }
21919
21920 sdst.write();
21921 }
21922
21923 Inst_VOP3__V_CMP_EQ_I64::Inst_VOP3__V_CMP_EQ_I64(
21924 InFmt_VOP3 *iFmt)
21925 : Inst_VOP3(iFmt, "v_cmp_eq_i64", true)
21926 {
21927 setFlag(ALU);
21928 } // Inst_VOP3__V_CMP_EQ_I64
21929
21930 Inst_VOP3__V_CMP_EQ_I64::~Inst_VOP3__V_CMP_EQ_I64()
21931 {
21932 } // ~Inst_VOP3__V_CMP_EQ_I64
21933
21934 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
21935 void
21936 Inst_VOP3__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst)
21937 {
21938 Wavefront *wf = gpuDynInst->wavefront();
21939 ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
21940 ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
21941 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21942
21943 src0.readSrc();
21944 src1.readSrc();
21945
21946 /**
21947 * input modifiers are supported by FP operations only
21948 */
21949 assert(!(instData.ABS & 0x1));
21950 assert(!(instData.ABS & 0x2));
21951 assert(!(instData.ABS & 0x4));
21952 assert(!(extData.NEG & 0x1));
21953 assert(!(extData.NEG & 0x2));
21954 assert(!(extData.NEG & 0x4));
21955
21956 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21957 if (wf->execMask(lane)) {
21958 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
21959 }
21960 }
21961
21962 sdst.write();
21963 }
21964
21965 Inst_VOP3__V_CMP_LE_I64::Inst_VOP3__V_CMP_LE_I64(
21966 InFmt_VOP3 *iFmt)
21967 : Inst_VOP3(iFmt, "v_cmp_le_i64", true)
21968 {
21969 setFlag(ALU);
21970 } // Inst_VOP3__V_CMP_LE_I64
21971
21972 Inst_VOP3__V_CMP_LE_I64::~Inst_VOP3__V_CMP_LE_I64()
21973 {
21974 } // ~Inst_VOP3__V_CMP_LE_I64
21975
21976 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
21977 void
21978 Inst_VOP3__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst)
21979 {
21980 Wavefront *wf = gpuDynInst->wavefront();
21981 ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
21982 ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
21983 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21984
21985 src0.readSrc();
21986 src1.readSrc();
21987
21988 /**
21989 * input modifiers are supported by FP operations only
21990 */
21991 assert(!(instData.ABS & 0x1));
21992 assert(!(instData.ABS & 0x2));
21993 assert(!(instData.ABS & 0x4));
21994 assert(!(extData.NEG & 0x1));
21995 assert(!(extData.NEG & 0x2));
21996 assert(!(extData.NEG & 0x4));
21997
21998 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21999 if (wf->execMask(lane)) {
22000 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
22001 }
22002 }
22003
22004 sdst.write();
22005 }
22006
22007 Inst_VOP3__V_CMP_GT_I64::Inst_VOP3__V_CMP_GT_I64(
22008 InFmt_VOP3 *iFmt)
22009 : Inst_VOP3(iFmt, "v_cmp_gt_i64", true)
22010 {
22011 setFlag(ALU);
22012 } // Inst_VOP3__V_CMP_GT_I64
22013
22014 Inst_VOP3__V_CMP_GT_I64::~Inst_VOP3__V_CMP_GT_I64()
22015 {
22016 } // ~Inst_VOP3__V_CMP_GT_I64
22017
22018 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
22019 void
22020 Inst_VOP3__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst)
22021 {
22022 Wavefront *wf = gpuDynInst->wavefront();
22023 ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22024 ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22025 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22026
22027 src0.readSrc();
22028 src1.readSrc();
22029
22030 /**
22031 * input modifiers are supported by FP operations only
22032 */
22033 assert(!(instData.ABS & 0x1));
22034 assert(!(instData.ABS & 0x2));
22035 assert(!(instData.ABS & 0x4));
22036 assert(!(extData.NEG & 0x1));
22037 assert(!(extData.NEG & 0x2));
22038 assert(!(extData.NEG & 0x4));
22039
22040 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22041 if (wf->execMask(lane)) {
22042 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
22043 }
22044 }
22045
22046 sdst.write();
22047 }
22048
22049 Inst_VOP3__V_CMP_NE_I64::Inst_VOP3__V_CMP_NE_I64(
22050 InFmt_VOP3 *iFmt)
22051 : Inst_VOP3(iFmt, "v_cmp_ne_i64", true)
22052 {
22053 setFlag(ALU);
22054 } // Inst_VOP3__V_CMP_NE_I64
22055
22056 Inst_VOP3__V_CMP_NE_I64::~Inst_VOP3__V_CMP_NE_I64()
22057 {
22058 } // ~Inst_VOP3__V_CMP_NE_I64
22059
22060 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
22061 void
22062 Inst_VOP3__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst)
22063 {
22064 Wavefront *wf = gpuDynInst->wavefront();
22065 ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22066 ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22067 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22068
22069 src0.readSrc();
22070 src1.readSrc();
22071
22072 /**
22073 * input modifiers are supported by FP operations only
22074 */
22075 assert(!(instData.ABS & 0x1));
22076 assert(!(instData.ABS & 0x2));
22077 assert(!(instData.ABS & 0x4));
22078 assert(!(extData.NEG & 0x1));
22079 assert(!(extData.NEG & 0x2));
22080 assert(!(extData.NEG & 0x4));
22081
22082 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22083 if (wf->execMask(lane)) {
22084 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
22085 }
22086 }
22087
22088 sdst.write();
22089 }
22090
22091 Inst_VOP3__V_CMP_GE_I64::Inst_VOP3__V_CMP_GE_I64(
22092 InFmt_VOP3 *iFmt)
22093 : Inst_VOP3(iFmt, "v_cmp_ge_i64", true)
22094 {
22095 setFlag(ALU);
22096 } // Inst_VOP3__V_CMP_GE_I64
22097
22098 Inst_VOP3__V_CMP_GE_I64::~Inst_VOP3__V_CMP_GE_I64()
22099 {
22100 } // ~Inst_VOP3__V_CMP_GE_I64
22101
22102 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
22103 void
22104 Inst_VOP3__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst)
22105 {
22106 Wavefront *wf = gpuDynInst->wavefront();
22107 ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22108 ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22109 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22110
22111 src0.readSrc();
22112 src1.readSrc();
22113
22114 /**
22115 * input modifiers are supported by FP operations only
22116 */
22117 assert(!(instData.ABS & 0x1));
22118 assert(!(instData.ABS & 0x2));
22119 assert(!(instData.ABS & 0x4));
22120 assert(!(extData.NEG & 0x1));
22121 assert(!(extData.NEG & 0x2));
22122 assert(!(extData.NEG & 0x4));
22123
22124 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22125 if (wf->execMask(lane)) {
22126 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
22127 }
22128 }
22129
22130 sdst.write();
22131 }
22132
22133 Inst_VOP3__V_CMP_T_I64::Inst_VOP3__V_CMP_T_I64(InFmt_VOP3 *iFmt)
22134 : Inst_VOP3(iFmt, "v_cmp_t_i64", true)
22135 {
22136 setFlag(ALU);
22137 } // Inst_VOP3__V_CMP_T_I64
22138
22139 Inst_VOP3__V_CMP_T_I64::~Inst_VOP3__V_CMP_T_I64()
22140 {
22141 } // ~Inst_VOP3__V_CMP_T_I64
22142
22143 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
22144 void
22145 Inst_VOP3__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst)
22146 {
22147 Wavefront *wf = gpuDynInst->wavefront();
22148 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22149
22150 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22151 if (wf->execMask(lane)) {
22152 sdst.setBit(lane, 1);
22153 }
22154 }
22155
22156 sdst.write();
22157 }
22158
22159 Inst_VOP3__V_CMP_F_U64::Inst_VOP3__V_CMP_F_U64(InFmt_VOP3 *iFmt)
22160 : Inst_VOP3(iFmt, "v_cmp_f_u64", true)
22161 {
22162 setFlag(ALU);
22163 } // Inst_VOP3__V_CMP_F_U64
22164
22165 Inst_VOP3__V_CMP_F_U64::~Inst_VOP3__V_CMP_F_U64()
22166 {
22167 } // ~Inst_VOP3__V_CMP_F_U64
22168
22169 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
22170 void
22171 Inst_VOP3__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst)
22172 {
22173 Wavefront *wf = gpuDynInst->wavefront();
22174 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22175
22176 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22177 if (wf->execMask(lane)) {
22178 sdst.setBit(lane, 0);
22179 }
22180 }
22181
22182 sdst.write();
22183 }
22184
22185 Inst_VOP3__V_CMP_LT_U64::Inst_VOP3__V_CMP_LT_U64(
22186 InFmt_VOP3 *iFmt)
22187 : Inst_VOP3(iFmt, "v_cmp_lt_u64", true)
22188 {
22189 setFlag(ALU);
22190 } // Inst_VOP3__V_CMP_LT_U64
22191
22192 Inst_VOP3__V_CMP_LT_U64::~Inst_VOP3__V_CMP_LT_U64()
22193 {
22194 } // ~Inst_VOP3__V_CMP_LT_U64
22195
22196 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
22197 void
22198 Inst_VOP3__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst)
22199 {
22200 Wavefront *wf = gpuDynInst->wavefront();
22201 ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22202 ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22203 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22204
22205 src0.readSrc();
22206 src1.readSrc();
22207
22208 /**
22209 * input modifiers are supported by FP operations only
22210 */
22211 assert(!(instData.ABS & 0x1));
22212 assert(!(instData.ABS & 0x2));
22213 assert(!(instData.ABS & 0x4));
22214 assert(!(extData.NEG & 0x1));
22215 assert(!(extData.NEG & 0x2));
22216 assert(!(extData.NEG & 0x4));
22217
22218 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22219 if (wf->execMask(lane)) {
22220 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
22221 }
22222 }
22223
22224 sdst.write();
22225 }
22226
22227 Inst_VOP3__V_CMP_EQ_U64::Inst_VOP3__V_CMP_EQ_U64(
22228 InFmt_VOP3 *iFmt)
22229 : Inst_VOP3(iFmt, "v_cmp_eq_u64", true)
22230 {
22231 setFlag(ALU);
22232 } // Inst_VOP3__V_CMP_EQ_U64
22233
22234 Inst_VOP3__V_CMP_EQ_U64::~Inst_VOP3__V_CMP_EQ_U64()
22235 {
22236 } // ~Inst_VOP3__V_CMP_EQ_U64
22237
22238 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
22239 void
22240 Inst_VOP3__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst)
22241 {
22242 Wavefront *wf = gpuDynInst->wavefront();
22243 ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22244 ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22245 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22246
22247 src0.readSrc();
22248 src1.readSrc();
22249
22250 /**
22251 * input modifiers are supported by FP operations only
22252 */
22253 assert(!(instData.ABS & 0x1));
22254 assert(!(instData.ABS & 0x2));
22255 assert(!(instData.ABS & 0x4));
22256 assert(!(extData.NEG & 0x1));
22257 assert(!(extData.NEG & 0x2));
22258 assert(!(extData.NEG & 0x4));
22259
22260 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22261 if (wf->execMask(lane)) {
22262 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
22263 }
22264 }
22265
22266 sdst.write();
22267 }
22268
22269 Inst_VOP3__V_CMP_LE_U64::Inst_VOP3__V_CMP_LE_U64(
22270 InFmt_VOP3 *iFmt)
22271 : Inst_VOP3(iFmt, "v_cmp_le_u64", true)
22272 {
22273 setFlag(ALU);
22274 } // Inst_VOP3__V_CMP_LE_U64
22275
22276 Inst_VOP3__V_CMP_LE_U64::~Inst_VOP3__V_CMP_LE_U64()
22277 {
22278 } // ~Inst_VOP3__V_CMP_LE_U64
22279
22280 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
22281 void
22282 Inst_VOP3__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst)
22283 {
22284 Wavefront *wf = gpuDynInst->wavefront();
22285 ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22286 ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22287 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22288
22289 src0.readSrc();
22290 src1.readSrc();
22291
22292 /**
22293 * input modifiers are supported by FP operations only
22294 */
22295 assert(!(instData.ABS & 0x1));
22296 assert(!(instData.ABS & 0x2));
22297 assert(!(instData.ABS & 0x4));
22298 assert(!(extData.NEG & 0x1));
22299 assert(!(extData.NEG & 0x2));
22300 assert(!(extData.NEG & 0x4));
22301
22302 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22303 if (wf->execMask(lane)) {
22304 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
22305 }
22306 }
22307
22308 sdst.write();
22309 }
22310
22311 Inst_VOP3__V_CMP_GT_U64::Inst_VOP3__V_CMP_GT_U64(
22312 InFmt_VOP3 *iFmt)
22313 : Inst_VOP3(iFmt, "v_cmp_gt_u64", true)
22314 {
22315 setFlag(ALU);
22316 } // Inst_VOP3__V_CMP_GT_U64
22317
22318 Inst_VOP3__V_CMP_GT_U64::~Inst_VOP3__V_CMP_GT_U64()
22319 {
22320 } // ~Inst_VOP3__V_CMP_GT_U64
22321
22322 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
22323 void
22324 Inst_VOP3__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst)
22325 {
22326 Wavefront *wf = gpuDynInst->wavefront();
22327 ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22328 ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22329 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22330
22331 src0.readSrc();
22332 src1.readSrc();
22333
22334 /**
22335 * input modifiers are supported by FP operations only
22336 */
22337 assert(!(instData.ABS & 0x1));
22338 assert(!(instData.ABS & 0x2));
22339 assert(!(instData.ABS & 0x4));
22340 assert(!(extData.NEG & 0x1));
22341 assert(!(extData.NEG & 0x2));
22342 assert(!(extData.NEG & 0x4));
22343
22344 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22345 if (wf->execMask(lane)) {
22346 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
22347 }
22348 }
22349
22350 sdst.write();
22351 }
22352
22353 Inst_VOP3__V_CMP_NE_U64::Inst_VOP3__V_CMP_NE_U64(
22354 InFmt_VOP3 *iFmt)
22355 : Inst_VOP3(iFmt, "v_cmp_ne_u64", true)
22356 {
22357 setFlag(ALU);
22358 } // Inst_VOP3__V_CMP_NE_U64
22359
22360 Inst_VOP3__V_CMP_NE_U64::~Inst_VOP3__V_CMP_NE_U64()
22361 {
22362 } // ~Inst_VOP3__V_CMP_NE_U64
22363
22364 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
22365 void
22366 Inst_VOP3__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst)
22367 {
22368 Wavefront *wf = gpuDynInst->wavefront();
22369 ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22370 ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22371 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22372
22373 src0.readSrc();
22374 src1.readSrc();
22375
22376 /**
22377 * input modifiers are supported by FP operations only
22378 */
22379 assert(!(instData.ABS & 0x1));
22380 assert(!(instData.ABS & 0x2));
22381 assert(!(instData.ABS & 0x4));
22382 assert(!(extData.NEG & 0x1));
22383 assert(!(extData.NEG & 0x2));
22384 assert(!(extData.NEG & 0x4));
22385
22386 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22387 if (wf->execMask(lane)) {
22388 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
22389 }
22390 }
22391
22392 sdst.write();
22393 }
22394
22395 Inst_VOP3__V_CMP_GE_U64::Inst_VOP3__V_CMP_GE_U64(
22396 InFmt_VOP3 *iFmt)
22397 : Inst_VOP3(iFmt, "v_cmp_ge_u64", true)
22398 {
22399 setFlag(ALU);
22400 } // Inst_VOP3__V_CMP_GE_U64
22401
22402 Inst_VOP3__V_CMP_GE_U64::~Inst_VOP3__V_CMP_GE_U64()
22403 {
22404 } // ~Inst_VOP3__V_CMP_GE_U64
22405
22406 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
22407 void
22408 Inst_VOP3__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst)
22409 {
22410 Wavefront *wf = gpuDynInst->wavefront();
22411 ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22412 ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22413 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22414
22415 src0.readSrc();
22416 src1.readSrc();
22417
22418 /**
22419 * input modifiers are supported by FP operations only
22420 */
22421 assert(!(instData.ABS & 0x1));
22422 assert(!(instData.ABS & 0x2));
22423 assert(!(instData.ABS & 0x4));
22424 assert(!(extData.NEG & 0x1));
22425 assert(!(extData.NEG & 0x2));
22426 assert(!(extData.NEG & 0x4));
22427
22428 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22429 if (wf->execMask(lane)) {
22430 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
22431 }
22432 }
22433
22434 sdst.write();
22435 }
22436
22437 Inst_VOP3__V_CMP_T_U64::Inst_VOP3__V_CMP_T_U64(InFmt_VOP3 *iFmt)
22438 : Inst_VOP3(iFmt, "v_cmp_t_u64", true)
22439 {
22440 setFlag(ALU);
22441 } // Inst_VOP3__V_CMP_T_U64
22442
22443 Inst_VOP3__V_CMP_T_U64::~Inst_VOP3__V_CMP_T_U64()
22444 {
22445 } // ~Inst_VOP3__V_CMP_T_U64
22446
22447 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
22448 void
22449 Inst_VOP3__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst)
22450 {
22451 Wavefront *wf = gpuDynInst->wavefront();
22452 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22453
22454 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22455 if (wf->execMask(lane)) {
22456 sdst.setBit(lane, 1);
22457 }
22458 }
22459
22460 sdst.write();
22461 }
22462
22463 Inst_VOP3__V_CMPX_F_I64::Inst_VOP3__V_CMPX_F_I64(
22464 InFmt_VOP3 *iFmt)
22465 : Inst_VOP3(iFmt, "v_cmpx_f_i64", true)
22466 {
22467 setFlag(ALU);
22468 } // Inst_VOP3__V_CMPX_F_I64
22469
22470 Inst_VOP3__V_CMPX_F_I64::~Inst_VOP3__V_CMPX_F_I64()
22471 {
22472 } // ~Inst_VOP3__V_CMPX_F_I64
22473
22474 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
22475 void
22476 Inst_VOP3__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst)
22477 {
22478 Wavefront *wf = gpuDynInst->wavefront();
22479 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22480
22481 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22482 if (wf->execMask(lane)) {
22483 sdst.setBit(lane, 0);
22484 }
22485 }
22486
22487 wf->execMask() = sdst.rawData();
22488 sdst.write();
22489 }
22490
22491 Inst_VOP3__V_CMPX_LT_I64::Inst_VOP3__V_CMPX_LT_I64(
22492 InFmt_VOP3 *iFmt)
22493 : Inst_VOP3(iFmt, "v_cmpx_lt_i64", true)
22494 {
22495 setFlag(ALU);
22496 } // Inst_VOP3__V_CMPX_LT_I64
22497
22498 Inst_VOP3__V_CMPX_LT_I64::~Inst_VOP3__V_CMPX_LT_I64()
22499 {
22500 } // ~Inst_VOP3__V_CMPX_LT_I64
22501
22502 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
22503 void
22504 Inst_VOP3__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst)
22505 {
22506 Wavefront *wf = gpuDynInst->wavefront();
22507 ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22508 ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22509 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22510
22511 src0.readSrc();
22512 src1.readSrc();
22513
22514 /**
22515 * input modifiers are supported by FP operations only
22516 */
22517 assert(!(instData.ABS & 0x1));
22518 assert(!(instData.ABS & 0x2));
22519 assert(!(instData.ABS & 0x4));
22520 assert(!(extData.NEG & 0x1));
22521 assert(!(extData.NEG & 0x2));
22522 assert(!(extData.NEG & 0x4));
22523
22524 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22525 if (wf->execMask(lane)) {
22526 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
22527 }
22528 }
22529
22530 wf->execMask() = sdst.rawData();
22531 sdst.write();
22532 }
22533
22534 Inst_VOP3__V_CMPX_EQ_I64::Inst_VOP3__V_CMPX_EQ_I64(
22535 InFmt_VOP3 *iFmt)
22536 : Inst_VOP3(iFmt, "v_cmpx_eq_i64", true)
22537 {
22538 setFlag(ALU);
22539 } // Inst_VOP3__V_CMPX_EQ_I64
22540
22541 Inst_VOP3__V_CMPX_EQ_I64::~Inst_VOP3__V_CMPX_EQ_I64()
22542 {
22543 } // ~Inst_VOP3__V_CMPX_EQ_I64
22544
22545 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
22546 void
22547 Inst_VOP3__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst)
22548 {
22549 Wavefront *wf = gpuDynInst->wavefront();
22550 ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22551 ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22552 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22553
22554 src0.readSrc();
22555 src1.readSrc();
22556
22557 /**
22558 * input modifiers are supported by FP operations only
22559 */
22560 assert(!(instData.ABS & 0x1));
22561 assert(!(instData.ABS & 0x2));
22562 assert(!(instData.ABS & 0x4));
22563 assert(!(extData.NEG & 0x1));
22564 assert(!(extData.NEG & 0x2));
22565 assert(!(extData.NEG & 0x4));
22566
22567 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22568 if (wf->execMask(lane)) {
22569 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
22570 }
22571 }
22572
22573 wf->execMask() = sdst.rawData();
22574 sdst.write();
22575 }
22576
22577 Inst_VOP3__V_CMPX_LE_I64::Inst_VOP3__V_CMPX_LE_I64(
22578 InFmt_VOP3 *iFmt)
22579 : Inst_VOP3(iFmt, "v_cmpx_le_i64", true)
22580 {
22581 setFlag(ALU);
22582 } // Inst_VOP3__V_CMPX_LE_I64
22583
22584 Inst_VOP3__V_CMPX_LE_I64::~Inst_VOP3__V_CMPX_LE_I64()
22585 {
22586 } // ~Inst_VOP3__V_CMPX_LE_I64
22587
22588 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
22589 void
22590 Inst_VOP3__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst)
22591 {
22592 Wavefront *wf = gpuDynInst->wavefront();
22593 ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22594 ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22595 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22596
22597 src0.readSrc();
22598 src1.readSrc();
22599
22600 /**
22601 * input modifiers are supported by FP operations only
22602 */
22603 assert(!(instData.ABS & 0x1));
22604 assert(!(instData.ABS & 0x2));
22605 assert(!(instData.ABS & 0x4));
22606 assert(!(extData.NEG & 0x1));
22607 assert(!(extData.NEG & 0x2));
22608 assert(!(extData.NEG & 0x4));
22609
22610 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22611 if (wf->execMask(lane)) {
22612 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
22613 }
22614 }
22615
22616 wf->execMask() = sdst.rawData();
22617 sdst.write();
22618 }
22619
22620 Inst_VOP3__V_CMPX_GT_I64::Inst_VOP3__V_CMPX_GT_I64(
22621 InFmt_VOP3 *iFmt)
22622 : Inst_VOP3(iFmt, "v_cmpx_gt_i64", true)
22623 {
22624 setFlag(ALU);
22625 } // Inst_VOP3__V_CMPX_GT_I64
22626
22627 Inst_VOP3__V_CMPX_GT_I64::~Inst_VOP3__V_CMPX_GT_I64()
22628 {
22629 } // ~Inst_VOP3__V_CMPX_GT_I64
22630
22631 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
22632 void
22633 Inst_VOP3__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst)
22634 {
22635 Wavefront *wf = gpuDynInst->wavefront();
22636 ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22637 ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22638 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22639
22640 src0.readSrc();
22641 src1.readSrc();
22642
22643 /**
22644 * input modifiers are supported by FP operations only
22645 */
22646 assert(!(instData.ABS & 0x1));
22647 assert(!(instData.ABS & 0x2));
22648 assert(!(instData.ABS & 0x4));
22649 assert(!(extData.NEG & 0x1));
22650 assert(!(extData.NEG & 0x2));
22651 assert(!(extData.NEG & 0x4));
22652
22653 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22654 if (wf->execMask(lane)) {
22655 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
22656 }
22657 }
22658
22659 wf->execMask() = sdst.rawData();
22660 sdst.write();
22661 }
22662
22663 Inst_VOP3__V_CMPX_NE_I64::Inst_VOP3__V_CMPX_NE_I64(
22664 InFmt_VOP3 *iFmt)
22665 : Inst_VOP3(iFmt, "v_cmpx_ne_i64", true)
22666 {
22667 setFlag(ALU);
22668 } // Inst_VOP3__V_CMPX_NE_I64
22669
22670 Inst_VOP3__V_CMPX_NE_I64::~Inst_VOP3__V_CMPX_NE_I64()
22671 {
22672 } // ~Inst_VOP3__V_CMPX_NE_I64
22673
22674 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
22675 void
22676 Inst_VOP3__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst)
22677 {
22678 Wavefront *wf = gpuDynInst->wavefront();
22679 ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22680 ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22681 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22682
22683 src0.readSrc();
22684 src1.readSrc();
22685
22686 /**
22687 * input modifiers are supported by FP operations only
22688 */
22689 assert(!(instData.ABS & 0x1));
22690 assert(!(instData.ABS & 0x2));
22691 assert(!(instData.ABS & 0x4));
22692 assert(!(extData.NEG & 0x1));
22693 assert(!(extData.NEG & 0x2));
22694 assert(!(extData.NEG & 0x4));
22695
22696 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22697 if (wf->execMask(lane)) {
22698 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
22699 }
22700 }
22701
22702 wf->execMask() = sdst.rawData();
22703 sdst.write();
22704 }
22705
22706 Inst_VOP3__V_CMPX_GE_I64::Inst_VOP3__V_CMPX_GE_I64(
22707 InFmt_VOP3 *iFmt)
22708 : Inst_VOP3(iFmt, "v_cmpx_ge_i64", true)
22709 {
22710 setFlag(ALU);
22711 } // Inst_VOP3__V_CMPX_GE_I64
22712
22713 Inst_VOP3__V_CMPX_GE_I64::~Inst_VOP3__V_CMPX_GE_I64()
22714 {
22715 } // ~Inst_VOP3__V_CMPX_GE_I64
22716
22717 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
22718 void
22719 Inst_VOP3__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst)
22720 {
22721 Wavefront *wf = gpuDynInst->wavefront();
22722 ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22723 ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22724 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22725
22726 src0.readSrc();
22727 src1.readSrc();
22728
22729 /**
22730 * input modifiers are supported by FP operations only
22731 */
22732 assert(!(instData.ABS & 0x1));
22733 assert(!(instData.ABS & 0x2));
22734 assert(!(instData.ABS & 0x4));
22735 assert(!(extData.NEG & 0x1));
22736 assert(!(extData.NEG & 0x2));
22737 assert(!(extData.NEG & 0x4));
22738
22739 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22740 if (wf->execMask(lane)) {
22741 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
22742 }
22743 }
22744
22745 wf->execMask() = sdst.rawData();
22746 sdst.write();
22747 }
22748
22749 Inst_VOP3__V_CMPX_T_I64::Inst_VOP3__V_CMPX_T_I64(
22750 InFmt_VOP3 *iFmt)
22751 : Inst_VOP3(iFmt, "v_cmpx_t_i64", true)
22752 {
22753 setFlag(ALU);
22754 } // Inst_VOP3__V_CMPX_T_I64
22755
22756 Inst_VOP3__V_CMPX_T_I64::~Inst_VOP3__V_CMPX_T_I64()
22757 {
22758 } // ~Inst_VOP3__V_CMPX_T_I64
22759
22760 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
22761 void
22762 Inst_VOP3__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst)
22763 {
22764 Wavefront *wf = gpuDynInst->wavefront();
22765 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22766
22767 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22768 if (wf->execMask(lane)) {
22769 sdst.setBit(lane, 1);
22770 }
22771 }
22772
22773 wf->execMask() = sdst.rawData();
22774 sdst.write();
22775 }
22776
22777 Inst_VOP3__V_CMPX_F_U64::Inst_VOP3__V_CMPX_F_U64(
22778 InFmt_VOP3 *iFmt)
22779 : Inst_VOP3(iFmt, "v_cmpx_f_u64", true)
22780 {
22781 setFlag(ALU);
22782 } // Inst_VOP3__V_CMPX_F_U64
22783
22784 Inst_VOP3__V_CMPX_F_U64::~Inst_VOP3__V_CMPX_F_U64()
22785 {
22786 } // ~Inst_VOP3__V_CMPX_F_U64
22787
22788 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
22789 void
22790 Inst_VOP3__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst)
22791 {
22792 Wavefront *wf = gpuDynInst->wavefront();
22793 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22794
22795 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22796 if (wf->execMask(lane)) {
22797 sdst.setBit(lane, 0);
22798 }
22799 }
22800
22801 wf->execMask() = sdst.rawData();
22802 sdst.write();
22803 }
22804
22805 Inst_VOP3__V_CMPX_LT_U64::Inst_VOP3__V_CMPX_LT_U64(
22806 InFmt_VOP3 *iFmt)
22807 : Inst_VOP3(iFmt, "v_cmpx_lt_u64", true)
22808 {
22809 setFlag(ALU);
22810 } // Inst_VOP3__V_CMPX_LT_U64
22811
22812 Inst_VOP3__V_CMPX_LT_U64::~Inst_VOP3__V_CMPX_LT_U64()
22813 {
22814 } // ~Inst_VOP3__V_CMPX_LT_U64
22815
22816 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
22817 void
22818 Inst_VOP3__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst)
22819 {
22820 Wavefront *wf = gpuDynInst->wavefront();
22821 ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22822 ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22823 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22824
22825 src0.readSrc();
22826 src1.readSrc();
22827
22828 /**
22829 * input modifiers are supported by FP operations only
22830 */
22831 assert(!(instData.ABS & 0x1));
22832 assert(!(instData.ABS & 0x2));
22833 assert(!(instData.ABS & 0x4));
22834 assert(!(extData.NEG & 0x1));
22835 assert(!(extData.NEG & 0x2));
22836 assert(!(extData.NEG & 0x4));
22837
22838 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22839 if (wf->execMask(lane)) {
22840 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
22841 }
22842 }
22843
22844 wf->execMask() = sdst.rawData();
22845 sdst.write();
22846 }
22847
22848 Inst_VOP3__V_CMPX_EQ_U64::Inst_VOP3__V_CMPX_EQ_U64(
22849 InFmt_VOP3 *iFmt)
22850 : Inst_VOP3(iFmt, "v_cmpx_eq_u64", true)
22851 {
22852 setFlag(ALU);
22853 } // Inst_VOP3__V_CMPX_EQ_U64
22854
22855 Inst_VOP3__V_CMPX_EQ_U64::~Inst_VOP3__V_CMPX_EQ_U64()
22856 {
22857 } // ~Inst_VOP3__V_CMPX_EQ_U64
22858
22859 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
22860 void
22861 Inst_VOP3__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst)
22862 {
22863 Wavefront *wf = gpuDynInst->wavefront();
22864 ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22865 ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22866 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22867
22868 src0.readSrc();
22869 src1.readSrc();
22870
22871 /**
22872 * input modifiers are supported by FP operations only
22873 */
22874 assert(!(instData.ABS & 0x1));
22875 assert(!(instData.ABS & 0x2));
22876 assert(!(instData.ABS & 0x4));
22877 assert(!(extData.NEG & 0x1));
22878 assert(!(extData.NEG & 0x2));
22879 assert(!(extData.NEG & 0x4));
22880
22881 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22882 if (wf->execMask(lane)) {
22883 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
22884 }
22885 }
22886
22887 wf->execMask() = sdst.rawData();
22888 sdst.write();
22889 }
22890
22891 Inst_VOP3__V_CMPX_LE_U64::Inst_VOP3__V_CMPX_LE_U64(
22892 InFmt_VOP3 *iFmt)
22893 : Inst_VOP3(iFmt, "v_cmpx_le_u64", true)
22894 {
22895 setFlag(ALU);
22896 } // Inst_VOP3__V_CMPX_LE_U64
22897
22898 Inst_VOP3__V_CMPX_LE_U64::~Inst_VOP3__V_CMPX_LE_U64()
22899 {
22900 } // ~Inst_VOP3__V_CMPX_LE_U64
22901
22902 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
22903 void
22904 Inst_VOP3__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst)
22905 {
22906 Wavefront *wf = gpuDynInst->wavefront();
22907 ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22908 ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22909 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22910
22911 src0.readSrc();
22912 src1.readSrc();
22913
22914 /**
22915 * input modifiers are supported by FP operations only
22916 */
22917 assert(!(instData.ABS & 0x1));
22918 assert(!(instData.ABS & 0x2));
22919 assert(!(instData.ABS & 0x4));
22920 assert(!(extData.NEG & 0x1));
22921 assert(!(extData.NEG & 0x2));
22922 assert(!(extData.NEG & 0x4));
22923
22924 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22925 if (wf->execMask(lane)) {
22926 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
22927 }
22928 }
22929
22930 wf->execMask() = sdst.rawData();
22931 sdst.write();
22932 }
22933
22934 Inst_VOP3__V_CMPX_GT_U64::Inst_VOP3__V_CMPX_GT_U64(
22935 InFmt_VOP3 *iFmt)
22936 : Inst_VOP3(iFmt, "v_cmpx_gt_u64", true)
22937 {
22938 setFlag(ALU);
22939 } // Inst_VOP3__V_CMPX_GT_U64
22940
22941 Inst_VOP3__V_CMPX_GT_U64::~Inst_VOP3__V_CMPX_GT_U64()
22942 {
22943 } // ~Inst_VOP3__V_CMPX_GT_U64
22944
22945 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
22946 void
22947 Inst_VOP3__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst)
22948 {
22949 Wavefront *wf = gpuDynInst->wavefront();
22950 ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22951 ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22952 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22953
22954 src0.readSrc();
22955 src1.readSrc();
22956
22957 /**
22958 * input modifiers are supported by FP operations only
22959 */
22960 assert(!(instData.ABS & 0x1));
22961 assert(!(instData.ABS & 0x2));
22962 assert(!(instData.ABS & 0x4));
22963 assert(!(extData.NEG & 0x1));
22964 assert(!(extData.NEG & 0x2));
22965 assert(!(extData.NEG & 0x4));
22966
22967 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22968 if (wf->execMask(lane)) {
22969 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
22970 }
22971 }
22972
22973 wf->execMask() = sdst.rawData();
22974 sdst.write();
22975 }
22976
22977 Inst_VOP3__V_CMPX_NE_U64::Inst_VOP3__V_CMPX_NE_U64(
22978 InFmt_VOP3 *iFmt)
22979 : Inst_VOP3(iFmt, "v_cmpx_ne_u64", true)
22980 {
22981 setFlag(ALU);
22982 } // Inst_VOP3__V_CMPX_NE_U64
22983
22984 Inst_VOP3__V_CMPX_NE_U64::~Inst_VOP3__V_CMPX_NE_U64()
22985 {
22986 } // ~Inst_VOP3__V_CMPX_NE_U64
22987
22988 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
22989 void
22990 Inst_VOP3__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst)
22991 {
22992 Wavefront *wf = gpuDynInst->wavefront();
22993 ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22994 ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22995 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22996
22997 src0.readSrc();
22998 src1.readSrc();
22999
23000 /**
23001 * input modifiers are supported by FP operations only
23002 */
23003 assert(!(instData.ABS & 0x1));
23004 assert(!(instData.ABS & 0x2));
23005 assert(!(instData.ABS & 0x4));
23006 assert(!(extData.NEG & 0x1));
23007 assert(!(extData.NEG & 0x2));
23008 assert(!(extData.NEG & 0x4));
23009
23010 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23011 if (wf->execMask(lane)) {
23012 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
23013 }
23014 }
23015
23016 wf->execMask() = sdst.rawData();
23017 sdst.write();
23018 }
23019
23020 Inst_VOP3__V_CMPX_GE_U64::Inst_VOP3__V_CMPX_GE_U64(
23021 InFmt_VOP3 *iFmt)
23022 : Inst_VOP3(iFmt, "v_cmpx_ge_u64", true)
23023 {
23024 setFlag(ALU);
23025 } // Inst_VOP3__V_CMPX_GE_U64
23026
23027 Inst_VOP3__V_CMPX_GE_U64::~Inst_VOP3__V_CMPX_GE_U64()
23028 {
23029 } // ~Inst_VOP3__V_CMPX_GE_U64
23030
23031 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
23032 void
23033 Inst_VOP3__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst)
23034 {
23035 Wavefront *wf = gpuDynInst->wavefront();
23036 ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
23037 ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
23038 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
23039
23040 src0.readSrc();
23041 src1.readSrc();
23042
23043 /**
23044 * input modifiers are supported by FP operations only
23045 */
23046 assert(!(instData.ABS & 0x1));
23047 assert(!(instData.ABS & 0x2));
23048 assert(!(instData.ABS & 0x4));
23049 assert(!(extData.NEG & 0x1));
23050 assert(!(extData.NEG & 0x2));
23051 assert(!(extData.NEG & 0x4));
23052
23053 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23054 if (wf->execMask(lane)) {
23055 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
23056 }
23057 }
23058
23059 wf->execMask() = sdst.rawData();
23060 sdst.write();
23061 }
23062
23063 Inst_VOP3__V_CMPX_T_U64::Inst_VOP3__V_CMPX_T_U64(
23064 InFmt_VOP3 *iFmt)
23065 : Inst_VOP3(iFmt, "v_cmpx_t_u64", true)
23066 {
23067 setFlag(ALU);
23068 } // Inst_VOP3__V_CMPX_T_U64
23069
23070 Inst_VOP3__V_CMPX_T_U64::~Inst_VOP3__V_CMPX_T_U64()
23071 {
23072 } // ~Inst_VOP3__V_CMPX_T_U64
23073
23074 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
23075 void
23076 Inst_VOP3__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst)
23077 {
23078 Wavefront *wf = gpuDynInst->wavefront();
23079 ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
23080
23081 /**
23082 * input modifiers are supported by FP operations only
23083 */
23084 assert(!(instData.ABS & 0x1));
23085 assert(!(instData.ABS & 0x2));
23086 assert(!(instData.ABS & 0x4));
23087 assert(!(extData.NEG & 0x1));
23088 assert(!(extData.NEG & 0x2));
23089 assert(!(extData.NEG & 0x4));
23090
23091 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23092 if (wf->execMask(lane)) {
23093 sdst.setBit(lane, 1);
23094 }
23095 }
23096
23097 wf->execMask() = sdst.rawData();
23098 sdst.write();
23099 }
23100
23101 Inst_VOP3__V_CNDMASK_B32::Inst_VOP3__V_CNDMASK_B32(InFmt_VOP3 *iFmt)
23102 : Inst_VOP3(iFmt, "v_cndmask_b32", false)
23103 {
23104 setFlag(ALU);
23105 setFlag(ReadsVCC);
23106 } // Inst_VOP3__V_CNDMASK_B32
23107
23108 Inst_VOP3__V_CNDMASK_B32::~Inst_VOP3__V_CNDMASK_B32()
23109 {
23110 } // ~Inst_VOP3__V_CNDMASK_B32
23111
23112 // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC
23113 // as a scalar GPR in S2.
23114 void
23115 Inst_VOP3__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst)
23116 {
23117 Wavefront *wf = gpuDynInst->wavefront();
23118 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
23119 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
23120 ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
23121 VecOperandU32 vdst(gpuDynInst, instData.VDST);
23122
23123 src0.readSrc();
23124 src1.readSrc();
23125 vcc.read();
23126
23127 /**
23128 * input modifiers are supported by FP operations only
23129 */
23130 assert(!(instData.ABS & 0x1));
23131 assert(!(instData.ABS & 0x2));
23132 assert(!(instData.ABS & 0x4));
23133 assert(!(extData.NEG & 0x1));
23134 assert(!(extData.NEG & 0x2));
23135 assert(!(extData.NEG & 0x4));
23136
23137 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23138 if (wf->execMask(lane)) {
23139 vdst[lane] = bits(vcc.rawData(), lane)
23140 ? src1[lane] : src0[lane];
23141 }
23142 }
23143
23144 vdst.write();
23145 }
23146
23147 Inst_VOP3__V_ADD_F32::Inst_VOP3__V_ADD_F32(InFmt_VOP3 *iFmt)
23148 : Inst_VOP3(iFmt, "v_add_f32", false)
23149 {
23150 setFlag(ALU);
23151 setFlag(F32);
23152 } // Inst_VOP3__V_ADD_F32
23153
23154 Inst_VOP3__V_ADD_F32::~Inst_VOP3__V_ADD_F32()
23155 {
23156 } // ~Inst_VOP3__V_ADD_F32
23157
23158 // D.f = S0.f + S1.f.
23159 void
23160 Inst_VOP3__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst)
23161 {
23162 Wavefront *wf = gpuDynInst->wavefront();
23163 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
23164 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
23165 VecOperandF32 vdst(gpuDynInst, instData.VDST);
23166
23167 src0.readSrc();
23168 src1.readSrc();
23169
23170 if (instData.ABS & 0x1) {
23171 src0.absModifier();
23172 }
23173
23174 if (instData.ABS & 0x2) {
23175 src1.absModifier();
23176 }
23177
23178 if (extData.NEG & 0x1) {
23179 src0.negModifier();
23180 }
23181
23182 if (extData.NEG & 0x2) {
23183 src1.negModifier();
23184 }
23185
23186 /**
23187 * input modifiers are supported by FP operations only
23188 */
23189 assert(!(instData.ABS & 0x4));
23190 assert(!(extData.NEG & 0x4));
23191
23192 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23193 if (wf->execMask(lane)) {
23194 vdst[lane] = src0[lane] + src1[lane];
23195 }
23196 }
23197
23198 vdst.write();
23199 }
23200
23201 Inst_VOP3__V_SUB_F32::Inst_VOP3__V_SUB_F32(InFmt_VOP3 *iFmt)
23202 : Inst_VOP3(iFmt, "v_sub_f32", false)
23203 {
23204 setFlag(ALU);
23205 setFlag(F32);
23206 } // Inst_VOP3__V_SUB_F32
23207
23208 Inst_VOP3__V_SUB_F32::~Inst_VOP3__V_SUB_F32()
23209 {
23210 } // ~Inst_VOP3__V_SUB_F32
23211
23212 // D.f = S0.f - S1.f.
23213 void
23214 Inst_VOP3__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst)
23215 {
23216 Wavefront *wf = gpuDynInst->wavefront();
23217 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
23218 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
23219 VecOperandF32 vdst(gpuDynInst, instData.VDST);
23220
23221 src0.readSrc();
23222 src1.readSrc();
23223
23224 if (instData.ABS & 0x1) {
23225 src0.absModifier();
23226 }
23227
23228 if (instData.ABS & 0x2) {
23229 src1.absModifier();
23230 }
23231
23232 if (extData.NEG & 0x1) {
23233 src0.negModifier();
23234 }
23235
23236 if (extData.NEG & 0x2) {
23237 src1.negModifier();
23238 }
23239
23240 /**
23241 * input modifiers are supported by FP operations only
23242 */
23243 assert(!(instData.ABS & 0x4));
23244 assert(!(extData.NEG & 0x4));
23245
23246 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23247 if (wf->execMask(lane)) {
23248 vdst[lane] = src0[lane] - src1[lane];
23249 }
23250 }
23251
23252 vdst.write();
23253 }
23254
23255 Inst_VOP3__V_SUBREV_F32::Inst_VOP3__V_SUBREV_F32(InFmt_VOP3 *iFmt)
23256 : Inst_VOP3(iFmt, "v_subrev_f32", false)
23257 {
23258 setFlag(ALU);
23259 setFlag(F32);
23260 } // Inst_VOP3__V_SUBREV_F32
23261
23262 Inst_VOP3__V_SUBREV_F32::~Inst_VOP3__V_SUBREV_F32()
23263 {
23264 } // ~Inst_VOP3__V_SUBREV_F32
23265
23266 // D.f = S1.f - S0.f.
23267 void
23268 Inst_VOP3__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst)
23269 {
23270 Wavefront *wf = gpuDynInst->wavefront();
23271 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
23272 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
23273 VecOperandF32 vdst(gpuDynInst, instData.VDST);
23274
23275 src0.readSrc();
23276 src1.readSrc();
23277
23278 if (instData.ABS & 0x1) {
23279 src0.absModifier();
23280 }
23281
23282 if (instData.ABS & 0x2) {
23283 src1.absModifier();
23284 }
23285
23286 if (extData.NEG & 0x1) {
23287 src0.negModifier();
23288 }
23289
23290 if (extData.NEG & 0x2) {
23291 src1.negModifier();
23292 }
23293
23294 /**
23295 * input modifiers are supported by FP operations only
23296 */
23297 assert(!(instData.ABS & 0x4));
23298 assert(!(extData.NEG & 0x4));
23299
23300 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23301 if (wf->execMask(lane)) {
23302 vdst[lane] = src1[lane] - src0[lane];
23303 }
23304 }
23305
23306 vdst.write();
23307 }
23308
23309 Inst_VOP3__V_MUL_LEGACY_F32::Inst_VOP3__V_MUL_LEGACY_F32(InFmt_VOP3 *iFmt)
23310 : Inst_VOP3(iFmt, "v_mul_legacy_f32", false)
23311 {
23312 setFlag(ALU);
23313 setFlag(F32);
23314 } // Inst_VOP3__V_MUL_LEGACY_F32
23315
23316 Inst_VOP3__V_MUL_LEGACY_F32::~Inst_VOP3__V_MUL_LEGACY_F32()
23317 {
23318 } // ~Inst_VOP3__V_MUL_LEGACY_F32
23319
23320 // D.f = S0.f * S1.f
23321 void
23322 Inst_VOP3__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
23323 {
23324 Wavefront *wf = gpuDynInst->wavefront();
23325 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
23326 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
23327 VecOperandF32 vdst(gpuDynInst, instData.VDST);
23328
23329 src0.readSrc();
23330 src1.readSrc();
23331
23332 if (instData.ABS & 0x1) {
23333 src0.absModifier();
23334 }
23335
23336 if (instData.ABS & 0x2) {
23337 src1.absModifier();
23338 }
23339
23340 if (extData.NEG & 0x1) {
23341 src0.negModifier();
23342 }
23343
23344 if (extData.NEG & 0x2) {
23345 src1.negModifier();
23346 }
23347
23348 /**
23349 * input modifiers are supported by FP operations only
23350 */
23351 assert(!(instData.ABS & 0x4));
23352 assert(!(extData.NEG & 0x4));
23353
23354 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23355 if (wf->execMask(lane)) {
23356 if (std::isnan(src0[lane]) ||
23357 std::isnan(src1[lane])) {
23358 vdst[lane] = NAN;
23359 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
23360 std::fpclassify(src0[lane]) == FP_ZERO) &&
23361 !std::signbit(src0[lane])) {
23362 if (std::isinf(src1[lane])) {
23363 vdst[lane] = NAN;
23364 } else if (!std::signbit(src1[lane])) {
23365 vdst[lane] = +0.0;
23366 } else {
23367 vdst[lane] = -0.0;
23368 }
23369 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
23370 std::fpclassify(src0[lane]) == FP_ZERO) &&
23371 std::signbit(src0[lane])) {
23372 if (std::isinf(src1[lane])) {
23373 vdst[lane] = NAN;
23374 } else if (std::signbit(src1[lane])) {
23375 vdst[lane] = +0.0;
23376 } else {
23377 vdst[lane] = -0.0;
23378 }
23379 } else if (std::isinf(src0[lane]) &&
23380 !std::signbit(src0[lane])) {
23381 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
23382 std::fpclassify(src1[lane]) == FP_ZERO) {
23383 vdst[lane] = NAN;
23384 } else if (!std::signbit(src1[lane])) {
23385 vdst[lane] = +INFINITY;
23386 } else {
23387 vdst[lane] = -INFINITY;
23388 }
23389 } else if (std::isinf(src0[lane]) &&
23390 std::signbit(src0[lane])) {
23391 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
23392 std::fpclassify(src1[lane]) == FP_ZERO) {
23393 vdst[lane] = NAN;
23394 } else if (std::signbit(src1[lane])) {
23395 vdst[lane] = +INFINITY;
23396 } else {
23397 vdst[lane] = -INFINITY;
23398 }
23399 } else {
23400 vdst[lane] = src0[lane] * src1[lane];
23401 }
23402 }
23403 }
23404
23405 vdst.write();
23406 }
23407
23408 Inst_VOP3__V_MUL_F32::Inst_VOP3__V_MUL_F32(InFmt_VOP3 *iFmt)
23409 : Inst_VOP3(iFmt, "v_mul_f32", false)
23410 {
23411 setFlag(ALU);
23412 setFlag(F32);
23413 } // Inst_VOP3__V_MUL_F32
23414
23415 Inst_VOP3__V_MUL_F32::~Inst_VOP3__V_MUL_F32()
23416 {
23417 } // ~Inst_VOP3__V_MUL_F32
23418
23419 // D.f = S0.f * S1.f.
23420 void
23421 Inst_VOP3__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst)
23422 {
23423 Wavefront *wf = gpuDynInst->wavefront();
23424 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
23425 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
23426 VecOperandF32 vdst(gpuDynInst, instData.VDST);
23427
23428 src0.readSrc();
23429 src1.readSrc();
23430
23431 if (instData.ABS & 0x1) {
23432 src0.absModifier();
23433 }
23434
23435 if (instData.ABS & 0x2) {
23436 src1.absModifier();
23437 }
23438
23439 if (extData.NEG & 0x1) {
23440 src0.negModifier();
23441 }
23442
23443 if (extData.NEG & 0x2) {
23444 src1.negModifier();
23445 }
23446
23447 /**
23448 * input modifiers are supported by FP operations only
23449 */
23450 assert(!(instData.ABS & 0x4));
23451 assert(!(extData.NEG & 0x4));
23452
23453 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23454 if (wf->execMask(lane)) {
23455 if (std::isnan(src0[lane]) ||
23456 std::isnan(src1[lane])) {
23457 vdst[lane] = NAN;
23458 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
23459 std::fpclassify(src0[lane]) == FP_ZERO) &&
23460 !std::signbit(src0[lane])) {
23461 if (std::isinf(src1[lane])) {
23462 vdst[lane] = NAN;
23463 } else if (!std::signbit(src1[lane])) {
23464 vdst[lane] = +0.0;
23465 } else {
23466 vdst[lane] = -0.0;
23467 }
23468 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
23469 std::fpclassify(src0[lane]) == FP_ZERO) &&
23470 std::signbit(src0[lane])) {
23471 if (std::isinf(src1[lane])) {
23472 vdst[lane] = NAN;
23473 } else if (std::signbit(src1[lane])) {
23474 vdst[lane] = +0.0;
23475 } else {
23476 vdst[lane] = -0.0;
23477 }
23478 } else if (std::isinf(src0[lane]) &&
23479 !std::signbit(src0[lane])) {
23480 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
23481 std::fpclassify(src1[lane]) == FP_ZERO) {
23482 vdst[lane] = NAN;
23483 } else if (!std::signbit(src1[lane])) {
23484 vdst[lane] = +INFINITY;
23485 } else {
23486 vdst[lane] = -INFINITY;
23487 }
23488 } else if (std::isinf(src0[lane]) &&
23489 std::signbit(src0[lane])) {
23490 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
23491 std::fpclassify(src1[lane]) == FP_ZERO) {
23492 vdst[lane] = NAN;
23493 } else if (std::signbit(src1[lane])) {
23494 vdst[lane] = +INFINITY;
23495 } else {
23496 vdst[lane] = -INFINITY;
23497 }
23498 } else {
23499 vdst[lane] = src0[lane] * src1[lane];
23500 }
23501 }
23502 }
23503
23504 vdst.write();
23505 }
23506
23507 Inst_VOP3__V_MUL_I32_I24::Inst_VOP3__V_MUL_I32_I24(InFmt_VOP3 *iFmt)
23508 : Inst_VOP3(iFmt, "v_mul_i32_i24", false)
23509 {
23510 setFlag(ALU);
23511 } // Inst_VOP3__V_MUL_I32_I24
23512
23513 Inst_VOP3__V_MUL_I32_I24::~Inst_VOP3__V_MUL_I32_I24()
23514 {
23515 } // ~Inst_VOP3__V_MUL_I32_I24
23516
23517 // D.i = S0.i[23:0] * S1.i[23:0].
23518 void
23519 Inst_VOP3__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst)
23520 {
23521 Wavefront *wf = gpuDynInst->wavefront();
23522 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
23523 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
23524 VecOperandI32 vdst(gpuDynInst, instData.VDST);
23525
23526 src0.readSrc();
23527 src1.read();
23528
23529 /**
23530 * input modifiers are supported by FP operations only
23531 */
23532 assert(!(instData.ABS & 0x1));
23533 assert(!(instData.ABS & 0x2));
23534 assert(!(instData.ABS & 0x4));
23535 assert(!(extData.NEG & 0x1));
23536 assert(!(extData.NEG & 0x2));
23537 assert(!(extData.NEG & 0x4));
23538
23539 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23540 if (wf->execMask(lane)) {
23541 vdst[lane] = sext<24>(bits(src0[lane], 23, 0))
23542 * sext<24>(bits(src1[lane], 23, 0));
23543 }
23544 }
23545
23546 vdst.write();
23547 }
23548
23549 Inst_VOP3__V_MUL_HI_I32_I24::Inst_VOP3__V_MUL_HI_I32_I24(InFmt_VOP3 *iFmt)
23550 : Inst_VOP3(iFmt, "v_mul_hi_i32_i24", false)
23551 {
23552 setFlag(ALU);
23553 } // Inst_VOP3__V_MUL_HI_I32_I24
23554
23555 Inst_VOP3__V_MUL_HI_I32_I24::~Inst_VOP3__V_MUL_HI_I32_I24()
23556 {
23557 } // ~Inst_VOP3__V_MUL_HI_I32_I24
23558
23559 // D.i = (S0.i[23:0] * S1.i[23:0]) >> 32.
23560 void
23561 Inst_VOP3__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst)
23562 {
23563 Wavefront *wf = gpuDynInst->wavefront();
23564 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
23565 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
23566 VecOperandI32 vdst(gpuDynInst, instData.VDST);
23567
23568 src0.readSrc();
23569 src1.readSrc();
23570
23571 /**
23572 * input modifiers are supported by FP operations only
23573 */
23574 assert(!(instData.ABS & 0x1));
23575 assert(!(instData.ABS & 0x2));
23576 assert(!(instData.ABS & 0x4));
23577 assert(!(extData.NEG & 0x1));
23578 assert(!(extData.NEG & 0x2));
23579 assert(!(extData.NEG & 0x4));
23580
23581 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23582 if (wf->execMask(lane)) {
23583 VecElemI64 tmp_src0
23584 = (VecElemI64)sext<24>(bits(src0[lane], 23, 0));
23585 VecElemI64 tmp_src1
23586 = (VecElemI64)sext<24>(bits(src1[lane], 23, 0));
23587
23588 vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32);
23589 }
23590 }
23591
23592 vdst.write();
23593 }
23594
23595 Inst_VOP3__V_MUL_U32_U24::Inst_VOP3__V_MUL_U32_U24(InFmt_VOP3 *iFmt)
23596 : Inst_VOP3(iFmt, "v_mul_u32_u24", false)
23597 {
23598 setFlag(ALU);
23599 } // Inst_VOP3__V_MUL_U32_U24
23600
23601 Inst_VOP3__V_MUL_U32_U24::~Inst_VOP3__V_MUL_U32_U24()
23602 {
23603 } // ~Inst_VOP3__V_MUL_U32_U24
23604
23605 // D.u = S0.u[23:0] * S1.u[23:0].
23606 void
23607 Inst_VOP3__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst)
23608 {
23609 Wavefront *wf = gpuDynInst->wavefront();
23610 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
23611 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
23612 VecOperandU32 vdst(gpuDynInst, instData.VDST);
23613
23614 src0.readSrc();
23615 src1.readSrc();
23616
23617 /**
23618 * input modifiers are supported by FP operations only
23619 */
23620 assert(!(instData.ABS & 0x1));
23621 assert(!(instData.ABS & 0x2));
23622 assert(!(instData.ABS & 0x4));
23623 assert(!(extData.NEG & 0x1));
23624 assert(!(extData.NEG & 0x2));
23625 assert(!(extData.NEG & 0x4));
23626
23627 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23628 if (wf->execMask(lane)) {
23629 vdst[lane] = bits(src0[lane], 23, 0) * bits(src1[lane], 23, 0);
23630 }
23631 }
23632
23633 vdst.write();
23634 }
23635
23636 Inst_VOP3__V_MUL_HI_U32_U24::Inst_VOP3__V_MUL_HI_U32_U24(InFmt_VOP3 *iFmt)
23637 : Inst_VOP3(iFmt, "v_mul_hi_u32_u24", false)
23638 {
23639 setFlag(ALU);
23640 } // Inst_VOP3__V_MUL_HI_U32_U24
23641
23642 Inst_VOP3__V_MUL_HI_U32_U24::~Inst_VOP3__V_MUL_HI_U32_U24()
23643 {
23644 } // ~Inst_VOP3__V_MUL_HI_U32_U24
23645
23646 // D.i = (S0.u[23:0] * S1.u[23:0]) >> 32.
23647 void
23648 Inst_VOP3__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst)
23649 {
23650 Wavefront *wf = gpuDynInst->wavefront();
23651 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
23652 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
23653 VecOperandU32 vdst(gpuDynInst, instData.VDST);
23654
23655 src0.readSrc();
23656 src1.readSrc();
23657
23658 /**
23659 * input modifiers are supported by FP operations only
23660 */
23661 assert(!(instData.ABS & 0x1));
23662 assert(!(instData.ABS & 0x2));
23663 assert(!(instData.ABS & 0x4));
23664 assert(!(extData.NEG & 0x1));
23665 assert(!(extData.NEG & 0x2));
23666 assert(!(extData.NEG & 0x4));
23667
23668 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23669 if (wf->execMask(lane)) {
23670 VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0);
23671 VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0);
23672 vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32);
23673 }
23674 }
23675
23676 vdst.write();
23677 }
23678
23679 Inst_VOP3__V_MIN_F32::Inst_VOP3__V_MIN_F32(InFmt_VOP3 *iFmt)
23680 : Inst_VOP3(iFmt, "v_min_f32", false)
23681 {
23682 setFlag(ALU);
23683 setFlag(F32);
23684 } // Inst_VOP3__V_MIN_F32
23685
23686 Inst_VOP3__V_MIN_F32::~Inst_VOP3__V_MIN_F32()
23687 {
23688 } // ~Inst_VOP3__V_MIN_F32
23689
23690 // D.f = (S0.f < S1.f ? S0.f : S1.f).
23691 void
23692 Inst_VOP3__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst)
23693 {
23694 Wavefront *wf = gpuDynInst->wavefront();
23695 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
23696 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
23697 VecOperandF32 vdst(gpuDynInst, instData.VDST);
23698
23699 src0.readSrc();
23700 src1.readSrc();
23701
23702 if (instData.ABS & 0x1) {
23703 src0.absModifier();
23704 }
23705
23706 if (instData.ABS & 0x2) {
23707 src1.absModifier();
23708 }
23709
23710 if (extData.NEG & 0x1) {
23711 src0.negModifier();
23712 }
23713
23714 if (extData.NEG & 0x2) {
23715 src1.negModifier();
23716 }
23717
23718 /**
23719 * input modifiers are supported by FP operations only
23720 */
23721 assert(!(instData.ABS & 0x4));
23722 assert(!(extData.NEG & 0x4));
23723
23724 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23725 if (wf->execMask(lane)) {
23726 vdst[lane] = std::fmin(src0[lane], src1[lane]);
23727 }
23728 }
23729
23730 vdst.write();
23731 }
23732
23733 Inst_VOP3__V_MAX_F32::Inst_VOP3__V_MAX_F32(InFmt_VOP3 *iFmt)
23734 : Inst_VOP3(iFmt, "v_max_f32", false)
23735 {
23736 setFlag(ALU);
23737 setFlag(F32);
23738 } // Inst_VOP3__V_MAX_F32
23739
23740 Inst_VOP3__V_MAX_F32::~Inst_VOP3__V_MAX_F32()
23741 {
23742 } // ~Inst_VOP3__V_MAX_F32
23743
23744 // D.f = (S0.f >= S1.f ? S0.f : S1.f).
23745 void
23746 Inst_VOP3__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst)
23747 {
23748 Wavefront *wf = gpuDynInst->wavefront();
23749 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
23750 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
23751 VecOperandF32 vdst(gpuDynInst, instData.VDST);
23752
23753 src0.readSrc();
23754 src1.readSrc();
23755
23756 if (instData.ABS & 0x1) {
23757 src0.absModifier();
23758 }
23759
23760 if (instData.ABS & 0x2) {
23761 src1.absModifier();
23762 }
23763
23764 if (extData.NEG & 0x1) {
23765 src0.negModifier();
23766 }
23767
23768 if (extData.NEG & 0x2) {
23769 src1.negModifier();
23770 }
23771
23772 /**
23773 * input modifiers are supported by FP operations only
23774 */
23775 assert(!(instData.ABS & 0x4));
23776 assert(!(extData.NEG & 0x4));
23777
23778 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23779 if (wf->execMask(lane)) {
23780 vdst[lane] = std::fmax(src0[lane], src1[lane]);
23781 }
23782 }
23783
23784 vdst.write();
23785 }
23786
23787 Inst_VOP3__V_MIN_I32::Inst_VOP3__V_MIN_I32(InFmt_VOP3 *iFmt)
23788 : Inst_VOP3(iFmt, "v_min_i32", false)
23789 {
23790 setFlag(ALU);
23791 } // Inst_VOP3__V_MIN_I32
23792
23793 Inst_VOP3__V_MIN_I32::~Inst_VOP3__V_MIN_I32()
23794 {
23795 } // ~Inst_VOP3__V_MIN_I32
23796
23797 // D.i = min(S0.i, S1.i).
23798 void
23799 Inst_VOP3__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst)
23800 {
23801 Wavefront *wf = gpuDynInst->wavefront();
23802 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
23803 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
23804 VecOperandI32 vdst(gpuDynInst, instData.VDST);
23805
23806 src0.readSrc();
23807 src1.readSrc();
23808
23809 /**
23810 * input modifiers are supported by FP operations only
23811 */
23812 assert(!(instData.ABS & 0x1));
23813 assert(!(instData.ABS & 0x2));
23814 assert(!(instData.ABS & 0x4));
23815 assert(!(extData.NEG & 0x1));
23816 assert(!(extData.NEG & 0x2));
23817 assert(!(extData.NEG & 0x4));
23818
23819 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23820 if (wf->execMask(lane)) {
23821 vdst[lane] = std::min(src0[lane], src1[lane]);
23822 }
23823 }
23824
23825 vdst.write();
23826 }
23827
23828 Inst_VOP3__V_MAX_I32::Inst_VOP3__V_MAX_I32(InFmt_VOP3 *iFmt)
23829 : Inst_VOP3(iFmt, "v_max_i32", false)
23830 {
23831 setFlag(ALU);
23832 } // Inst_VOP3__V_MAX_I32
23833
23834 Inst_VOP3__V_MAX_I32::~Inst_VOP3__V_MAX_I32()
23835 {
23836 } // ~Inst_VOP3__V_MAX_I32
23837
23838 // D.i = max(S0.i, S1.i).
23839 void
23840 Inst_VOP3__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst)
23841 {
23842 Wavefront *wf = gpuDynInst->wavefront();
23843 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
23844 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
23845 VecOperandI32 vdst(gpuDynInst, instData.VDST);
23846
23847 src0.readSrc();
23848 src1.readSrc();
23849
23850 /**
23851 * input modifiers are supported by FP operations only
23852 */
23853 assert(!(instData.ABS & 0x1));
23854 assert(!(instData.ABS & 0x2));
23855 assert(!(instData.ABS & 0x4));
23856 assert(!(extData.NEG & 0x1));
23857 assert(!(extData.NEG & 0x2));
23858 assert(!(extData.NEG & 0x4));
23859
23860 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23861 if (wf->execMask(lane)) {
23862 vdst[lane] = std::max(src0[lane], src1[lane]);
23863 }
23864 }
23865
23866 vdst.write();
23867 }
23868
23869 Inst_VOP3__V_MIN_U32::Inst_VOP3__V_MIN_U32(InFmt_VOP3 *iFmt)
23870 : Inst_VOP3(iFmt, "v_min_u32", false)
23871 {
23872 setFlag(ALU);
23873 } // Inst_VOP3__V_MIN_U32
23874
23875 Inst_VOP3__V_MIN_U32::~Inst_VOP3__V_MIN_U32()
23876 {
23877 } // ~Inst_VOP3__V_MIN_U32
23878
23879 // D.u = min(S0.u, S1.u).
23880 void
23881 Inst_VOP3__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst)
23882 {
23883 Wavefront *wf = gpuDynInst->wavefront();
23884 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
23885 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
23886 VecOperandU32 vdst(gpuDynInst, instData.VDST);
23887
23888 src0.readSrc();
23889 src1.readSrc();
23890
23891 /**
23892 * input modifiers are supported by FP operations only
23893 */
23894 assert(!(instData.ABS & 0x1));
23895 assert(!(instData.ABS & 0x2));
23896 assert(!(instData.ABS & 0x4));
23897 assert(!(extData.NEG & 0x1));
23898 assert(!(extData.NEG & 0x2));
23899 assert(!(extData.NEG & 0x4));
23900
23901 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23902 if (wf->execMask(lane)) {
23903 vdst[lane] = std::min(src0[lane], src1[lane]);
23904 }
23905 }
23906
23907 vdst.write();
23908 }
23909
23910 Inst_VOP3__V_MAX_U32::Inst_VOP3__V_MAX_U32(InFmt_VOP3 *iFmt)
23911 : Inst_VOP3(iFmt, "v_max_u32", false)
23912 {
23913 setFlag(ALU);
23914 } // Inst_VOP3__V_MAX_U32
23915
23916 Inst_VOP3__V_MAX_U32::~Inst_VOP3__V_MAX_U32()
23917 {
23918 } // ~Inst_VOP3__V_MAX_U32
23919
23920 // D.u = max(S0.u, S1.u).
23921 void
23922 Inst_VOP3__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst)
23923 {
23924 Wavefront *wf = gpuDynInst->wavefront();
23925 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
23926 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
23927 VecOperandU32 vdst(gpuDynInst, instData.VDST);
23928
23929 src0.readSrc();
23930 src1.readSrc();
23931
23932 /**
23933 * input modifiers are supported by FP operations only
23934 */
23935 assert(!(instData.ABS & 0x1));
23936 assert(!(instData.ABS & 0x2));
23937 assert(!(instData.ABS & 0x4));
23938 assert(!(extData.NEG & 0x1));
23939 assert(!(extData.NEG & 0x2));
23940 assert(!(extData.NEG & 0x4));
23941
23942 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23943 if (wf->execMask(lane)) {
23944 vdst[lane] = std::max(src0[lane], src1[lane]);
23945 }
23946 }
23947
23948 vdst.write();
23949 }
23950
23951 Inst_VOP3__V_LSHRREV_B32::Inst_VOP3__V_LSHRREV_B32(InFmt_VOP3 *iFmt)
23952 : Inst_VOP3(iFmt, "v_lshrrev_b32", false)
23953 {
23954 setFlag(ALU);
23955 } // Inst_VOP3__V_LSHRREV_B32
23956
23957 Inst_VOP3__V_LSHRREV_B32::~Inst_VOP3__V_LSHRREV_B32()
23958 {
23959 } // ~Inst_VOP3__V_LSHRREV_B32
23960
23961 // D.u = S1.u >> S0.u[4:0].
23962 // The vacated bits are set to zero.
23963 void
23964 Inst_VOP3__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst)
23965 {
23966 Wavefront *wf = gpuDynInst->wavefront();
23967 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
23968 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
23969 VecOperandU32 vdst(gpuDynInst, instData.VDST);
23970
23971 src0.readSrc();
23972 src1.readSrc();
23973
23974 /**
23975 * input modifiers are supported by FP operations only
23976 */
23977 assert(!(instData.ABS & 0x1));
23978 assert(!(instData.ABS & 0x2));
23979 assert(!(instData.ABS & 0x4));
23980 assert(!(extData.NEG & 0x1));
23981 assert(!(extData.NEG & 0x2));
23982 assert(!(extData.NEG & 0x4));
23983
23984 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23985 if (wf->execMask(lane)) {
23986 vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
23987 }
23988 }
23989
23990 vdst.write();
23991 }
23992
23993 Inst_VOP3__V_ASHRREV_I32::Inst_VOP3__V_ASHRREV_I32(InFmt_VOP3 *iFmt)
23994 : Inst_VOP3(iFmt, "v_ashrrev_i32", false)
23995 {
23996 setFlag(ALU);
23997 } // Inst_VOP3__V_ASHRREV_I32
23998
23999 Inst_VOP3__V_ASHRREV_I32::~Inst_VOP3__V_ASHRREV_I32()
24000 {
24001 } // ~Inst_VOP3__V_ASHRREV_I32
24002
24003 // D.i = signext(S1.i) >> S0.i[4:0].
24004 // The vacated bits are set to the sign bit of the input value.
24005 void
24006 Inst_VOP3__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst)
24007 {
24008 Wavefront *wf = gpuDynInst->wavefront();
24009 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24010 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
24011 VecOperandI32 vdst(gpuDynInst, instData.VDST);
24012
24013 src0.readSrc();
24014 src1.readSrc();
24015
24016 /**
24017 * input modifiers are supported by FP operations only
24018 */
24019 assert(!(instData.ABS & 0x1));
24020 assert(!(instData.ABS & 0x2));
24021 assert(!(instData.ABS & 0x4));
24022 assert(!(extData.NEG & 0x1));
24023 assert(!(extData.NEG & 0x2));
24024 assert(!(extData.NEG & 0x4));
24025
24026 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24027 if (wf->execMask(lane)) {
24028 vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
24029 }
24030 }
24031
24032 vdst.write();
24033 }
24034
24035 Inst_VOP3__V_LSHLREV_B32::Inst_VOP3__V_LSHLREV_B32(InFmt_VOP3 *iFmt)
24036 : Inst_VOP3(iFmt, "v_lshlrev_b32", false)
24037 {
24038 setFlag(ALU);
24039 } // Inst_VOP3__V_LSHLREV_B32
24040
24041 Inst_VOP3__V_LSHLREV_B32::~Inst_VOP3__V_LSHLREV_B32()
24042 {
24043 } // ~Inst_VOP3__V_LSHLREV_B32
24044
24045 // D.u = S1.u << S0.u[4:0].
24046 void
24047 Inst_VOP3__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst)
24048 {
24049 Wavefront *wf = gpuDynInst->wavefront();
24050 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24051 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24052 VecOperandU32 vdst(gpuDynInst, instData.VDST);
24053
24054 src0.readSrc();
24055 src1.readSrc();
24056
24057 /**
24058 * input modifiers are supported by FP operations only
24059 */
24060 assert(!(instData.ABS & 0x1));
24061 assert(!(instData.ABS & 0x2));
24062 assert(!(instData.ABS & 0x4));
24063 assert(!(extData.NEG & 0x1));
24064 assert(!(extData.NEG & 0x2));
24065 assert(!(extData.NEG & 0x4));
24066
24067 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24068 if (wf->execMask(lane)) {
24069 vdst[lane] = src1[lane] << bits(src0[lane], 4, 0);
24070 }
24071 }
24072
24073 vdst.write();
24074 }
24075
24076 Inst_VOP3__V_AND_B32::Inst_VOP3__V_AND_B32(InFmt_VOP3 *iFmt)
24077 : Inst_VOP3(iFmt, "v_and_b32", false)
24078 {
24079 setFlag(ALU);
24080 } // Inst_VOP3__V_AND_B32
24081
24082 Inst_VOP3__V_AND_B32::~Inst_VOP3__V_AND_B32()
24083 {
24084 } // ~Inst_VOP3__V_AND_B32
24085
24086 // D.u = S0.u & S1.u.
24087 // Input and output modifiers not supported.
24088 void
24089 Inst_VOP3__V_AND_B32::execute(GPUDynInstPtr gpuDynInst)
24090 {
24091 Wavefront *wf = gpuDynInst->wavefront();
24092 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24093 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24094 VecOperandU32 vdst(gpuDynInst, instData.VDST);
24095
24096 src0.readSrc();
24097 src1.readSrc();
24098
24099 /**
24100 * input modifiers are supported by FP operations only
24101 */
24102 assert(!(instData.ABS & 0x1));
24103 assert(!(instData.ABS & 0x2));
24104 assert(!(instData.ABS & 0x4));
24105 assert(!(extData.NEG & 0x1));
24106 assert(!(extData.NEG & 0x2));
24107 assert(!(extData.NEG & 0x4));
24108
24109 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24110 if (wf->execMask(lane)) {
24111 vdst[lane] = src0[lane] & src1[lane];
24112 }
24113 }
24114
24115 vdst.write();
24116 }
24117
24118 Inst_VOP3__V_OR_B32::Inst_VOP3__V_OR_B32(InFmt_VOP3 *iFmt)
24119 : Inst_VOP3(iFmt, "v_or_b32", false)
24120 {
24121 setFlag(ALU);
24122 } // Inst_VOP3__V_OR_B32
24123
24124 Inst_VOP3__V_OR_B32::~Inst_VOP3__V_OR_B32()
24125 {
24126 } // ~Inst_VOP3__V_OR_B32
24127
24128 // D.u = S0.u | S1.u.
24129 // Input and output modifiers not supported.
24130 void
24131 Inst_VOP3__V_OR_B32::execute(GPUDynInstPtr gpuDynInst)
24132 {
24133 Wavefront *wf = gpuDynInst->wavefront();
24134 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24135 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24136 VecOperandU32 vdst(gpuDynInst, instData.VDST);
24137
24138 src0.readSrc();
24139 src1.readSrc();
24140
24141 /**
24142 * input modifiers are supported by FP operations only
24143 */
24144 assert(!(instData.ABS & 0x1));
24145 assert(!(instData.ABS & 0x2));
24146 assert(!(instData.ABS & 0x4));
24147 assert(!(extData.NEG & 0x1));
24148 assert(!(extData.NEG & 0x2));
24149 assert(!(extData.NEG & 0x4));
24150
24151 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24152 if (wf->execMask(lane)) {
24153 vdst[lane] = src0[lane] | src1[lane];
24154 }
24155 }
24156
24157 vdst.write();
24158 }
24159
24160 Inst_VOP3__V_XOR_B32::Inst_VOP3__V_XOR_B32(InFmt_VOP3 *iFmt)
24161 : Inst_VOP3(iFmt, "v_xor_b32", false)
24162 {
24163 setFlag(ALU);
24164 } // Inst_VOP3__V_XOR_B32
24165
24166 Inst_VOP3__V_XOR_B32::~Inst_VOP3__V_XOR_B32()
24167 {
24168 } // ~Inst_VOP3__V_XOR_B32
24169
24170 // D.u = S0.u ^ S1.u.
24171 // Input and output modifiers not supported.
24172 void
24173 Inst_VOP3__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst)
24174 {
24175 Wavefront *wf = gpuDynInst->wavefront();
24176 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24177 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24178 VecOperandU32 vdst(gpuDynInst, instData.VDST);
24179
24180 src0.readSrc();
24181 src1.readSrc();
24182
24183 /**
24184 * input modifiers are supported by FP operations only
24185 */
24186 assert(!(instData.ABS & 0x1));
24187 assert(!(instData.ABS & 0x2));
24188 assert(!(instData.ABS & 0x4));
24189 assert(!(extData.NEG & 0x1));
24190 assert(!(extData.NEG & 0x2));
24191 assert(!(extData.NEG & 0x4));
24192
24193 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24194 if (wf->execMask(lane)) {
24195 vdst[lane] = src0[lane] ^ src1[lane];
24196 }
24197 }
24198
24199 vdst.write();
24200 }
24201
24202 Inst_VOP3__V_MAC_F32::Inst_VOP3__V_MAC_F32(InFmt_VOP3 *iFmt)
24203 : Inst_VOP3(iFmt, "v_mac_f32", false)
24204 {
24205 setFlag(ALU);
24206 setFlag(F32);
24207 setFlag(MAC);
24208 } // Inst_VOP3__V_MAC_F32
24209
24210 Inst_VOP3__V_MAC_F32::~Inst_VOP3__V_MAC_F32()
24211 {
24212 } // ~Inst_VOP3__V_MAC_F32
24213
24214 // D.f = S0.f * S1.f + D.f.
24215 void
24216 Inst_VOP3__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst)
24217 {
24218 Wavefront *wf = gpuDynInst->wavefront();
24219 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
24220 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
24221 VecOperandF32 vdst(gpuDynInst, instData.VDST);
24222
24223 src0.readSrc();
24224 src1.readSrc();
24225 vdst.read();
24226
24227 if (instData.ABS & 0x1) {
24228 src0.absModifier();
24229 }
24230
24231 if (instData.ABS & 0x2) {
24232 src1.absModifier();
24233 }
24234
24235 if (extData.NEG & 0x1) {
24236 src0.negModifier();
24237 }
24238
24239 if (extData.NEG & 0x2) {
24240 src1.negModifier();
24241 }
24242
24243 /**
24244 * input modifiers are supported by FP operations only
24245 */
24246 assert(!(instData.ABS & 0x4));
24247 assert(!(extData.NEG & 0x4));
24248
24249 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24250 if (wf->execMask(lane)) {
24251 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
24252 }
24253 }
24254
24255 vdst.write();
24256 }
24257
24258 Inst_VOP3__V_ADD_U32::Inst_VOP3__V_ADD_U32(InFmt_VOP3_SDST_ENC *iFmt)
24259 : Inst_VOP3_SDST_ENC(iFmt, "v_add_u32")
24260 {
24261 setFlag(ALU);
24262 setFlag(WritesVCC);
24263 } // Inst_VOP3__V_ADD_U32
24264
24265 Inst_VOP3__V_ADD_U32::~Inst_VOP3__V_ADD_U32()
24266 {
24267 } // ~Inst_VOP3__V_ADD_U32
24268
24269 // D.u = S0.u + S1.u;
24270 // VCC[threadId] = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an UNSIGNED
24271 // overflow or carry-out.
24272 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
24273 void
24274 Inst_VOP3__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
24275 {
24276 Wavefront *wf = gpuDynInst->wavefront();
24277 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24278 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24279 VecOperandU32 vdst(gpuDynInst, instData.VDST);
24280 ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
24281
24282 src0.readSrc();
24283 src1.readSrc();
24284
24285 /**
24286 * input modifiers are supported by FP operations only
24287 */
24288 assert(!(extData.NEG & 0x1));
24289 assert(!(extData.NEG & 0x2));
24290 assert(!(extData.NEG & 0x4));
24291
24292 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24293 if (wf->execMask(lane)) {
24294 vdst[lane] = src0[lane] + src1[lane];
24295 vcc.setBit(lane, ((VecElemU64)src0[lane]
24296 + (VecElemU64)src1[lane]) >= 0x100000000ULL ? 1 : 0);
24297 }
24298 }
24299
24300 vdst.write();
24301 vcc.write();
24302 }
24303
24304 Inst_VOP3__V_SUB_U32::Inst_VOP3__V_SUB_U32(InFmt_VOP3_SDST_ENC *iFmt)
24305 : Inst_VOP3_SDST_ENC(iFmt, "v_sub_u32")
24306 {
24307 setFlag(ALU);
24308 setFlag(WritesVCC);
24309 } // Inst_VOP3__V_SUB_U32
24310
24311 Inst_VOP3__V_SUB_U32::~Inst_VOP3__V_SUB_U32()
24312 {
24313 } // ~Inst_VOP3__V_SUB_U32
24314
24315 // D.u = S0.u - S1.u;
24316 // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or
24317 // carry-out.
24318 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
24319 void
24320 Inst_VOP3__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
24321 {
24322 Wavefront *wf = gpuDynInst->wavefront();
24323 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24324 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24325 VecOperandU32 vdst(gpuDynInst, instData.VDST);
24326 ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
24327
24328 src0.readSrc();
24329 src1.readSrc();
24330
24331 /**
24332 * input modifiers are supported by FP operations only
24333 */
24334 assert(!(extData.NEG & 0x1));
24335 assert(!(extData.NEG & 0x2));
24336 assert(!(extData.NEG & 0x4));
24337
24338 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24339 if (wf->execMask(lane)) {
24340 vdst[lane] = src0[lane] - src1[lane];
24341 vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0);
24342 }
24343 }
24344
24345 vdst.write();
24346 vcc.write();
24347 }
24348
24349 Inst_VOP3__V_SUBREV_U32::Inst_VOP3__V_SUBREV_U32(
24350 InFmt_VOP3_SDST_ENC *iFmt)
24351 : Inst_VOP3_SDST_ENC(iFmt, "v_subrev_u32")
24352 {
24353 setFlag(ALU);
24354 setFlag(WritesVCC);
24355 } // Inst_VOP3__V_SUBREV_U32
24356
24357 Inst_VOP3__V_SUBREV_U32::~Inst_VOP3__V_SUBREV_U32()
24358 {
24359 } // ~Inst_VOP3__V_SUBREV_U32
24360
24361 // D.u = S1.u - S0.u;
24362 // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or
24363 // carry-out.
24364 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
24365 void
24366 Inst_VOP3__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst)
24367 {
24368 Wavefront *wf = gpuDynInst->wavefront();
24369 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24370 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24371 VecOperandU32 vdst(gpuDynInst, instData.VDST);
24372 ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
24373
24374 src0.readSrc();
24375 src1.readSrc();
24376
24377 /**
24378 * input modifiers are supported by FP operations only
24379 */
24380 assert(!(extData.NEG & 0x1));
24381 assert(!(extData.NEG & 0x2));
24382 assert(!(extData.NEG & 0x4));
24383
24384 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24385 if (wf->execMask(lane)) {
24386 vdst[lane] = src1[lane] - src0[lane];
24387 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
24388 }
24389 }
24390
24391 vdst.write();
24392 vcc.write();
24393 }
24394
24395 Inst_VOP3__V_ADDC_U32::Inst_VOP3__V_ADDC_U32(InFmt_VOP3_SDST_ENC *iFmt)
24396 : Inst_VOP3_SDST_ENC(iFmt, "v_addc_u32")
24397 {
24398 setFlag(ALU);
24399 setFlag(WritesVCC);
24400 setFlag(ReadsVCC);
24401 } // Inst_VOP3__V_ADDC_U32
24402
24403 Inst_VOP3__V_ADDC_U32::~Inst_VOP3__V_ADDC_U32()
24404 {
24405 } // ~Inst_VOP3__V_ADDC_U32
24406
24407 // D.u = S0.u + S1.u + VCC[threadId];
24408 // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x100000000ULL ? 1 : 0)
24409 // is an UNSIGNED overflow.
24410 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
24411 // source comes from the SGPR-pair at S2.u.
24412 void
24413 Inst_VOP3__V_ADDC_U32::execute(GPUDynInstPtr gpuDynInst)
24414 {
24415 Wavefront *wf = gpuDynInst->wavefront();
24416 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24417 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24418 ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
24419 VecOperandU32 vdst(gpuDynInst, instData.VDST);
24420 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
24421
24422 src0.readSrc();
24423 src1.readSrc();
24424 vcc.read();
24425
24426 /**
24427 * input modifiers are supported by FP operations only
24428 */
24429 assert(!(extData.NEG & 0x1));
24430 assert(!(extData.NEG & 0x2));
24431 assert(!(extData.NEG & 0x4));
24432
24433 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24434 if (wf->execMask(lane)) {
24435 vdst[lane] = src0[lane] + src1[lane]
24436 + bits(vcc.rawData(), lane);
24437 sdst.setBit(lane, ((VecElemU64)src0[lane]
24438 + (VecElemU64)src1[lane]
24439 + (VecElemU64)bits(vcc.rawData(), lane))
24440 >= 0x100000000 ? 1 : 0);
24441 }
24442 }
24443
24444 vdst.write();
24445 sdst.write();
24446 }
24447
24448 Inst_VOP3__V_SUBB_U32::Inst_VOP3__V_SUBB_U32(InFmt_VOP3_SDST_ENC *iFmt)
24449 : Inst_VOP3_SDST_ENC(iFmt, "v_subb_u32")
24450 {
24451 setFlag(ALU);
24452 setFlag(WritesVCC);
24453 setFlag(ReadsVCC);
24454 } // Inst_VOP3__V_SUBB_U32
24455
24456 Inst_VOP3__V_SUBB_U32::~Inst_VOP3__V_SUBB_U32()
24457 {
24458 } // ~Inst_VOP3__V_SUBB_U32
24459
24460 // D.u = S0.u - S1.u - VCC[threadId];
24461 // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
24462 // overflow.
24463 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
24464 // source comes from the SGPR-pair at S2.u.
24465 void
24466 Inst_VOP3__V_SUBB_U32::execute(GPUDynInstPtr gpuDynInst)
24467 {
24468 Wavefront *wf = gpuDynInst->wavefront();
24469 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24470 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24471 ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
24472 ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
24473 VecOperandU32 vdst(gpuDynInst, instData.VDST);
24474
24475 src0.readSrc();
24476 src1.readSrc();
24477 vcc.read();
24478
24479 /**
24480 * input modifiers are supported by FP operations only
24481 */
24482 assert(!(extData.NEG & 0x1));
24483 assert(!(extData.NEG & 0x2));
24484 assert(!(extData.NEG & 0x4));
24485
24486 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24487 if (wf->execMask(lane)) {
24488 vdst[lane] = src0[lane] - src1[lane]
24489 - bits(vcc.rawData(), lane);
24490 sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane))
24491 > src0[lane] ? 1 : 0);
24492 }
24493 }
24494
24495 vdst.write();
24496 sdst.write();
24497 }
24498
24499 Inst_VOP3__V_SUBBREV_U32::Inst_VOP3__V_SUBBREV_U32(
24500 InFmt_VOP3_SDST_ENC *iFmt)
24501 : Inst_VOP3_SDST_ENC(iFmt, "v_subbrev_u32")
24502 {
24503 setFlag(ALU);
24504 setFlag(WritesVCC);
24505 setFlag(ReadsVCC);
24506 } // Inst_VOP3__V_SUBBREV_U32
24507
24508 Inst_VOP3__V_SUBBREV_U32::~Inst_VOP3__V_SUBBREV_U32()
24509 {
24510 } // ~Inst_VOP3__V_SUBBREV_U32
24511
24512 // D.u = S1.u - S0.u - VCC[threadId];
24513 // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
24514 // overflow.
24515 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
24516 // source comes from the SGPR-pair at S2.u.
24517 void
24518 Inst_VOP3__V_SUBBREV_U32::execute(GPUDynInstPtr gpuDynInst)
24519 {
24520 Wavefront *wf = gpuDynInst->wavefront();
24521 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24522 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24523 ConstScalarOperandU64 sdst(gpuDynInst, instData.SDST);
24524 ScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
24525 VecOperandU32 vdst(gpuDynInst, instData.VDST);
24526
24527 src0.readSrc();
24528 src1.readSrc();
24529 vcc.read();
24530
24531 /**
24532 * input modifiers are supported by FP operations only
24533 */
24534 assert(!(extData.NEG & 0x1));
24535 assert(!(extData.NEG & 0x2));
24536 assert(!(extData.NEG & 0x4));
24537
24538 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24539 if (wf->execMask(lane)) {
24540 vdst[lane] = src1[lane] - src0[lane]
24541 - bits(vcc.rawData(), lane);
24542 sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane))
24543 > src0[lane] ? 1 : 0);
24544 }
24545 }
24546
24547 vdst.write();
24548 sdst.write();
24549 }
24550
24551 Inst_VOP3__V_ADD_F16::Inst_VOP3__V_ADD_F16(InFmt_VOP3 *iFmt)
24552 : Inst_VOP3(iFmt, "v_add_f16", false)
24553 {
24554 setFlag(ALU);
24555 setFlag(F16);
24556 } // Inst_VOP3__V_ADD_F16
24557
24558 Inst_VOP3__V_ADD_F16::~Inst_VOP3__V_ADD_F16()
24559 {
24560 } // ~Inst_VOP3__V_ADD_F16
24561
24562 // D.f16 = S0.f16 + S1.f16.
24563 void
24564 Inst_VOP3__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst)
24565 {
24566 panicUnimplemented();
24567 }
24568
24569 Inst_VOP3__V_SUB_F16::Inst_VOP3__V_SUB_F16(InFmt_VOP3 *iFmt)
24570 : Inst_VOP3(iFmt, "v_sub_f16", false)
24571 {
24572 setFlag(ALU);
24573 setFlag(F16);
24574 } // Inst_VOP3__V_SUB_F16
24575
24576 Inst_VOP3__V_SUB_F16::~Inst_VOP3__V_SUB_F16()
24577 {
24578 } // ~Inst_VOP3__V_SUB_F16
24579
24580 // D.f16 = S0.f16 - S1.f16.
24581 void
24582 Inst_VOP3__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst)
24583 {
24584 panicUnimplemented();
24585 }
24586
24587 Inst_VOP3__V_SUBREV_F16::Inst_VOP3__V_SUBREV_F16(InFmt_VOP3 *iFmt)
24588 : Inst_VOP3(iFmt, "v_subrev_f16", false)
24589 {
24590 setFlag(ALU);
24591 setFlag(F16);
24592 } // Inst_VOP3__V_SUBREV_F16
24593
24594 Inst_VOP3__V_SUBREV_F16::~Inst_VOP3__V_SUBREV_F16()
24595 {
24596 } // ~Inst_VOP3__V_SUBREV_F16
24597
24598 // D.f16 = S1.f16 - S0.f16.
24599 void
24600 Inst_VOP3__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst)
24601 {
24602 panicUnimplemented();
24603 }
24604
24605 Inst_VOP3__V_MUL_F16::Inst_VOP3__V_MUL_F16(InFmt_VOP3 *iFmt)
24606 : Inst_VOP3(iFmt, "v_mul_f16", false)
24607 {
24608 setFlag(ALU);
24609 setFlag(F16);
24610 } // Inst_VOP3__V_MUL_F16
24611
24612 Inst_VOP3__V_MUL_F16::~Inst_VOP3__V_MUL_F16()
24613 {
24614 } // ~Inst_VOP3__V_MUL_F16
24615
24616 // D.f16 = S0.f16 * S1.f16.
24617 void
24618 Inst_VOP3__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst)
24619 {
24620 panicUnimplemented();
24621 }
24622
24623 Inst_VOP3__V_MAC_F16::Inst_VOP3__V_MAC_F16(InFmt_VOP3 *iFmt)
24624 : Inst_VOP3(iFmt, "v_mac_f16", false)
24625 {
24626 setFlag(ALU);
24627 setFlag(F16);
24628 setFlag(MAC);
24629 } // Inst_VOP3__V_MAC_F16
24630
24631 Inst_VOP3__V_MAC_F16::~Inst_VOP3__V_MAC_F16()
24632 {
24633 } // ~Inst_VOP3__V_MAC_F16
24634
24635 // D.f16 = S0.f16 * S1.f16 + D.f16.
24636 void
24637 Inst_VOP3__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst)
24638 {
24639 panicUnimplemented();
24640 }
24641
24642 Inst_VOP3__V_ADD_U16::Inst_VOP3__V_ADD_U16(InFmt_VOP3 *iFmt)
24643 : Inst_VOP3(iFmt, "v_add_u16", false)
24644 {
24645 setFlag(ALU);
24646 } // Inst_VOP3__V_ADD_U16
24647
24648 Inst_VOP3__V_ADD_U16::~Inst_VOP3__V_ADD_U16()
24649 {
24650 } // ~Inst_VOP3__V_ADD_U16
24651
24652 // D.u16 = S0.u16 + S1.u16.
24653 void
24654 Inst_VOP3__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst)
24655 {
24656 Wavefront *wf = gpuDynInst->wavefront();
24657 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24658 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
24659 VecOperandU16 vdst(gpuDynInst, instData.VDST);
24660
24661 src0.readSrc();
24662 src1.readSrc();
24663
24664 /**
24665 * input modifiers are supported by FP operations only
24666 */
24667 assert(!(instData.ABS & 0x1));
24668 assert(!(instData.ABS & 0x2));
24669 assert(!(instData.ABS & 0x4));
24670 assert(!(extData.NEG & 0x1));
24671 assert(!(extData.NEG & 0x2));
24672 assert(!(extData.NEG & 0x4));
24673
24674 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24675 if (wf->execMask(lane)) {
24676 vdst[lane] = src0[lane] + src1[lane];
24677 }
24678 }
24679
24680 vdst.write();
24681 }
24682
24683 Inst_VOP3__V_SUB_U16::Inst_VOP3__V_SUB_U16(InFmt_VOP3 *iFmt)
24684 : Inst_VOP3(iFmt, "v_sub_u16", false)
24685 {
24686 setFlag(ALU);
24687 } // Inst_VOP3__V_SUB_U16
24688
24689 Inst_VOP3__V_SUB_U16::~Inst_VOP3__V_SUB_U16()
24690 {
24691 } // ~Inst_VOP3__V_SUB_U16
24692
24693 // D.u16 = S0.u16 - S1.u16.
24694 void
24695 Inst_VOP3__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst)
24696 {
24697 Wavefront *wf = gpuDynInst->wavefront();
24698 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24699 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
24700 VecOperandU16 vdst(gpuDynInst, instData.VDST);
24701
24702 src0.readSrc();
24703 src1.readSrc();
24704
24705 /**
24706 * input modifiers are supported by FP operations only
24707 */
24708 assert(!(instData.ABS & 0x1));
24709 assert(!(instData.ABS & 0x2));
24710 assert(!(instData.ABS & 0x4));
24711 assert(!(extData.NEG & 0x1));
24712 assert(!(extData.NEG & 0x2));
24713 assert(!(extData.NEG & 0x4));
24714
24715 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24716 if (wf->execMask(lane)) {
24717 vdst[lane] = src0[lane] - src1[lane];
24718 }
24719 }
24720
24721 vdst.write();
24722 }
24723
24724 Inst_VOP3__V_SUBREV_U16::Inst_VOP3__V_SUBREV_U16(InFmt_VOP3 *iFmt)
24725 : Inst_VOP3(iFmt, "v_subrev_u16", false)
24726 {
24727 setFlag(ALU);
24728 } // Inst_VOP3__V_SUBREV_U16
24729
24730 Inst_VOP3__V_SUBREV_U16::~Inst_VOP3__V_SUBREV_U16()
24731 {
24732 } // ~Inst_VOP3__V_SUBREV_U16
24733
24734 // D.u16 = S1.u16 - S0.u16.
24735 void
24736 Inst_VOP3__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst)
24737 {
24738 Wavefront *wf = gpuDynInst->wavefront();
24739 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24740 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
24741 VecOperandU16 vdst(gpuDynInst, instData.VDST);
24742
24743 src0.readSrc();
24744 src1.readSrc();
24745
24746 /**
24747 * input modifiers are supported by FP operations only
24748 */
24749 assert(!(instData.ABS & 0x1));
24750 assert(!(instData.ABS & 0x2));
24751 assert(!(instData.ABS & 0x4));
24752 assert(!(extData.NEG & 0x1));
24753 assert(!(extData.NEG & 0x2));
24754 assert(!(extData.NEG & 0x4));
24755
24756 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24757 if (wf->execMask(lane)) {
24758 vdst[lane] = src1[lane] - src0[lane];
24759 }
24760 }
24761
24762 vdst.write();
24763 }
24764
24765 Inst_VOP3__V_MUL_LO_U16::Inst_VOP3__V_MUL_LO_U16(InFmt_VOP3 *iFmt)
24766 : Inst_VOP3(iFmt, "v_mul_lo_u16", false)
24767 {
24768 setFlag(ALU);
24769 } // Inst_VOP3__V_MUL_LO_U16
24770
24771 Inst_VOP3__V_MUL_LO_U16::~Inst_VOP3__V_MUL_LO_U16()
24772 {
24773 } // ~Inst_VOP3__V_MUL_LO_U16
24774
24775 // D.u16 = S0.u16 * S1.u16.
24776 void
24777 Inst_VOP3__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst)
24778 {
24779 Wavefront *wf = gpuDynInst->wavefront();
24780 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24781 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
24782 VecOperandU16 vdst(gpuDynInst, instData.VDST);
24783
24784 src0.readSrc();
24785 src1.readSrc();
24786
24787 /**
24788 * input modifiers are supported by FP operations only
24789 */
24790 assert(!(instData.ABS & 0x1));
24791 assert(!(instData.ABS & 0x2));
24792 assert(!(instData.ABS & 0x4));
24793 assert(!(extData.NEG & 0x1));
24794 assert(!(extData.NEG & 0x2));
24795 assert(!(extData.NEG & 0x4));
24796
24797 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24798 if (wf->execMask(lane)) {
24799 vdst[lane] = src0[lane] * src1[lane];
24800 }
24801 }
24802
24803 vdst.write();
24804 }
24805
24806 Inst_VOP3__V_LSHLREV_B16::Inst_VOP3__V_LSHLREV_B16(InFmt_VOP3 *iFmt)
24807 : Inst_VOP3(iFmt, "v_lshlrev_b16", false)
24808 {
24809 setFlag(ALU);
24810 } // Inst_VOP3__V_LSHLREV_B16
24811
24812 Inst_VOP3__V_LSHLREV_B16::~Inst_VOP3__V_LSHLREV_B16()
24813 {
24814 } // ~Inst_VOP3__V_LSHLREV_B16
24815
24816 // D.u[15:0] = S1.u[15:0] << S0.u[3:0].
24817 void
24818 Inst_VOP3__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst)
24819 {
24820 Wavefront *wf = gpuDynInst->wavefront();
24821 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
24822 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24823 VecOperandU16 vdst(gpuDynInst, instData.VDST);
24824
24825 src0.readSrc();
24826 src1.readSrc();
24827
24828 /**
24829 * input modifiers are supported by FP operations only
24830 */
24831 assert(!(instData.ABS & 0x1));
24832 assert(!(instData.ABS & 0x2));
24833 assert(!(instData.ABS & 0x4));
24834 assert(!(extData.NEG & 0x1));
24835 assert(!(extData.NEG & 0x2));
24836 assert(!(extData.NEG & 0x4));
24837
24838 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24839 if (wf->execMask(lane)) {
24840 vdst[lane] = src1[lane] << bits(src0[lane], 3, 0);
24841 }
24842 }
24843
24844 vdst.write();
24845 }
24846
24847 Inst_VOP3__V_LSHRREV_B16::Inst_VOP3__V_LSHRREV_B16(InFmt_VOP3 *iFmt)
24848 : Inst_VOP3(iFmt, "v_lshrrev_b16", false)
24849 {
24850 setFlag(ALU);
24851 } // Inst_VOP3__V_LSHRREV_B16
24852
24853 Inst_VOP3__V_LSHRREV_B16::~Inst_VOP3__V_LSHRREV_B16()
24854 {
24855 } // ~Inst_VOP3__V_LSHRREV_B16
24856
24857 // D.u[15:0] = S1.u[15:0] >> S0.u[3:0].
24858 // The vacated bits are set to zero.
24859 void
24860 Inst_VOP3__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst)
24861 {
24862 Wavefront *wf = gpuDynInst->wavefront();
24863 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
24864 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24865 VecOperandU16 vdst(gpuDynInst, instData.VDST);
24866
24867 src0.readSrc();
24868 src1.readSrc();
24869
24870 if (instData.ABS & 0x1) {
24871 src0.absModifier();
24872 }
24873
24874 if (instData.ABS & 0x2) {
24875 src1.absModifier();
24876 }
24877
24878 if (extData.NEG & 0x1) {
24879 src0.negModifier();
24880 }
24881
24882 if (extData.NEG & 0x2) {
24883 src1.negModifier();
24884 }
24885
24886 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24887 if (wf->execMask(lane)) {
24888 vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0);
24889 }
24890 }
24891
24892 vdst.write();
24893 }
24894
24895 Inst_VOP3__V_ASHRREV_I16::Inst_VOP3__V_ASHRREV_I16(InFmt_VOP3 *iFmt)
24896 : Inst_VOP3(iFmt, "v_ashrrev_i16", false)
24897 {
24898 setFlag(ALU);
24899 } // Inst_VOP3__V_ASHRREV_I16
24900
24901 Inst_VOP3__V_ASHRREV_I16::~Inst_VOP3__V_ASHRREV_I16()
24902 {
24903 } // ~Inst_VOP3__V_ASHRREV_I16
24904
24905 // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0].
24906 // The vacated bits are set to the sign bit of the input value.
24907 void
24908 Inst_VOP3__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst)
24909 {
24910 Wavefront *wf = gpuDynInst->wavefront();
24911 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24912 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
24913 VecOperandI16 vdst(gpuDynInst, instData.VDST);
24914
24915 src0.readSrc();
24916 src1.readSrc();
24917
24918 /**
24919 * input modifiers are supported by FP operations only
24920 */
24921 assert(!(instData.ABS & 0x1));
24922 assert(!(instData.ABS & 0x2));
24923 assert(!(instData.ABS & 0x4));
24924 assert(!(extData.NEG & 0x1));
24925 assert(!(extData.NEG & 0x2));
24926 assert(!(extData.NEG & 0x4));
24927
24928 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24929 if (wf->execMask(lane)) {
24930 vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0);
24931 }
24932 }
24933
24934 vdst.write();
24935 }
24936
24937 Inst_VOP3__V_MAX_F16::Inst_VOP3__V_MAX_F16(InFmt_VOP3 *iFmt)
24938 : Inst_VOP3(iFmt, "v_max_f16", false)
24939 {
24940 setFlag(ALU);
24941 setFlag(F16);
24942 } // Inst_VOP3__V_MAX_F16
24943
24944 Inst_VOP3__V_MAX_F16::~Inst_VOP3__V_MAX_F16()
24945 {
24946 } // ~Inst_VOP3__V_MAX_F16
24947
24948 // D.f16 = max(S0.f16, S1.f16).
24949 void
24950 Inst_VOP3__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst)
24951 {
24952 panicUnimplemented();
24953 }
24954
24955 Inst_VOP3__V_MIN_F16::Inst_VOP3__V_MIN_F16(InFmt_VOP3 *iFmt)
24956 : Inst_VOP3(iFmt, "v_min_f16", false)
24957 {
24958 setFlag(ALU);
24959 setFlag(F16);
24960 } // Inst_VOP3__V_MIN_F16
24961
24962 Inst_VOP3__V_MIN_F16::~Inst_VOP3__V_MIN_F16()
24963 {
24964 } // ~Inst_VOP3__V_MIN_F16
24965
24966 // D.f16 = min(S0.f16, S1.f16).
24967 void
24968 Inst_VOP3__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst)
24969 {
24970 panicUnimplemented();
24971 }
24972
24973 Inst_VOP3__V_MAX_U16::Inst_VOP3__V_MAX_U16(InFmt_VOP3 *iFmt)
24974 : Inst_VOP3(iFmt, "v_max_u16", false)
24975 {
24976 setFlag(ALU);
24977 } // Inst_VOP3__V_MAX_U16
24978
24979 Inst_VOP3__V_MAX_U16::~Inst_VOP3__V_MAX_U16()
24980 {
24981 } // ~Inst_VOP3__V_MAX_U16
24982
24983 // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]).
24984 void
24985 Inst_VOP3__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst)
24986 {
24987 Wavefront *wf = gpuDynInst->wavefront();
24988 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24989 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
24990 VecOperandU16 vdst(gpuDynInst, instData.VDST);
24991
24992 src0.readSrc();
24993 src1.readSrc();
24994
24995 if (instData.ABS & 0x1) {
24996 src0.absModifier();
24997 }
24998
24999 if (instData.ABS & 0x2) {
25000 src1.absModifier();
25001 }
25002
25003 if (extData.NEG & 0x1) {
25004 src0.negModifier();
25005 }
25006
25007 if (extData.NEG & 0x2) {
25008 src1.negModifier();
25009 }
25010
25011 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25012 if (wf->execMask(lane)) {
25013 vdst[lane] = std::max(src0[lane], src1[lane]);
25014 }
25015 }
25016
25017 vdst.write();
25018 }
25019
25020 Inst_VOP3__V_MAX_I16::Inst_VOP3__V_MAX_I16(InFmt_VOP3 *iFmt)
25021 : Inst_VOP3(iFmt, "v_max_i16", false)
25022 {
25023 setFlag(ALU);
25024 } // Inst_VOP3__V_MAX_I16
25025
25026 Inst_VOP3__V_MAX_I16::~Inst_VOP3__V_MAX_I16()
25027 {
25028 } // ~Inst_VOP3__V_MAX_I16
25029
25030 // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]).
25031 void
25032 Inst_VOP3__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst)
25033 {
25034 Wavefront *wf = gpuDynInst->wavefront();
25035 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
25036 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
25037 VecOperandI16 vdst(gpuDynInst, instData.VDST);
25038
25039 src0.readSrc();
25040 src1.readSrc();
25041
25042 if (instData.ABS & 0x1) {
25043 src0.absModifier();
25044 }
25045
25046 if (instData.ABS & 0x2) {
25047 src1.absModifier();
25048 }
25049
25050 if (extData.NEG & 0x1) {
25051 src0.negModifier();
25052 }
25053
25054 if (extData.NEG & 0x2) {
25055 src1.negModifier();
25056 }
25057
25058 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25059 if (wf->execMask(lane)) {
25060 vdst[lane] = std::max(src0[lane], src1[lane]);
25061 }
25062 }
25063
25064 vdst.write();
25065 }
25066
25067 Inst_VOP3__V_MIN_U16::Inst_VOP3__V_MIN_U16(InFmt_VOP3 *iFmt)
25068 : Inst_VOP3(iFmt, "v_min_u16", false)
25069 {
25070 setFlag(ALU);
25071 } // Inst_VOP3__V_MIN_U16
25072
25073 Inst_VOP3__V_MIN_U16::~Inst_VOP3__V_MIN_U16()
25074 {
25075 } // ~Inst_VOP3__V_MIN_U16
25076
25077 // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]).
25078 void
25079 Inst_VOP3__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst)
25080 {
25081 Wavefront *wf = gpuDynInst->wavefront();
25082 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
25083 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
25084 VecOperandU16 vdst(gpuDynInst, instData.VDST);
25085
25086 src0.readSrc();
25087 src1.readSrc();
25088
25089 if (instData.ABS & 0x1) {
25090 src0.absModifier();
25091 }
25092
25093 if (instData.ABS & 0x2) {
25094 src1.absModifier();
25095 }
25096
25097 if (extData.NEG & 0x1) {
25098 src0.negModifier();
25099 }
25100
25101 if (extData.NEG & 0x2) {
25102 src1.negModifier();
25103 }
25104
25105 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25106 if (wf->execMask(lane)) {
25107 vdst[lane] = std::min(src0[lane], src1[lane]);
25108 }
25109 }
25110
25111 vdst.write();
25112 }
25113
25114 Inst_VOP3__V_MIN_I16::Inst_VOP3__V_MIN_I16(InFmt_VOP3 *iFmt)
25115 : Inst_VOP3(iFmt, "v_min_i16", false)
25116 {
25117 setFlag(ALU);
25118 } // Inst_VOP3__V_MIN_I16
25119
25120 Inst_VOP3__V_MIN_I16::~Inst_VOP3__V_MIN_I16()
25121 {
25122 } // ~Inst_VOP3__V_MIN_I16
25123
25124 // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]).
25125 void
25126 Inst_VOP3__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst)
25127 {
25128 Wavefront *wf = gpuDynInst->wavefront();
25129 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
25130 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
25131 VecOperandI16 vdst(gpuDynInst, instData.VDST);
25132
25133 src0.readSrc();
25134 src1.readSrc();
25135
25136 if (instData.ABS & 0x1) {
25137 src0.absModifier();
25138 }
25139
25140 if (instData.ABS & 0x2) {
25141 src1.absModifier();
25142 }
25143
25144 if (extData.NEG & 0x1) {
25145 src0.negModifier();
25146 }
25147
25148 if (extData.NEG & 0x2) {
25149 src1.negModifier();
25150 }
25151
25152 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25153 if (wf->execMask(lane)) {
25154 vdst[lane] = std::min(src0[lane], src1[lane]);
25155 }
25156 }
25157
25158 vdst.write();
25159 }
25160
25161 Inst_VOP3__V_LDEXP_F16::Inst_VOP3__V_LDEXP_F16(InFmt_VOP3 *iFmt)
25162 : Inst_VOP3(iFmt, "v_ldexp_f16", false)
25163 {
25164 setFlag(ALU);
25165 setFlag(F16);
25166 } // Inst_VOP3__V_LDEXP_F16
25167
25168 Inst_VOP3__V_LDEXP_F16::~Inst_VOP3__V_LDEXP_F16()
25169 {
25170 } // ~Inst_VOP3__V_LDEXP_F16
25171
25172 // D.f16 = S0.f16 * (2 ** S1.i16).
25173 void
25174 Inst_VOP3__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst)
25175 {
25176 panicUnimplemented();
25177 }
25178
25179 Inst_VOP3__V_NOP::Inst_VOP3__V_NOP(InFmt_VOP3 *iFmt)
25180 : Inst_VOP3(iFmt, "v_nop", false)
25181 {
25182 setFlag(Nop);
25183 setFlag(ALU);
25184 } // Inst_VOP3__V_NOP
25185
25186 Inst_VOP3__V_NOP::~Inst_VOP3__V_NOP()
25187 {
25188 } // ~Inst_VOP3__V_NOP
25189
25190 // Do nothing.
25191 void
25192 Inst_VOP3__V_NOP::execute(GPUDynInstPtr gpuDynInst)
25193 {
25194 }
25195
25196 Inst_VOP3__V_MOV_B32::Inst_VOP3__V_MOV_B32(InFmt_VOP3 *iFmt)
25197 : Inst_VOP3(iFmt, "v_mov_b32", false)
25198 {
25199 setFlag(ALU);
25200 } // Inst_VOP3__V_MOV_B32
25201
25202 Inst_VOP3__V_MOV_B32::~Inst_VOP3__V_MOV_B32()
25203 {
25204 } // ~Inst_VOP3__V_MOV_B32
25205
25206 // D.u = S0.u.
25207 // Input and output modifiers not supported; this is an untyped operation.
25208 void
25209 Inst_VOP3__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst)
25210 {
25211 Wavefront *wf = gpuDynInst->wavefront();
25212 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
25213 VecOperandU32 vdst(gpuDynInst, instData.VDST);
25214
25215 src.readSrc();
25216
25217 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25218 if (wf->execMask(lane)) {
25219 vdst[lane] = src[lane];
25220 }
25221 }
25222
25223 vdst.write();
25224 }
25225
25226 Inst_VOP3__V_CVT_I32_F64::Inst_VOP3__V_CVT_I32_F64(InFmt_VOP3 *iFmt)
25227 : Inst_VOP3(iFmt, "v_cvt_i32_f64", false)
25228 {
25229 setFlag(ALU);
25230 setFlag(F64);
25231 } // Inst_VOP3__V_CVT_I32_F64
25232
25233 Inst_VOP3__V_CVT_I32_F64::~Inst_VOP3__V_CVT_I32_F64()
25234 {
25235 } // ~Inst_VOP3__V_CVT_I32_F64
25236
25237 // D.i = (int)S0.d.
25238 // Out-of-range floating point values (including infinity) saturate. NaN
25239 // is converted to 0.
25240 void
25241 Inst_VOP3__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst)
25242 {
25243 Wavefront *wf = gpuDynInst->wavefront();
25244 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
25245 VecOperandI32 vdst(gpuDynInst, instData.VDST);
25246
25247 src.readSrc();
25248
25249 if (instData.ABS & 0x1) {
25250 src.absModifier();
25251 }
25252
25253 if (extData.NEG & 0x1) {
25254 src.negModifier();
25255 }
25256
25257 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25258 if (wf->execMask(lane)) {
25259 int exp;
25260 std::frexp(src[lane],&exp);
25261 if (std::isnan(src[lane])) {
25262 vdst[lane] = 0;
25263 } else if (std::isinf(src[lane]) || exp > 30) {
25264 if (std::signbit(src[lane])) {
25265 vdst[lane] = INT_MIN;
25266 } else {
25267 vdst[lane] = INT_MAX;
25268 }
25269 } else {
25270 vdst[lane] = (VecElemI32)src[lane];
25271 }
25272 }
25273 }
25274
25275 vdst.write();
25276 }
25277
25278 Inst_VOP3__V_CVT_F64_I32::Inst_VOP3__V_CVT_F64_I32(InFmt_VOP3 *iFmt)
25279 : Inst_VOP3(iFmt, "v_cvt_f64_i32", false)
25280 {
25281 setFlag(ALU);
25282 setFlag(F64);
25283 } // Inst_VOP3__V_CVT_F64_I32
25284
25285 Inst_VOP3__V_CVT_F64_I32::~Inst_VOP3__V_CVT_F64_I32()
25286 {
25287 } // ~Inst_VOP3__V_CVT_F64_I32
25288
25289 // D.d = (double)S0.i.
25290 void
25291 Inst_VOP3__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst)
25292 {
25293 Wavefront *wf = gpuDynInst->wavefront();
25294 ConstVecOperandI32 src(gpuDynInst, extData.SRC0);
25295 VecOperandF64 vdst(gpuDynInst, instData.VDST);
25296
25297 src.readSrc();
25298
25299 if (instData.ABS & 0x1) {
25300 src.absModifier();
25301 }
25302
25303 if (extData.NEG & 0x1) {
25304 src.negModifier();
25305 }
25306
25307 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25308 if (wf->execMask(lane)) {
25309 vdst[lane] = (VecElemF64)src[lane];
25310 }
25311 }
25312
25313 vdst.write();
25314 }
25315
25316 Inst_VOP3__V_CVT_F32_I32::Inst_VOP3__V_CVT_F32_I32(InFmt_VOP3 *iFmt)
25317 : Inst_VOP3(iFmt, "v_cvt_f32_i32", false)
25318 {
25319 setFlag(ALU);
25320 setFlag(F32);
25321 } // Inst_VOP3__V_CVT_F32_I32
25322
25323 Inst_VOP3__V_CVT_F32_I32::~Inst_VOP3__V_CVT_F32_I32()
25324 {
25325 } // ~Inst_VOP3__V_CVT_F32_I32
25326
25327 // D.f = (float)S0.i.
25328 void
25329 Inst_VOP3__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst)
25330 {
25331 Wavefront *wf = gpuDynInst->wavefront();
25332 VecOperandI32 src(gpuDynInst, extData.SRC0);
25333 VecOperandF32 vdst(gpuDynInst, instData.VDST);
25334
25335 src.readSrc();
25336
25337 /**
25338 * input modifiers are supported by FP operations only
25339 */
25340 assert(!(instData.ABS & 0x1));
25341 assert(!(instData.ABS & 0x2));
25342 assert(!(instData.ABS & 0x4));
25343 assert(!(extData.NEG & 0x1));
25344 assert(!(extData.NEG & 0x2));
25345 assert(!(extData.NEG & 0x4));
25346
25347 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25348 if (wf->execMask(lane)) {
25349 vdst[lane] = (VecElemF32)src[lane];
25350 }
25351 }
25352
25353 vdst.write();
25354 }
25355
25356 Inst_VOP3__V_CVT_F32_U32::Inst_VOP3__V_CVT_F32_U32(InFmt_VOP3 *iFmt)
25357 : Inst_VOP3(iFmt, "v_cvt_f32_u32", false)
25358 {
25359 setFlag(ALU);
25360 setFlag(F32);
25361 } // Inst_VOP3__V_CVT_F32_U32
25362
25363 Inst_VOP3__V_CVT_F32_U32::~Inst_VOP3__V_CVT_F32_U32()
25364 {
25365 } // ~Inst_VOP3__V_CVT_F32_U32
25366
25367 // D.f = (float)S0.u.
25368 void
25369 Inst_VOP3__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst)
25370 {
25371 Wavefront *wf = gpuDynInst->wavefront();
25372 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
25373 VecOperandF32 vdst(gpuDynInst, instData.VDST);
25374
25375 src.readSrc();
25376
25377 if (instData.ABS & 0x1) {
25378 src.absModifier();
25379 }
25380
25381 if (extData.NEG & 0x1) {
25382 src.negModifier();
25383 }
25384
25385 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25386 if (wf->execMask(lane)) {
25387 vdst[lane] = (VecElemF32)src[lane];
25388 }
25389 }
25390
25391 vdst.write();
25392 }
25393
25394 Inst_VOP3__V_CVT_U32_F32::Inst_VOP3__V_CVT_U32_F32(InFmt_VOP3 *iFmt)
25395 : Inst_VOP3(iFmt, "v_cvt_u32_f32", false)
25396 {
25397 setFlag(ALU);
25398 setFlag(F32);
25399 } // Inst_VOP3__V_CVT_U32_F32
25400
25401 Inst_VOP3__V_CVT_U32_F32::~Inst_VOP3__V_CVT_U32_F32()
25402 {
25403 } // ~Inst_VOP3__V_CVT_U32_F32
25404
25405 // D.u = (unsigned)S0.f.
25406 // Out-of-range floating point values (including infinity) saturate. NaN
25407 // is converted to 0.
25408 void
25409 Inst_VOP3__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst)
25410 {
25411 Wavefront *wf = gpuDynInst->wavefront();
25412 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
25413 VecOperandU32 vdst(gpuDynInst, instData.VDST);
25414
25415 src.readSrc();
25416
25417 if (instData.ABS & 0x1) {
25418 src.absModifier();
25419 }
25420
25421 if (extData.NEG & 0x1) {
25422 src.negModifier();
25423 }
25424
25425 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25426 if (wf->execMask(lane)) {
25427 int exp;
25428 std::frexp(src[lane],&exp);
25429 if (std::isnan(src[lane])) {
25430 vdst[lane] = 0;
25431 } else if (std::isinf(src[lane])) {
25432 if (std::signbit(src[lane])) {
25433 vdst[lane] = 0;
25434 } else {
25435 vdst[lane] = UINT_MAX;
25436 }
25437 } else if (exp > 31) {
25438 vdst[lane] = UINT_MAX;
25439 } else {
25440 vdst[lane] = (VecElemU32)src[lane];
25441 }
25442 }
25443 }
25444
25445 vdst.write();
25446 }
25447
25448 Inst_VOP3__V_CVT_I32_F32::Inst_VOP3__V_CVT_I32_F32(InFmt_VOP3 *iFmt)
25449 : Inst_VOP3(iFmt, "v_cvt_i32_f32", false)
25450 {
25451 setFlag(ALU);
25452 setFlag(F32);
25453 } // Inst_VOP3__V_CVT_I32_F32
25454
25455 Inst_VOP3__V_CVT_I32_F32::~Inst_VOP3__V_CVT_I32_F32()
25456 {
25457 } // ~Inst_VOP3__V_CVT_I32_F32
25458
25459 // D.i = (int)S0.f.
25460 // Out-of-range floating point values (including infinity) saturate. NaN
25461 // is converted to 0.
25462 void
25463 Inst_VOP3__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst)
25464 {
25465 Wavefront *wf = gpuDynInst->wavefront();
25466 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
25467 VecOperandI32 vdst(gpuDynInst, instData.VDST);
25468
25469 src.readSrc();
25470
25471 if (instData.ABS & 0x1) {
25472 src.absModifier();
25473 }
25474
25475 if (extData.NEG & 0x1) {
25476 src.negModifier();
25477 }
25478
25479 /**
25480 * input modifiers are supported by FP operations only
25481 */
25482 assert(!(instData.ABS & 0x2));
25483 assert(!(instData.ABS & 0x4));
25484 assert(!(extData.NEG & 0x2));
25485 assert(!(extData.NEG & 0x4));
25486
25487 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25488 if (wf->execMask(lane)) {
25489 int exp;
25490 std::frexp(src[lane],&exp);
25491 if (std::isnan(src[lane])) {
25492 vdst[lane] = 0;
25493 } else if (std::isinf(src[lane]) || exp > 30) {
25494 if (std::signbit(src[lane])) {
25495 vdst[lane] = INT_MIN;
25496 } else {
25497 vdst[lane] = INT_MAX;
25498 }
25499 } else {
25500 vdst[lane] = (VecElemI32)src[lane];
25501 }
25502 }
25503 }
25504
25505 vdst.write();
25506 }
25507
25508 Inst_VOP3__V_MOV_FED_B32::Inst_VOP3__V_MOV_FED_B32(InFmt_VOP3 *iFmt)
25509 : Inst_VOP3(iFmt, "v_mov_fed_b32", false)
25510 {
25511 setFlag(ALU);
25512 } // Inst_VOP3__V_MOV_FED_B32
25513
25514 Inst_VOP3__V_MOV_FED_B32::~Inst_VOP3__V_MOV_FED_B32()
25515 {
25516 } // ~Inst_VOP3__V_MOV_FED_B32
25517
25518 // D.u = S0.u;
25519 // Input and output modifiers not supported; this is an untyped operation.
25520 void
25521 Inst_VOP3__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst)
25522 {
25523 panicUnimplemented();
25524 }
25525
25526 Inst_VOP3__V_CVT_F16_F32::Inst_VOP3__V_CVT_F16_F32(InFmt_VOP3 *iFmt)
25527 : Inst_VOP3(iFmt, "v_cvt_f16_f32", false)
25528 {
25529 setFlag(ALU);
25530 setFlag(F32);
25531 } // Inst_VOP3__V_CVT_F16_F32
25532
25533 Inst_VOP3__V_CVT_F16_F32::~Inst_VOP3__V_CVT_F16_F32()
25534 {
25535 } // ~Inst_VOP3__V_CVT_F16_F32
25536
25537 // D.f16 = flt32_to_flt16(S0.f).
25538 void
25539 Inst_VOP3__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst)
25540 {
25541 panicUnimplemented();
25542 }
25543
25544 Inst_VOP3__V_CVT_F32_F16::Inst_VOP3__V_CVT_F32_F16(InFmt_VOP3 *iFmt)
25545 : Inst_VOP3(iFmt, "v_cvt_f32_f16", false)
25546 {
25547 setFlag(ALU);
25548 setFlag(F32);
25549 } // Inst_VOP3__V_CVT_F32_F16
25550
25551 Inst_VOP3__V_CVT_F32_F16::~Inst_VOP3__V_CVT_F32_F16()
25552 {
25553 } // ~Inst_VOP3__V_CVT_F32_F16
25554
25555 // D.f = flt16_to_flt32(S0.f16).
25556 void
25557 Inst_VOP3__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst)
25558 {
25559 panicUnimplemented();
25560 }
25561
25562 Inst_VOP3__V_CVT_RPI_I32_F32::Inst_VOP3__V_CVT_RPI_I32_F32(
25563 InFmt_VOP3 *iFmt)
25564 : Inst_VOP3(iFmt, "v_cvt_rpi_i32_f32", false)
25565 {
25566 setFlag(ALU);
25567 setFlag(F32);
25568 } // Inst_VOP3__V_CVT_RPI_I32_F32
25569
25570 Inst_VOP3__V_CVT_RPI_I32_F32::~Inst_VOP3__V_CVT_RPI_I32_F32()
25571 {
25572 } // ~Inst_VOP3__V_CVT_RPI_I32_F32
25573
25574 // D.i = (int)floor(S0.f + 0.5).
25575 void
25576 Inst_VOP3__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst)
25577 {
25578 Wavefront *wf = gpuDynInst->wavefront();
25579 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
25580 VecOperandI32 vdst(gpuDynInst, instData.VDST);
25581
25582 src.readSrc();
25583
25584 if (instData.ABS & 0x1) {
25585 src.absModifier();
25586 }
25587
25588 if (extData.NEG & 0x1) {
25589 src.negModifier();
25590 }
25591
25592 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25593 if (wf->execMask(lane)) {
25594 vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5);
25595 }
25596 }
25597
25598 vdst.write();
25599 }
25600
25601 Inst_VOP3__V_CVT_FLR_I32_F32::Inst_VOP3__V_CVT_FLR_I32_F32(
25602 InFmt_VOP3 *iFmt)
25603 : Inst_VOP3(iFmt, "v_cvt_flr_i32_f32", false)
25604 {
25605 setFlag(ALU);
25606 setFlag(F32);
25607 } // Inst_VOP3__V_CVT_FLR_I32_F32
25608
25609 Inst_VOP3__V_CVT_FLR_I32_F32::~Inst_VOP3__V_CVT_FLR_I32_F32()
25610 {
25611 } // ~Inst_VOP3__V_CVT_FLR_I32_F32
25612
25613 // D.i = (int)floor(S0.f).
25614 void
25615 Inst_VOP3__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst)
25616 {
25617 Wavefront *wf = gpuDynInst->wavefront();
25618 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
25619 VecOperandI32 vdst(gpuDynInst, instData.VDST);
25620
25621 src.readSrc();
25622
25623 if (instData.ABS & 0x1) {
25624 src.absModifier();
25625 }
25626
25627 if (extData.NEG & 0x1) {
25628 src.negModifier();
25629 }
25630
25631 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25632 if (wf->execMask(lane)) {
25633 vdst[lane] = (VecElemI32)std::floor(src[lane]);
25634 }
25635 }
25636
25637 vdst.write();
25638 }
25639
25640 Inst_VOP3__V_CVT_OFF_F32_I4::Inst_VOP3__V_CVT_OFF_F32_I4(InFmt_VOP3 *iFmt)
25641 : Inst_VOP3(iFmt, "v_cvt_off_f32_i4", false)
25642 {
25643 setFlag(ALU);
25644 setFlag(F32);
25645 } // Inst_VOP3__V_CVT_OFF_F32_I4
25646
25647 Inst_VOP3__V_CVT_OFF_F32_I4::~Inst_VOP3__V_CVT_OFF_F32_I4()
25648 {
25649 } // ~Inst_VOP3__V_CVT_OFF_F32_I4
25650
25651 // 4-bit signed int to 32-bit float.
25652 void
25653 Inst_VOP3__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst)
25654 {
25655 panicUnimplemented();
25656 }
25657
25658 Inst_VOP3__V_CVT_F32_F64::Inst_VOP3__V_CVT_F32_F64(InFmt_VOP3 *iFmt)
25659 : Inst_VOP3(iFmt, "v_cvt_f32_f64", false)
25660 {
25661 setFlag(ALU);
25662 setFlag(F64);
25663 } // Inst_VOP3__V_CVT_F32_F64
25664
25665 Inst_VOP3__V_CVT_F32_F64::~Inst_VOP3__V_CVT_F32_F64()
25666 {
25667 } // ~Inst_VOP3__V_CVT_F32_F64
25668
25669 // D.f = (float)S0.d.
25670 void
25671 Inst_VOP3__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst)
25672 {
25673 Wavefront *wf = gpuDynInst->wavefront();
25674 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
25675 VecOperandF32 vdst(gpuDynInst, instData.VDST);
25676
25677 src.readSrc();
25678
25679 if (instData.ABS & 0x1) {
25680 src.absModifier();
25681 }
25682
25683 if (extData.NEG & 0x1) {
25684 src.negModifier();
25685 }
25686
25687 /**
25688 * input modifiers are supported by FP operations only
25689 */
25690 assert(!(instData.ABS & 0x2));
25691 assert(!(instData.ABS & 0x4));
25692 assert(!(extData.NEG & 0x2));
25693 assert(!(extData.NEG & 0x4));
25694
25695 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25696 if (wf->execMask(lane)) {
25697 vdst[lane] = (VecElemF32)src[lane];
25698 }
25699 }
25700
25701 vdst.write();
25702 }
25703
25704 Inst_VOP3__V_CVT_F64_F32::Inst_VOP3__V_CVT_F64_F32(InFmt_VOP3 *iFmt)
25705 : Inst_VOP3(iFmt, "v_cvt_f64_f32", false)
25706 {
25707 setFlag(ALU);
25708 setFlag(F64);
25709 } // Inst_VOP3__V_CVT_F64_F32
25710
25711 Inst_VOP3__V_CVT_F64_F32::~Inst_VOP3__V_CVT_F64_F32()
25712 {
25713 } // ~Inst_VOP3__V_CVT_F64_F32
25714
25715 // D.d = (double)S0.f.
25716 void
25717 Inst_VOP3__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst)
25718 {
25719 Wavefront *wf = gpuDynInst->wavefront();
25720 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
25721 VecOperandF64 vdst(gpuDynInst, instData.VDST);
25722
25723 src.readSrc();
25724
25725 if (instData.ABS & 0x1) {
25726 src.absModifier();
25727 }
25728
25729 if (extData.NEG & 0x1) {
25730 src.negModifier();
25731 }
25732
25733 /**
25734 * input modifiers are supported by FP operations only
25735 */
25736 assert(!(instData.ABS & 0x2));
25737 assert(!(instData.ABS & 0x4));
25738 assert(!(extData.NEG & 0x2));
25739 assert(!(extData.NEG & 0x4));
25740
25741 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25742 if (wf->execMask(lane)) {
25743 vdst[lane] = (VecElemF64)src[lane];
25744 }
25745 }
25746
25747 vdst.write();
25748 }
25749
25750 Inst_VOP3__V_CVT_F32_UBYTE0::Inst_VOP3__V_CVT_F32_UBYTE0(InFmt_VOP3 *iFmt)
25751 : Inst_VOP3(iFmt, "v_cvt_f32_ubyte0", false)
25752 {
25753 setFlag(ALU);
25754 setFlag(F32);
25755 } // Inst_VOP3__V_CVT_F32_UBYTE0
25756
25757 Inst_VOP3__V_CVT_F32_UBYTE0::~Inst_VOP3__V_CVT_F32_UBYTE0()
25758 {
25759 } // ~Inst_VOP3__V_CVT_F32_UBYTE0
25760
25761 // D.f = (float)(S0.u[7:0]).
25762 void
25763 Inst_VOP3__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst)
25764 {
25765 Wavefront *wf = gpuDynInst->wavefront();
25766 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
25767 VecOperandF32 vdst(gpuDynInst, instData.VDST);
25768
25769 src.readSrc();
25770
25771 if (instData.ABS & 0x1) {
25772 src.absModifier();
25773 }
25774
25775 if (extData.NEG & 0x1) {
25776 src.negModifier();
25777 }
25778
25779 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25780 if (wf->execMask(lane)) {
25781 vdst[lane] = (VecElemF32)bits(src[lane], 7, 0);
25782 }
25783 }
25784
25785 vdst.write();
25786 }
25787
25788 Inst_VOP3__V_CVT_F32_UBYTE1::Inst_VOP3__V_CVT_F32_UBYTE1(InFmt_VOP3 *iFmt)
25789 : Inst_VOP3(iFmt, "v_cvt_f32_ubyte1", false)
25790 {
25791 setFlag(ALU);
25792 setFlag(F32);
25793 } // Inst_VOP3__V_CVT_F32_UBYTE1
25794
25795 Inst_VOP3__V_CVT_F32_UBYTE1::~Inst_VOP3__V_CVT_F32_UBYTE1()
25796 {
25797 } // ~Inst_VOP3__V_CVT_F32_UBYTE1
25798
25799 // D.f = (float)(S0.u[15:8]).
25800 void
25801 Inst_VOP3__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst)
25802 {
25803 Wavefront *wf = gpuDynInst->wavefront();
25804 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
25805 VecOperandF32 vdst(gpuDynInst, instData.VDST);
25806
25807 src.readSrc();
25808
25809 if (instData.ABS & 0x1) {
25810 src.absModifier();
25811 }
25812
25813 if (extData.NEG & 0x1) {
25814 src.negModifier();
25815 }
25816
25817 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25818 if (wf->execMask(lane)) {
25819 vdst[lane] = (VecElemF32)bits(src[lane], 15, 8);
25820 }
25821 }
25822
25823 vdst.write();
25824 }
25825
25826 Inst_VOP3__V_CVT_F32_UBYTE2::Inst_VOP3__V_CVT_F32_UBYTE2(InFmt_VOP3 *iFmt)
25827 : Inst_VOP3(iFmt, "v_cvt_f32_ubyte2", false)
25828 {
25829 setFlag(ALU);
25830 setFlag(F32);
25831 } // Inst_VOP3__V_CVT_F32_UBYTE2
25832
25833 Inst_VOP3__V_CVT_F32_UBYTE2::~Inst_VOP3__V_CVT_F32_UBYTE2()
25834 {
25835 } // ~Inst_VOP3__V_CVT_F32_UBYTE2
25836
25837 // D.f = (float)(S0.u[23:16]).
25838 void
25839 Inst_VOP3__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst)
25840 {
25841 Wavefront *wf = gpuDynInst->wavefront();
25842 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
25843 VecOperandF32 vdst(gpuDynInst, instData.VDST);
25844
25845 src.readSrc();
25846
25847 if (instData.ABS & 0x1) {
25848 src.absModifier();
25849 }
25850
25851 if (extData.NEG & 0x1) {
25852 src.negModifier();
25853 }
25854
25855 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25856 if (wf->execMask(lane)) {
25857 vdst[lane] = (VecElemF32)bits(src[lane], 23, 16);
25858 }
25859 }
25860
25861 vdst.write();
25862 }
25863
25864 Inst_VOP3__V_CVT_F32_UBYTE3::Inst_VOP3__V_CVT_F32_UBYTE3(InFmt_VOP3 *iFmt)
25865 : Inst_VOP3(iFmt, "v_cvt_f32_ubyte3", false)
25866 {
25867 setFlag(ALU);
25868 setFlag(F32);
25869 } // Inst_VOP3__V_CVT_F32_UBYTE3
25870
25871 Inst_VOP3__V_CVT_F32_UBYTE3::~Inst_VOP3__V_CVT_F32_UBYTE3()
25872 {
25873 } // ~Inst_VOP3__V_CVT_F32_UBYTE3
25874
25875 // D.f = (float)(S0.u[31:24]).
25876 void
25877 Inst_VOP3__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst)
25878 {
25879 Wavefront *wf = gpuDynInst->wavefront();
25880 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
25881 VecOperandF32 vdst(gpuDynInst, instData.VDST);
25882
25883 src.readSrc();
25884
25885 if (instData.ABS & 0x1) {
25886 src.absModifier();
25887 }
25888
25889 if (extData.NEG & 0x1) {
25890 src.negModifier();
25891 }
25892
25893 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25894 if (wf->execMask(lane)) {
25895 vdst[lane] = (VecElemF32)bits(src[lane], 31, 24);
25896 }
25897 }
25898
25899 vdst.write();
25900 }
25901
25902 Inst_VOP3__V_CVT_U32_F64::Inst_VOP3__V_CVT_U32_F64(InFmt_VOP3 *iFmt)
25903 : Inst_VOP3(iFmt, "v_cvt_u32_f64", false)
25904 {
25905 setFlag(ALU);
25906 setFlag(F64);
25907 } // Inst_VOP3__V_CVT_U32_F64
25908
25909 Inst_VOP3__V_CVT_U32_F64::~Inst_VOP3__V_CVT_U32_F64()
25910 {
25911 } // ~Inst_VOP3__V_CVT_U32_F64
25912
25913 // D.u = (unsigned)S0.d.
25914 // Out-of-range floating point values (including infinity) saturate. NaN
25915 // is converted to 0.
25916 void
25917 Inst_VOP3__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst)
25918 {
25919 Wavefront *wf = gpuDynInst->wavefront();
25920 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
25921 VecOperandU32 vdst(gpuDynInst, instData.VDST);
25922
25923 src.readSrc();
25924
25925 if (instData.ABS & 0x1) {
25926 src.absModifier();
25927 }
25928
25929 if (extData.NEG & 0x1) {
25930 src.negModifier();
25931 }
25932
25933 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25934 if (wf->execMask(lane)) {
25935 int exp;
25936 std::frexp(src[lane],&exp);
25937 if (std::isnan(src[lane])) {
25938 vdst[lane] = 0;
25939 } else if (std::isinf(src[lane])) {
25940 if (std::signbit(src[lane])) {
25941 vdst[lane] = 0;
25942 } else {
25943 vdst[lane] = UINT_MAX;
25944 }
25945 } else if (exp > 31) {
25946 vdst[lane] = UINT_MAX;
25947 } else {
25948 vdst[lane] = (VecElemU32)src[lane];
25949 }
25950 }
25951 }
25952
25953 vdst.write();
25954 }
25955
25956 Inst_VOP3__V_CVT_F64_U32::Inst_VOP3__V_CVT_F64_U32(InFmt_VOP3 *iFmt)
25957 : Inst_VOP3(iFmt, "v_cvt_f64_u32", false)
25958 {
25959 setFlag(ALU);
25960 setFlag(F64);
25961 } // Inst_VOP3__V_CVT_F64_U32
25962
25963 Inst_VOP3__V_CVT_F64_U32::~Inst_VOP3__V_CVT_F64_U32()
25964 {
25965 } // ~Inst_VOP3__V_CVT_F64_U32
25966
25967 // D.d = (double)S0.u.
25968 void
25969 Inst_VOP3__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst)
25970 {
25971 Wavefront *wf = gpuDynInst->wavefront();
25972 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
25973 VecOperandF64 vdst(gpuDynInst, instData.VDST);
25974
25975 src.readSrc();
25976
25977 if (instData.ABS & 0x1) {
25978 src.absModifier();
25979 }
25980
25981 if (extData.NEG & 0x1) {
25982 src.negModifier();
25983 }
25984
25985 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25986 if (wf->execMask(lane)) {
25987 vdst[lane] = (VecElemF64)src[lane];
25988 }
25989 }
25990
25991 vdst.write();
25992 }
25993
25994 Inst_VOP3__V_TRUNC_F64::Inst_VOP3__V_TRUNC_F64(InFmt_VOP3 *iFmt)
25995 : Inst_VOP3(iFmt, "v_trunc_f64", false)
25996 {
25997 setFlag(ALU);
25998 setFlag(F64);
25999 } // Inst_VOP3__V_TRUNC_F64
26000
26001 Inst_VOP3__V_TRUNC_F64::~Inst_VOP3__V_TRUNC_F64()
26002 {
26003 } // ~Inst_VOP3__V_TRUNC_F64
26004
26005 // D.d = trunc(S0.d), return integer part of S0.d.
26006 void
26007 Inst_VOP3__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst)
26008 {
26009 Wavefront *wf = gpuDynInst->wavefront();
26010 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26011 VecOperandF64 vdst(gpuDynInst, instData.VDST);
26012
26013 src.readSrc();
26014
26015 if (instData.ABS & 0x1) {
26016 src.absModifier();
26017 }
26018
26019 if (extData.NEG & 0x1) {
26020 src.negModifier();
26021 }
26022
26023 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26024 if (wf->execMask(lane)) {
26025 vdst[lane] = std::trunc(src[lane]);
26026 }
26027 }
26028
26029 vdst.write();
26030 }
26031
26032 Inst_VOP3__V_CEIL_F64::Inst_VOP3__V_CEIL_F64(InFmt_VOP3 *iFmt)
26033 : Inst_VOP3(iFmt, "v_ceil_f64", false)
26034 {
26035 setFlag(ALU);
26036 setFlag(F64);
26037 } // Inst_VOP3__V_CEIL_F64
26038
26039 Inst_VOP3__V_CEIL_F64::~Inst_VOP3__V_CEIL_F64()
26040 {
26041 } // ~Inst_VOP3__V_CEIL_F64
26042
26043 // D.d = ceil(S0.d);
26044 void
26045 Inst_VOP3__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst)
26046 {
26047 Wavefront *wf = gpuDynInst->wavefront();
26048 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26049 VecOperandF64 vdst(gpuDynInst, instData.VDST);
26050
26051 src.readSrc();
26052
26053 if (instData.ABS & 0x1) {
26054 src.absModifier();
26055 }
26056
26057 if (extData.NEG & 0x1) {
26058 src.negModifier();
26059 }
26060
26061 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26062 if (wf->execMask(lane)) {
26063 vdst[lane] = std::ceil(src[lane]);
26064 }
26065 }
26066
26067 vdst.write();
26068 }
26069
26070 Inst_VOP3__V_RNDNE_F64::Inst_VOP3__V_RNDNE_F64(InFmt_VOP3 *iFmt)
26071 : Inst_VOP3(iFmt, "v_rndne_f64", false)
26072 {
26073 setFlag(ALU);
26074 setFlag(F64);
26075 } // Inst_VOP3__V_RNDNE_F64
26076
26077 Inst_VOP3__V_RNDNE_F64::~Inst_VOP3__V_RNDNE_F64()
26078 {
26079 } // ~Inst_VOP3__V_RNDNE_F64
26080
26081 // D.d = round_nearest_even(S0.d).
26082 void
26083 Inst_VOP3__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst)
26084 {
26085 Wavefront *wf = gpuDynInst->wavefront();
26086 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26087 VecOperandF64 vdst(gpuDynInst, instData.VDST);
26088
26089 src.readSrc();
26090
26091 if (instData.ABS & 0x1) {
26092 src.absModifier();
26093 }
26094
26095 if (extData.NEG & 0x1) {
26096 src.negModifier();
26097 }
26098
26099 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26100 if (wf->execMask(lane)) {
26101 vdst[lane] = roundNearestEven(src[lane]);
26102 }
26103 }
26104
26105 vdst.write();
26106 }
26107
26108 Inst_VOP3__V_FLOOR_F64::Inst_VOP3__V_FLOOR_F64(InFmt_VOP3 *iFmt)
26109 : Inst_VOP3(iFmt, "v_floor_f64", false)
26110 {
26111 setFlag(ALU);
26112 setFlag(F64);
26113 } // Inst_VOP3__V_FLOOR_F64
26114
26115 Inst_VOP3__V_FLOOR_F64::~Inst_VOP3__V_FLOOR_F64()
26116 {
26117 } // ~Inst_VOP3__V_FLOOR_F64
26118
26119 // D.d = floor(S0.d);
26120 void
26121 Inst_VOP3__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst)
26122 {
26123 Wavefront *wf = gpuDynInst->wavefront();
26124 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26125 VecOperandF64 vdst(gpuDynInst, instData.VDST);
26126
26127 src.readSrc();
26128
26129 if (instData.ABS & 0x1) {
26130 src.absModifier();
26131 }
26132
26133 if (extData.NEG & 0x1) {
26134 src.negModifier();
26135 }
26136
26137 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26138 if (wf->execMask(lane)) {
26139 vdst[lane] = std::floor(src[lane]);
26140 }
26141 }
26142
26143 vdst.write();
26144 }
26145
26146 Inst_VOP3__V_FRACT_F32::Inst_VOP3__V_FRACT_F32(InFmt_VOP3 *iFmt)
26147 : Inst_VOP3(iFmt, "v_fract_f32", false)
26148 {
26149 setFlag(ALU);
26150 setFlag(F32);
26151 } // Inst_VOP3__V_FRACT_F32
26152
26153 Inst_VOP3__V_FRACT_F32::~Inst_VOP3__V_FRACT_F32()
26154 {
26155 } // ~Inst_VOP3__V_FRACT_F32
26156
26157 // D.f = modf(S0.f).
26158 void
26159 Inst_VOP3__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst)
26160 {
26161 Wavefront *wf = gpuDynInst->wavefront();
26162 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26163 VecOperandF32 vdst(gpuDynInst, instData.VDST);
26164
26165 src.readSrc();
26166
26167 if (instData.ABS & 0x1) {
26168 src.absModifier();
26169 }
26170
26171 if (extData.NEG & 0x1) {
26172 src.negModifier();
26173 }
26174
26175 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26176 if (wf->execMask(lane)) {
26177 VecElemF32 int_part(0.0);
26178 vdst[lane] = std::modf(src[lane], &int_part);
26179 }
26180 }
26181
26182 vdst.write();
26183 }
26184
26185 Inst_VOP3__V_TRUNC_F32::Inst_VOP3__V_TRUNC_F32(InFmt_VOP3 *iFmt)
26186 : Inst_VOP3(iFmt, "v_trunc_f32", false)
26187 {
26188 setFlag(ALU);
26189 setFlag(F32);
26190 } // Inst_VOP3__V_TRUNC_F32
26191
26192 Inst_VOP3__V_TRUNC_F32::~Inst_VOP3__V_TRUNC_F32()
26193 {
26194 } // ~Inst_VOP3__V_TRUNC_F32
26195
26196 // D.f = trunc(S0.f), return integer part of S0.f.
26197 void
26198 Inst_VOP3__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst)
26199 {
26200 Wavefront *wf = gpuDynInst->wavefront();
26201 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26202 VecOperandF32 vdst(gpuDynInst, instData.VDST);
26203
26204 src.readSrc();
26205
26206 if (instData.ABS & 0x1) {
26207 src.absModifier();
26208 }
26209
26210 if (extData.NEG & 0x1) {
26211 src.negModifier();
26212 }
26213
26214 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26215 if (wf->execMask(lane)) {
26216 vdst[lane] = std::trunc(src[lane]);
26217 }
26218 }
26219
26220 vdst.write();
26221 }
26222
26223 Inst_VOP3__V_CEIL_F32::Inst_VOP3__V_CEIL_F32(InFmt_VOP3 *iFmt)
26224 : Inst_VOP3(iFmt, "v_ceil_f32", false)
26225 {
26226 setFlag(ALU);
26227 setFlag(F32);
26228 } // Inst_VOP3__V_CEIL_F32
26229
26230 Inst_VOP3__V_CEIL_F32::~Inst_VOP3__V_CEIL_F32()
26231 {
26232 } // ~Inst_VOP3__V_CEIL_F32
26233
26234 // D.f = ceil(S0.f);
26235 void
26236 Inst_VOP3__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst)
26237 {
26238 Wavefront *wf = gpuDynInst->wavefront();
26239 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26240 VecOperandF32 vdst(gpuDynInst, instData.VDST);
26241
26242 src.readSrc();
26243
26244 if (instData.ABS & 0x1) {
26245 src.absModifier();
26246 }
26247
26248 if (extData.NEG & 0x1) {
26249 src.negModifier();
26250 }
26251
26252 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26253 if (wf->execMask(lane)) {
26254 vdst[lane] = std::ceil(src[lane]);
26255 }
26256 }
26257
26258 vdst.write();
26259 }
26260
26261 Inst_VOP3__V_RNDNE_F32::Inst_VOP3__V_RNDNE_F32(InFmt_VOP3 *iFmt)
26262 : Inst_VOP3(iFmt, "v_rndne_f32", false)
26263 {
26264 setFlag(ALU);
26265 setFlag(F32);
26266 } // Inst_VOP3__V_RNDNE_F32
26267
26268 Inst_VOP3__V_RNDNE_F32::~Inst_VOP3__V_RNDNE_F32()
26269 {
26270 } // ~Inst_VOP3__V_RNDNE_F32
26271
26272 // D.f = round_nearest_even(S0.f).
26273 void
26274 Inst_VOP3__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst)
26275 {
26276 Wavefront *wf = gpuDynInst->wavefront();
26277 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26278 VecOperandF32 vdst(gpuDynInst, instData.VDST);
26279
26280 src.readSrc();
26281
26282 if (instData.ABS & 0x1) {
26283 src.absModifier();
26284 }
26285
26286 if (extData.NEG & 0x1) {
26287 src.negModifier();
26288 }
26289
26290 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26291 if (wf->execMask(lane)) {
26292 vdst[lane] = roundNearestEven(src[lane]);
26293 }
26294 }
26295
26296 vdst.write();
26297 }
26298
26299 Inst_VOP3__V_FLOOR_F32::Inst_VOP3__V_FLOOR_F32(InFmt_VOP3 *iFmt)
26300 : Inst_VOP3(iFmt, "v_floor_f32", false)
26301 {
26302 setFlag(ALU);
26303 setFlag(F32);
26304 } // Inst_VOP3__V_FLOOR_F32
26305
26306 Inst_VOP3__V_FLOOR_F32::~Inst_VOP3__V_FLOOR_F32()
26307 {
26308 } // ~Inst_VOP3__V_FLOOR_F32
26309
26310 // D.f = floor(S0.f);
26311 void
26312 Inst_VOP3__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst)
26313 {
26314 Wavefront *wf = gpuDynInst->wavefront();
26315 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26316 VecOperandF32 vdst(gpuDynInst, instData.VDST);
26317
26318 src.readSrc();
26319
26320 if (instData.ABS & 0x1) {
26321 src.absModifier();
26322 }
26323
26324 if (extData.NEG & 0x1) {
26325 src.negModifier();
26326 }
26327
26328 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26329 if (wf->execMask(lane)) {
26330 vdst[lane] = std::floor(src[lane]);
26331 }
26332 }
26333
26334 vdst.write();
26335 }
26336
26337 Inst_VOP3__V_EXP_F32::Inst_VOP3__V_EXP_F32(InFmt_VOP3 *iFmt)
26338 : Inst_VOP3(iFmt, "v_exp_f32", false)
26339 {
26340 setFlag(ALU);
26341 setFlag(F32);
26342 } // Inst_VOP3__V_EXP_F32
26343
26344 Inst_VOP3__V_EXP_F32::~Inst_VOP3__V_EXP_F32()
26345 {
26346 } // ~Inst_VOP3__V_EXP_F32
26347
26348 // D.f = pow(2.0, S0.f).
26349 void
26350 Inst_VOP3__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst)
26351 {
26352 Wavefront *wf = gpuDynInst->wavefront();
26353 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26354 VecOperandF32 vdst(gpuDynInst, instData.VDST);
26355
26356 src.readSrc();
26357
26358 if (instData.ABS & 0x1) {
26359 src.absModifier();
26360 }
26361
26362 if (extData.NEG & 0x1) {
26363 src.negModifier();
26364 }
26365
26366 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26367 if (wf->execMask(lane)) {
26368 vdst[lane] = std::pow(2.0, src[lane]);
26369 }
26370 }
26371
26372 vdst.write();
26373 }
26374
26375 Inst_VOP3__V_LOG_F32::Inst_VOP3__V_LOG_F32(InFmt_VOP3 *iFmt)
26376 : Inst_VOP3(iFmt, "v_log_f32", false)
26377 {
26378 setFlag(ALU);
26379 setFlag(F32);
26380 } // Inst_VOP3__V_LOG_F32
26381
26382 Inst_VOP3__V_LOG_F32::~Inst_VOP3__V_LOG_F32()
26383 {
26384 } // ~Inst_VOP3__V_LOG_F32
26385
26386 // D.f = log2(S0.f).
26387 void
26388 Inst_VOP3__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst)
26389 {
26390 Wavefront *wf = gpuDynInst->wavefront();
26391 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26392 VecOperandF32 vdst(gpuDynInst, instData.VDST);
26393
26394 src.readSrc();
26395
26396 if (instData.ABS & 0x1) {
26397 src.absModifier();
26398 }
26399
26400 if (extData.NEG & 0x1) {
26401 src.negModifier();
26402 }
26403
26404 /**
26405 * input modifiers are supported by FP operations only
26406 */
26407 assert(!(instData.ABS & 0x2));
26408 assert(!(instData.ABS & 0x4));
26409 assert(!(extData.NEG & 0x2));
26410 assert(!(extData.NEG & 0x4));
26411
26412 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26413 if (wf->execMask(lane)) {
26414 vdst[lane] = std::log2(src[lane]);
26415 }
26416 }
26417
26418 vdst.write();
26419 }
26420
26421 Inst_VOP3__V_RCP_F32::Inst_VOP3__V_RCP_F32(InFmt_VOP3 *iFmt)
26422 : Inst_VOP3(iFmt, "v_rcp_f32", false)
26423 {
26424 setFlag(ALU);
26425 setFlag(F32);
26426 } // Inst_VOP3__V_RCP_F32
26427
26428 Inst_VOP3__V_RCP_F32::~Inst_VOP3__V_RCP_F32()
26429 {
26430 } // ~Inst_VOP3__V_RCP_F32
26431
26432 // D.f = 1.0 / S0.f.
26433 void
26434 Inst_VOP3__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst)
26435 {
26436 Wavefront *wf = gpuDynInst->wavefront();
26437 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26438 VecOperandF32 vdst(gpuDynInst, instData.VDST);
26439
26440 src.readSrc();
26441
26442 if (instData.ABS & 0x1) {
26443 src.absModifier();
26444 }
26445
26446 if (extData.NEG & 0x1) {
26447 src.negModifier();
26448 }
26449
26450 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26451 if (wf->execMask(lane)) {
26452 vdst[lane] = 1.0 / src[lane];
26453 }
26454 }
26455
26456 vdst.write();
26457 }
26458
26459 Inst_VOP3__V_RCP_IFLAG_F32::Inst_VOP3__V_RCP_IFLAG_F32(InFmt_VOP3 *iFmt)
26460 : Inst_VOP3(iFmt, "v_rcp_iflag_f32", false)
26461 {
26462 setFlag(ALU);
26463 setFlag(F32);
26464 } // Inst_VOP3__V_RCP_IFLAG_F32
26465
26466 Inst_VOP3__V_RCP_IFLAG_F32::~Inst_VOP3__V_RCP_IFLAG_F32()
26467 {
26468 } // ~Inst_VOP3__V_RCP_IFLAG_F32
26469
26470 // D.f = 1.0 / S0.f.
26471 void
26472 Inst_VOP3__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst)
26473 {
26474 Wavefront *wf = gpuDynInst->wavefront();
26475 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26476 VecOperandF32 vdst(gpuDynInst, instData.VDST);
26477
26478 src.readSrc();
26479
26480 if (instData.ABS & 0x1) {
26481 src.absModifier();
26482 }
26483
26484 if (extData.NEG & 0x1) {
26485 src.negModifier();
26486 }
26487
26488 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26489 if (wf->execMask(lane)) {
26490 vdst[lane] = 1.0 / src[lane];
26491 }
26492 }
26493
26494 vdst.write();
26495 }
26496
26497 Inst_VOP3__V_RSQ_F32::Inst_VOP3__V_RSQ_F32(InFmt_VOP3 *iFmt)
26498 : Inst_VOP3(iFmt, "v_rsq_f32", false)
26499 {
26500 setFlag(ALU);
26501 setFlag(F32);
26502 } // Inst_VOP3__V_RSQ_F32
26503
26504 Inst_VOP3__V_RSQ_F32::~Inst_VOP3__V_RSQ_F32()
26505 {
26506 } // ~Inst_VOP3__V_RSQ_F32
26507
26508 // D.f = 1.0 / sqrt(S0.f).
26509 void
26510 Inst_VOP3__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst)
26511 {
26512 Wavefront *wf = gpuDynInst->wavefront();
26513 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26514 VecOperandF32 vdst(gpuDynInst, instData.VDST);
26515
26516 src.readSrc();
26517
26518 if (instData.ABS & 0x1) {
26519 src.absModifier();
26520 }
26521
26522 if (extData.NEG & 0x1) {
26523 src.negModifier();
26524 }
26525
26526 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26527 if (wf->execMask(lane)) {
26528 vdst[lane] = 1.0 / std::sqrt(src[lane]);
26529 }
26530 }
26531
26532 vdst.write();
26533 }
26534
26535 Inst_VOP3__V_RCP_F64::Inst_VOP3__V_RCP_F64(InFmt_VOP3 *iFmt)
26536 : Inst_VOP3(iFmt, "v_rcp_f64", false)
26537 {
26538 setFlag(ALU);
26539 setFlag(F64);
26540 } // Inst_VOP3__V_RCP_F64
26541
26542 Inst_VOP3__V_RCP_F64::~Inst_VOP3__V_RCP_F64()
26543 {
26544 } // ~Inst_VOP3__V_RCP_F64
26545
26546 // D.d = 1.0 / S0.d.
26547 void
26548 Inst_VOP3__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst)
26549 {
26550 Wavefront *wf = gpuDynInst->wavefront();
26551 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26552 VecOperandF64 vdst(gpuDynInst, instData.VDST);
26553
26554 src.readSrc();
26555
26556 if (instData.ABS & 0x1) {
26557 src.absModifier();
26558 }
26559
26560 if (extData.NEG & 0x1) {
26561 src.negModifier();
26562 }
26563
26564 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26565 if (wf->execMask(lane)) {
26566 if (std::fpclassify(src[lane]) == FP_ZERO) {
26567 vdst[lane] = +INFINITY;
26568 } else if (std::isnan(src[lane])) {
26569 vdst[lane] = NAN;
26570 } else if (std::isinf(src[lane])) {
26571 if (std::signbit(src[lane])) {
26572 vdst[lane] = -0.0;
26573 } else {
26574 vdst[lane] = 0.0;
26575 }
26576 } else {
26577 vdst[lane] = 1.0 / src[lane];
26578 }
26579 }
26580 }
26581
26582 vdst.write();
26583 }
26584
26585 Inst_VOP3__V_RSQ_F64::Inst_VOP3__V_RSQ_F64(InFmt_VOP3 *iFmt)
26586 : Inst_VOP3(iFmt, "v_rsq_f64", false)
26587 {
26588 setFlag(ALU);
26589 setFlag(F64);
26590 } // Inst_VOP3__V_RSQ_F64
26591
26592 Inst_VOP3__V_RSQ_F64::~Inst_VOP3__V_RSQ_F64()
26593 {
26594 } // ~Inst_VOP3__V_RSQ_F64
26595
26596 // D.d = 1.0 / sqrt(S0.d).
26597 void
26598 Inst_VOP3__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst)
26599 {
26600 Wavefront *wf = gpuDynInst->wavefront();
26601 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26602 VecOperandF64 vdst(gpuDynInst, instData.VDST);
26603
26604 src.readSrc();
26605
26606 if (instData.ABS & 0x1) {
26607 src.absModifier();
26608 }
26609
26610 if (extData.NEG & 0x1) {
26611 src.negModifier();
26612 }
26613
26614 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26615 if (wf->execMask(lane)) {
26616 if (std::fpclassify(src[lane]) == FP_ZERO) {
26617 vdst[lane] = +INFINITY;
26618 } else if (std::isnan(src[lane])) {
26619 vdst[lane] = NAN;
26620 } else if (std::isinf(src[lane]) && !std::signbit(src[lane])) {
26621 vdst[lane] = 0.0;
26622 } else if (std::signbit(src[lane])) {
26623 vdst[lane] = NAN;
26624 } else {
26625 vdst[lane] = 1.0 / std::sqrt(src[lane]);
26626 }
26627 }
26628 }
26629
26630 vdst.write();
26631 }
26632
26633 Inst_VOP3__V_SQRT_F32::Inst_VOP3__V_SQRT_F32(InFmt_VOP3 *iFmt)
26634 : Inst_VOP3(iFmt, "v_sqrt_f32", false)
26635 {
26636 setFlag(ALU);
26637 setFlag(F32);
26638 } // Inst_VOP3__V_SQRT_F32
26639
26640 Inst_VOP3__V_SQRT_F32::~Inst_VOP3__V_SQRT_F32()
26641 {
26642 } // ~Inst_VOP3__V_SQRT_F32
26643
26644 // D.f = sqrt(S0.f).
26645 void
26646 Inst_VOP3__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst)
26647 {
26648 Wavefront *wf = gpuDynInst->wavefront();
26649 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26650 VecOperandF32 vdst(gpuDynInst, instData.VDST);
26651
26652 src.readSrc();
26653
26654 if (instData.ABS & 0x1) {
26655 src.absModifier();
26656 }
26657
26658 if (extData.NEG & 0x1) {
26659 src.negModifier();
26660 }
26661
26662 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26663 if (wf->execMask(lane)) {
26664 vdst[lane] = std::sqrt(src[lane]);
26665 }
26666 }
26667
26668 vdst.write();
26669 }
26670
26671 Inst_VOP3__V_SQRT_F64::Inst_VOP3__V_SQRT_F64(InFmt_VOP3 *iFmt)
26672 : Inst_VOP3(iFmt, "v_sqrt_f64", false)
26673 {
26674 setFlag(ALU);
26675 setFlag(F64);
26676 } // Inst_VOP3__V_SQRT_F64
26677
26678 Inst_VOP3__V_SQRT_F64::~Inst_VOP3__V_SQRT_F64()
26679 {
26680 } // ~Inst_VOP3__V_SQRT_F64
26681
26682 // D.d = sqrt(S0.d).
26683 void
26684 Inst_VOP3__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst)
26685 {
26686 Wavefront *wf = gpuDynInst->wavefront();
26687 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26688 VecOperandF64 vdst(gpuDynInst, instData.VDST);
26689
26690 src.readSrc();
26691
26692 if (instData.ABS & 0x1) {
26693 src.absModifier();
26694 }
26695
26696 if (extData.NEG & 0x1) {
26697 src.negModifier();
26698 }
26699
26700 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26701 if (wf->execMask(lane)) {
26702 vdst[lane] = std::sqrt(src[lane]);
26703 }
26704 }
26705
26706 vdst.write();
26707 }
26708
26709 Inst_VOP3__V_SIN_F32::Inst_VOP3__V_SIN_F32(InFmt_VOP3 *iFmt)
26710 : Inst_VOP3(iFmt, "v_sin_f32", false)
26711 {
26712 setFlag(ALU);
26713 setFlag(F32);
26714 } // Inst_VOP3__V_SIN_F32
26715
26716 Inst_VOP3__V_SIN_F32::~Inst_VOP3__V_SIN_F32()
26717 {
26718 } // ~Inst_VOP3__V_SIN_F32
26719
26720 // D.f = sin(S0.f * 2 * PI).
26721 void
26722 Inst_VOP3__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst)
26723 {
26724 Wavefront *wf = gpuDynInst->wavefront();
26725 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26726 ConstScalarOperandF32 pi(gpuDynInst, REG_PI);
26727 VecOperandF32 vdst(gpuDynInst, instData.VDST);
26728
26729 src.readSrc();
26730 pi.read();
26731
26732 if (instData.ABS & 0x1) {
26733 src.absModifier();
26734 }
26735
26736 if (extData.NEG & 0x1) {
26737 src.negModifier();
26738 }
26739
26740 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26741 if (wf->execMask(lane)) {
26742 vdst[lane] = std::sin(src[lane] * 2 * pi.rawData());
26743 }
26744 }
26745
26746 vdst.write();
26747 }
26748
26749 Inst_VOP3__V_COS_F32::Inst_VOP3__V_COS_F32(InFmt_VOP3 *iFmt)
26750 : Inst_VOP3(iFmt, "v_cos_f32", false)
26751 {
26752 setFlag(ALU);
26753 setFlag(F32);
26754 } // Inst_VOP3__V_COS_F32
26755
26756 Inst_VOP3__V_COS_F32::~Inst_VOP3__V_COS_F32()
26757 {
26758 } // ~Inst_VOP3__V_COS_F32
26759
26760 // D.f = cos(S0.f * 2 * PI).
26761 void
26762 Inst_VOP3__V_COS_F32::execute(GPUDynInstPtr gpuDynInst)
26763 {
26764 Wavefront *wf = gpuDynInst->wavefront();
26765 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26766 ConstScalarOperandF32 pi(gpuDynInst, REG_PI);
26767 VecOperandF32 vdst(gpuDynInst, instData.VDST);
26768
26769 src.readSrc();
26770 pi.read();
26771
26772 if (instData.ABS & 0x1) {
26773 src.absModifier();
26774 }
26775
26776 if (extData.NEG & 0x1) {
26777 src.negModifier();
26778 }
26779
26780 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26781 if (wf->execMask(lane)) {
26782 vdst[lane] = std::cos(src[lane] * 2 * pi.rawData());
26783 }
26784 }
26785
26786 vdst.write();
26787 }
26788
26789 Inst_VOP3__V_NOT_B32::Inst_VOP3__V_NOT_B32(InFmt_VOP3 *iFmt)
26790 : Inst_VOP3(iFmt, "v_not_b32", false)
26791 {
26792 setFlag(ALU);
26793 } // Inst_VOP3__V_NOT_B32
26794
26795 Inst_VOP3__V_NOT_B32::~Inst_VOP3__V_NOT_B32()
26796 {
26797 } // ~Inst_VOP3__V_NOT_B32
26798
26799 // D.u = ~S0.u.
26800 // Input and output modifiers not supported.
26801 void
26802 Inst_VOP3__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst)
26803 {
26804 Wavefront *wf = gpuDynInst->wavefront();
26805 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
26806 VecOperandU32 vdst(gpuDynInst, instData.VDST);
26807
26808 src.readSrc();
26809
26810 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26811 if (wf->execMask(lane)) {
26812 vdst[lane] = ~src[lane];
26813 }
26814 }
26815
26816 vdst.write();
26817 }
26818
26819 Inst_VOP3__V_BFREV_B32::Inst_VOP3__V_BFREV_B32(InFmt_VOP3 *iFmt)
26820 : Inst_VOP3(iFmt, "v_bfrev_b32", false)
26821 {
26822 setFlag(ALU);
26823 } // Inst_VOP3__V_BFREV_B32
26824
26825 Inst_VOP3__V_BFREV_B32::~Inst_VOP3__V_BFREV_B32()
26826 {
26827 } // ~Inst_VOP3__V_BFREV_B32
26828
26829 // D.u[31:0] = S0.u[0:31], bitfield reverse.
26830 // Input and output modifiers not supported.
26831 void
26832 Inst_VOP3__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst)
26833 {
26834 Wavefront *wf = gpuDynInst->wavefront();
26835 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
26836 VecOperandU32 vdst(gpuDynInst, instData.VDST);
26837
26838 src.readSrc();
26839
26840 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26841 if (wf->execMask(lane)) {
26842 vdst[lane] = reverseBits(src[lane]);
26843 }
26844 }
26845
26846 vdst.write();
26847 }
26848
26849 Inst_VOP3__V_FFBH_U32::Inst_VOP3__V_FFBH_U32(InFmt_VOP3 *iFmt)
26850 : Inst_VOP3(iFmt, "v_ffbh_u32", false)
26851 {
26852 setFlag(ALU);
26853 } // Inst_VOP3__V_FFBH_U32
26854
26855 Inst_VOP3__V_FFBH_U32::~Inst_VOP3__V_FFBH_U32()
26856 {
26857 } // ~Inst_VOP3__V_FFBH_U32
26858
26859 // D.u = position of first 1 in S0.u from MSB;
26860 // D.u = 0xffffffff if S0.u == 0.
26861 void
26862 Inst_VOP3__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst)
26863 {
26864 Wavefront *wf = gpuDynInst->wavefront();
26865 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
26866 VecOperandU32 vdst(gpuDynInst, instData.VDST);
26867
26868 src.readSrc();
26869
26870 if (instData.ABS & 0x1) {
26871 src.absModifier();
26872 }
26873
26874 if (extData.NEG & 0x1) {
26875 src.negModifier();
26876 }
26877
26878 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26879 if (wf->execMask(lane)) {
26880 vdst[lane] = findFirstOneMsb(src[lane]);
26881 }
26882 }
26883
26884 vdst.write();
26885 }
26886
26887 Inst_VOP3__V_FFBL_B32::Inst_VOP3__V_FFBL_B32(InFmt_VOP3 *iFmt)
26888 : Inst_VOP3(iFmt, "v_ffbl_b32", false)
26889 {
26890 setFlag(ALU);
26891 } // Inst_VOP3__V_FFBL_B32
26892
26893 Inst_VOP3__V_FFBL_B32::~Inst_VOP3__V_FFBL_B32()
26894 {
26895 } // ~Inst_VOP3__V_FFBL_B32
26896
26897 // D.u = position of first 1 in S0.u from LSB;
26898 // D.u = 0xffffffff if S0.u == 0.
26899 void
26900 Inst_VOP3__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst)
26901 {
26902 Wavefront *wf = gpuDynInst->wavefront();
26903 ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
26904 VecOperandU32 vdst(gpuDynInst, instData.VDST);
26905
26906 src.readSrc();
26907
26908 if (instData.ABS & 0x1) {
26909 src.absModifier();
26910 }
26911
26912 if (extData.NEG & 0x1) {
26913 src.negModifier();
26914 }
26915
26916 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26917 if (wf->execMask(lane)) {
26918 vdst[lane] = findFirstOne(src[lane]);
26919 }
26920 }
26921
26922 vdst.write();
26923 }
26924
26925 Inst_VOP3__V_FFBH_I32::Inst_VOP3__V_FFBH_I32(InFmt_VOP3 *iFmt)
26926 : Inst_VOP3(iFmt, "v_ffbh_i32", false)
26927 {
26928 setFlag(ALU);
26929 } // Inst_VOP3__V_FFBH_I32
26930
26931 Inst_VOP3__V_FFBH_I32::~Inst_VOP3__V_FFBH_I32()
26932 {
26933 } // ~Inst_VOP3__V_FFBH_I32
26934
26935 // D.u = position of first bit different from sign bit in S0.i from MSB;
26936 // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff.
26937 void
26938 Inst_VOP3__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst)
26939 {
26940 Wavefront *wf = gpuDynInst->wavefront();
26941 ConstVecOperandI32 src(gpuDynInst, extData.SRC0);
26942 VecOperandU32 vdst(gpuDynInst, instData.VDST);
26943
26944 src.readSrc();
26945
26946 if (instData.ABS & 0x1) {
26947 src.absModifier();
26948 }
26949
26950 if (extData.NEG & 0x1) {
26951 src.negModifier();
26952 }
26953
26954 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26955 if (wf->execMask(lane)) {
26956 vdst[lane] = firstOppositeSignBit(src[lane]);
26957 }
26958 }
26959
26960 vdst.write();
26961 }
26962
26963 Inst_VOP3__V_FREXP_EXP_I32_F64::Inst_VOP3__V_FREXP_EXP_I32_F64(
26964 InFmt_VOP3 *iFmt)
26965 : Inst_VOP3(iFmt, "v_frexp_exp_i32_f64", false)
26966 {
26967 setFlag(ALU);
26968 setFlag(F64);
26969 } // Inst_VOP3__V_FREXP_EXP_I32_F64
26970
26971 Inst_VOP3__V_FREXP_EXP_I32_F64::~Inst_VOP3__V_FREXP_EXP_I32_F64()
26972 {
26973 } // ~Inst_VOP3__V_FREXP_EXP_I32_F64
26974
26975 // See V_FREXP_EXP_I32_F32.
26976 void
26977 Inst_VOP3__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst)
26978 {
26979 Wavefront *wf = gpuDynInst->wavefront();
26980 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26981 VecOperandI32 vdst(gpuDynInst, instData.VDST);
26982
26983 src.readSrc();
26984
26985 if (instData.ABS & 0x1) {
26986 src.absModifier();
26987 }
26988
26989 if (extData.NEG & 0x1) {
26990 src.negModifier();
26991 }
26992
26993 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26994 if (wf->execMask(lane)) {
26995 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
26996 vdst[lane] = 0;
26997 } else {
26998 VecElemI32 exp(0);
26999 std::frexp(src[lane], &exp);
27000 vdst[lane] = exp;
27001 }
27002 }
27003 }
27004
27005 vdst.write();
27006 }
27007
27008 Inst_VOP3__V_FREXP_MANT_F64::Inst_VOP3__V_FREXP_MANT_F64(InFmt_VOP3 *iFmt)
27009 : Inst_VOP3(iFmt, "v_frexp_mant_f64", false)
27010 {
27011 setFlag(ALU);
27012 setFlag(F64);
27013 } // Inst_VOP3__V_FREXP_MANT_F64
27014
27015 Inst_VOP3__V_FREXP_MANT_F64::~Inst_VOP3__V_FREXP_MANT_F64()
27016 {
27017 } // ~Inst_VOP3__V_FREXP_MANT_F64
27018
27019 void
27020 Inst_VOP3__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst)
27021 {
27022 Wavefront *wf = gpuDynInst->wavefront();
27023 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
27024 VecOperandF64 vdst(gpuDynInst, instData.VDST);
27025
27026 src.readSrc();
27027
27028 if (instData.ABS & 0x1) {
27029 src.absModifier();
27030 }
27031
27032 if (extData.NEG & 0x1) {
27033 src.negModifier();
27034 }
27035
27036 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27037 if (wf->execMask(lane)) {
27038 VecElemI32 exp(0);
27039 vdst[lane] = std::frexp(src[lane], &exp);
27040 }
27041 }
27042
27043 vdst.write();
27044 }
27045
27046 Inst_VOP3__V_FRACT_F64::Inst_VOP3__V_FRACT_F64(InFmt_VOP3 *iFmt)
27047 : Inst_VOP3(iFmt, "v_fract_f64", false)
27048 {
27049 setFlag(ALU);
27050 setFlag(F64);
27051 } // Inst_VOP3__V_FRACT_F64
27052
27053 Inst_VOP3__V_FRACT_F64::~Inst_VOP3__V_FRACT_F64()
27054 {
27055 } // ~Inst_VOP3__V_FRACT_F64
27056
27057 void
27058 Inst_VOP3__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst)
27059 {
27060 Wavefront *wf = gpuDynInst->wavefront();
27061 ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
27062 VecOperandF64 vdst(gpuDynInst, instData.VDST);
27063
27064 src.readSrc();
27065
27066 if (instData.ABS & 0x1) {
27067 src.absModifier();
27068 }
27069
27070 if (extData.NEG & 0x1) {
27071 src.negModifier();
27072 }
27073
27074 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27075 if (wf->execMask(lane)) {
27076 VecElemF32 int_part(0.0);
27077 vdst[lane] = std::modf(src[lane], &int_part);
27078 }
27079 }
27080
27081 vdst.write();
27082 }
27083
27084 Inst_VOP3__V_FREXP_EXP_I32_F32::Inst_VOP3__V_FREXP_EXP_I32_F32(
27085 InFmt_VOP3 *iFmt)
27086 : Inst_VOP3(iFmt, "v_frexp_exp_i32_f32", false)
27087 {
27088 setFlag(ALU);
27089 setFlag(F32);
27090 } // Inst_VOP3__V_FREXP_EXP_I32_F32
27091
27092 Inst_VOP3__V_FREXP_EXP_I32_F32::~Inst_VOP3__V_FREXP_EXP_I32_F32()
27093 {
27094 } // ~Inst_VOP3__V_FREXP_EXP_I32_F32
27095
27096 // frexp(S0.f, Exponenti(S0.f))
27097 // if (S0.f == INF || S0.f == NAN) then D.i = 0;
27098 // else D.i = Exponent(S0.f)
27099 void
27100 Inst_VOP3__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst)
27101 {
27102 Wavefront *wf = gpuDynInst->wavefront();
27103 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
27104 VecOperandI32 vdst(gpuDynInst, instData.VDST);
27105
27106 src.readSrc();
27107
27108 if (instData.ABS & 0x1) {
27109 src.absModifier();
27110 }
27111
27112 if (extData.NEG & 0x1) {
27113 src.negModifier();
27114 }
27115
27116 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27117 if (wf->execMask(lane)) {
27118 if (std::isinf(src[lane])|| std::isnan(src[lane])) {
27119 vdst[lane] = 0;
27120 } else {
27121 VecElemI32 exp(0);
27122 std::frexp(src[lane], &exp);
27123 vdst[lane] = exp;
27124 }
27125 }
27126 }
27127
27128 vdst.write();
27129 }
27130
27131 Inst_VOP3__V_FREXP_MANT_F32::Inst_VOP3__V_FREXP_MANT_F32(InFmt_VOP3 *iFmt)
27132 : Inst_VOP3(iFmt, "v_frexp_mant_f32", false)
27133 {
27134 setFlag(ALU);
27135 setFlag(F32);
27136 } // Inst_VOP3__V_FREXP_MANT_F32
27137
27138 Inst_VOP3__V_FREXP_MANT_F32::~Inst_VOP3__V_FREXP_MANT_F32()
27139 {
27140 } // ~Inst_VOP3__V_FREXP_MANT_F32
27141
27142 // if (S0.f == INF || S0.f == NAN) then D.f = S0.f;
27143 // else D.f = Mantissa(S0.f).
27144 void
27145 Inst_VOP3__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst)
27146 {
27147 Wavefront *wf = gpuDynInst->wavefront();
27148 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
27149 VecOperandF32 vdst(gpuDynInst, instData.VDST);
27150
27151 src.readSrc();
27152
27153 if (instData.ABS & 0x1) {
27154 src.absModifier();
27155 }
27156
27157 if (extData.NEG & 0x1) {
27158 src.negModifier();
27159 }
27160
27161 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27162 if (wf->execMask(lane)) {
27163 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
27164 vdst[lane] = src[lane];
27165 } else {
27166 VecElemI32 exp(0);
27167 vdst[lane] = std::frexp(src[lane], &exp);
27168 }
27169 }
27170 }
27171
27172 vdst.write();
27173 }
27174
27175 Inst_VOP3__V_CLREXCP::Inst_VOP3__V_CLREXCP(InFmt_VOP3 *iFmt)
27176 : Inst_VOP3(iFmt, "v_clrexcp", false)
27177 {
27178 } // Inst_VOP3__V_CLREXCP
27179
27180 Inst_VOP3__V_CLREXCP::~Inst_VOP3__V_CLREXCP()
27181 {
27182 } // ~Inst_VOP3__V_CLREXCP
27183
27184 void
27185 Inst_VOP3__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst)
27186 {
27187 panicUnimplemented();
27188 }
27189
27190 Inst_VOP3__V_CVT_F16_U16::Inst_VOP3__V_CVT_F16_U16(InFmt_VOP3 *iFmt)
27191 : Inst_VOP3(iFmt, "v_cvt_f16_u16", false)
27192 {
27193 setFlag(ALU);
27194 setFlag(F16);
27195 } // Inst_VOP3__V_CVT_F16_U16
27196
27197 Inst_VOP3__V_CVT_F16_U16::~Inst_VOP3__V_CVT_F16_U16()
27198 {
27199 } // ~Inst_VOP3__V_CVT_F16_U16
27200
27201 // D.f16 = uint16_to_flt16(S.u16).
27202 void
27203 Inst_VOP3__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst)
27204 {
27205 panicUnimplemented();
27206 }
27207
27208 Inst_VOP3__V_CVT_F16_I16::Inst_VOP3__V_CVT_F16_I16(InFmt_VOP3 *iFmt)
27209 : Inst_VOP3(iFmt, "v_cvt_f16_i16", false)
27210 {
27211 setFlag(ALU);
27212 setFlag(F16);
27213 } // Inst_VOP3__V_CVT_F16_I16
27214
27215 Inst_VOP3__V_CVT_F16_I16::~Inst_VOP3__V_CVT_F16_I16()
27216 {
27217 } // ~Inst_VOP3__V_CVT_F16_I16
27218
27219 // D.f16 = int16_to_flt16(S.i16).
27220 void
27221 Inst_VOP3__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst)
27222 {
27223 panicUnimplemented();
27224 }
27225
27226 Inst_VOP3__V_CVT_U16_F16::Inst_VOP3__V_CVT_U16_F16(InFmt_VOP3 *iFmt)
27227 : Inst_VOP3(iFmt, "v_cvt_u16_f16", false)
27228 {
27229 setFlag(ALU);
27230 setFlag(F16);
27231 } // Inst_VOP3__V_CVT_U16_F16
27232
27233 Inst_VOP3__V_CVT_U16_F16::~Inst_VOP3__V_CVT_U16_F16()
27234 {
27235 } // ~Inst_VOP3__V_CVT_U16_F16
27236
27237 // D.u16 = flt16_to_uint16(S.f16).
27238 void
27239 Inst_VOP3__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst)
27240 {
27241 panicUnimplemented();
27242 }
27243
27244 Inst_VOP3__V_CVT_I16_F16::Inst_VOP3__V_CVT_I16_F16(InFmt_VOP3 *iFmt)
27245 : Inst_VOP3(iFmt, "v_cvt_i16_f16", false)
27246 {
27247 setFlag(ALU);
27248 setFlag(F16);
27249 } // Inst_VOP3__V_CVT_I16_F16
27250
27251 Inst_VOP3__V_CVT_I16_F16::~Inst_VOP3__V_CVT_I16_F16()
27252 {
27253 } // ~Inst_VOP3__V_CVT_I16_F16
27254
27255 // D.i16 = flt16_to_int16(S.f16).
27256 void
27257 Inst_VOP3__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst)
27258 {
27259 panicUnimplemented();
27260 }
27261
27262 Inst_VOP3__V_RCP_F16::Inst_VOP3__V_RCP_F16(InFmt_VOP3 *iFmt)
27263 : Inst_VOP3(iFmt, "v_rcp_f16", false)
27264 {
27265 setFlag(ALU);
27266 setFlag(F16);
27267 } // Inst_VOP3__V_RCP_F16
27268
27269 Inst_VOP3__V_RCP_F16::~Inst_VOP3__V_RCP_F16()
27270 {
27271 } // ~Inst_VOP3__V_RCP_F16
27272
27273 // if (S0.f16 == 1.0f)
27274 // D.f16 = 1.0f;
27275 // else
27276 // D.f16 = 1 / S0.f16.
27277 void
27278 Inst_VOP3__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst)
27279 {
27280 panicUnimplemented();
27281 }
27282
27283 Inst_VOP3__V_SQRT_F16::Inst_VOP3__V_SQRT_F16(InFmt_VOP3 *iFmt)
27284 : Inst_VOP3(iFmt, "v_sqrt_f16", false)
27285 {
27286 setFlag(ALU);
27287 setFlag(F16);
27288 } // Inst_VOP3__V_SQRT_F16
27289
27290 Inst_VOP3__V_SQRT_F16::~Inst_VOP3__V_SQRT_F16()
27291 {
27292 } // ~Inst_VOP3__V_SQRT_F16
27293
27294 // if (S0.f16 == 1.0f)
27295 // D.f16 = 1.0f;
27296 // else
27297 // D.f16 = sqrt(S0.f16).
27298 void
27299 Inst_VOP3__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst)
27300 {
27301 panicUnimplemented();
27302 }
27303
27304 Inst_VOP3__V_RSQ_F16::Inst_VOP3__V_RSQ_F16(InFmt_VOP3 *iFmt)
27305 : Inst_VOP3(iFmt, "v_rsq_f16", false)
27306 {
27307 setFlag(ALU);
27308 setFlag(F16);
27309 } // Inst_VOP3__V_RSQ_F16
27310
27311 Inst_VOP3__V_RSQ_F16::~Inst_VOP3__V_RSQ_F16()
27312 {
27313 } // ~Inst_VOP3__V_RSQ_F16
27314
27315 // if (S0.f16 == 1.0f)
27316 // D.f16 = 1.0f;
27317 // else
27318 // D.f16 = 1 / sqrt(S0.f16).
27319 void
27320 Inst_VOP3__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst)
27321 {
27322 panicUnimplemented();
27323 }
27324
27325 Inst_VOP3__V_LOG_F16::Inst_VOP3__V_LOG_F16(InFmt_VOP3 *iFmt)
27326 : Inst_VOP3(iFmt, "v_log_f16", false)
27327 {
27328 setFlag(ALU);
27329 setFlag(F16);
27330 } // Inst_VOP3__V_LOG_F16
27331
27332 Inst_VOP3__V_LOG_F16::~Inst_VOP3__V_LOG_F16()
27333 {
27334 } // ~Inst_VOP3__V_LOG_F16
27335
27336 // if (S0.f16 == 1.0f)
27337 // D.f16 = 0.0f;
27338 // else
27339 // D.f16 = log2(S0.f16).
27340 void
27341 Inst_VOP3__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst)
27342 {
27343 panicUnimplemented();
27344 }
27345
27346 Inst_VOP3__V_EXP_F16::Inst_VOP3__V_EXP_F16(InFmt_VOP3 *iFmt)
27347 : Inst_VOP3(iFmt, "v_exp_f16", false)
27348 {
27349 setFlag(ALU);
27350 setFlag(F16);
27351 } // Inst_VOP3__V_EXP_F16
27352
27353 Inst_VOP3__V_EXP_F16::~Inst_VOP3__V_EXP_F16()
27354 {
27355 } // ~Inst_VOP3__V_EXP_F16
27356
27357 // if (S0.f16 == 0.0f)
27358 // D.f16 = 1.0f;
27359 // else
27360 // D.f16 = pow(2.0, S0.f16).
27361 void
27362 Inst_VOP3__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst)
27363 {
27364 panicUnimplemented();
27365 }
27366
27367 Inst_VOP3__V_FREXP_MANT_F16::Inst_VOP3__V_FREXP_MANT_F16(InFmt_VOP3 *iFmt)
27368 : Inst_VOP3(iFmt, "v_frexp_mant_f16", false)
27369 {
27370 setFlag(ALU);
27371 setFlag(F16);
27372 } // Inst_VOP3__V_FREXP_MANT_F16
27373
27374 Inst_VOP3__V_FREXP_MANT_F16::~Inst_VOP3__V_FREXP_MANT_F16()
27375 {
27376 } // ~Inst_VOP3__V_FREXP_MANT_F16
27377
27378 // if (S0.f16 == +-INF || S0.f16 == NAN)
27379 // D.f16 = S0.f16;
27380 // else
27381 // D.f16 = mantissa(S0.f16).
27382 void
27383 Inst_VOP3__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst)
27384 {
27385 panicUnimplemented();
27386 }
27387
27388 Inst_VOP3__V_FREXP_EXP_I16_F16::Inst_VOP3__V_FREXP_EXP_I16_F16(
27389 InFmt_VOP3 *iFmt)
27390 : Inst_VOP3(iFmt, "v_frexp_exp_i16_f16", false)
27391 {
27392 setFlag(ALU);
27393 setFlag(F16);
27394 } // Inst_VOP3__V_FREXP_EXP_I16_F16
27395
27396 Inst_VOP3__V_FREXP_EXP_I16_F16::~Inst_VOP3__V_FREXP_EXP_I16_F16()
27397 {
27398 } // ~Inst_VOP3__V_FREXP_EXP_I16_F16
27399
27400 void
27401 Inst_VOP3__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst)
27402 {
27403 panicUnimplemented();
27404 }
27405
27406 Inst_VOP3__V_FLOOR_F16::Inst_VOP3__V_FLOOR_F16(InFmt_VOP3 *iFmt)
27407 : Inst_VOP3(iFmt, "v_floor_f16", false)
27408 {
27409 setFlag(ALU);
27410 setFlag(F16);
27411 } // Inst_VOP3__V_FLOOR_F16
27412
27413 Inst_VOP3__V_FLOOR_F16::~Inst_VOP3__V_FLOOR_F16()
27414 {
27415 } // ~Inst_VOP3__V_FLOOR_F16
27416
27417 // D.f16 = floor(S0.f16);
27418 void
27419 Inst_VOP3__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst)
27420 {
27421 panicUnimplemented();
27422 }
27423
27424 Inst_VOP3__V_CEIL_F16::Inst_VOP3__V_CEIL_F16(InFmt_VOP3 *iFmt)
27425 : Inst_VOP3(iFmt, "v_ceil_f16", false)
27426 {
27427 setFlag(ALU);
27428 setFlag(F16);
27429 } // Inst_VOP3__V_CEIL_F16
27430
27431 Inst_VOP3__V_CEIL_F16::~Inst_VOP3__V_CEIL_F16()
27432 {
27433 } // ~Inst_VOP3__V_CEIL_F16
27434
27435 // D.f16 = ceil(S0.f16);
27436 void
27437 Inst_VOP3__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst)
27438 {
27439 panicUnimplemented();
27440 }
27441
27442 Inst_VOP3__V_TRUNC_F16::Inst_VOP3__V_TRUNC_F16(InFmt_VOP3 *iFmt)
27443 : Inst_VOP3(iFmt, "v_trunc_f16", false)
27444 {
27445 setFlag(ALU);
27446 setFlag(F16);
27447 } // Inst_VOP3__V_TRUNC_F16
27448
27449 Inst_VOP3__V_TRUNC_F16::~Inst_VOP3__V_TRUNC_F16()
27450 {
27451 } // ~Inst_VOP3__V_TRUNC_F16
27452
27453 // D.f16 = trunc(S0.f16).
27454 void
27455 Inst_VOP3__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst)
27456 {
27457 panicUnimplemented();
27458 }
27459
27460 Inst_VOP3__V_RNDNE_F16::Inst_VOP3__V_RNDNE_F16(InFmt_VOP3 *iFmt)
27461 : Inst_VOP3(iFmt, "v_rndne_f16", false)
27462 {
27463 setFlag(ALU);
27464 setFlag(F16);
27465 } // Inst_VOP3__V_RNDNE_F16
27466
27467 Inst_VOP3__V_RNDNE_F16::~Inst_VOP3__V_RNDNE_F16()
27468 {
27469 } // ~Inst_VOP3__V_RNDNE_F16
27470
27471 // D.f16 = roundNearestEven(S0.f16);
27472 void
27473 Inst_VOP3__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst)
27474 {
27475 panicUnimplemented();
27476 }
27477
27478 Inst_VOP3__V_FRACT_F16::Inst_VOP3__V_FRACT_F16(InFmt_VOP3 *iFmt)
27479 : Inst_VOP3(iFmt, "v_fract_f16", false)
27480 {
27481 setFlag(ALU);
27482 setFlag(F16);
27483 } // Inst_VOP3__V_FRACT_F16
27484
27485 Inst_VOP3__V_FRACT_F16::~Inst_VOP3__V_FRACT_F16()
27486 {
27487 } // ~Inst_VOP3__V_FRACT_F16
27488
27489 // D.f16 = S0.f16 + -floor(S0.f16).
27490 void
27491 Inst_VOP3__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst)
27492 {
27493 panicUnimplemented();
27494 }
27495
27496 Inst_VOP3__V_SIN_F16::Inst_VOP3__V_SIN_F16(InFmt_VOP3 *iFmt)
27497 : Inst_VOP3(iFmt, "v_sin_f16", false)
27498 {
27499 setFlag(ALU);
27500 setFlag(F16);
27501 } // Inst_VOP3__V_SIN_F16
27502
27503 Inst_VOP3__V_SIN_F16::~Inst_VOP3__V_SIN_F16()
27504 {
27505 } // ~Inst_VOP3__V_SIN_F16
27506
27507 // D.f16 = sin(S0.f16 * 2 * PI).
27508 void
27509 Inst_VOP3__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst)
27510 {
27511 panicUnimplemented();
27512 }
27513
27514 Inst_VOP3__V_COS_F16::Inst_VOP3__V_COS_F16(InFmt_VOP3 *iFmt)
27515 : Inst_VOP3(iFmt, "v_cos_f16", false)
27516 {
27517 setFlag(ALU);
27518 setFlag(F16);
27519 } // Inst_VOP3__V_COS_F16
27520
27521 Inst_VOP3__V_COS_F16::~Inst_VOP3__V_COS_F16()
27522 {
27523 } // ~Inst_VOP3__V_COS_F16
27524
27525 // D.f16 = cos(S0.f16 * 2 * PI).
27526 void
27527 Inst_VOP3__V_COS_F16::execute(GPUDynInstPtr gpuDynInst)
27528 {
27529 panicUnimplemented();
27530 }
27531
27532 Inst_VOP3__V_EXP_LEGACY_F32::Inst_VOP3__V_EXP_LEGACY_F32(InFmt_VOP3 *iFmt)
27533 : Inst_VOP3(iFmt, "v_exp_legacy_f32", false)
27534 {
27535 setFlag(ALU);
27536 setFlag(F32);
27537 } // Inst_VOP3__V_EXP_LEGACY_F32
27538
27539 Inst_VOP3__V_EXP_LEGACY_F32::~Inst_VOP3__V_EXP_LEGACY_F32()
27540 {
27541 } // ~Inst_VOP3__V_EXP_LEGACY_F32
27542
27543 // D.f = pow(2.0, S0.f)
27544 void
27545 Inst_VOP3__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
27546 {
27547 Wavefront *wf = gpuDynInst->wavefront();
27548 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
27549 VecOperandF32 vdst(gpuDynInst, instData.VDST);
27550
27551 src.readSrc();
27552
27553 if (instData.ABS & 0x1) {
27554 src.absModifier();
27555 }
27556
27557 if (extData.NEG & 0x1) {
27558 src.negModifier();
27559 }
27560
27561 /**
27562 * input modifiers are supported by FP operations only
27563 */
27564 assert(!(instData.ABS & 0x2));
27565 assert(!(instData.ABS & 0x4));
27566 assert(!(extData.NEG & 0x2));
27567 assert(!(extData.NEG & 0x4));
27568
27569 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27570 if (wf->execMask(lane)) {
27571 vdst[lane] = std::pow(2.0, src[lane]);
27572 }
27573 }
27574
27575 vdst.write();
27576 }
27577
27578 Inst_VOP3__V_LOG_LEGACY_F32::Inst_VOP3__V_LOG_LEGACY_F32(InFmt_VOP3 *iFmt)
27579 : Inst_VOP3(iFmt, "v_log_legacy_f32", false)
27580 {
27581 setFlag(ALU);
27582 setFlag(F32);
27583 } // Inst_VOP3__V_LOG_LEGACY_F32
27584
27585 Inst_VOP3__V_LOG_LEGACY_F32::~Inst_VOP3__V_LOG_LEGACY_F32()
27586 {
27587 } // ~Inst_VOP3__V_LOG_LEGACY_F32
27588
27589 // D.f = log2(S0.f).
27590 void
27591 Inst_VOP3__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
27592 {
27593 Wavefront *wf = gpuDynInst->wavefront();
27594 ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
27595 VecOperandF32 vdst(gpuDynInst, instData.VDST);
27596
27597 src.readSrc();
27598
27599 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27600 if (wf->execMask(lane)) {
27601 vdst[lane] = std::log2(src[lane]);
27602 }
27603 }
27604
27605 vdst.write();
27606 }
27607
27608 Inst_VOP3__V_MAD_LEGACY_F32::Inst_VOP3__V_MAD_LEGACY_F32(InFmt_VOP3 *iFmt)
27609 : Inst_VOP3(iFmt, "v_mad_legacy_f32", false)
27610 {
27611 setFlag(ALU);
27612 setFlag(F32);
27613 setFlag(MAD);
27614 } // Inst_VOP3__V_MAD_LEGACY_F32
27615
27616 Inst_VOP3__V_MAD_LEGACY_F32::~Inst_VOP3__V_MAD_LEGACY_F32()
27617 {
27618 } // ~Inst_VOP3__V_MAD_LEGACY_F32
27619
27620 // D.f = S0.f * S1.f + S2.f
27621 void
27622 Inst_VOP3__V_MAD_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
27623 {
27624 Wavefront *wf = gpuDynInst->wavefront();
27625 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
27626 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
27627 ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
27628 VecOperandF32 vdst(gpuDynInst, instData.VDST);
27629
27630 src0.readSrc();
27631 src1.readSrc();
27632 src2.readSrc();
27633
27634 if (instData.ABS & 0x1) {
27635 src0.absModifier();
27636 }
27637
27638 if (instData.ABS & 0x2) {
27639 src1.absModifier();
27640 }
27641
27642 if (instData.ABS & 0x4) {
27643 src2.absModifier();
27644 }
27645
27646 if (extData.NEG & 0x1) {
27647 src0.negModifier();
27648 }
27649
27650 if (extData.NEG & 0x2) {
27651 src1.negModifier();
27652 }
27653
27654 if (extData.NEG & 0x4) {
27655 src2.negModifier();
27656 }
27657
27658 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27659 if (wf->execMask(lane)) {
27660 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
27661 }
27662 }
27663
27664 vdst.write();
27665 }
27666
27667 Inst_VOP3__V_MAD_F32::Inst_VOP3__V_MAD_F32(InFmt_VOP3 *iFmt)
27668 : Inst_VOP3(iFmt, "v_mad_f32", false)
27669 {
27670 setFlag(ALU);
27671 setFlag(F32);
27672 setFlag(MAD);
27673 } // Inst_VOP3__V_MAD_F32
27674
27675 Inst_VOP3__V_MAD_F32::~Inst_VOP3__V_MAD_F32()
27676 {
27677 } // ~Inst_VOP3__V_MAD_F32
27678
27679 // D.f = S0.f * S1.f + S2.f.
27680 void
27681 Inst_VOP3__V_MAD_F32::execute(GPUDynInstPtr gpuDynInst)
27682 {
27683 Wavefront *wf = gpuDynInst->wavefront();
27684 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
27685 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
27686 ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
27687 VecOperandF32 vdst(gpuDynInst, instData.VDST);
27688
27689 src0.readSrc();
27690 src1.readSrc();
27691 src2.readSrc();
27692
27693 if (instData.ABS & 0x1) {
27694 src0.absModifier();
27695 }
27696
27697 if (instData.ABS & 0x2) {
27698 src1.absModifier();
27699 }
27700
27701 if (instData.ABS & 0x4) {
27702 src2.absModifier();
27703 }
27704
27705 if (extData.NEG & 0x1) {
27706 src0.negModifier();
27707 }
27708
27709 if (extData.NEG & 0x2) {
27710 src1.negModifier();
27711 }
27712
27713 if (extData.NEG & 0x4) {
27714 src2.negModifier();
27715 }
27716
27717 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27718 if (wf->execMask(lane)) {
27719 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
27720 }
27721 }
27722
27723 vdst.write();
27724 }
27725
27726 Inst_VOP3__V_MAD_I32_I24::Inst_VOP3__V_MAD_I32_I24(InFmt_VOP3 *iFmt)
27727 : Inst_VOP3(iFmt, "v_mad_i32_i24", false)
27728 {
27729 setFlag(ALU);
27730 setFlag(MAD);
27731 } // Inst_VOP3__V_MAD_I32_I24
27732
27733 Inst_VOP3__V_MAD_I32_I24::~Inst_VOP3__V_MAD_I32_I24()
27734 {
27735 } // ~Inst_VOP3__V_MAD_I32_I24
27736
27737 // D.i = S0.i[23:0] * S1.i[23:0] + S2.i.
27738 void
27739 Inst_VOP3__V_MAD_I32_I24::execute(GPUDynInstPtr gpuDynInst)
27740 {
27741 Wavefront *wf = gpuDynInst->wavefront();
27742 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
27743 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
27744 ConstVecOperandI32 src2(gpuDynInst, extData.SRC2);
27745 VecOperandI32 vdst(gpuDynInst, instData.VDST);
27746
27747 src0.readSrc();
27748 src1.readSrc();
27749 src2.readSrc();
27750
27751 /**
27752 * input modifiers are supported by FP operations only
27753 */
27754 assert(!(instData.ABS & 0x1));
27755 assert(!(instData.ABS & 0x2));
27756 assert(!(instData.ABS & 0x4));
27757 assert(!(extData.NEG & 0x1));
27758 assert(!(extData.NEG & 0x2));
27759 assert(!(extData.NEG & 0x4));
27760
27761 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27762 if (wf->execMask(lane)) {
27763 vdst[lane] = sext<24>(bits(src0[lane], 23, 0))
27764 * sext<24>(bits(src1[lane], 23, 0)) + src2[lane];
27765 }
27766 }
27767
27768 vdst.write();
27769 }
27770
27771 Inst_VOP3__V_MAD_U32_U24::Inst_VOP3__V_MAD_U32_U24(InFmt_VOP3 *iFmt)
27772 : Inst_VOP3(iFmt, "v_mad_u32_u24", false)
27773 {
27774 setFlag(ALU);
27775 setFlag(MAD);
27776 } // Inst_VOP3__V_MAD_U32_U24
27777
27778 Inst_VOP3__V_MAD_U32_U24::~Inst_VOP3__V_MAD_U32_U24()
27779 {
27780 } // ~Inst_VOP3__V_MAD_U32_U24
27781
27782 // D.u = S0.u[23:0] * S1.u[23:0] + S2.u.
27783 void
27784 Inst_VOP3__V_MAD_U32_U24::execute(GPUDynInstPtr gpuDynInst)
27785 {
27786 Wavefront *wf = gpuDynInst->wavefront();
27787 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
27788 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
27789 ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
27790 VecOperandU32 vdst(gpuDynInst, instData.VDST);
27791
27792 src0.readSrc();
27793 src1.readSrc();
27794 src2.readSrc();
27795
27796 /**
27797 * input modifiers are supported by FP operations only
27798 */
27799 assert(!(instData.ABS & 0x1));
27800 assert(!(instData.ABS & 0x2));
27801 assert(!(instData.ABS & 0x4));
27802 assert(!(extData.NEG & 0x1));
27803 assert(!(extData.NEG & 0x2));
27804 assert(!(extData.NEG & 0x4));
27805
27806 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27807 if (wf->execMask(lane)) {
27808 vdst[lane] = bits(src0[lane], 23, 0) * bits(src1[lane], 23, 0)
27809 + src2[lane];
27810 }
27811 }
27812
27813 vdst.write();
27814 }
27815
27816 Inst_VOP3__V_CUBEID_F32::Inst_VOP3__V_CUBEID_F32(InFmt_VOP3 *iFmt)
27817 : Inst_VOP3(iFmt, "v_cubeid_f32", false)
27818 {
27819 setFlag(ALU);
27820 setFlag(F32);
27821 } // Inst_VOP3__V_CUBEID_F32
27822
27823 Inst_VOP3__V_CUBEID_F32::~Inst_VOP3__V_CUBEID_F32()
27824 {
27825 } // ~Inst_VOP3__V_CUBEID_F32
27826
27827 void
27828 Inst_VOP3__V_CUBEID_F32::execute(GPUDynInstPtr gpuDynInst)
27829 {
27830 panicUnimplemented();
27831 }
27832
27833 Inst_VOP3__V_CUBESC_F32::Inst_VOP3__V_CUBESC_F32(InFmt_VOP3 *iFmt)
27834 : Inst_VOP3(iFmt, "v_cubesc_f32", false)
27835 {
27836 setFlag(ALU);
27837 setFlag(F32);
27838 } // Inst_VOP3__V_CUBESC_F32
27839
27840 Inst_VOP3__V_CUBESC_F32::~Inst_VOP3__V_CUBESC_F32()
27841 {
27842 } // ~Inst_VOP3__V_CUBESC_F32
27843
27844 void
27845 Inst_VOP3__V_CUBESC_F32::execute(GPUDynInstPtr gpuDynInst)
27846 {
27847 panicUnimplemented();
27848 }
27849
27850 Inst_VOP3__V_CUBETC_F32::Inst_VOP3__V_CUBETC_F32(InFmt_VOP3 *iFmt)
27851 : Inst_VOP3(iFmt, "v_cubetc_f32", false)
27852 {
27853 setFlag(ALU);
27854 setFlag(F32);
27855 } // Inst_VOP3__V_CUBETC_F32
27856
27857 Inst_VOP3__V_CUBETC_F32::~Inst_VOP3__V_CUBETC_F32()
27858 {
27859 } // ~Inst_VOP3__V_CUBETC_F32
27860
27861 void
27862 Inst_VOP3__V_CUBETC_F32::execute(GPUDynInstPtr gpuDynInst)
27863 {
27864 panicUnimplemented();
27865 }
27866
27867 Inst_VOP3__V_CUBEMA_F32::Inst_VOP3__V_CUBEMA_F32(InFmt_VOP3 *iFmt)
27868 : Inst_VOP3(iFmt, "v_cubema_f32", false)
27869 {
27870 setFlag(ALU);
27871 setFlag(F32);
27872 } // Inst_VOP3__V_CUBEMA_F32
27873
27874 Inst_VOP3__V_CUBEMA_F32::~Inst_VOP3__V_CUBEMA_F32()
27875 {
27876 } // ~Inst_VOP3__V_CUBEMA_F32
27877
27878 void
27879 Inst_VOP3__V_CUBEMA_F32::execute(GPUDynInstPtr gpuDynInst)
27880 {
27881 panicUnimplemented();
27882 }
27883
27884 Inst_VOP3__V_BFE_U32::Inst_VOP3__V_BFE_U32(InFmt_VOP3 *iFmt)
27885 : Inst_VOP3(iFmt, "v_bfe_u32", false)
27886 {
27887 setFlag(ALU);
27888 } // Inst_VOP3__V_BFE_U32
27889
27890 Inst_VOP3__V_BFE_U32::~Inst_VOP3__V_BFE_U32()
27891 {
27892 } // ~Inst_VOP3__V_BFE_U32
27893
27894 // D.u = (S0.u >> S1.u[4:0]) & ((1 << S2.u[4:0]) - 1).
27895 // Bitfield extract with S0 = data, S1 = field_offset, S2 = field_width.
27896 void
27897 Inst_VOP3__V_BFE_U32::execute(GPUDynInstPtr gpuDynInst)
27898 {
27899 Wavefront *wf = gpuDynInst->wavefront();
27900 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
27901 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
27902 ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
27903 VecOperandU32 vdst(gpuDynInst, instData.VDST);
27904
27905 src0.readSrc();
27906 src1.readSrc();
27907 src2.readSrc();
27908
27909 /**
27910 * input modifiers are supported by FP operations only
27911 */
27912 assert(!(instData.ABS & 0x1));
27913 assert(!(instData.ABS & 0x2));
27914 assert(!(instData.ABS & 0x4));
27915 assert(!(extData.NEG & 0x1));
27916 assert(!(extData.NEG & 0x2));
27917 assert(!(extData.NEG & 0x4));
27918
27919 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27920 if (wf->execMask(lane)) {
27921 vdst[lane] = (src0[lane] >> bits(src1[lane], 4, 0))
27922 & ((1 << bits(src2[lane], 4, 0)) - 1);
27923 }
27924 }
27925
27926 vdst.write();
27927 }
27928
27929 Inst_VOP3__V_BFE_I32::Inst_VOP3__V_BFE_I32(InFmt_VOP3 *iFmt)
27930 : Inst_VOP3(iFmt, "v_bfe_i32", false)
27931 {
27932 setFlag(ALU);
27933 } // Inst_VOP3__V_BFE_I32
27934
27935 Inst_VOP3__V_BFE_I32::~Inst_VOP3__V_BFE_I32()
27936 {
27937 } // ~Inst_VOP3__V_BFE_I32
27938
27939 // D.i = (S0.i >> S1.u[4:0]) & ((1 << S2.u[4:0]) - 1).
27940 // Bitfield extract with S0 = data, S1 = field_offset, S2 = field_width.
27941 void
27942 Inst_VOP3__V_BFE_I32::execute(GPUDynInstPtr gpuDynInst)
27943 {
27944 Wavefront *wf = gpuDynInst->wavefront();
27945 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
27946 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
27947 ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
27948 VecOperandI32 vdst(gpuDynInst, instData.VDST);
27949
27950 src0.readSrc();
27951 src1.readSrc();
27952 src2.readSrc();
27953
27954 /**
27955 * input modifiers are supported by FP operations only
27956 */
27957 assert(!(instData.ABS & 0x1));
27958 assert(!(instData.ABS & 0x2));
27959 assert(!(instData.ABS & 0x4));
27960 assert(!(extData.NEG & 0x1));
27961 assert(!(extData.NEG & 0x2));
27962 assert(!(extData.NEG & 0x4));
27963
27964 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27965 if (wf->execMask(lane)) {
27966 vdst[lane] = (src0[lane] >> bits(src1[lane], 4, 0))
27967 & ((1 << bits(src2[lane], 4, 0)) - 1);
27968 }
27969 }
27970
27971 vdst.write();
27972 }
27973
27974 Inst_VOP3__V_BFI_B32::Inst_VOP3__V_BFI_B32(InFmt_VOP3 *iFmt)
27975 : Inst_VOP3(iFmt, "v_bfi_b32", false)
27976 {
27977 setFlag(ALU);
27978 } // Inst_VOP3__V_BFI_B32
27979
27980 Inst_VOP3__V_BFI_B32::~Inst_VOP3__V_BFI_B32()
27981 {
27982 } // ~Inst_VOP3__V_BFI_B32
27983
27984 // D.u = (S0.u & S1.u) | (~S0.u & S2.u); bitfield insert.
27985 void
27986 Inst_VOP3__V_BFI_B32::execute(GPUDynInstPtr gpuDynInst)
27987 {
27988 Wavefront *wf = gpuDynInst->wavefront();
27989 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
27990 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
27991 ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
27992 VecOperandU32 vdst(gpuDynInst, instData.VDST);
27993
27994 src0.readSrc();
27995 src1.readSrc();
27996 src2.readSrc();
27997
27998 /**
27999 * input modifiers are supported by FP operations only
28000 */
28001 assert(!(instData.ABS & 0x1));
28002 assert(!(instData.ABS & 0x2));
28003 assert(!(instData.ABS & 0x4));
28004 assert(!(extData.NEG & 0x1));
28005 assert(!(extData.NEG & 0x2));
28006 assert(!(extData.NEG & 0x4));
28007
28008 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28009 if (wf->execMask(lane)) {
28010 vdst[lane] = (src0[lane] & src1[lane]) | (~src0[lane]
28011 & src2[lane]);
28012 }
28013 }
28014
28015 vdst.write();
28016 }
28017
28018 Inst_VOP3__V_FMA_F32::Inst_VOP3__V_FMA_F32(InFmt_VOP3 *iFmt)
28019 : Inst_VOP3(iFmt, "v_fma_f32", false)
28020 {
28021 setFlag(ALU);
28022 setFlag(F32);
28023 setFlag(FMA);
28024 } // Inst_VOP3__V_FMA_F32
28025
28026 Inst_VOP3__V_FMA_F32::~Inst_VOP3__V_FMA_F32()
28027 {
28028 } // ~Inst_VOP3__V_FMA_F32
28029
28030 // D.f = S0.f * S1.f + S2.f.
28031 void
28032 Inst_VOP3__V_FMA_F32::execute(GPUDynInstPtr gpuDynInst)
28033 {
28034 Wavefront *wf = gpuDynInst->wavefront();
28035 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
28036 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
28037 ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
28038 VecOperandF32 vdst(gpuDynInst, instData.VDST);
28039
28040 src0.readSrc();
28041 src1.readSrc();
28042 src2.readSrc();
28043
28044 if (instData.ABS & 0x1) {
28045 src0.absModifier();
28046 }
28047
28048 if (instData.ABS & 0x2) {
28049 src1.absModifier();
28050 }
28051
28052 if (instData.ABS & 0x4) {
28053 src2.absModifier();
28054 }
28055
28056 if (extData.NEG & 0x1) {
28057 src0.negModifier();
28058 }
28059
28060 if (extData.NEG & 0x2) {
28061 src1.negModifier();
28062 }
28063
28064 if (extData.NEG & 0x4) {
28065 src2.negModifier();
28066 }
28067
28068 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28069 if (wf->execMask(lane)) {
28070 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
28071 }
28072 }
28073
28074 vdst.write();
28075 }
28076
28077 Inst_VOP3__V_FMA_F64::Inst_VOP3__V_FMA_F64(InFmt_VOP3 *iFmt)
28078 : Inst_VOP3(iFmt, "v_fma_f64", false)
28079 {
28080 setFlag(ALU);
28081 setFlag(F64);
28082 setFlag(FMA);
28083 } // Inst_VOP3__V_FMA_F64
28084
28085 Inst_VOP3__V_FMA_F64::~Inst_VOP3__V_FMA_F64()
28086 {
28087 } // ~Inst_VOP3__V_FMA_F64
28088
28089 // D.d = S0.d * S1.d + S2.d.
28090 void
28091 Inst_VOP3__V_FMA_F64::execute(GPUDynInstPtr gpuDynInst)
28092 {
28093 Wavefront *wf = gpuDynInst->wavefront();
28094 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
28095 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
28096 ConstVecOperandF64 src2(gpuDynInst, extData.SRC2);
28097 VecOperandF64 vdst(gpuDynInst, instData.VDST);
28098
28099 src0.readSrc();
28100 src1.readSrc();
28101 src2.readSrc();
28102
28103 if (instData.ABS & 0x1) {
28104 src0.absModifier();
28105 }
28106
28107 if (instData.ABS & 0x2) {
28108 src1.absModifier();
28109 }
28110
28111 if (instData.ABS & 0x4) {
28112 src2.absModifier();
28113 }
28114
28115 if (extData.NEG & 0x1) {
28116 src0.negModifier();
28117 }
28118
28119 if (extData.NEG & 0x2) {
28120 src1.negModifier();
28121 }
28122
28123 if (extData.NEG & 0x4) {
28124 src2.negModifier();
28125 }
28126
28127 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28128 if (wf->execMask(lane)) {
28129 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
28130 }
28131 }
28132
28133 vdst.write();
28134 }
28135
28136 Inst_VOP3__V_LERP_U8::Inst_VOP3__V_LERP_U8(InFmt_VOP3 *iFmt)
28137 : Inst_VOP3(iFmt, "v_lerp_u8", false)
28138 {
28139 setFlag(ALU);
28140 } // Inst_VOP3__V_LERP_U8
28141
28142 Inst_VOP3__V_LERP_U8::~Inst_VOP3__V_LERP_U8()
28143 {
28144 } // ~Inst_VOP3__V_LERP_U8
28145
28146 // D.u = ((S0.u[31:24] + S1.u[31:24] + S2.u[24]) >> 1) << 24
28147 // D.u += ((S0.u[23:16] + S1.u[23:16] + S2.u[16]) >> 1) << 16;
28148 // D.u += ((S0.u[15:8] + S1.u[15:8] + S2.u[8]) >> 1) << 8;
28149 // D.u += ((S0.u[7:0] + S1.u[7:0] + S2.u[0]) >> 1).
28150 void
28151 Inst_VOP3__V_LERP_U8::execute(GPUDynInstPtr gpuDynInst)
28152 {
28153 Wavefront *wf = gpuDynInst->wavefront();
28154 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
28155 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28156 ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28157 VecOperandU32 vdst(gpuDynInst, instData.VDST);
28158
28159 src0.readSrc();
28160 src1.readSrc();
28161 src2.readSrc();
28162
28163 /**
28164 * input modifiers are supported by FP operations only
28165 */
28166 assert(!(instData.ABS & 0x1));
28167 assert(!(instData.ABS & 0x2));
28168 assert(!(instData.ABS & 0x4));
28169 assert(!(extData.NEG & 0x1));
28170 assert(!(extData.NEG & 0x2));
28171 assert(!(extData.NEG & 0x4));
28172
28173 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28174 if (wf->execMask(lane)) {
28175 vdst[lane] = ((bits(src0[lane], 31, 24)
28176 + bits(src1[lane], 31, 24) + bits(src2[lane], 24)) >> 1)
28177 << 24;
28178 vdst[lane] += ((bits(src0[lane], 23, 16)
28179 + bits(src1[lane], 23, 16) + bits(src2[lane], 16)) >> 1)
28180 << 16;
28181 vdst[lane] += ((bits(src0[lane], 15, 8)
28182 + bits(src1[lane], 15, 8) + bits(src2[lane], 8)) >> 1)
28183 << 8;
28184 vdst[lane] += ((bits(src0[lane], 7, 0) + bits(src1[lane], 7, 0)
28185 + bits(src2[lane], 0)) >> 1);
28186 }
28187 }
28188
28189 vdst.write();
28190 }
28191
28192 Inst_VOP3__V_ALIGNBIT_B32::Inst_VOP3__V_ALIGNBIT_B32(InFmt_VOP3 *iFmt)
28193 : Inst_VOP3(iFmt, "v_alignbit_b32", false)
28194 {
28195 setFlag(ALU);
28196 } // Inst_VOP3__V_ALIGNBIT_B32
28197
28198 Inst_VOP3__V_ALIGNBIT_B32::~Inst_VOP3__V_ALIGNBIT_B32()
28199 {
28200 } // ~Inst_VOP3__V_ALIGNBIT_B32
28201
28202 // D.u = ({S0, S1} >> S2.u[4:0]) & 0xffffffff.
28203 void
28204 Inst_VOP3__V_ALIGNBIT_B32::execute(GPUDynInstPtr gpuDynInst)
28205 {
28206 Wavefront *wf = gpuDynInst->wavefront();
28207 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
28208 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28209 ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28210 VecOperandU32 vdst(gpuDynInst, instData.VDST);
28211
28212 src0.readSrc();
28213 src1.readSrc();
28214 src2.readSrc();
28215
28216 /**
28217 * input modifiers are supported by FP operations only
28218 */
28219 assert(!(instData.ABS & 0x1));
28220 assert(!(instData.ABS & 0x2));
28221 assert(!(instData.ABS & 0x4));
28222 assert(!(extData.NEG & 0x1));
28223 assert(!(extData.NEG & 0x2));
28224 assert(!(extData.NEG & 0x4));
28225
28226 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28227 if (wf->execMask(lane)) {
28228 VecElemU64 src_0_1 = (((VecElemU64)src0[lane] << 32)
28229 | (VecElemU64)src1[lane]);
28230 vdst[lane] = (VecElemU32)((src_0_1
28231 >> (VecElemU64)bits(src2[lane], 4, 0)) & 0xffffffff);
28232 }
28233 }
28234
28235 vdst.write();
28236 }
28237
28238 Inst_VOP3__V_ALIGNBYTE_B32::Inst_VOP3__V_ALIGNBYTE_B32(InFmt_VOP3 *iFmt)
28239 : Inst_VOP3(iFmt, "v_alignbyte_b32", false)
28240 {
28241 setFlag(ALU);
28242 } // Inst_VOP3__V_ALIGNBYTE_B32
28243
28244 Inst_VOP3__V_ALIGNBYTE_B32::~Inst_VOP3__V_ALIGNBYTE_B32()
28245 {
28246 } // ~Inst_VOP3__V_ALIGNBYTE_B32
28247
28248 // D.u = ({S0, S1} >> (8 * S2.u[4:0])) & 0xffffffff.
28249 void
28250 Inst_VOP3__V_ALIGNBYTE_B32::execute(GPUDynInstPtr gpuDynInst)
28251 {
28252 Wavefront *wf = gpuDynInst->wavefront();
28253 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
28254 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28255 ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28256 VecOperandU32 vdst(gpuDynInst, instData.VDST);
28257
28258 src0.readSrc();
28259 src1.readSrc();
28260 src2.readSrc();
28261
28262 /**
28263 * input modifiers are supported by FP operations only
28264 */
28265 assert(!(instData.ABS & 0x1));
28266 assert(!(instData.ABS & 0x2));
28267 assert(!(instData.ABS & 0x4));
28268 assert(!(extData.NEG & 0x1));
28269 assert(!(extData.NEG & 0x2));
28270 assert(!(extData.NEG & 0x4));
28271
28272 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28273 if (wf->execMask(lane)) {
28274 VecElemU64 src_0_1 = (((VecElemU64)src0[lane] << 32)
28275 | (VecElemU64)src1[lane]);
28276 vdst[lane] = (VecElemU32)((src_0_1
28277 >> (8ULL * (VecElemU64)bits(src2[lane], 4, 0)))
28278 & 0xffffffff);
28279 }
28280 }
28281
28282 vdst.write();
28283 }
28284
28285 Inst_VOP3__V_MIN3_F32::Inst_VOP3__V_MIN3_F32(InFmt_VOP3 *iFmt)
28286 : Inst_VOP3(iFmt, "v_min3_f32", false)
28287 {
28288 setFlag(ALU);
28289 setFlag(F32);
28290 } // Inst_VOP3__V_MIN3_F32
28291
28292 Inst_VOP3__V_MIN3_F32::~Inst_VOP3__V_MIN3_F32()
28293 {
28294 } // ~Inst_VOP3__V_MIN3_F32
28295
28296 // D.f = min(S0.f, S1.f, S2.f).
28297 void
28298 Inst_VOP3__V_MIN3_F32::execute(GPUDynInstPtr gpuDynInst)
28299 {
28300 Wavefront *wf = gpuDynInst->wavefront();
28301 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
28302 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
28303 ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
28304 VecOperandF32 vdst(gpuDynInst, instData.VDST);
28305
28306 src0.readSrc();
28307 src1.readSrc();
28308 src2.readSrc();
28309
28310 if (instData.ABS & 0x1) {
28311 src0.absModifier();
28312 }
28313
28314 if (instData.ABS & 0x2) {
28315 src1.absModifier();
28316 }
28317
28318 if (instData.ABS & 0x4) {
28319 src2.absModifier();
28320 }
28321
28322 if (extData.NEG & 0x1) {
28323 src0.negModifier();
28324 }
28325
28326 if (extData.NEG & 0x2) {
28327 src1.negModifier();
28328 }
28329
28330 if (extData.NEG & 0x4) {
28331 src2.negModifier();
28332 }
28333
28334 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28335 if (wf->execMask(lane)) {
28336 VecElemF32 min_0_1 = std::fmin(src0[lane], src1[lane]);
28337 vdst[lane] = std::fmin(min_0_1, src2[lane]);
28338 }
28339 }
28340
28341 vdst.write();
28342 }
28343
28344 Inst_VOP3__V_MIN3_I32::Inst_VOP3__V_MIN3_I32(InFmt_VOP3 *iFmt)
28345 : Inst_VOP3(iFmt, "v_min3_i32", false)
28346 {
28347 setFlag(ALU);
28348 } // Inst_VOP3__V_MIN3_I32
28349
28350 Inst_VOP3__V_MIN3_I32::~Inst_VOP3__V_MIN3_I32()
28351 {
28352 } // ~Inst_VOP3__V_MIN3_I32
28353
28354 // D.i = min(S0.i, S1.i, S2.i).
28355 void
28356 Inst_VOP3__V_MIN3_I32::execute(GPUDynInstPtr gpuDynInst)
28357 {
28358 Wavefront *wf = gpuDynInst->wavefront();
28359 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
28360 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
28361 ConstVecOperandI32 src2(gpuDynInst, extData.SRC2);
28362 VecOperandI32 vdst(gpuDynInst, instData.VDST);
28363
28364 src0.readSrc();
28365 src1.readSrc();
28366 src2.readSrc();
28367
28368 /**
28369 * input modifiers are supported by FP operations only
28370 */
28371 assert(!(instData.ABS & 0x1));
28372 assert(!(instData.ABS & 0x2));
28373 assert(!(instData.ABS & 0x4));
28374 assert(!(extData.NEG & 0x1));
28375 assert(!(extData.NEG & 0x2));
28376 assert(!(extData.NEG & 0x4));
28377
28378 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28379 if (wf->execMask(lane)) {
28380 VecElemI32 min_0_1 = std::min(src0[lane], src1[lane]);
28381 vdst[lane] = std::min(min_0_1, src2[lane]);
28382 }
28383 }
28384
28385 vdst.write();
28386 }
28387
28388 Inst_VOP3__V_MIN3_U32::Inst_VOP3__V_MIN3_U32(InFmt_VOP3 *iFmt)
28389 : Inst_VOP3(iFmt, "v_min3_u32", false)
28390 {
28391 setFlag(ALU);
28392 } // Inst_VOP3__V_MIN3_U32
28393
28394 Inst_VOP3__V_MIN3_U32::~Inst_VOP3__V_MIN3_U32()
28395 {
28396 } // ~Inst_VOP3__V_MIN3_U32
28397
28398 // D.u = min(S0.u, S1.u, S2.u).
28399 void
28400 Inst_VOP3__V_MIN3_U32::execute(GPUDynInstPtr gpuDynInst)
28401 {
28402 Wavefront *wf = gpuDynInst->wavefront();
28403 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
28404 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28405 ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28406 VecOperandU32 vdst(gpuDynInst, instData.VDST);
28407
28408 src0.readSrc();
28409 src1.readSrc();
28410 src2.readSrc();
28411
28412 /**
28413 * input modifiers are supported by FP operations only
28414 */
28415 assert(!(instData.ABS & 0x1));
28416 assert(!(instData.ABS & 0x2));
28417 assert(!(instData.ABS & 0x4));
28418 assert(!(extData.NEG & 0x1));
28419 assert(!(extData.NEG & 0x2));
28420 assert(!(extData.NEG & 0x4));
28421
28422 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28423 if (wf->execMask(lane)) {
28424 VecElemU32 min_0_1 = std::min(src0[lane], src1[lane]);
28425 vdst[lane] = std::min(min_0_1, src2[lane]);
28426 }
28427 }
28428
28429 vdst.write();
28430 }
28431
28432 Inst_VOP3__V_MAX3_F32::Inst_VOP3__V_MAX3_F32(InFmt_VOP3 *iFmt)
28433 : Inst_VOP3(iFmt, "v_max3_f32", false)
28434 {
28435 setFlag(ALU);
28436 setFlag(F32);
28437 } // Inst_VOP3__V_MAX3_F32
28438
28439 Inst_VOP3__V_MAX3_F32::~Inst_VOP3__V_MAX3_F32()
28440 {
28441 } // ~Inst_VOP3__V_MAX3_F32
28442
28443 // D.f = max(S0.f, S1.f, S2.f).
28444 void
28445 Inst_VOP3__V_MAX3_F32::execute(GPUDynInstPtr gpuDynInst)
28446 {
28447 Wavefront *wf = gpuDynInst->wavefront();
28448 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
28449 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
28450 ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
28451 VecOperandF32 vdst(gpuDynInst, instData.VDST);
28452
28453 src0.readSrc();
28454 src1.readSrc();
28455 src2.readSrc();
28456
28457 if (instData.ABS & 0x1) {
28458 src0.absModifier();
28459 }
28460
28461 if (instData.ABS & 0x2) {
28462 src1.absModifier();
28463 }
28464
28465 if (instData.ABS & 0x4) {
28466 src2.absModifier();
28467 }
28468
28469 if (extData.NEG & 0x1) {
28470 src0.negModifier();
28471 }
28472
28473 if (extData.NEG & 0x2) {
28474 src1.negModifier();
28475 }
28476
28477 if (extData.NEG & 0x4) {
28478 src2.negModifier();
28479 }
28480
28481 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28482 if (wf->execMask(lane)) {
28483 VecElemF32 max_0_1 = std::fmax(src0[lane], src1[lane]);
28484 vdst[lane] = std::fmax(max_0_1, src2[lane]);
28485 }
28486 }
28487
28488 vdst.write();
28489 }
28490
28491 Inst_VOP3__V_MAX3_I32::Inst_VOP3__V_MAX3_I32(InFmt_VOP3 *iFmt)
28492 : Inst_VOP3(iFmt, "v_max3_i32", false)
28493 {
28494 setFlag(ALU);
28495 } // Inst_VOP3__V_MAX3_I32
28496
28497 Inst_VOP3__V_MAX3_I32::~Inst_VOP3__V_MAX3_I32()
28498 {
28499 } // ~Inst_VOP3__V_MAX3_I32
28500
28501 // D.i = max(S0.i, S1.i, S2.i).
28502 void
28503 Inst_VOP3__V_MAX3_I32::execute(GPUDynInstPtr gpuDynInst)
28504 {
28505 Wavefront *wf = gpuDynInst->wavefront();
28506 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
28507 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
28508 ConstVecOperandI32 src2(gpuDynInst, extData.SRC2);
28509 VecOperandI32 vdst(gpuDynInst, instData.VDST);
28510
28511 src0.readSrc();
28512 src1.readSrc();
28513 src2.readSrc();
28514
28515 /**
28516 * input modifiers are supported by FP operations only
28517 */
28518 assert(!(instData.ABS & 0x1));
28519 assert(!(instData.ABS & 0x2));
28520 assert(!(instData.ABS & 0x4));
28521 assert(!(extData.NEG & 0x1));
28522 assert(!(extData.NEG & 0x2));
28523 assert(!(extData.NEG & 0x4));
28524
28525 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28526 if (wf->execMask(lane)) {
28527 VecElemI32 max_0_1 = std::max(src0[lane], src1[lane]);
28528 vdst[lane] = std::max(max_0_1, src2[lane]);
28529 }
28530 }
28531
28532 vdst.write();
28533 }
28534
28535 Inst_VOP3__V_MAX3_U32::Inst_VOP3__V_MAX3_U32(InFmt_VOP3 *iFmt)
28536 : Inst_VOP3(iFmt, "v_max3_u32", false)
28537 {
28538 setFlag(ALU);
28539 } // Inst_VOP3__V_MAX3_U32
28540
28541 Inst_VOP3__V_MAX3_U32::~Inst_VOP3__V_MAX3_U32()
28542 {
28543 } // ~Inst_VOP3__V_MAX3_U32
28544
28545 // D.u = max(S0.u, S1.u, S2.u).
28546 void
28547 Inst_VOP3__V_MAX3_U32::execute(GPUDynInstPtr gpuDynInst)
28548 {
28549 Wavefront *wf = gpuDynInst->wavefront();
28550 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
28551 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28552 ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28553 VecOperandU32 vdst(gpuDynInst, instData.VDST);
28554
28555 src0.readSrc();
28556 src1.readSrc();
28557 src2.readSrc();
28558
28559 /**
28560 * input modifiers are supported by FP operations only
28561 */
28562 assert(!(instData.ABS & 0x1));
28563 assert(!(instData.ABS & 0x2));
28564 assert(!(instData.ABS & 0x4));
28565 assert(!(extData.NEG & 0x1));
28566 assert(!(extData.NEG & 0x2));
28567 assert(!(extData.NEG & 0x4));
28568
28569 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28570 if (wf->execMask(lane)) {
28571 VecElemU32 max_0_1 = std::max(src0[lane], src1[lane]);
28572 vdst[lane] = std::max(max_0_1, src2[lane]);
28573 }
28574 }
28575
28576 vdst.write();
28577 }
28578
28579 Inst_VOP3__V_MED3_F32::Inst_VOP3__V_MED3_F32(InFmt_VOP3 *iFmt)
28580 : Inst_VOP3(iFmt, "v_med3_f32", false)
28581 {
28582 setFlag(ALU);
28583 setFlag(F32);
28584 } // Inst_VOP3__V_MED3_F32
28585
28586 Inst_VOP3__V_MED3_F32::~Inst_VOP3__V_MED3_F32()
28587 {
28588 } // ~Inst_VOP3__V_MED3_F32
28589
28590 // D.f = median(S0.f, S1.f, S2.f).
28591 void
28592 Inst_VOP3__V_MED3_F32::execute(GPUDynInstPtr gpuDynInst)
28593 {
28594 Wavefront *wf = gpuDynInst->wavefront();
28595 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
28596 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
28597 ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
28598 VecOperandF32 vdst(gpuDynInst, instData.VDST);
28599
28600 src0.readSrc();
28601 src1.readSrc();
28602 src2.readSrc();
28603
28604 if (instData.ABS & 0x1) {
28605 src0.absModifier();
28606 }
28607
28608 if (instData.ABS & 0x2) {
28609 src1.absModifier();
28610 }
28611
28612 if (instData.ABS & 0x4) {
28613 src2.absModifier();
28614 }
28615
28616 if (extData.NEG & 0x1) {
28617 src0.negModifier();
28618 }
28619
28620 if (extData.NEG & 0x2) {
28621 src1.negModifier();
28622 }
28623
28624 if (extData.NEG & 0x4) {
28625 src2.negModifier();
28626 }
28627
28628 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28629 if (wf->execMask(lane)) {
28630 vdst[lane] = median(src0[lane], src1[lane], src2[lane]);
28631 }
28632 }
28633
28634 vdst.write();
28635 }
28636
28637 Inst_VOP3__V_MED3_I32::Inst_VOP3__V_MED3_I32(InFmt_VOP3 *iFmt)
28638 : Inst_VOP3(iFmt, "v_med3_i32", false)
28639 {
28640 setFlag(ALU);
28641 } // Inst_VOP3__V_MED3_I32
28642
28643 Inst_VOP3__V_MED3_I32::~Inst_VOP3__V_MED3_I32()
28644 {
28645 } // ~Inst_VOP3__V_MED3_I32
28646
28647 // D.i = median(S0.i, S1.i, S2.i).
28648 void
28649 Inst_VOP3__V_MED3_I32::execute(GPUDynInstPtr gpuDynInst)
28650 {
28651 Wavefront *wf = gpuDynInst->wavefront();
28652 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
28653 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
28654 ConstVecOperandI32 src2(gpuDynInst, extData.SRC2);
28655 VecOperandI32 vdst(gpuDynInst, instData.VDST);
28656
28657 src0.readSrc();
28658 src1.readSrc();
28659 src2.readSrc();
28660
28661 /**
28662 * input modifiers are supported by FP operations only
28663 */
28664 assert(!(instData.ABS & 0x1));
28665 assert(!(instData.ABS & 0x2));
28666 assert(!(instData.ABS & 0x4));
28667 assert(!(extData.NEG & 0x1));
28668 assert(!(extData.NEG & 0x2));
28669 assert(!(extData.NEG & 0x4));
28670
28671 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28672 if (wf->execMask(lane)) {
28673 vdst[lane] = median(src0[lane], src1[lane], src2[lane]);
28674 }
28675 }
28676
28677 vdst.write();
28678 }
28679
28680 Inst_VOP3__V_MED3_U32::Inst_VOP3__V_MED3_U32(InFmt_VOP3 *iFmt)
28681 : Inst_VOP3(iFmt, "v_med3_u32", false)
28682 {
28683 setFlag(ALU);
28684 } // Inst_VOP3__V_MED3_U32
28685
28686 Inst_VOP3__V_MED3_U32::~Inst_VOP3__V_MED3_U32()
28687 {
28688 } // ~Inst_VOP3__V_MED3_U32
28689
28690 // D.u = median(S0.u, S1.u, S2.u).
28691 void
28692 Inst_VOP3__V_MED3_U32::execute(GPUDynInstPtr gpuDynInst)
28693 {
28694 Wavefront *wf = gpuDynInst->wavefront();
28695 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
28696 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28697 ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28698 VecOperandU32 vdst(gpuDynInst, instData.VDST);
28699
28700 src0.readSrc();
28701 src1.readSrc();
28702 src2.readSrc();
28703
28704 /**
28705 * input modifiers are supported by FP operations only
28706 */
28707 assert(!(instData.ABS & 0x1));
28708 assert(!(instData.ABS & 0x2));
28709 assert(!(instData.ABS & 0x4));
28710 assert(!(extData.NEG & 0x1));
28711 assert(!(extData.NEG & 0x2));
28712 assert(!(extData.NEG & 0x4));
28713
28714 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28715 if (wf->execMask(lane)) {
28716 vdst[lane] = median(src0[lane], src1[lane], src2[lane]);
28717 }
28718 }
28719
28720 vdst.write();
28721 }
28722
28723 Inst_VOP3__V_SAD_U8::Inst_VOP3__V_SAD_U8(InFmt_VOP3 *iFmt)
28724 : Inst_VOP3(iFmt, "v_sad_u8", false)
28725 {
28726 setFlag(ALU);
28727 } // Inst_VOP3__V_SAD_U8
28728
28729 Inst_VOP3__V_SAD_U8::~Inst_VOP3__V_SAD_U8()
28730 {
28731 } // ~Inst_VOP3__V_SAD_U8
28732
28733 // D.u = abs(S0.i[31:24] - S1.i[31:24]) + abs(S0.i[23:16] - S1.i[23:16]) +
28734 // abs(S0.i[15:8] - S1.i[15:8]) + abs(S0.i[7:0] - S1.i[7:0]) + S2.u.
28735 // Sum of absolute differences with accumulation, overflow into upper bits
28736 // is allowed.
28737 void
28738 Inst_VOP3__V_SAD_U8::execute(GPUDynInstPtr gpuDynInst)
28739 {
28740 Wavefront *wf = gpuDynInst->wavefront();
28741 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
28742 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
28743 ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28744 VecOperandU32 vdst(gpuDynInst, instData.VDST);
28745
28746 src0.readSrc();
28747 src1.readSrc();
28748 src2.readSrc();
28749
28750 /**
28751 * input modifiers are supported by FP operations only
28752 */
28753 assert(!(instData.ABS & 0x1));
28754 assert(!(instData.ABS & 0x2));
28755 assert(!(instData.ABS & 0x4));
28756 assert(!(extData.NEG & 0x1));
28757 assert(!(extData.NEG & 0x2));
28758 assert(!(extData.NEG & 0x4));
28759
28760 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28761 if (wf->execMask(lane)) {
28762 vdst[lane] = std::abs(bits(src0[lane], 31, 24)
28763 - bits(src1[lane], 31, 24))
28764 + std::abs(bits(src0[lane], 23, 16)
28765 - bits(src1[lane], 23, 16))
28766 + std::abs(bits(src0[lane], 15, 8)
28767 - bits(src1[lane], 15, 8))
28768 + std::abs(bits(src0[lane], 7, 0)
28769 - bits(src1[lane], 7, 0)) + src2[lane];
28770 }
28771 }
28772
28773 vdst.write();
28774 }
28775
28776 Inst_VOP3__V_SAD_HI_U8::Inst_VOP3__V_SAD_HI_U8(InFmt_VOP3 *iFmt)
28777 : Inst_VOP3(iFmt, "v_sad_hi_u8", false)
28778 {
28779 setFlag(ALU);
28780 } // Inst_VOP3__V_SAD_HI_U8
28781
28782 Inst_VOP3__V_SAD_HI_U8::~Inst_VOP3__V_SAD_HI_U8()
28783 {
28784 } // ~Inst_VOP3__V_SAD_HI_U8
28785
28786 // D.u = (SAD_U8(S0, S1, 0) << 16) + S2.u.
28787 // Sum of absolute differences with accumulation, overflow is lost.
28788 void
28789 Inst_VOP3__V_SAD_HI_U8::execute(GPUDynInstPtr gpuDynInst)
28790 {
28791 Wavefront *wf = gpuDynInst->wavefront();
28792 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
28793 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28794 ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28795 VecOperandU32 vdst(gpuDynInst, instData.VDST);
28796
28797 src0.readSrc();
28798 src1.readSrc();
28799 src2.readSrc();
28800
28801 /**
28802 * input modifiers are supported by FP operations only
28803 */
28804 assert(!(instData.ABS & 0x1));
28805 assert(!(instData.ABS & 0x2));
28806 assert(!(instData.ABS & 0x4));
28807 assert(!(extData.NEG & 0x1));
28808 assert(!(extData.NEG & 0x2));
28809 assert(!(extData.NEG & 0x4));
28810
28811 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28812 if (wf->execMask(lane)) {
28813 vdst[lane] = (((bits(src0[lane], 31, 24)
28814 - bits(src1[lane], 31, 24)) + (bits(src0[lane], 23, 16)
28815 - bits(src1[lane], 23, 16)) + (bits(src0[lane], 15, 8)
28816 - bits(src1[lane], 15, 8)) + (bits(src0[lane], 7, 0)
28817 - bits(src1[lane], 7, 0))) << 16) + src2[lane];
28818 }
28819 }
28820
28821 vdst.write();
28822 }
28823
28824 Inst_VOP3__V_SAD_U16::Inst_VOP3__V_SAD_U16(InFmt_VOP3 *iFmt)
28825 : Inst_VOP3(iFmt, "v_sad_u16", false)
28826 {
28827 setFlag(ALU);
28828 } // Inst_VOP3__V_SAD_U16
28829
28830 Inst_VOP3__V_SAD_U16::~Inst_VOP3__V_SAD_U16()
28831 {
28832 } // ~Inst_VOP3__V_SAD_U16
28833
28834 // D.u = abs(S0.i[31:16] - S1.i[31:16]) + abs(S0.i[15:0] - S1.i[15:0])
28835 // + S2.u.
28836 // Word SAD with accumulation.
28837 void
28838 Inst_VOP3__V_SAD_U16::execute(GPUDynInstPtr gpuDynInst)
28839 {
28840 Wavefront *wf = gpuDynInst->wavefront();
28841 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
28842 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
28843 ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28844 VecOperandU32 vdst(gpuDynInst, instData.VDST);
28845
28846 src0.readSrc();
28847 src1.readSrc();
28848 src2.readSrc();
28849
28850 /**
28851 * input modifiers are supported by FP operations only
28852 */
28853 assert(!(instData.ABS & 0x1));
28854 assert(!(instData.ABS & 0x2));
28855 assert(!(instData.ABS & 0x4));
28856 assert(!(extData.NEG & 0x1));
28857 assert(!(extData.NEG & 0x2));
28858 assert(!(extData.NEG & 0x4));
28859
28860 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28861 if (wf->execMask(lane)) {
28862 vdst[lane] = std::abs(bits(src0[lane], 31, 16)
28863 - bits(src1[lane], 31, 16))
28864 + std::abs(bits(src0[lane], 15, 0)
28865 - bits(src1[lane], 15, 0)) + src2[lane];
28866 }
28867 }
28868
28869 vdst.write();
28870 }
28871
28872 Inst_VOP3__V_SAD_U32::Inst_VOP3__V_SAD_U32(InFmt_VOP3 *iFmt)
28873 : Inst_VOP3(iFmt, "v_sad_u32", false)
28874 {
28875 setFlag(ALU);
28876 } // Inst_VOP3__V_SAD_U32
28877
28878 Inst_VOP3__V_SAD_U32::~Inst_VOP3__V_SAD_U32()
28879 {
28880 } // ~Inst_VOP3__V_SAD_U32
28881
28882 // D.u = abs(S0.i - S1.i) + S2.u.
28883 // Dword SAD with accumulation.
28884 void
28885 Inst_VOP3__V_SAD_U32::execute(GPUDynInstPtr gpuDynInst)
28886 {
28887 Wavefront *wf = gpuDynInst->wavefront();
28888 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
28889 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
28890 ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28891 VecOperandU32 vdst(gpuDynInst, instData.VDST);
28892
28893 src0.readSrc();
28894 src1.readSrc();
28895 src2.readSrc();
28896
28897 /**
28898 * input modifiers are supported by FP operations only
28899 */
28900 assert(!(instData.ABS & 0x1));
28901 assert(!(instData.ABS & 0x2));
28902 assert(!(instData.ABS & 0x4));
28903 assert(!(extData.NEG & 0x1));
28904 assert(!(extData.NEG & 0x2));
28905 assert(!(extData.NEG & 0x4));
28906
28907 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28908 if (wf->execMask(lane)) {
28909 vdst[lane] = std::abs(src0[lane] - src1[lane]) + src2[lane];
28910 }
28911 }
28912
28913 vdst.write();
28914 }
28915
28916 Inst_VOP3__V_CVT_PK_U8_F32::Inst_VOP3__V_CVT_PK_U8_F32(InFmt_VOP3 *iFmt)
28917 : Inst_VOP3(iFmt, "v_cvt_pk_u8_f32", false)
28918 {
28919 setFlag(ALU);
28920 setFlag(F32);
28921 } // Inst_VOP3__V_CVT_PK_U8_F32
28922
28923 Inst_VOP3__V_CVT_PK_U8_F32::~Inst_VOP3__V_CVT_PK_U8_F32()
28924 {
28925 } // ~Inst_VOP3__V_CVT_PK_U8_F32
28926
28927 // D.u = ((flt32_to_uint8(S0.f) & 0xff) << (8 * S1.u[1:0]))
28928 // | (S2.u & ~(0xff << (8 * S1.u[1:0]))).
28929 // Convert floating point value S0 to 8-bit unsigned integer and pack the
28930 // result into byte S1 of dword S2.
28931 void
28932 Inst_VOP3__V_CVT_PK_U8_F32::execute(GPUDynInstPtr gpuDynInst)
28933 {
28934 Wavefront *wf = gpuDynInst->wavefront();
28935 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
28936 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28937 ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28938 VecOperandU32 vdst(gpuDynInst, instData.VDST);
28939
28940 src0.readSrc();
28941 src1.readSrc();
28942 src2.readSrc();
28943
28944 if (instData.ABS & 0x1) {
28945 src0.absModifier();
28946 }
28947
28948
28949 if (extData.NEG & 0x1) {
28950 src0.negModifier();
28951 }
28952
28953 /**
28954 * input modifiers are supported by FP operations only
28955 */
28956 assert(!(instData.ABS & 0x2));
28957 assert(!(instData.ABS & 0x4));
28958 assert(!(extData.NEG & 0x2));
28959 assert(!(extData.NEG & 0x4));
28960
28961 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28962 if (wf->execMask(lane)) {
28963 vdst[lane] = (((VecElemU8)src0[lane] & 0xff)
28964 << (8 * bits(src1[lane], 1, 0)))
28965 | (src2[lane] & ~(0xff << (8 * bits(src1[lane], 1, 0))));
28966 }
28967 }
28968
28969 vdst.write();
28970 }
28971
28972 Inst_VOP3__V_DIV_FIXUP_F32::Inst_VOP3__V_DIV_FIXUP_F32(InFmt_VOP3 *iFmt)
28973 : Inst_VOP3(iFmt, "v_div_fixup_f32", false)
28974 {
28975 setFlag(ALU);
28976 setFlag(F32);
28977 } // Inst_VOP3__V_DIV_FIXUP_F32
28978
28979 Inst_VOP3__V_DIV_FIXUP_F32::~Inst_VOP3__V_DIV_FIXUP_F32()
28980 {
28981 } // ~Inst_VOP3__V_DIV_FIXUP_F32
28982
28983 // D.f = Divide fixup and flags -- s0.f = Quotient, s1.f = Denominator,
28984 // s2.f = Numerator.
28985 void
28986 Inst_VOP3__V_DIV_FIXUP_F32::execute(GPUDynInstPtr gpuDynInst)
28987 {
28988 Wavefront *wf = gpuDynInst->wavefront();
28989 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
28990 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
28991 ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
28992 VecOperandF32 vdst(gpuDynInst, instData.VDST);
28993
28994 src0.readSrc();
28995 src1.readSrc();
28996 src2.readSrc();
28997
28998 if (instData.ABS & 0x1) {
28999 src0.absModifier();
29000 }
29001
29002 if (instData.ABS & 0x2) {
29003 src1.absModifier();
29004 }
29005
29006 if (instData.ABS & 0x4) {
29007 src2.absModifier();
29008 }
29009
29010 if (extData.NEG & 0x1) {
29011 src0.negModifier();
29012 }
29013
29014 if (extData.NEG & 0x2) {
29015 src1.negModifier();
29016 }
29017
29018 if (extData.NEG & 0x4) {
29019 src2.negModifier();
29020 }
29021
29022 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29023 if (wf->execMask(lane)) {
29024 if (std::fpclassify(src1[lane]) == FP_ZERO) {
29025 if (std::signbit(src1[lane])) {
29026 vdst[lane] = -INFINITY;
29027 } else {
29028 vdst[lane] = +INFINITY;
29029 }
29030 } else if (std::isnan(src2[lane]) || std::isnan(src1[lane])) {
29031 vdst[lane] = NAN;
29032 } else if (std::isinf(src1[lane])) {
29033 if (std::signbit(src1[lane])) {
29034 vdst[lane] = -INFINITY;
29035 } else {
29036 vdst[lane] = +INFINITY;
29037 }
29038 } else {
29039 vdst[lane] = src2[lane] / src1[lane];
29040 }
29041 }
29042 }
29043
29044 vdst.write();
29045 } // execute
29046 // --- Inst_VOP3__V_DIV_FIXUP_F64 class methods ---
29047
29048 Inst_VOP3__V_DIV_FIXUP_F64::Inst_VOP3__V_DIV_FIXUP_F64(InFmt_VOP3 *iFmt)
29049 : Inst_VOP3(iFmt, "v_div_fixup_f64", false)
29050 {
29051 setFlag(ALU);
29052 setFlag(F64);
29053 } // Inst_VOP3__V_DIV_FIXUP_F64
29054
29055 Inst_VOP3__V_DIV_FIXUP_F64::~Inst_VOP3__V_DIV_FIXUP_F64()
29056 {
29057 } // ~Inst_VOP3__V_DIV_FIXUP_F64
29058
29059 // D.d = Divide fixup and flags -- s0.d = Quotient, s1.d = Denominator,
29060 // s2.d = Numerator.
29061 void
29062 Inst_VOP3__V_DIV_FIXUP_F64::execute(GPUDynInstPtr gpuDynInst)
29063 {
29064 Wavefront *wf = gpuDynInst->wavefront();
29065 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
29066 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
29067 ConstVecOperandF64 src2(gpuDynInst, extData.SRC2);
29068 VecOperandF64 vdst(gpuDynInst, instData.VDST);
29069
29070 src0.readSrc();
29071 src1.readSrc();
29072 src2.readSrc();
29073
29074 if (instData.ABS & 0x1) {
29075 src0.absModifier();
29076 }
29077
29078 if (instData.ABS & 0x2) {
29079 src1.absModifier();
29080 }
29081
29082 if (instData.ABS & 0x4) {
29083 src2.absModifier();
29084 }
29085
29086 if (extData.NEG & 0x1) {
29087 src0.negModifier();
29088 }
29089
29090 if (extData.NEG & 0x2) {
29091 src1.negModifier();
29092 }
29093
29094 if (extData.NEG & 0x4) {
29095 src2.negModifier();
29096 }
29097
29098 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29099 if (wf->execMask(lane)) {
29100 int sign_out = std::signbit(src1[lane])
29101 ^ std::signbit(src2[lane]);
29102 int exp1(0);
29103 int exp2(0);
29104 std::frexp(src1[lane], &exp1);
29105 std::frexp(src2[lane], &exp2);
29106
29107 if (std::isnan(src1[lane]) || std::isnan(src2[lane])) {
29108 vdst[lane] = std::numeric_limits<VecElemF64>::quiet_NaN();
29109 } else if (std::fpclassify(src1[lane]) == FP_ZERO
29110 && std::fpclassify(src2[lane]) == FP_ZERO) {
29111 vdst[lane]
29112 = std::numeric_limits<VecElemF64>::signaling_NaN();
29113 } else if (std::isinf(src1[lane]) && std::isinf(src2[lane])) {
29114 vdst[lane]
29115 = std::numeric_limits<VecElemF64>::signaling_NaN();
29116 } else if (std::fpclassify(src1[lane]) == FP_ZERO
29117 || std::isinf(src2[lane])) {
29118 vdst[lane] = sign_out ? -INFINITY : +INFINITY;
29119 } else if (std::isinf(src1[lane])
29120 || std::fpclassify(src2[lane]) == FP_ZERO) {
29121 vdst[lane] = sign_out ? -0.0 : +0.0;
29122 } else if (exp2 - exp1 < -1075) {
29123 vdst[lane] = src0[lane];
29124 } else if (exp1 == 2047) {
29125 vdst[lane] = src0[lane];
29126 } else {
29127 vdst[lane] = sign_out ? -std::fabs(src0[lane])
29128 : std::fabs(src0[lane]);
29129 }
29130 }
29131 }
29132
29133 vdst.write();
29134 }
29135
29136 Inst_VOP3__V_DIV_SCALE_F32::Inst_VOP3__V_DIV_SCALE_F32(
29137 InFmt_VOP3_SDST_ENC *iFmt)
29138 : Inst_VOP3_SDST_ENC(iFmt, "v_div_scale_f32")
29139 {
29140 setFlag(ALU);
29141 setFlag(WritesVCC);
29142 setFlag(F32);
29143 } // Inst_VOP3__V_DIV_SCALE_F32
29144
29145 Inst_VOP3__V_DIV_SCALE_F32::~Inst_VOP3__V_DIV_SCALE_F32()
29146 {
29147 } // ~Inst_VOP3__V_DIV_SCALE_F32
29148
29149 // {vcc,D.f} = Divide preop and flags -- s0.f = Quotient, s1.f =
29150 // Denominator, s2.f = Numerator -- s0 must equal s1 or s2. Given a
29151 // numerator and denominator, this opcode will appropriately scale inputs
29152 // for division to avoid subnormal terms during Newton-Raphson correction
29153 // algorithm. This opcode producses a VCC flag for post-scale of quotient.
29154 void
29155 Inst_VOP3__V_DIV_SCALE_F32::execute(GPUDynInstPtr gpuDynInst)
29156 {
29157 Wavefront *wf = gpuDynInst->wavefront();
29158 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
29159 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
29160 ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
29161 ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
29162 VecOperandF32 vdst(gpuDynInst, instData.VDST);
29163
29164 src0.readSrc();
29165 src1.readSrc();
29166 src2.readSrc();
29167
29168 if (extData.NEG & 0x1) {
29169 src0.negModifier();
29170 }
29171
29172 if (extData.NEG & 0x2) {
29173 src1.negModifier();
29174 }
29175
29176 if (extData.NEG & 0x4) {
29177 src2.negModifier();
29178 }
29179
29180 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29181 if (wf->execMask(lane)) {
29182 vdst[lane] = src0[lane];
29183 vcc.setBit(lane, 0);
29184 }
29185 }
29186
29187 vcc.write();
29188 vdst.write();
29189 } // execute
29190 // --- Inst_VOP3__V_DIV_SCALE_F64 class methods ---
29191
29192 Inst_VOP3__V_DIV_SCALE_F64::Inst_VOP3__V_DIV_SCALE_F64(
29193 InFmt_VOP3_SDST_ENC *iFmt)
29194 : Inst_VOP3_SDST_ENC(iFmt, "v_div_scale_f64")
29195 {
29196 setFlag(ALU);
29197 setFlag(WritesVCC);
29198 setFlag(F64);
29199 } // Inst_VOP3__V_DIV_SCALE_F64
29200
29201 Inst_VOP3__V_DIV_SCALE_F64::~Inst_VOP3__V_DIV_SCALE_F64()
29202 {
29203 } // ~Inst_VOP3__V_DIV_SCALE_F64
29204
29205 // {vcc,D.d} = Divide preop and flags -- s0.d = Quotient, s1.d =
29206 // Denominator, s2.d = Numerator -- s0 must equal s1 or s2. Given a
29207 // numerator and denominator, this opcode will appropriately scale inputs
29208 // for division to avoid subnormal terms during Newton-Raphson correction
29209 // algorithm. This opcode producses a VCC flag for post-scale of quotient.
29210 void
29211 Inst_VOP3__V_DIV_SCALE_F64::execute(GPUDynInstPtr gpuDynInst)
29212 {
29213 Wavefront *wf = gpuDynInst->wavefront();
29214 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
29215 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
29216 ConstVecOperandF64 src2(gpuDynInst, extData.SRC2);
29217 ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
29218 VecOperandF64 vdst(gpuDynInst, instData.VDST);
29219
29220 src0.readSrc();
29221 src1.readSrc();
29222 src2.readSrc();
29223
29224 if (extData.NEG & 0x1) {
29225 src0.negModifier();
29226 }
29227
29228 if (extData.NEG & 0x2) {
29229 src1.negModifier();
29230 }
29231
29232 if (extData.NEG & 0x4) {
29233 src2.negModifier();
29234 }
29235
29236 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29237 if (wf->execMask(lane)) {
29238 int exp1(0);
29239 int exp2(0);
29240 std::frexp(src1[lane], &exp1);
29241 std::frexp(src2[lane], &exp2);
29242 vcc.setBit(lane, 0);
29243
29244 if (std::fpclassify(src1[lane]) == FP_ZERO
29245 || std::fpclassify(src2[lane]) == FP_ZERO) {
29246 vdst[lane] = NAN;
29247 } else if (exp2 - exp1 >= 768) {
29248 vcc.setBit(lane, 1);
29249 if (src0[lane] == src1[lane]) {
29250 vdst[lane] = std::ldexp(src0[lane], 128);
29251 }
29252 } else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL) {
29253 vdst[lane] = std::ldexp(src0[lane], 128);
29254 } else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL
29255 && std::fpclassify(src2[lane] / src1[lane])
29256 == FP_SUBNORMAL) {
29257 vcc.setBit(lane, 1);
29258 if (src0[lane] == src1[lane]) {
29259 vdst[lane] = std::ldexp(src0[lane], 128);
29260 }
29261 } else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL) {
29262 vdst[lane] = std::ldexp(src0[lane], -128);
29263 } else if (std::fpclassify(src2[lane] / src1[lane])
29264 == FP_SUBNORMAL) {
29265 vcc.setBit(lane, 1);
29266 if (src0[lane] == src2[lane]) {
29267 vdst[lane] = std::ldexp(src0[lane], 128);
29268 }
29269 } else if (exp2 <= 53) {
29270 vdst[lane] = std::ldexp(src0[lane], 128);
29271 }
29272 }
29273 }
29274
29275 vcc.write();
29276 vdst.write();
29277 }
29278
29279 Inst_VOP3__V_DIV_FMAS_F32::Inst_VOP3__V_DIV_FMAS_F32(InFmt_VOP3 *iFmt)
29280 : Inst_VOP3(iFmt, "v_div_fmas_f32", false)
29281 {
29282 setFlag(ALU);
29283 setFlag(ReadsVCC);
29284 setFlag(F32);
29285 setFlag(FMA);
29286 } // Inst_VOP3__V_DIV_FMAS_F32
29287
29288 Inst_VOP3__V_DIV_FMAS_F32::~Inst_VOP3__V_DIV_FMAS_F32()
29289 {
29290 } // ~Inst_VOP3__V_DIV_FMAS_F32
29291
29292 // D.f = Special case divide FMA with scale and flags(s0.f = Quotient,
29293 // s1.f = Denominator, s2.f = Numerator)
29294 void
29295 Inst_VOP3__V_DIV_FMAS_F32::execute(GPUDynInstPtr gpuDynInst)
29296 {
29297 Wavefront *wf = gpuDynInst->wavefront();
29298 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
29299 ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
29300 ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
29301 VecOperandF64 vdst(gpuDynInst, instData.VDST);
29302
29303 src0.readSrc();
29304 src1.readSrc();
29305 src2.readSrc();
29306
29307 if (instData.ABS & 0x1) {
29308 src0.absModifier();
29309 }
29310
29311 if (instData.ABS & 0x2) {
29312 src1.absModifier();
29313 }
29314
29315 if (instData.ABS & 0x4) {
29316 src2.absModifier();
29317 }
29318
29319 if (extData.NEG & 0x1) {
29320 src0.negModifier();
29321 }
29322
29323 if (extData.NEG & 0x2) {
29324 src1.negModifier();
29325 }
29326
29327 if (extData.NEG & 0x4) {
29328 src2.negModifier();
29329 }
29330
29331 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29332 if (wf->execMask(lane)) {
29333 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
29334 }
29335 }
29336
29337 //vdst.write();
29338 } // execute
29339 // --- Inst_VOP3__V_DIV_FMAS_F64 class methods ---
29340
29341 Inst_VOP3__V_DIV_FMAS_F64::Inst_VOP3__V_DIV_FMAS_F64(InFmt_VOP3 *iFmt)
29342 : Inst_VOP3(iFmt, "v_div_fmas_f64", false)
29343 {
29344 setFlag(ALU);
29345 setFlag(ReadsVCC);
29346 setFlag(F64);
29347 setFlag(FMA);
29348 } // Inst_VOP3__V_DIV_FMAS_F64
29349
29350 Inst_VOP3__V_DIV_FMAS_F64::~Inst_VOP3__V_DIV_FMAS_F64()
29351 {
29352 } // ~Inst_VOP3__V_DIV_FMAS_F64
29353
29354 // D.d = Special case divide FMA with scale and flags(s0.d = Quotient,
29355 // s1.d = Denominator, s2.d = Numerator)
29356 void
29357 Inst_VOP3__V_DIV_FMAS_F64::execute(GPUDynInstPtr gpuDynInst)
29358 {
29359 Wavefront *wf = gpuDynInst->wavefront();
29360 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
29361 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
29362 ConstVecOperandF64 src2(gpuDynInst, extData.SRC2);
29363 VecOperandF64 vdst(gpuDynInst, instData.VDST);
29364 ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
29365
29366 src0.readSrc();
29367 src1.readSrc();
29368 src2.readSrc();
29369 vcc.read();
29370
29371 if (instData.ABS & 0x1) {
29372 src0.absModifier();
29373 }
29374
29375 if (instData.ABS & 0x2) {
29376 src1.absModifier();
29377 }
29378
29379 if (instData.ABS & 0x4) {
29380 src2.absModifier();
29381 }
29382
29383 if (extData.NEG & 0x1) {
29384 src0.negModifier();
29385 }
29386
29387 if (extData.NEG & 0x2) {
29388 src1.negModifier();
29389 }
29390
29391 if (extData.NEG & 0x4) {
29392 src2.negModifier();
29393 }
29394
29395 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29396 if (wf->execMask(lane)) {
29397 if (bits(vcc.rawData(), lane)) {
29398 vdst[lane] = std::pow(2, 64)
29399 * std::fma(src0[lane], src1[lane], src2[lane]);
29400 } else {
29401 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
29402 }
29403 }
29404 }
29405
29406 vdst.write();
29407 }
29408
29409 Inst_VOP3__V_MSAD_U8::Inst_VOP3__V_MSAD_U8(InFmt_VOP3 *iFmt)
29410 : Inst_VOP3(iFmt, "v_msad_u8", false)
29411 {
29412 setFlag(ALU);
29413 } // Inst_VOP3__V_MSAD_U8
29414
29415 Inst_VOP3__V_MSAD_U8::~Inst_VOP3__V_MSAD_U8()
29416 {
29417 } // ~Inst_VOP3__V_MSAD_U8
29418
29419 // D.u = Masked Byte SAD with accum_lo(S0.u, S1.u, S2.u).
29420 void
29421 Inst_VOP3__V_MSAD_U8::execute(GPUDynInstPtr gpuDynInst)
29422 {
29423 panicUnimplemented();
29424 }
29425
29426 Inst_VOP3__V_QSAD_PK_U16_U8::Inst_VOP3__V_QSAD_PK_U16_U8(InFmt_VOP3 *iFmt)
29427 : Inst_VOP3(iFmt, "v_qsad_pk_u16_u8", false)
29428 {
29429 setFlag(ALU);
29430 } // Inst_VOP3__V_QSAD_PK_U16_U8
29431
29432 Inst_VOP3__V_QSAD_PK_U16_U8::~Inst_VOP3__V_QSAD_PK_U16_U8()
29433 {
29434 } // ~Inst_VOP3__V_QSAD_PK_U16_U8
29435
29436 // D.u = Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0],
29437 // S1.u[31:0], S2.u[63:0])
29438 void
29439 Inst_VOP3__V_QSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst)
29440 {
29441 panicUnimplemented();
29442 }
29443
29444 Inst_VOP3__V_MQSAD_PK_U16_U8::Inst_VOP3__V_MQSAD_PK_U16_U8(
29445 InFmt_VOP3 *iFmt)
29446 : Inst_VOP3(iFmt, "v_mqsad_pk_u16_u8", false)
29447 {
29448 setFlag(ALU);
29449 } // Inst_VOP3__V_MQSAD_PK_U16_U8
29450
29451 Inst_VOP3__V_MQSAD_PK_U16_U8::~Inst_VOP3__V_MQSAD_PK_U16_U8()
29452 {
29453 } // ~Inst_VOP3__V_MQSAD_PK_U16_U8
29454
29455 // D.u = Masked Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0],
29456 // S1.u[31:0], S2.u[63:0])
29457 void
29458 Inst_VOP3__V_MQSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst)
29459 {
29460 panicUnimplemented();
29461 }
29462
29463 Inst_VOP3__V_MQSAD_U32_U8::Inst_VOP3__V_MQSAD_U32_U8(InFmt_VOP3 *iFmt)
29464 : Inst_VOP3(iFmt, "v_mqsad_u32_u8", false)
29465 {
29466 setFlag(ALU);
29467 } // Inst_VOP3__V_MQSAD_U32_U8
29468
29469 Inst_VOP3__V_MQSAD_U32_U8::~Inst_VOP3__V_MQSAD_U32_U8()
29470 {
29471 } // ~Inst_VOP3__V_MQSAD_U32_U8
29472
29473 // D.u128 = Masked Quad-Byte SAD with 32-bit accum_lo/hi(S0.u[63:0],
29474 // S1.u[31:0], S2.u[127:0])
29475 void
29476 Inst_VOP3__V_MQSAD_U32_U8::execute(GPUDynInstPtr gpuDynInst)
29477 {
29478 panicUnimplemented();
29479 }
29480
29481 Inst_VOP3__V_MAD_U64_U32::Inst_VOP3__V_MAD_U64_U32(
29482 InFmt_VOP3_SDST_ENC *iFmt)
29483 : Inst_VOP3_SDST_ENC(iFmt, "v_mad_u64_u32")
29484 {
29485 setFlag(ALU);
29486 setFlag(WritesVCC);
29487 setFlag(MAD);
29488 } // Inst_VOP3__V_MAD_U64_U32
29489
29490 Inst_VOP3__V_MAD_U64_U32::~Inst_VOP3__V_MAD_U64_U32()
29491 {
29492 } // ~Inst_VOP3__V_MAD_U64_U32
29493
29494 // {vcc_out, D.u64} = S0.u32 * S1.u32 + S2.u64.
29495 void
29496 Inst_VOP3__V_MAD_U64_U32::execute(GPUDynInstPtr gpuDynInst)
29497 {
29498 Wavefront *wf = gpuDynInst->wavefront();
29499 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
29500 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
29501 ConstVecOperandU64 src2(gpuDynInst, extData.SRC2);
29502 ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
29503 VecOperandU64 vdst(gpuDynInst, instData.VDST);
29504
29505 src0.readSrc();
29506 src1.readSrc();
29507 src2.readSrc();
29508 vdst.read();
29509
29510 /**
29511 * input modifiers are supported by FP operations only
29512 */
29513 assert(!(extData.NEG & 0x1));
29514 assert(!(extData.NEG & 0x2));
29515 assert(!(extData.NEG & 0x4));
29516
29517 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29518 if (wf->execMask(lane)) {
29519 vcc.setBit(lane, muladd(vdst[lane], src0[lane], src1[lane],
29520 src2[lane]));
29521 }
29522 }
29523
29524 vcc.write();
29525 vdst.write();
29526 }
29527
29528 Inst_VOP3__V_MAD_I64_I32::Inst_VOP3__V_MAD_I64_I32(
29529 InFmt_VOP3_SDST_ENC *iFmt)
29530 : Inst_VOP3_SDST_ENC(iFmt, "v_mad_i64_i32")
29531 {
29532 setFlag(ALU);
29533 setFlag(WritesVCC);
29534 setFlag(MAD);
29535 } // Inst_VOP3__V_MAD_I64_I32
29536
29537 Inst_VOP3__V_MAD_I64_I32::~Inst_VOP3__V_MAD_I64_I32()
29538 {
29539 } // ~Inst_VOP3__V_MAD_I64_I32
29540
29541 // {vcc_out,D.i64} = S0.i32 * S1.i32 + S2.i64.
29542 void
29543 Inst_VOP3__V_MAD_I64_I32::execute(GPUDynInstPtr gpuDynInst)
29544 {
29545 Wavefront *wf = gpuDynInst->wavefront();
29546 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
29547 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
29548 ConstVecOperandI64 src2(gpuDynInst, extData.SRC2);
29549 ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
29550 VecOperandI64 vdst(gpuDynInst, instData.VDST);
29551
29552 src0.readSrc();
29553 src1.readSrc();
29554 src2.readSrc();
29555
29556 /**
29557 * input modifiers are supported by FP operations only
29558 */
29559 assert(!(extData.NEG & 0x1));
29560 assert(!(extData.NEG & 0x2));
29561 assert(!(extData.NEG & 0x4));
29562
29563 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29564 if (wf->execMask(lane)) {
29565 vcc.setBit(lane, muladd(vdst[lane], src0[lane], src1[lane],
29566 src2[lane]));
29567 }
29568 }
29569
29570 vcc.write();
29571 vdst.write();
29572 }
29573
29574 Inst_VOP3__V_MAD_F16::Inst_VOP3__V_MAD_F16(InFmt_VOP3 *iFmt)
29575 : Inst_VOP3(iFmt, "v_mad_f16", false)
29576 {
29577 setFlag(ALU);
29578 setFlag(F16);
29579 setFlag(MAD);
29580 } // Inst_VOP3__V_MAD_F16
29581
29582 Inst_VOP3__V_MAD_F16::~Inst_VOP3__V_MAD_F16()
29583 {
29584 } // ~Inst_VOP3__V_MAD_F16
29585
29586 // D.f16 = S0.f16 * S1.f16 + S2.f16.
29587 // Supports round mode, exception flags, saturation.
29588 void
29589 Inst_VOP3__V_MAD_F16::execute(GPUDynInstPtr gpuDynInst)
29590 {
29591 panicUnimplemented();
29592 }
29593
29594 Inst_VOP3__V_MAD_U16::Inst_VOP3__V_MAD_U16(InFmt_VOP3 *iFmt)
29595 : Inst_VOP3(iFmt, "v_mad_u16", false)
29596 {
29597 setFlag(ALU);
29598 setFlag(MAD);
29599 } // Inst_VOP3__V_MAD_U16
29600
29601 Inst_VOP3__V_MAD_U16::~Inst_VOP3__V_MAD_U16()
29602 {
29603 } // ~Inst_VOP3__V_MAD_U16
29604
29605 // D.u16 = S0.u16 * S1.u16 + S2.u16.
29606 // Supports saturation (unsigned 16-bit integer domain).
29607 void
29608 Inst_VOP3__V_MAD_U16::execute(GPUDynInstPtr gpuDynInst)
29609 {
29610 Wavefront *wf = gpuDynInst->wavefront();
29611 ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
29612 ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
29613 ConstVecOperandU16 src2(gpuDynInst, extData.SRC2);
29614 VecOperandU16 vdst(gpuDynInst, instData.VDST);
29615
29616 src0.readSrc();
29617 src1.readSrc();
29618 src2.readSrc();
29619
29620 /**
29621 * input modifiers are supported by FP operations only
29622 */
29623 assert(!(instData.ABS & 0x1));
29624 assert(!(instData.ABS & 0x2));
29625 assert(!(instData.ABS & 0x4));
29626 assert(!(extData.NEG & 0x1));
29627 assert(!(extData.NEG & 0x2));
29628 assert(!(extData.NEG & 0x4));
29629
29630 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29631 if (wf->execMask(lane)) {
29632 vdst[lane] = src0[lane] * src1[lane] + src2[lane];
29633 }
29634 }
29635
29636 vdst.write();
29637 }
29638
29639 Inst_VOP3__V_MAD_I16::Inst_VOP3__V_MAD_I16(InFmt_VOP3 *iFmt)
29640 : Inst_VOP3(iFmt, "v_mad_i16", false)
29641 {
29642 setFlag(ALU);
29643 setFlag(MAD);
29644 } // Inst_VOP3__V_MAD_I16
29645
29646 Inst_VOP3__V_MAD_I16::~Inst_VOP3__V_MAD_I16()
29647 {
29648 } // ~Inst_VOP3__V_MAD_I16
29649
29650 // D.i16 = S0.i16 * S1.i16 + S2.i16.
29651 // Supports saturation (signed 16-bit integer domain).
29652 void
29653 Inst_VOP3__V_MAD_I16::execute(GPUDynInstPtr gpuDynInst)
29654 {
29655 Wavefront *wf = gpuDynInst->wavefront();
29656 ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
29657 ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
29658 ConstVecOperandI16 src2(gpuDynInst, extData.SRC2);
29659 VecOperandI16 vdst(gpuDynInst, instData.VDST);
29660
29661 src0.readSrc();
29662 src1.readSrc();
29663 src2.readSrc();
29664
29665 /**
29666 * input modifiers are supported by FP operations only
29667 */
29668 assert(!(instData.ABS & 0x1));
29669 assert(!(instData.ABS & 0x2));
29670 assert(!(instData.ABS & 0x4));
29671 assert(!(extData.NEG & 0x1));
29672 assert(!(extData.NEG & 0x2));
29673 assert(!(extData.NEG & 0x4));
29674
29675 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29676 if (wf->execMask(lane)) {
29677 vdst[lane] = src0[lane] * src1[lane] + src2[lane];
29678 }
29679 }
29680
29681 vdst.write();
29682 }
29683
29684 Inst_VOP3__V_PERM_B32::Inst_VOP3__V_PERM_B32(InFmt_VOP3 *iFmt)
29685 : Inst_VOP3(iFmt, "v_perm_b32", false)
29686 {
29687 setFlag(ALU);
29688 } // Inst_VOP3__V_PERM_B32
29689
29690 Inst_VOP3__V_PERM_B32::~Inst_VOP3__V_PERM_B32()
29691 {
29692 } // ~Inst_VOP3__V_PERM_B32
29693
29694 // D.u[31:24] = permute({S0.u, S1.u}, S2.u[31:24]);
29695 // D.u[23:16] = permute({S0.u, S1.u}, S2.u[23:16]);
29696 // D.u[15:8] = permute({S0.u, S1.u}, S2.u[15:8]);
29697 // D.u[7:0] = permute({S0.u, S1.u}, S2.u[7:0]);
29698 // byte permute(byte in[8], byte sel) {
29699 // if(sel>=13) then return 0xff;
29700 // elsif(sel==12) then return 0x00;
29701 // elsif(sel==11) then return in[7][7] * 0xff;
29702 // elsif(sel==10) then return in[5][7] * 0xff;
29703 // elsif(sel==9) then return in[3][7] * 0xff;
29704 // elsif(sel==8) then return in[1][7] * 0xff;
29705 // else return in[sel];
29706 // }
29707 void
29708 Inst_VOP3__V_PERM_B32::execute(GPUDynInstPtr gpuDynInst)
29709 {
29710 Wavefront *wf = gpuDynInst->wavefront();
29711 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
29712 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
29713 ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
29714 VecOperandU32 vdst(gpuDynInst, instData.VDST);
29715
29716 src0.readSrc();
29717 src1.readSrc();
29718 src2.readSrc();
29719
29720 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29721 if (wf->execMask(lane)) {
29722 VecElemU64 selector = (VecElemU64)src0[lane];
29723 selector = (selector << 32) | (VecElemU64)src1[lane];
29724 vdst[lane] = 0;
29725
29726 DPRINTF(GCN3, "Executing v_perm_b32 src_0 0x%08x, src_1 "
29727 "0x%08x, src_2 0x%08x, vdst 0x%08x\n", src0[lane],
29728 src1[lane], src2[lane], vdst[lane]);
29729 DPRINTF(GCN3, "Selector: 0x%08x \n", selector);
29730
29731 for (int i = 0; i < 4 ; ++i) {
29732 VecElemU32 permuted_val = permute(selector, 0xFF
29733 & ((VecElemU32)src2[lane] >> (8 * i)));
29734 vdst[lane] |= (permuted_val << i);
29735 }
29736
29737 DPRINTF(GCN3, "v_perm result: 0x%08x\n", vdst[lane]);
29738 }
29739 }
29740
29741 vdst.write();
29742 }
29743
29744 Inst_VOP3__V_FMA_F16::Inst_VOP3__V_FMA_F16(InFmt_VOP3 *iFmt)
29745 : Inst_VOP3(iFmt, "v_fma_f16", false)
29746 {
29747 setFlag(ALU);
29748 setFlag(F16);
29749 setFlag(FMA);
29750 } // Inst_VOP3__V_FMA_F16
29751
29752 Inst_VOP3__V_FMA_F16::~Inst_VOP3__V_FMA_F16()
29753 {
29754 } // ~Inst_VOP3__V_FMA_F16
29755
29756 // D.f16 = S0.f16 * S1.f16 + S2.f16.
29757 // Fused half precision multiply add.
29758 void
29759 Inst_VOP3__V_FMA_F16::execute(GPUDynInstPtr gpuDynInst)
29760 {
29761 panicUnimplemented();
29762 }
29763
29764 Inst_VOP3__V_DIV_FIXUP_F16::Inst_VOP3__V_DIV_FIXUP_F16(InFmt_VOP3 *iFmt)
29765 : Inst_VOP3(iFmt, "v_div_fixup_f16", false)
29766 {
29767 setFlag(ALU);
29768 setFlag(F16);
29769 } // Inst_VOP3__V_DIV_FIXUP_F16
29770
29771 Inst_VOP3__V_DIV_FIXUP_F16::~Inst_VOP3__V_DIV_FIXUP_F16()
29772 {
29773 } // ~Inst_VOP3__V_DIV_FIXUP_F16
29774
29775 // sign_out = sign(S1.f16)^sign(S2.f16);
29776 // if (S2.f16 == NAN)
29777 // D.f16 = Quiet(S2.f16);
29778 // else if (S1.f16 == NAN)
29779 // D.f16 = Quiet(S1.f16);
29780 // else if (S1.f16 == S2.f16 == 0)
29781 // # 0/0
29782 // D.f16 = pele_nan(0xfe00);
29783 // else if (abs(S1.f16) == abs(S2.f16) == +-INF)
29784 // # inf/inf
29785 // D.f16 = pele_nan(0xfe00);
29786 // else if (S1.f16 ==0 || abs(S2.f16) == +-INF)
29787 // # x/0, or inf/y
29788 // D.f16 = sign_out ? -INF : INF;
29789 // else if (abs(S1.f16) == +-INF || S2.f16 == 0)
29790 // # x/inf, 0/y
29791 // D.f16 = sign_out ? -0 : 0;
29792 // else if ((exp(S2.f16) - exp(S1.f16)) < -150)
29793 // D.f16 = sign_out ? -underflow : underflow;
29794 // else if (exp(S1.f16) == 255)
29795 // D.f16 = sign_out ? -overflow : overflow;
29796 // else
29797 // D.f16 = sign_out ? -abs(S0.f16) : abs(S0.f16).
29798 // Half precision division fixup.
29799 // S0 = Quotient, S1 = Denominator, S3 = Numerator.
29800 // Given a numerator, denominator, and quotient from a divide, this opcode
29801 // will detect and apply special case numerics, touching up the quotient if
29802 // necessary. This opcode also generates invalid, denorm and divide by
29803 // zero exceptions caused by the division.
29804 void
29805 Inst_VOP3__V_DIV_FIXUP_F16::execute(GPUDynInstPtr gpuDynInst)
29806 {
29807 panicUnimplemented();
29808 }
29809
29810 Inst_VOP3__V_CVT_PKACCUM_U8_F32::Inst_VOP3__V_CVT_PKACCUM_U8_F32(
29811 InFmt_VOP3 *iFmt)
29812 : Inst_VOP3(iFmt, "v_cvt_pkaccum_u8_f32", false)
29813 {
29814 setFlag(ALU);
29815 setFlag(F32);
29816 } // Inst_VOP3__V_CVT_PKACCUM_U8_F32
29817
29818 Inst_VOP3__V_CVT_PKACCUM_U8_F32::~Inst_VOP3__V_CVT_PKACCUM_U8_F32()
29819 {
29820 } // ~Inst_VOP3__V_CVT_PKACCUM_U8_F32
29821
29822 // byte = S1.u[1:0]; bit = byte * 8;
29823 // D.u[bit + 7:bit] = flt32_to_uint8(S0.f);
29824 // Pack converted value of S0.f into byte S1 of the destination.
29825 // SQ translates to V_CVT_PK_U8_F32.
29826 // Note: this opcode uses src_c to pass destination in as a source.
29827 void
29828 Inst_VOP3__V_CVT_PKACCUM_U8_F32::execute(GPUDynInstPtr gpuDynInst)
29829 {
29830 panicUnimplemented();
29831 }
29832
29833 Inst_VOP3__V_INTERP_P1_F32::Inst_VOP3__V_INTERP_P1_F32(InFmt_VOP3 *iFmt)
29834 : Inst_VOP3(iFmt, "v_interp_p1_f32", false)
29835 {
29836 setFlag(ALU);
29837 setFlag(F32);
29838 } // Inst_VOP3__V_INTERP_P1_F32
29839
29840 Inst_VOP3__V_INTERP_P1_F32::~Inst_VOP3__V_INTERP_P1_F32()
29841 {
29842 } // ~Inst_VOP3__V_INTERP_P1_F32
29843
29844 // D.f = P10 * S.f + P0;
29845 void
29846 Inst_VOP3__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst)
29847 {
29848 panicUnimplemented();
29849 }
29850
29851 Inst_VOP3__V_INTERP_P2_F32::Inst_VOP3__V_INTERP_P2_F32(InFmt_VOP3 *iFmt)
29852 : Inst_VOP3(iFmt, "v_interp_p2_f32", false)
29853 {
29854 setFlag(ALU);
29855 setFlag(F32);
29856 } // Inst_VOP3__V_INTERP_P2_F32
29857
29858 Inst_VOP3__V_INTERP_P2_F32::~Inst_VOP3__V_INTERP_P2_F32()
29859 {
29860 } // ~Inst_VOP3__V_INTERP_P2_F32
29861
29862 // D.f = P20 * S.f + D.f;
29863 void
29864 Inst_VOP3__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst)
29865 {
29866 panicUnimplemented();
29867 }
29868
29869 Inst_VOP3__V_INTERP_MOV_F32::Inst_VOP3__V_INTERP_MOV_F32(InFmt_VOP3 *iFmt)
29870 : Inst_VOP3(iFmt, "v_interp_mov_f32", false)
29871 {
29872 setFlag(ALU);
29873 setFlag(F32);
29874 } // Inst_VOP3__V_INTERP_MOV_F32
29875
29876 Inst_VOP3__V_INTERP_MOV_F32::~Inst_VOP3__V_INTERP_MOV_F32()
29877 {
29878 } // ~Inst_VOP3__V_INTERP_MOV_F32
29879
29880 // D.f = {P10,P20,P0}[S.u]; parameter load.
29881 void
29882 Inst_VOP3__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst)
29883 {
29884 panicUnimplemented();
29885 }
29886
29887 Inst_VOP3__V_INTERP_P1LL_F16::Inst_VOP3__V_INTERP_P1LL_F16(
29888 InFmt_VOP3 *iFmt)
29889 : Inst_VOP3(iFmt, "v_interp_p1ll_f16", false)
29890 {
29891 setFlag(ALU);
29892 setFlag(F16);
29893 } // Inst_VOP3__V_INTERP_P1LL_F16
29894
29895 Inst_VOP3__V_INTERP_P1LL_F16::~Inst_VOP3__V_INTERP_P1LL_F16()
29896 {
29897 } // ~Inst_VOP3__V_INTERP_P1LL_F16
29898
29899 // D.f32 = P10.f16 * S0.f32 + P0.f16.
29900 void
29901 Inst_VOP3__V_INTERP_P1LL_F16::execute(GPUDynInstPtr gpuDynInst)
29902 {
29903 panicUnimplemented();
29904 }
29905
29906 Inst_VOP3__V_INTERP_P1LV_F16::Inst_VOP3__V_INTERP_P1LV_F16(
29907 InFmt_VOP3 *iFmt)
29908 : Inst_VOP3(iFmt, "v_interp_p1lv_f16", false)
29909 {
29910 setFlag(ALU);
29911 setFlag(F16);
29912 } // Inst_VOP3__V_INTERP_P1LV_F16
29913
29914 Inst_VOP3__V_INTERP_P1LV_F16::~Inst_VOP3__V_INTERP_P1LV_F16()
29915 {
29916 } // ~Inst_VOP3__V_INTERP_P1LV_F16
29917
29918 void
29919 Inst_VOP3__V_INTERP_P1LV_F16::execute(GPUDynInstPtr gpuDynInst)
29920 {
29921 panicUnimplemented();
29922 }
29923
29924 Inst_VOP3__V_INTERP_P2_F16::Inst_VOP3__V_INTERP_P2_F16(InFmt_VOP3 *iFmt)
29925 : Inst_VOP3(iFmt, "v_interp_p2_f16", false)
29926 {
29927 setFlag(ALU);
29928 setFlag(F16);
29929 } // Inst_VOP3__V_INTERP_P2_F16
29930
29931 Inst_VOP3__V_INTERP_P2_F16::~Inst_VOP3__V_INTERP_P2_F16()
29932 {
29933 } // ~Inst_VOP3__V_INTERP_P2_F16
29934
29935 // D.f16 = P20.f16 * S0.f32 + S2.f32.
29936 void
29937 Inst_VOP3__V_INTERP_P2_F16::execute(GPUDynInstPtr gpuDynInst)
29938 {
29939 panicUnimplemented();
29940 }
29941
29942 Inst_VOP3__V_ADD_F64::Inst_VOP3__V_ADD_F64(InFmt_VOP3 *iFmt)
29943 : Inst_VOP3(iFmt, "v_add_f64", false)
29944 {
29945 setFlag(ALU);
29946 setFlag(F64);
29947 } // Inst_VOP3__V_ADD_F64
29948
29949 Inst_VOP3__V_ADD_F64::~Inst_VOP3__V_ADD_F64()
29950 {
29951 } // ~Inst_VOP3__V_ADD_F64
29952
29953 // D.d = S0.d + S1.d.
29954 void
29955 Inst_VOP3__V_ADD_F64::execute(GPUDynInstPtr gpuDynInst)
29956 {
29957 Wavefront *wf = gpuDynInst->wavefront();
29958 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
29959 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
29960 VecOperandF64 vdst(gpuDynInst, instData.VDST);
29961
29962 src0.readSrc();
29963 src1.readSrc();
29964
29965 if (instData.ABS & 0x1) {
29966 src0.absModifier();
29967 }
29968
29969 if (instData.ABS & 0x2) {
29970 src1.absModifier();
29971 }
29972
29973 if (extData.NEG & 0x1) {
29974 src0.negModifier();
29975 }
29976
29977 if (extData.NEG & 0x2) {
29978 src1.negModifier();
29979 }
29980
29981 /**
29982 * input modifiers are supported by FP operations only
29983 */
29984 assert(!(instData.ABS & 0x4));
29985 assert(!(extData.NEG & 0x4));
29986
29987 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29988 if (wf->execMask(lane)) {
29989 if (std::isnan(src0[lane]) ||
29990 std::isnan(src1[lane]) ) {
29991 vdst[lane] = NAN;
29992 } else if (std::isinf(src0[lane]) &&
29993 std::isinf(src1[lane])) {
29994 if (std::signbit(src0[lane]) !=
29995 std::signbit(src1[lane])) {
29996 vdst[lane] = NAN;
29997 } else {
29998 vdst[lane] = src0[lane];
29999 }
30000 } else if (std::isinf(src0[lane])) {
30001 vdst[lane] = src0[lane];
30002 } else if (std::isinf(src1[lane])) {
30003 vdst[lane] = src1[lane];
30004 } else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
30005 std::fpclassify(src0[lane]) == FP_ZERO) {
30006 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
30007 std::fpclassify(src1[lane]) == FP_ZERO) {
30008 if (std::signbit(src0[lane]) &&
30009 std::signbit(src1[lane])) {
30010 vdst[lane] = -0.0;
30011 } else {
30012 vdst[lane] = 0.0;
30013 }
30014 } else {
30015 vdst[lane] = src1[lane];
30016 }
30017 } else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
30018 std::fpclassify(src1[lane]) == FP_ZERO) {
30019 if (std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
30020 std::fpclassify(src0[lane]) == FP_ZERO) {
30021 if (std::signbit(src0[lane]) &&
30022 std::signbit(src1[lane])) {
30023 vdst[lane] = -0.0;
30024 } else {
30025 vdst[lane] = 0.0;
30026 }
30027 } else {
30028 vdst[lane] = src0[lane];
30029 }
30030 } else {
30031 vdst[lane] = src0[lane] + src1[lane];
30032 }
30033 }
30034 }
30035
30036 vdst.write();
30037 }
30038
30039 Inst_VOP3__V_MUL_F64::Inst_VOP3__V_MUL_F64(InFmt_VOP3 *iFmt)
30040 : Inst_VOP3(iFmt, "v_mul_f64", false)
30041 {
30042 setFlag(ALU);
30043 setFlag(F64);
30044 } // Inst_VOP3__V_MUL_F64
30045
30046 Inst_VOP3__V_MUL_F64::~Inst_VOP3__V_MUL_F64()
30047 {
30048 } // ~Inst_VOP3__V_MUL_F64
30049
30050 // D.d = S0.d * S1.d.
30051 void
30052 Inst_VOP3__V_MUL_F64::execute(GPUDynInstPtr gpuDynInst)
30053 {
30054 Wavefront *wf = gpuDynInst->wavefront();
30055 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
30056 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
30057 VecOperandF64 vdst(gpuDynInst, instData.VDST);
30058
30059 src0.readSrc();
30060 src1.readSrc();
30061
30062 if (instData.ABS & 0x1) {
30063 src0.absModifier();
30064 }
30065
30066 if (instData.ABS & 0x2) {
30067 src1.absModifier();
30068 }
30069
30070 if (extData.NEG & 0x1) {
30071 src0.negModifier();
30072 }
30073
30074 if (extData.NEG & 0x2) {
30075 src1.negModifier();
30076 }
30077
30078 /**
30079 * input modifiers are supported by FP operations only
30080 */
30081 assert(!(instData.ABS & 0x4));
30082 assert(!(extData.NEG & 0x4));
30083
30084 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30085 if (wf->execMask(lane)) {
30086 if (std::isnan(src0[lane]) ||
30087 std::isnan(src1[lane])) {
30088 vdst[lane] = NAN;
30089 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
30090 std::fpclassify(src0[lane]) == FP_ZERO) &&
30091 !std::signbit(src0[lane])) {
30092 if (std::isinf(src1[lane])) {
30093 vdst[lane] = NAN;
30094 } else if (!std::signbit(src1[lane])) {
30095 vdst[lane] = +0.0;
30096 } else {
30097 vdst[lane] = -0.0;
30098 }
30099 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
30100 std::fpclassify(src0[lane]) == FP_ZERO) &&
30101 std::signbit(src0[lane])) {
30102 if (std::isinf(src1[lane])) {
30103 vdst[lane] = NAN;
30104 } else if (std::signbit(src1[lane])) {
30105 vdst[lane] = +0.0;
30106 } else {
30107 vdst[lane] = -0.0;
30108 }
30109 } else if (std::isinf(src0[lane]) &&
30110 !std::signbit(src0[lane])) {
30111 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
30112 std::fpclassify(src1[lane]) == FP_ZERO) {
30113 vdst[lane] = NAN;
30114 } else if (!std::signbit(src1[lane])) {
30115 vdst[lane] = +INFINITY;
30116 } else {
30117 vdst[lane] = -INFINITY;
30118 }
30119 } else if (std::isinf(src0[lane]) &&
30120 std::signbit(src0[lane])) {
30121 if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
30122 std::fpclassify(src1[lane]) == FP_ZERO) {
30123 vdst[lane] = NAN;
30124 } else if (std::signbit(src1[lane])) {
30125 vdst[lane] = +INFINITY;
30126 } else {
30127 vdst[lane] = -INFINITY;
30128 }
30129 } else {
30130 vdst[lane] = src0[lane] * src1[lane];
30131 }
30132 }
30133 }
30134
30135 vdst.write();
30136 }
30137
30138 Inst_VOP3__V_MIN_F64::Inst_VOP3__V_MIN_F64(InFmt_VOP3 *iFmt)
30139 : Inst_VOP3(iFmt, "v_min_f64", false)
30140 {
30141 setFlag(ALU);
30142 setFlag(F64);
30143 } // Inst_VOP3__V_MIN_F64
30144
30145 Inst_VOP3__V_MIN_F64::~Inst_VOP3__V_MIN_F64()
30146 {
30147 } // ~Inst_VOP3__V_MIN_F64
30148
30149 // D.d = min(S0.d, S1.d).
30150 void
30151 Inst_VOP3__V_MIN_F64::execute(GPUDynInstPtr gpuDynInst)
30152 {
30153 Wavefront *wf = gpuDynInst->wavefront();
30154 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
30155 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
30156 VecOperandF64 vdst(gpuDynInst, instData.VDST);
30157
30158 src0.readSrc();
30159 src1.readSrc();
30160
30161 if (instData.ABS & 0x1) {
30162 src0.absModifier();
30163 }
30164
30165 if (instData.ABS & 0x2) {
30166 src1.absModifier();
30167 }
30168
30169 if (extData.NEG & 0x1) {
30170 src0.negModifier();
30171 }
30172
30173 if (extData.NEG & 0x2) {
30174 src1.negModifier();
30175 }
30176
30177 /**
30178 * input modifiers are supported by FP operations only
30179 */
30180 assert(!(instData.ABS & 0x4));
30181 assert(!(extData.NEG & 0x4));
30182
30183 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30184 if (wf->execMask(lane)) {
30185 vdst[lane] = std::fmin(src0[lane], src1[lane]);
30186 }
30187 }
30188
30189 vdst.write();
30190 }
30191
30192 Inst_VOP3__V_MAX_F64::Inst_VOP3__V_MAX_F64(InFmt_VOP3 *iFmt)
30193 : Inst_VOP3(iFmt, "v_max_f64", false)
30194 {
30195 setFlag(ALU);
30196 setFlag(F64);
30197 } // Inst_VOP3__V_MAX_F64
30198
30199 Inst_VOP3__V_MAX_F64::~Inst_VOP3__V_MAX_F64()
30200 {
30201 } // ~Inst_VOP3__V_MAX_F64
30202
30203 // D.d = max(S0.d, S1.d).
30204 void
30205 Inst_VOP3__V_MAX_F64::execute(GPUDynInstPtr gpuDynInst)
30206 {
30207 Wavefront *wf = gpuDynInst->wavefront();
30208 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
30209 ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
30210 VecOperandF64 vdst(gpuDynInst, instData.VDST);
30211
30212 src0.readSrc();
30213 src1.readSrc();
30214
30215 if (instData.ABS & 0x1) {
30216 src0.absModifier();
30217 }
30218
30219 if (instData.ABS & 0x2) {
30220 src1.absModifier();
30221 }
30222
30223 if (extData.NEG & 0x1) {
30224 src0.negModifier();
30225 }
30226
30227 if (extData.NEG & 0x2) {
30228 src1.negModifier();
30229 }
30230
30231 /**
30232 * input modifiers are supported by FP operations only
30233 */
30234 assert(!(instData.ABS & 0x4));
30235 assert(!(extData.NEG & 0x4));
30236
30237 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30238 if (wf->execMask(lane)) {
30239 vdst[lane] = std::fmax(src0[lane], src1[lane]);
30240 }
30241 }
30242
30243 vdst.write();
30244 }
30245
30246 Inst_VOP3__V_LDEXP_F64::Inst_VOP3__V_LDEXP_F64(InFmt_VOP3 *iFmt)
30247 : Inst_VOP3(iFmt, "v_ldexp_f64", false)
30248 {
30249 setFlag(ALU);
30250 setFlag(F64);
30251 } // Inst_VOP3__V_LDEXP_F64
30252
30253 Inst_VOP3__V_LDEXP_F64::~Inst_VOP3__V_LDEXP_F64()
30254 {
30255 } // ~Inst_VOP3__V_LDEXP_F64
30256
30257 // D.d = pow(S0.d, S1.i[31:0]).
30258 void
30259 Inst_VOP3__V_LDEXP_F64::execute(GPUDynInstPtr gpuDynInst)
30260 {
30261 Wavefront *wf = gpuDynInst->wavefront();
30262 ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
30263 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
30264 VecOperandF64 vdst(gpuDynInst, instData.VDST);
30265
30266 src0.readSrc();
30267 src1.readSrc();
30268
30269 if (instData.ABS & 0x1) {
30270 src0.absModifier();
30271 }
30272
30273 if (extData.NEG & 0x1) {
30274 src0.negModifier();
30275 }
30276
30277 /**
30278 * input modifiers are supported by FP operations only
30279 */
30280 assert(!(instData.ABS & 0x2));
30281 assert(!(instData.ABS & 0x4));
30282 assert(!(extData.NEG & 0x2));
30283 assert(!(extData.NEG & 0x4));
30284
30285 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30286 if (wf->execMask(lane)) {
30287 if (std::isnan(src0[lane]) || std::isinf(src0[lane])) {
30288 vdst[lane] = src0[lane];
30289 } else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
30290 || std::fpclassify(src0[lane]) == FP_ZERO) {
30291 if (std::signbit(src0[lane])) {
30292 vdst[lane] = -0.0;
30293 } else {
30294 vdst[lane] = +0.0;
30295 }
30296 } else {
30297 vdst[lane] = std::ldexp(src0[lane], src1[lane]);
30298 }
30299 }
30300 }
30301
30302 vdst.write();
30303 }
30304
30305 Inst_VOP3__V_MUL_LO_U32::Inst_VOP3__V_MUL_LO_U32(InFmt_VOP3 *iFmt)
30306 : Inst_VOP3(iFmt, "v_mul_lo_u32", false)
30307 {
30308 setFlag(ALU);
30309 } // Inst_VOP3__V_MUL_LO_U32
30310
30311 Inst_VOP3__V_MUL_LO_U32::~Inst_VOP3__V_MUL_LO_U32()
30312 {
30313 } // ~Inst_VOP3__V_MUL_LO_U32
30314
30315 // D.u = S0.u * S1.u.
30316 void
30317 Inst_VOP3__V_MUL_LO_U32::execute(GPUDynInstPtr gpuDynInst)
30318 {
30319 Wavefront *wf = gpuDynInst->wavefront();
30320 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30321 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
30322 VecOperandU32 vdst(gpuDynInst, instData.VDST);
30323
30324 src0.readSrc();
30325 src1.readSrc();
30326
30327 /**
30328 * input modifiers are supported by FP operations only
30329 */
30330 assert(!(instData.ABS & 0x1));
30331 assert(!(instData.ABS & 0x2));
30332 assert(!(instData.ABS & 0x4));
30333 assert(!(extData.NEG & 0x1));
30334 assert(!(extData.NEG & 0x2));
30335 assert(!(extData.NEG & 0x4));
30336
30337 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30338 if (wf->execMask(lane)) {
30339 VecElemI64 s0 = (VecElemI64)src0[lane];
30340 VecElemI64 s1 = (VecElemI64)src1[lane];
30341 vdst[lane] = (VecElemU32)((s0 * s1) & 0xffffffffLL);
30342 }
30343 }
30344
30345 vdst.write();
30346 }
30347
30348 Inst_VOP3__V_MUL_HI_U32::Inst_VOP3__V_MUL_HI_U32(InFmt_VOP3 *iFmt)
30349 : Inst_VOP3(iFmt, "v_mul_hi_u32", false)
30350 {
30351 setFlag(ALU);
30352 } // Inst_VOP3__V_MUL_HI_U32
30353
30354 Inst_VOP3__V_MUL_HI_U32::~Inst_VOP3__V_MUL_HI_U32()
30355 {
30356 } // ~Inst_VOP3__V_MUL_HI_U32
30357
30358 // D.u = (S0.u * S1.u) >> 32.
30359 void
30360 Inst_VOP3__V_MUL_HI_U32::execute(GPUDynInstPtr gpuDynInst)
30361 {
30362 Wavefront *wf = gpuDynInst->wavefront();
30363 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30364 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
30365 VecOperandU32 vdst(gpuDynInst, instData.VDST);
30366
30367 src0.readSrc();
30368 src1.readSrc();
30369
30370 /**
30371 * input modifiers are supported by FP operations only
30372 */
30373 assert(!(instData.ABS & 0x1));
30374 assert(!(instData.ABS & 0x2));
30375 assert(!(instData.ABS & 0x4));
30376 assert(!(extData.NEG & 0x1));
30377 assert(!(extData.NEG & 0x2));
30378 assert(!(extData.NEG & 0x4));
30379
30380 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30381 if (wf->execMask(lane)) {
30382 VecElemI64 s0 = (VecElemI64)src0[lane];
30383 VecElemI64 s1 = (VecElemI64)src1[lane];
30384 vdst[lane]
30385 = (VecElemU32)(((s0 * s1) >> 32) & 0xffffffffLL);
30386 }
30387 }
30388
30389 vdst.write();
30390 }
30391
30392 Inst_VOP3__V_MUL_HI_I32::Inst_VOP3__V_MUL_HI_I32(InFmt_VOP3 *iFmt)
30393 : Inst_VOP3(iFmt, "v_mul_hi_i32", false)
30394 {
30395 setFlag(ALU);
30396 } // Inst_VOP3__V_MUL_HI_I32
30397
30398 Inst_VOP3__V_MUL_HI_I32::~Inst_VOP3__V_MUL_HI_I32()
30399 {
30400 } // ~Inst_VOP3__V_MUL_HI_I32
30401
30402 // D.i = (S0.i * S1.i) >> 32.
30403 void
30404 Inst_VOP3__V_MUL_HI_I32::execute(GPUDynInstPtr gpuDynInst)
30405 {
30406 Wavefront *wf = gpuDynInst->wavefront();
30407 ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
30408 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
30409 VecOperandI32 vdst(gpuDynInst, instData.VDST);
30410
30411 src0.readSrc();
30412 src1.readSrc();
30413
30414 /**
30415 * input modifiers are supported by FP operations only
30416 */
30417 assert(!(instData.ABS & 0x1));
30418 assert(!(instData.ABS & 0x2));
30419 assert(!(instData.ABS & 0x4));
30420 assert(!(extData.NEG & 0x1));
30421 assert(!(extData.NEG & 0x2));
30422 assert(!(extData.NEG & 0x4));
30423
30424 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30425 if (wf->execMask(lane)) {
30426 VecElemI64 s0 = (VecElemI64)src0[lane];
30427 VecElemI64 s1 = (VecElemI64)src1[lane];
30428 vdst[lane]
30429 = (VecElemI32)(((s0 * s1) >> 32LL) & 0xffffffffLL);
30430 }
30431 }
30432
30433 vdst.write();
30434 }
30435
30436 Inst_VOP3__V_LDEXP_F32::Inst_VOP3__V_LDEXP_F32(InFmt_VOP3 *iFmt)
30437 : Inst_VOP3(iFmt, "v_ldexp_f32", false)
30438 {
30439 setFlag(ALU);
30440 setFlag(F32);
30441 } // Inst_VOP3__V_LDEXP_F32
30442
30443 Inst_VOP3__V_LDEXP_F32::~Inst_VOP3__V_LDEXP_F32()
30444 {
30445 } // ~Inst_VOP3__V_LDEXP_F32
30446
30447 // D.f = pow(S0.f, S1.i)
30448 void
30449 Inst_VOP3__V_LDEXP_F32::execute(GPUDynInstPtr gpuDynInst)
30450 {
30451 Wavefront *wf = gpuDynInst->wavefront();
30452 ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
30453 ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
30454 VecOperandF32 vdst(gpuDynInst, instData.VDST);
30455
30456 src0.readSrc();
30457 src1.readSrc();
30458
30459 /**
30460 * input modifiers are supported by FP operations only
30461 */
30462 assert(!(instData.ABS & 0x2));
30463 assert(!(instData.ABS & 0x4));
30464 assert(!(extData.NEG & 0x2));
30465 assert(!(extData.NEG & 0x4));
30466
30467 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30468 if (wf->execMask(lane)) {
30469 vdst[lane] = std::ldexp(src0[lane], src1[lane]);
30470 }
30471 }
30472
30473 vdst.write();
30474 }
30475
30476 Inst_VOP3__V_READLANE_B32::Inst_VOP3__V_READLANE_B32(InFmt_VOP3 *iFmt)
30477 : Inst_VOP3(iFmt, "v_readlane_b32", true)
30478 {
30479 setFlag(ALU);
30480 setFlag(IgnoreExec);
30481 } // Inst_VOP3__V_READLANE_B32
30482
30483 Inst_VOP3__V_READLANE_B32::~Inst_VOP3__V_READLANE_B32()
30484 {
30485 } // ~Inst_VOP3__V_READLANE_B32
30486
30487 // Copy one VGPR value to one SGPR. D = SGPR-dest, S0 = Source Data (VGPR#
30488 // or M0(lds-direct)), S1 = Lane Select (SGPR or M0). Ignores exec mask.
30489 // Input and output modifiers not supported; this is an untyped operation.
30490 void
30491 Inst_VOP3__V_READLANE_B32::execute(GPUDynInstPtr gpuDynInst)
30492 {
30493 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30494 ConstScalarOperandU32 src1(gpuDynInst, extData.SRC1);
30495 ScalarOperandU32 sdst(gpuDynInst, instData.VDST);
30496
30497 src0.readSrc();
30498 src1.read();
30499
30500 /**
30501 * input modifiers are supported by FP operations only
30502 */
30503 assert(!(instData.ABS & 0x1));
30504 assert(!(instData.ABS & 0x2));
30505 assert(!(instData.ABS & 0x4));
30506 assert(!(extData.NEG & 0x1));
30507 assert(!(extData.NEG & 0x2));
30508 assert(!(extData.NEG & 0x4));
30509
30510 sdst = src0[src1.rawData() & 0x3f];
30511
30512 sdst.write();
30513 }
30514
30515 Inst_VOP3__V_WRITELANE_B32::Inst_VOP3__V_WRITELANE_B32(InFmt_VOP3 *iFmt)
30516 : Inst_VOP3(iFmt, "v_writelane_b32", false)
30517 {
30518 setFlag(ALU);
30519 setFlag(IgnoreExec);
30520 } // Inst_VOP3__V_WRITELANE_B32
30521
30522 Inst_VOP3__V_WRITELANE_B32::~Inst_VOP3__V_WRITELANE_B32()
30523 {
30524 } // ~Inst_VOP3__V_WRITELANE_B32
30525
30526 // Write value into one VGPR in one lane. D = VGPR-dest, S0 = Source Data
30527 // (sgpr, m0, exec or constants), S1 = Lane Select (SGPR or M0). Ignores
30528 // exec mask. Input and output modifiers not supported; this is an untyped
30529 // operation.
30530 void
30531 Inst_VOP3__V_WRITELANE_B32::execute(GPUDynInstPtr gpuDynInst)
30532 {
30533 ConstScalarOperandU32 src0(gpuDynInst, extData.SRC0);
30534 ConstScalarOperandU32 src1(gpuDynInst, extData.SRC1);
30535 VecOperandU32 vdst(gpuDynInst, instData.VDST);
30536
30537 src0.read();
30538 src1.read();
30539 vdst.read();
30540
30541 /**
30542 * input modifiers are supported by FP operations only
30543 */
30544 assert(!(instData.ABS & 0x1));
30545 assert(!(instData.ABS & 0x2));
30546 assert(!(instData.ABS & 0x4));
30547 assert(!(extData.NEG & 0x1));
30548 assert(!(extData.NEG & 0x2));
30549 assert(!(extData.NEG & 0x4));
30550
30551 vdst[src1.rawData() & 0x3f] = src0.rawData();
30552
30553 vdst.write();
30554 }
30555
30556 Inst_VOP3__V_BCNT_U32_B32::Inst_VOP3__V_BCNT_U32_B32(InFmt_VOP3 *iFmt)
30557 : Inst_VOP3(iFmt, "v_bcnt_u32_b32", false)
30558 {
30559 setFlag(ALU);
30560 } // Inst_VOP3__V_BCNT_U32_B32
30561
30562 Inst_VOP3__V_BCNT_U32_B32::~Inst_VOP3__V_BCNT_U32_B32()
30563 {
30564 } // ~Inst_VOP3__V_BCNT_U32_B32
30565
30566 // D.u = CountOneBits(S0.u) + S1.u. Bit count.
30567 void
30568 Inst_VOP3__V_BCNT_U32_B32::execute(GPUDynInstPtr gpuDynInst)
30569 {
30570 Wavefront *wf = gpuDynInst->wavefront();
30571 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30572 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
30573 VecOperandU32 vdst(gpuDynInst, instData.VDST);
30574
30575 src0.readSrc();
30576 src1.readSrc();
30577
30578 /**
30579 * input modifiers are supported by FP operations only
30580 */
30581 assert(!(instData.ABS & 0x1));
30582 assert(!(instData.ABS & 0x2));
30583 assert(!(instData.ABS & 0x4));
30584 assert(!(extData.NEG & 0x1));
30585 assert(!(extData.NEG & 0x2));
30586 assert(!(extData.NEG & 0x4));
30587
30588 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30589 if (wf->execMask(lane)) {
30590 vdst[lane] = popCount(src0[lane]) + src1[lane];
30591 }
30592 }
30593
30594 vdst.write();
30595 }
30596
30597 Inst_VOP3__V_MBCNT_LO_U32_B32::Inst_VOP3__V_MBCNT_LO_U32_B32(
30598 InFmt_VOP3 *iFmt)
30599 : Inst_VOP3(iFmt, "v_mbcnt_lo_u32_b32", false)
30600 {
30601 setFlag(ALU);
30602 } // Inst_VOP3__V_MBCNT_LO_U32_B32
30603
30604 Inst_VOP3__V_MBCNT_LO_U32_B32::~Inst_VOP3__V_MBCNT_LO_U32_B32()
30605 {
30606 } // ~Inst_VOP3__V_MBCNT_LO_U32_B32
30607
30608 // Masked bit count, ThreadPosition is the position of this thread in the
30609 // wavefront (in 0..63).
30610 void
30611 Inst_VOP3__V_MBCNT_LO_U32_B32::execute(GPUDynInstPtr gpuDynInst)
30612 {
30613 Wavefront *wf = gpuDynInst->wavefront();
30614 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30615 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
30616 VecOperandU32 vdst(gpuDynInst, instData.VDST);
30617 uint64_t threadMask = 0;
30618
30619 src0.readSrc();
30620 src1.readSrc();
30621
30622 /**
30623 * input modifiers are supported by FP operations only
30624 */
30625 assert(!(instData.ABS & 0x1));
30626 assert(!(instData.ABS & 0x2));
30627 assert(!(instData.ABS & 0x4));
30628 assert(!(extData.NEG & 0x1));
30629 assert(!(extData.NEG & 0x2));
30630 assert(!(extData.NEG & 0x4));
30631
30632 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30633 if (wf->execMask(lane)) {
30634 threadMask = ((1LL << lane) - 1LL);
30635 vdst[lane] = popCount(src0[lane] & bits(threadMask, 31, 0)) +
30636 src1[lane];
30637 }
30638 }
30639
30640 vdst.write();
30641 } // execute
30642 // --- Inst_VOP3__V_MBCNT_HI_U32_B32 class methods ---
30643
30644 Inst_VOP3__V_MBCNT_HI_U32_B32::Inst_VOP3__V_MBCNT_HI_U32_B32(
30645 InFmt_VOP3 *iFmt)
30646 : Inst_VOP3(iFmt, "v_mbcnt_hi_u32_b32", false)
30647 {
30648 setFlag(ALU);
30649 } // Inst_VOP3__V_MBCNT_HI_U32_B32
30650
30651 Inst_VOP3__V_MBCNT_HI_U32_B32::~Inst_VOP3__V_MBCNT_HI_U32_B32()
30652 {
30653 } // ~Inst_VOP3__V_MBCNT_HI_U32_B32
30654
30655 // ThreadMask = (1 << ThreadPosition) - 1;
30656 // D.u = CountOneBits(S0.u & ThreadMask[63:32]) + S1.u.
30657 // Masked bit count, ThreadPosition is the position of this thread in the
30658 // wavefront (in 0..63).
30659 void
30660 Inst_VOP3__V_MBCNT_HI_U32_B32::execute(GPUDynInstPtr gpuDynInst)
30661 {
30662 Wavefront *wf = gpuDynInst->wavefront();
30663 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30664 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
30665 VecOperandU32 vdst(gpuDynInst, instData.VDST);
30666 uint64_t threadMask = 0;
30667
30668 src0.readSrc();
30669 src1.readSrc();
30670
30671 /**
30672 * input modifiers are supported by FP operations only
30673 */
30674 assert(!(instData.ABS & 0x1));
30675 assert(!(instData.ABS & 0x2));
30676 assert(!(instData.ABS & 0x4));
30677 assert(!(extData.NEG & 0x1));
30678 assert(!(extData.NEG & 0x2));
30679 assert(!(extData.NEG & 0x4));
30680
30681 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30682 if (wf->execMask(lane)) {
30683 threadMask = ((1LL << lane) - 1LL);
30684 vdst[lane] = popCount(src0[lane] & bits(threadMask, 63, 32)) +
30685 src1[lane];
30686 }
30687 }
30688
30689 vdst.write();
30690 } // execute
30691 // --- Inst_VOP3__V_LSHLREV_B64 class methods ---
30692
30693 Inst_VOP3__V_LSHLREV_B64::Inst_VOP3__V_LSHLREV_B64(InFmt_VOP3 *iFmt)
30694 : Inst_VOP3(iFmt, "v_lshlrev_b64", false)
30695 {
30696 setFlag(ALU);
30697 } // Inst_VOP3__V_LSHLREV_B64
30698
30699 Inst_VOP3__V_LSHLREV_B64::~Inst_VOP3__V_LSHLREV_B64()
30700 {
30701 } // ~Inst_VOP3__V_LSHLREV_B64
30702
30703 // D.u64 = S1.u64 << S0.u[5:0].
30704 void
30705 Inst_VOP3__V_LSHLREV_B64::execute(GPUDynInstPtr gpuDynInst)
30706 {
30707 Wavefront *wf = gpuDynInst->wavefront();
30708 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30709 ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
30710 VecOperandU64 vdst(gpuDynInst, instData.VDST);
30711
30712 src0.readSrc();
30713 src1.readSrc();
30714
30715 /**
30716 * input modifiers are supported by FP operations only
30717 */
30718 assert(!(instData.ABS & 0x1));
30719 assert(!(instData.ABS & 0x2));
30720 assert(!(instData.ABS & 0x4));
30721 assert(!(extData.NEG & 0x1));
30722 assert(!(extData.NEG & 0x2));
30723 assert(!(extData.NEG & 0x4));
30724
30725 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30726 if (wf->execMask(lane)) {
30727 vdst[lane] = src1[lane] << bits(src0[lane], 5, 0);
30728 }
30729 }
30730
30731 vdst.write();
30732 }
30733
30734 Inst_VOP3__V_LSHRREV_B64::Inst_VOP3__V_LSHRREV_B64(InFmt_VOP3 *iFmt)
30735 : Inst_VOP3(iFmt, "v_lshrrev_b64", false)
30736 {
30737 setFlag(ALU);
30738 } // Inst_VOP3__V_LSHRREV_B64
30739
30740 Inst_VOP3__V_LSHRREV_B64::~Inst_VOP3__V_LSHRREV_B64()
30741 {
30742 } // ~Inst_VOP3__V_LSHRREV_B64
30743
30744 // D.u64 = S1.u64 >> S0.u[5:0].
30745 // The vacated bits are set to zero.
30746 void
30747 Inst_VOP3__V_LSHRREV_B64::execute(GPUDynInstPtr gpuDynInst)
30748 {
30749 Wavefront *wf = gpuDynInst->wavefront();
30750 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30751 ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
30752 VecOperandU64 vdst(gpuDynInst, instData.VDST);
30753
30754 src0.readSrc();
30755 src1.readSrc();
30756
30757 /**
30758 * input modifiers are supported by FP operations only
30759 */
30760 assert(!(instData.ABS & 0x1));
30761 assert(!(instData.ABS & 0x2));
30762 assert(!(instData.ABS & 0x4));
30763 assert(!(extData.NEG & 0x1));
30764 assert(!(extData.NEG & 0x2));
30765 assert(!(extData.NEG & 0x4));
30766
30767 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30768 if (wf->execMask(lane)) {
30769 vdst[lane] = src1[lane] >> bits(src0[lane], 5, 0);
30770 }
30771 }
30772
30773 vdst.write();
30774 }
30775
30776 Inst_VOP3__V_ASHRREV_I64::Inst_VOP3__V_ASHRREV_I64(InFmt_VOP3 *iFmt)
30777 : Inst_VOP3(iFmt, "v_ashrrev_i64", false)
30778 {
30779 setFlag(ALU);
30780 } // Inst_VOP3__V_ASHRREV_I64
30781
30782 Inst_VOP3__V_ASHRREV_I64::~Inst_VOP3__V_ASHRREV_I64()
30783 {
30784 } // ~Inst_VOP3__V_ASHRREV_I64
30785
30786 // D.u64 = signext(S1.u64) >> S0.u[5:0].
30787 // The vacated bits are set to the sign bit of the input value.
30788 void
30789 Inst_VOP3__V_ASHRREV_I64::execute(GPUDynInstPtr gpuDynInst)
30790 {
30791 Wavefront *wf = gpuDynInst->wavefront();
30792 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30793 ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
30794 VecOperandU64 vdst(gpuDynInst, instData.VDST);
30795
30796 src0.readSrc();
30797 src1.readSrc();
30798
30799 /**
30800 * input modifiers are supported by FP operations only
30801 */
30802 assert(!(instData.ABS & 0x1));
30803 assert(!(instData.ABS & 0x2));
30804 assert(!(instData.ABS & 0x4));
30805 assert(!(extData.NEG & 0x1));
30806 assert(!(extData.NEG & 0x2));
30807 assert(!(extData.NEG & 0x4));
30808
30809 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30810 if (wf->execMask(lane)) {
30811 vdst[lane]
30812 = src1[lane] >> bits(src0[lane], 5, 0);
30813 }
30814 }
30815
30816 vdst.write();
30817 }
30818
30819 Inst_VOP3__V_TRIG_PREOP_F64::Inst_VOP3__V_TRIG_PREOP_F64(InFmt_VOP3 *iFmt)
30820 : Inst_VOP3(iFmt, "v_trig_preop_f64", false)
30821 {
30822 setFlag(ALU);
30823 setFlag(F64);
30824 } // Inst_VOP3__V_TRIG_PREOP_F64
30825
30826 Inst_VOP3__V_TRIG_PREOP_F64::~Inst_VOP3__V_TRIG_PREOP_F64()
30827 {
30828 } // ~Inst_VOP3__V_TRIG_PREOP_F64
30829
30830 void
30831 Inst_VOP3__V_TRIG_PREOP_F64::execute(GPUDynInstPtr gpuDynInst)
30832 {
30833 panicUnimplemented();
30834 }
30835
30836 Inst_VOP3__V_BFM_B32::Inst_VOP3__V_BFM_B32(InFmt_VOP3 *iFmt)
30837 : Inst_VOP3(iFmt, "v_bfm_b32", false)
30838 {
30839 setFlag(ALU);
30840 } // Inst_VOP3__V_BFM_B32
30841
30842 Inst_VOP3__V_BFM_B32::~Inst_VOP3__V_BFM_B32()
30843 {
30844 } // ~Inst_VOP3__V_BFM_B32
30845
30846 // D.u = ((1 << S0.u[4:0]) - 1) << S1.u[4:0];
30847 void
30848 Inst_VOP3__V_BFM_B32::execute(GPUDynInstPtr gpuDynInst)
30849 {
30850 Wavefront *wf = gpuDynInst->wavefront();
30851 ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30852 ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
30853 VecOperandU32 vdst(gpuDynInst, instData.VDST);
30854
30855 src0.readSrc();
30856 src1.readSrc();
30857
30858 /**
30859 * input modifiers are supported by FP operations only
30860 */
30861 assert(!(instData.ABS & 0x1));
30862 assert(!(instData.ABS & 0x2));
30863 assert(!(instData.ABS & 0x4));
30864 assert(!(extData.NEG & 0x1));
30865 assert(!(extData.NEG & 0x2));
30866 assert(!(extData.NEG & 0x4));
30867
30868 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30869 if (wf->execMask(lane)) {
30870 vdst[lane] = ((1 << bits(src0[lane], 4, 0)) - 1)
30871 << bits(src1[lane], 4, 0);
30872 }
30873 }
30874
30875 vdst.write();
30876 }
30877
30878 Inst_VOP3__V_CVT_PKNORM_I16_F32::Inst_VOP3__V_CVT_PKNORM_I16_F32(
30879 InFmt_VOP3 *iFmt)
30880 : Inst_VOP3(iFmt, "v_cvt_pknorm_i16_f32", false)
30881 {
30882 setFlag(ALU);
30883 setFlag(F32);
30884 } // Inst_VOP3__V_CVT_PKNORM_I16_F32
30885
30886 Inst_VOP3__V_CVT_PKNORM_I16_F32::~Inst_VOP3__V_CVT_PKNORM_I16_F32()
30887 {
30888 } // ~Inst_VOP3__V_CVT_PKNORM_I16_F32
30889
30890 // D = {(snorm)S1.f, (snorm)S0.f}.
30891 void
30892 Inst_VOP3__V_CVT_PKNORM_I16_F32::execute(GPUDynInstPtr gpuDynInst)
30893 {
30894 panicUnimplemented();
30895 }
30896
30897 Inst_VOP3__V_CVT_PKNORM_U16_F32::Inst_VOP3__V_CVT_PKNORM_U16_F32(
30898 InFmt_VOP3 *iFmt)
30899 : Inst_VOP3(iFmt, "v_cvt_pknorm_u16_f32", false)
30900 {
30901 setFlag(ALU);
30902 setFlag(F32);
30903 } // Inst_VOP3__V_CVT_PKNORM_U16_F32
30904
30905 Inst_VOP3__V_CVT_PKNORM_U16_F32::~Inst_VOP3__V_CVT_PKNORM_U16_F32()
30906 {
30907 } // ~Inst_VOP3__V_CVT_PKNORM_U16_F32
30908
30909 // D = {(unorm)S1.f, (unorm)S0.f}.
30910 void
30911 Inst_VOP3__V_CVT_PKNORM_U16_F32::execute(GPUDynInstPtr gpuDynInst)
30912 {
30913 panicUnimplemented();
30914 }
30915
30916 Inst_VOP3__V_CVT_PKRTZ_F16_F32::Inst_VOP3__V_CVT_PKRTZ_F16_F32(
30917 InFmt_VOP3 *iFmt)
30918 : Inst_VOP3(iFmt, "v_cvt_pkrtz_f16_f32", false)
30919 {
30920 setFlag(ALU);
30921 setFlag(F32);
30922 } // Inst_VOP3__V_CVT_PKRTZ_F16_F32
30923
30924 Inst_VOP3__V_CVT_PKRTZ_F16_F32::~Inst_VOP3__V_CVT_PKRTZ_F16_F32()
30925 {
30926 } // ~Inst_VOP3__V_CVT_PKRTZ_F16_F32
30927
30928 void
30929 Inst_VOP3__V_CVT_PKRTZ_F16_F32::execute(GPUDynInstPtr gpuDynInst)
30930 {
30931 panicUnimplemented();
30932 }
30933
30934 Inst_VOP3__V_CVT_PK_U16_U32::Inst_VOP3__V_CVT_PK_U16_U32(InFmt_VOP3 *iFmt)
30935 : Inst_VOP3(iFmt, "v_cvt_pk_u16_u32", false)
30936 {
30937 setFlag(ALU);
30938 } // Inst_VOP3__V_CVT_PK_U16_U32
30939
30940 Inst_VOP3__V_CVT_PK_U16_U32::~Inst_VOP3__V_CVT_PK_U16_U32()
30941 {
30942 } // ~Inst_VOP3__V_CVT_PK_U16_U32
30943
30944 // D = {uint32_to_uint16(S1.u), uint32_to_uint16(S0.u)}.
30945 void
30946 Inst_VOP3__V_CVT_PK_U16_U32::execute(GPUDynInstPtr gpuDynInst)
30947 {
30948 panicUnimplemented();
30949 }
30950
30951 Inst_VOP3__V_CVT_PK_I16_I32::Inst_VOP3__V_CVT_PK_I16_I32(InFmt_VOP3 *iFmt)
30952 : Inst_VOP3(iFmt, "v_cvt_pk_i16_i32", false)
30953 {
30954 setFlag(ALU);
30955 } // Inst_VOP3__V_CVT_PK_I16_I32
30956
30957 Inst_VOP3__V_CVT_PK_I16_I32::~Inst_VOP3__V_CVT_PK_I16_I32()
30958 {
30959 } // ~Inst_VOP3__V_CVT_PK_I16_I32
30960
30961 // D = {int32_to_int16(S1.i), int32_to_int16(S0.i)}.
30962 void
30963 Inst_VOP3__V_CVT_PK_I16_I32::execute(GPUDynInstPtr gpuDynInst)
30964 {
30965 panicUnimplemented();
30966 }
30967
30968 Inst_DS__DS_ADD_U32::Inst_DS__DS_ADD_U32(InFmt_DS *iFmt)
30969 : Inst_DS(iFmt, "ds_add_u32")
30970 {
30971 } // Inst_DS__DS_ADD_U32
30972
30973 Inst_DS__DS_ADD_U32::~Inst_DS__DS_ADD_U32()
30974 {
30975 } // ~Inst_DS__DS_ADD_U32
30976
30977 // tmp = MEM[ADDR];
30978 // MEM[ADDR] += DATA;
30979 // RETURN_DATA = tmp.
30980 void
30981 Inst_DS__DS_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
30982 {
30983 panicUnimplemented();
30984 }
30985
30986 Inst_DS__DS_SUB_U32::Inst_DS__DS_SUB_U32(InFmt_DS *iFmt)
30987 : Inst_DS(iFmt, "ds_sub_u32")
30988 {
30989 } // Inst_DS__DS_SUB_U32
30990
30991 Inst_DS__DS_SUB_U32::~Inst_DS__DS_SUB_U32()
30992 {
30993 } // ~Inst_DS__DS_SUB_U32
30994
30995 // tmp = MEM[ADDR];
30996 // MEM[ADDR] -= DATA;
30997 // RETURN_DATA = tmp.
30998 void
30999 Inst_DS__DS_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
31000 {
31001 panicUnimplemented();
31002 }
31003
31004 Inst_DS__DS_RSUB_U32::Inst_DS__DS_RSUB_U32(InFmt_DS *iFmt)
31005 : Inst_DS(iFmt, "ds_rsub_u32")
31006 {
31007 } // Inst_DS__DS_RSUB_U32
31008
31009 Inst_DS__DS_RSUB_U32::~Inst_DS__DS_RSUB_U32()
31010 {
31011 } // ~Inst_DS__DS_RSUB_U32
31012
31013 // tmp = MEM[ADDR];
31014 // MEM[ADDR] = DATA - MEM[ADDR];
31015 // RETURN_DATA = tmp.
31016 // Subtraction with reversed operands.
31017 void
31018 Inst_DS__DS_RSUB_U32::execute(GPUDynInstPtr gpuDynInst)
31019 {
31020 panicUnimplemented();
31021 }
31022
31023 Inst_DS__DS_INC_U32::Inst_DS__DS_INC_U32(InFmt_DS *iFmt)
31024 : Inst_DS(iFmt, "ds_inc_u32")
31025 {
31026 } // Inst_DS__DS_INC_U32
31027
31028 Inst_DS__DS_INC_U32::~Inst_DS__DS_INC_U32()
31029 {
31030 } // ~Inst_DS__DS_INC_U32
31031
31032 // tmp = MEM[ADDR];
31033 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
31034 // RETURN_DATA = tmp.
31035 void
31036 Inst_DS__DS_INC_U32::execute(GPUDynInstPtr gpuDynInst)
31037 {
31038 panicUnimplemented();
31039 }
31040
31041 Inst_DS__DS_DEC_U32::Inst_DS__DS_DEC_U32(InFmt_DS *iFmt)
31042 : Inst_DS(iFmt, "ds_dec_u32")
31043 {
31044 } // Inst_DS__DS_DEC_U32
31045
31046 Inst_DS__DS_DEC_U32::~Inst_DS__DS_DEC_U32()
31047 {
31048 } // ~Inst_DS__DS_DEC_U32
31049
31050 // tmp = MEM[ADDR];
31051 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
31052 // (unsigned compare); RETURN_DATA = tmp.
31053 void
31054 Inst_DS__DS_DEC_U32::execute(GPUDynInstPtr gpuDynInst)
31055 {
31056 panicUnimplemented();
31057 }
31058
31059 Inst_DS__DS_MIN_I32::Inst_DS__DS_MIN_I32(InFmt_DS *iFmt)
31060 : Inst_DS(iFmt, "ds_min_i32")
31061 {
31062 } // Inst_DS__DS_MIN_I32
31063
31064 Inst_DS__DS_MIN_I32::~Inst_DS__DS_MIN_I32()
31065 {
31066 } // ~Inst_DS__DS_MIN_I32
31067
31068 // tmp = MEM[ADDR];
31069 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
31070 // RETURN_DATA = tmp.
31071 void
31072 Inst_DS__DS_MIN_I32::execute(GPUDynInstPtr gpuDynInst)
31073 {
31074 panicUnimplemented();
31075 }
31076
31077 Inst_DS__DS_MAX_I32::Inst_DS__DS_MAX_I32(InFmt_DS *iFmt)
31078 : Inst_DS(iFmt, "ds_max_i32")
31079 {
31080 } // Inst_DS__DS_MAX_I32
31081
31082 Inst_DS__DS_MAX_I32::~Inst_DS__DS_MAX_I32()
31083 {
31084 } // ~Inst_DS__DS_MAX_I32
31085
31086 // tmp = MEM[ADDR];
31087 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
31088 // RETURN_DATA = tmp.
31089 void
31090 Inst_DS__DS_MAX_I32::execute(GPUDynInstPtr gpuDynInst)
31091 {
31092 panicUnimplemented();
31093 }
31094
31095 Inst_DS__DS_MIN_U32::Inst_DS__DS_MIN_U32(InFmt_DS *iFmt)
31096 : Inst_DS(iFmt, "ds_min_u32")
31097 {
31098 } // Inst_DS__DS_MIN_U32
31099
31100 Inst_DS__DS_MIN_U32::~Inst_DS__DS_MIN_U32()
31101 {
31102 } // ~Inst_DS__DS_MIN_U32
31103
31104 // tmp = MEM[ADDR];
31105 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
31106 // RETURN_DATA = tmp.
31107 void
31108 Inst_DS__DS_MIN_U32::execute(GPUDynInstPtr gpuDynInst)
31109 {
31110 panicUnimplemented();
31111 }
31112
31113 Inst_DS__DS_MAX_U32::Inst_DS__DS_MAX_U32(InFmt_DS *iFmt)
31114 : Inst_DS(iFmt, "ds_max_u32")
31115 {
31116 } // Inst_DS__DS_MAX_U32
31117
31118 Inst_DS__DS_MAX_U32::~Inst_DS__DS_MAX_U32()
31119 {
31120 } // ~Inst_DS__DS_MAX_U32
31121
31122 // tmp = MEM[ADDR];
31123 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
31124 // RETURN_DATA = tmp.
31125 void
31126 Inst_DS__DS_MAX_U32::execute(GPUDynInstPtr gpuDynInst)
31127 {
31128 panicUnimplemented();
31129 }
31130
31131 Inst_DS__DS_AND_B32::Inst_DS__DS_AND_B32(InFmt_DS *iFmt)
31132 : Inst_DS(iFmt, "ds_and_b32")
31133 {
31134 } // Inst_DS__DS_AND_B32
31135
31136 Inst_DS__DS_AND_B32::~Inst_DS__DS_AND_B32()
31137 {
31138 } // ~Inst_DS__DS_AND_B32
31139
31140 // tmp = MEM[ADDR];
31141 // MEM[ADDR] &= DATA;
31142 // RETURN_DATA = tmp.
31143 void
31144 Inst_DS__DS_AND_B32::execute(GPUDynInstPtr gpuDynInst)
31145 {
31146 panicUnimplemented();
31147 }
31148
31149 Inst_DS__DS_OR_B32::Inst_DS__DS_OR_B32(InFmt_DS *iFmt)
31150 : Inst_DS(iFmt, "ds_or_b32")
31151 {
31152 } // Inst_DS__DS_OR_B32
31153
31154 Inst_DS__DS_OR_B32::~Inst_DS__DS_OR_B32()
31155 {
31156 } // ~Inst_DS__DS_OR_B32
31157
31158 // tmp = MEM[ADDR];
31159 // MEM[ADDR] |= DATA;
31160 // RETURN_DATA = tmp.
31161 void
31162 Inst_DS__DS_OR_B32::execute(GPUDynInstPtr gpuDynInst)
31163 {
31164 panicUnimplemented();
31165 }
31166
31167 Inst_DS__DS_XOR_B32::Inst_DS__DS_XOR_B32(InFmt_DS *iFmt)
31168 : Inst_DS(iFmt, "ds_xor_b32")
31169 {
31170 } // Inst_DS__DS_XOR_B32
31171
31172 Inst_DS__DS_XOR_B32::~Inst_DS__DS_XOR_B32()
31173 {
31174 } // ~Inst_DS__DS_XOR_B32
31175
31176 // tmp = MEM[ADDR];
31177 // MEM[ADDR] ^= DATA;
31178 // RETURN_DATA = tmp.
31179 void
31180 Inst_DS__DS_XOR_B32::execute(GPUDynInstPtr gpuDynInst)
31181 {
31182 panicUnimplemented();
31183 }
31184
31185 Inst_DS__DS_MSKOR_B32::Inst_DS__DS_MSKOR_B32(InFmt_DS *iFmt)
31186 : Inst_DS(iFmt, "ds_mskor_b32")
31187 {
31188 } // Inst_DS__DS_MSKOR_B32
31189
31190 Inst_DS__DS_MSKOR_B32::~Inst_DS__DS_MSKOR_B32()
31191 {
31192 } // ~Inst_DS__DS_MSKOR_B32
31193
31194 // tmp = MEM[ADDR];
31195 // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
31196 // RETURN_DATA = tmp.
31197 void
31198 Inst_DS__DS_MSKOR_B32::execute(GPUDynInstPtr gpuDynInst)
31199 {
31200 panicUnimplemented();
31201 }
31202
31203 Inst_DS__DS_WRITE_B32::Inst_DS__DS_WRITE_B32(InFmt_DS *iFmt)
31204 : Inst_DS(iFmt, "ds_write_b32")
31205 {
31206 setFlag(MemoryRef);
31207 setFlag(Store);
31208 } // Inst_DS__DS_WRITE_B32
31209
31210 Inst_DS__DS_WRITE_B32::~Inst_DS__DS_WRITE_B32()
31211 {
31212 } // ~Inst_DS__DS_WRITE_B32
31213
31214 // MEM[ADDR] = DATA.
31215 // Write dword.
31216 void
31217 Inst_DS__DS_WRITE_B32::execute(GPUDynInstPtr gpuDynInst)
31218 {
31219 Wavefront *wf = gpuDynInst->wavefront();
31220 gpuDynInst->execUnitId = wf->execUnitId;
31221 gpuDynInst->exec_mask = wf->execMask();
31222 gpuDynInst->latency.init(gpuDynInst->computeUnit());
31223 gpuDynInst->latency.set(
31224 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
31225 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
31226 ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
31227
31228 addr.read();
31229 data.read();
31230
31231 calcAddr(gpuDynInst, addr);
31232
31233 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
31234 if (wf->execMask(lane)) {
31235 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane]
31236 = data[lane];
31237 }
31238 }
31239
31240 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
31241
31242 wf->wrLmReqsInPipe--;
31243 wf->outstandingReqsWrLm++;
31244 wf->outstandingReqs++;
31245 wf->validateRequestCounters();
31246 }
31247
31248 void
31249 Inst_DS__DS_WRITE_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
31250 {
31251 Addr offset0 = instData.OFFSET0;
31252 Addr offset1 = instData.OFFSET1;
31253 Addr offset = (offset1 << 8) | offset0;
31254
31255 initMemWrite<VecElemU32>(gpuDynInst, offset);
31256 } // initiateAcc
31257
31258 void
31259 Inst_DS__DS_WRITE_B32::completeAcc(GPUDynInstPtr gpuDynInst)
31260 {
31261 } // completeAcc
31262
31263 Inst_DS__DS_WRITE2_B32::Inst_DS__DS_WRITE2_B32(InFmt_DS *iFmt)
31264 : Inst_DS(iFmt, "ds_write2_b32")
31265 {
31266 setFlag(MemoryRef);
31267 setFlag(Store);
31268 } // Inst_DS__DS_WRITE2_B32
31269
31270 Inst_DS__DS_WRITE2_B32::~Inst_DS__DS_WRITE2_B32()
31271 {
31272 } // ~Inst_DS__DS_WRITE2_B32
31273
31274 // MEM[ADDR_BASE + OFFSET0 * 4] = DATA;
31275 // MEM[ADDR_BASE + OFFSET1 * 4] = DATA2.
31276 // Write 2 dwords.
31277 void
31278 Inst_DS__DS_WRITE2_B32::execute(GPUDynInstPtr gpuDynInst)
31279 {
31280 Wavefront *wf = gpuDynInst->wavefront();
31281 gpuDynInst->execUnitId = wf->execUnitId;
31282 gpuDynInst->exec_mask = wf->execMask();
31283 gpuDynInst->latency.init(gpuDynInst->computeUnit());
31284 gpuDynInst->latency.set(
31285 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
31286 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
31287 ConstVecOperandU32 data0(gpuDynInst, extData.DATA0);
31288 ConstVecOperandU32 data1(gpuDynInst, extData.DATA1);
31289
31290 addr.read();
31291 data0.read();
31292 data1.read();
31293
31294 calcAddr(gpuDynInst, addr);
31295
31296 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
31297 if (wf->execMask(lane)) {
31298 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 2]
31299 = data0[lane];
31300 (reinterpret_cast<VecElemU32*>(
31301 gpuDynInst->d_data))[lane * 2 + 1] = data1[lane];
31302 }
31303 }
31304
31305 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
31306
31307 wf->wrLmReqsInPipe--;
31308 wf->outstandingReqsWrLm++;
31309 wf->outstandingReqs++;
31310 wf->validateRequestCounters();
31311 }
31312
31313 void
31314 Inst_DS__DS_WRITE2_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
31315 {
31316 Addr offset0 = instData.OFFSET0 * 4;
31317 Addr offset1 = instData.OFFSET1 * 4;
31318
31319 initDualMemWrite<VecElemU32>(gpuDynInst, offset0, offset1);
31320 }
31321
31322 void
31323 Inst_DS__DS_WRITE2_B32::completeAcc(GPUDynInstPtr gpuDynInst)
31324 {
31325 }
31326
31327 Inst_DS__DS_WRITE2ST64_B32::Inst_DS__DS_WRITE2ST64_B32(InFmt_DS *iFmt)
31328 : Inst_DS(iFmt, "ds_write2st64_b32")
31329 {
31330 setFlag(MemoryRef);
31331 setFlag(Store);
31332 } // Inst_DS__DS_WRITE2ST64_B32
31333
31334 Inst_DS__DS_WRITE2ST64_B32::~Inst_DS__DS_WRITE2ST64_B32()
31335 {
31336 } // ~Inst_DS__DS_WRITE2ST64_B32
31337
31338 // MEM[ADDR_BASE + OFFSET0 * 4 * 64] = DATA;
31339 // MEM[ADDR_BASE + OFFSET1 * 4 * 64] = DATA2;
31340 // Write 2 dwords.
31341 void
31342 Inst_DS__DS_WRITE2ST64_B32::execute(GPUDynInstPtr gpuDynInst)
31343 {
31344 Wavefront *wf = gpuDynInst->wavefront();
31345 gpuDynInst->execUnitId = wf->execUnitId;
31346 gpuDynInst->exec_mask = wf->execMask();
31347 gpuDynInst->latency.init(gpuDynInst->computeUnit());
31348 gpuDynInst->latency.set(
31349 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
31350 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
31351 ConstVecOperandU32 data0(gpuDynInst, extData.DATA0);
31352 ConstVecOperandU32 data1(gpuDynInst, extData.DATA1);
31353
31354 addr.read();
31355 data0.read();
31356 data1.read();
31357
31358 calcAddr(gpuDynInst, addr);
31359
31360 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
31361 if (wf->execMask(lane)) {
31362 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 2]
31363 = data0[lane];
31364 (reinterpret_cast<VecElemU32*>(
31365 gpuDynInst->d_data))[lane * 2 + 1] = data1[lane];
31366 }
31367 }
31368
31369 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
31370
31371 wf->wrLmReqsInPipe--;
31372 wf->outstandingReqsWrLm++;
31373 wf->outstandingReqs++;
31374 wf->validateRequestCounters();
31375 } // execute
31376
31377 void
31378 Inst_DS__DS_WRITE2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
31379 {
31380 Addr offset0 = instData.OFFSET0 * 4 * 64;
31381 Addr offset1 = instData.OFFSET1 * 4 * 64;
31382
31383 initDualMemWrite<VecElemU32>(gpuDynInst, offset0, offset1);
31384 }
31385
31386 void
31387 Inst_DS__DS_WRITE2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst)
31388 {
31389 }
31390 // --- Inst_DS__DS_CMPST_B32 class methods ---
31391
31392 Inst_DS__DS_CMPST_B32::Inst_DS__DS_CMPST_B32(InFmt_DS *iFmt)
31393 : Inst_DS(iFmt, "ds_cmpst_b32")
31394 {
31395 } // Inst_DS__DS_CMPST_B32
31396
31397 Inst_DS__DS_CMPST_B32::~Inst_DS__DS_CMPST_B32()
31398 {
31399 } // ~Inst_DS__DS_CMPST_B32
31400
31401 // tmp = MEM[ADDR];
31402 // src = DATA2;
31403 // cmp = DATA;
31404 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
31405 // RETURN_DATA[0] = tmp.
31406 // Compare and store.
31407 void
31408 Inst_DS__DS_CMPST_B32::execute(GPUDynInstPtr gpuDynInst)
31409 {
31410 panicUnimplemented();
31411 }
31412
31413 Inst_DS__DS_CMPST_F32::Inst_DS__DS_CMPST_F32(InFmt_DS *iFmt)
31414 : Inst_DS(iFmt, "ds_cmpst_f32")
31415 {
31416 setFlag(F32);
31417 } // Inst_DS__DS_CMPST_F32
31418
31419 Inst_DS__DS_CMPST_F32::~Inst_DS__DS_CMPST_F32()
31420 {
31421 } // ~Inst_DS__DS_CMPST_F32
31422
31423 // tmp = MEM[ADDR];
31424 // src = DATA2;
31425 // cmp = DATA;
31426 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
31427 // RETURN_DATA[0] = tmp.
31428 void
31429 Inst_DS__DS_CMPST_F32::execute(GPUDynInstPtr gpuDynInst)
31430 {
31431 panicUnimplemented();
31432 }
31433
31434 Inst_DS__DS_MIN_F32::Inst_DS__DS_MIN_F32(InFmt_DS *iFmt)
31435 : Inst_DS(iFmt, "ds_min_f32")
31436 {
31437 setFlag(F32);
31438 } // Inst_DS__DS_MIN_F32
31439
31440 Inst_DS__DS_MIN_F32::~Inst_DS__DS_MIN_F32()
31441 {
31442 } // ~Inst_DS__DS_MIN_F32
31443
31444 // tmp = MEM[ADDR];
31445 // src = DATA;
31446 // cmp = DATA2;
31447 // MEM[ADDR] = (cmp < tmp) ? src : tmp.
31448 void
31449 Inst_DS__DS_MIN_F32::execute(GPUDynInstPtr gpuDynInst)
31450 {
31451 panicUnimplemented();
31452 }
31453
31454 Inst_DS__DS_MAX_F32::Inst_DS__DS_MAX_F32(InFmt_DS *iFmt)
31455 : Inst_DS(iFmt, "ds_max_f32")
31456 {
31457 setFlag(F32);
31458 } // Inst_DS__DS_MAX_F32
31459
31460 Inst_DS__DS_MAX_F32::~Inst_DS__DS_MAX_F32()
31461 {
31462 } // ~Inst_DS__DS_MAX_F32
31463
31464 // tmp = MEM[ADDR];
31465 // src = DATA;
31466 // cmp = DATA2;
31467 // MEM[ADDR] = (tmp > cmp) ? src : tmp.
31468 void
31469 Inst_DS__DS_MAX_F32::execute(GPUDynInstPtr gpuDynInst)
31470 {
31471 panicUnimplemented();
31472 }
31473
31474 Inst_DS__DS_NOP::Inst_DS__DS_NOP(InFmt_DS *iFmt)
31475 : Inst_DS(iFmt, "ds_nop")
31476 {
31477 setFlag(Nop);
31478 } // Inst_DS__DS_NOP
31479
31480 Inst_DS__DS_NOP::~Inst_DS__DS_NOP()
31481 {
31482 } // ~Inst_DS__DS_NOP
31483
31484 // Do nothing.
31485 void
31486 Inst_DS__DS_NOP::execute(GPUDynInstPtr gpuDynInst)
31487 {
31488 }
31489
31490 Inst_DS__DS_ADD_F32::Inst_DS__DS_ADD_F32(InFmt_DS *iFmt)
31491 : Inst_DS(iFmt, "ds_add_f32")
31492 {
31493 setFlag(F32);
31494 } // Inst_DS__DS_ADD_F32
31495
31496 Inst_DS__DS_ADD_F32::~Inst_DS__DS_ADD_F32()
31497 {
31498 } // ~Inst_DS__DS_ADD_F32
31499
31500 // tmp = MEM[ADDR];
31501 // MEM[ADDR] += DATA;
31502 // RETURN_DATA = tmp.
31503 void
31504 Inst_DS__DS_ADD_F32::execute(GPUDynInstPtr gpuDynInst)
31505 {
31506 panicUnimplemented();
31507 }
31508
31509 Inst_DS__DS_WRITE_B8::Inst_DS__DS_WRITE_B8(InFmt_DS *iFmt)
31510 : Inst_DS(iFmt, "ds_write_b8")
31511 {
31512 setFlag(MemoryRef);
31513 setFlag(Store);
31514 } // Inst_DS__DS_WRITE_B8
31515
31516 Inst_DS__DS_WRITE_B8::~Inst_DS__DS_WRITE_B8()
31517 {
31518 } // ~Inst_DS__DS_WRITE_B8
31519
31520 // MEM[ADDR] = DATA[7:0].
31521 void
31522 Inst_DS__DS_WRITE_B8::execute(GPUDynInstPtr gpuDynInst)
31523 {
31524 Wavefront *wf = gpuDynInst->wavefront();
31525 gpuDynInst->execUnitId = wf->execUnitId;
31526 gpuDynInst->exec_mask = wf->execMask();
31527 gpuDynInst->latency.init(gpuDynInst->computeUnit());
31528 gpuDynInst->latency.set(
31529 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
31530 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
31531 ConstVecOperandU8 data(gpuDynInst, extData.DATA0);
31532
31533 addr.read();
31534 data.read();
31535
31536 calcAddr(gpuDynInst, addr);
31537
31538 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
31539 if (wf->execMask(lane)) {
31540 (reinterpret_cast<VecElemU8*>(gpuDynInst->d_data))[lane]
31541 = data[lane];
31542 }
31543 }
31544
31545 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
31546
31547 wf->wrLmReqsInPipe--;
31548 wf->outstandingReqsWrLm++;
31549 wf->outstandingReqs++;
31550 wf->validateRequestCounters();
31551 } // execute
31552
31553 void
31554 Inst_DS__DS_WRITE_B8::initiateAcc(GPUDynInstPtr gpuDynInst)
31555 {
31556 Addr offset0 = instData.OFFSET0;
31557 Addr offset1 = instData.OFFSET1;
31558 Addr offset = (offset1 << 8) | offset0;
31559
31560 initMemWrite<VecElemU8>(gpuDynInst, offset);
31561 } // initiateAcc
31562
31563 void
31564 Inst_DS__DS_WRITE_B8::completeAcc(GPUDynInstPtr gpuDynInst)
31565 {
31566 } // completeAcc
31567 // --- Inst_DS__DS_WRITE_B16 class methods ---
31568
31569 Inst_DS__DS_WRITE_B16::Inst_DS__DS_WRITE_B16(InFmt_DS *iFmt)
31570 : Inst_DS(iFmt, "ds_write_b16")
31571 {
31572 setFlag(MemoryRef);
31573 setFlag(Store);
31574 } // Inst_DS__DS_WRITE_B16
31575
31576 Inst_DS__DS_WRITE_B16::~Inst_DS__DS_WRITE_B16()
31577 {
31578 } // ~Inst_DS__DS_WRITE_B16
31579
31580 // MEM[ADDR] = DATA[15:0]
31581 void
31582 Inst_DS__DS_WRITE_B16::execute(GPUDynInstPtr gpuDynInst)
31583 {
31584 Wavefront *wf = gpuDynInst->wavefront();
31585 gpuDynInst->execUnitId = wf->execUnitId;
31586 gpuDynInst->exec_mask = wf->execMask();
31587 gpuDynInst->latency.init(gpuDynInst->computeUnit());
31588 gpuDynInst->latency.set(
31589 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
31590 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
31591 ConstVecOperandU16 data(gpuDynInst, extData.DATA0);
31592
31593 addr.read();
31594 data.read();
31595
31596 calcAddr(gpuDynInst, addr);
31597
31598 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
31599 if (wf->execMask(lane)) {
31600 (reinterpret_cast<VecElemU16*>(gpuDynInst->d_data))[lane]
31601 = data[lane];
31602 }
31603 }
31604
31605 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
31606
31607 wf->wrLmReqsInPipe--;
31608 wf->outstandingReqsWrLm++;
31609 wf->outstandingReqs++;
31610 wf->validateRequestCounters();
31611 } // execute
31612
31613 void
31614 Inst_DS__DS_WRITE_B16::initiateAcc(GPUDynInstPtr gpuDynInst)
31615 {
31616 Addr offset0 = instData.OFFSET0;
31617 Addr offset1 = instData.OFFSET1;
31618 Addr offset = (offset1 << 8) | offset0;
31619
31620 initMemWrite<VecElemU16>(gpuDynInst, offset);
31621 } // initiateAcc
31622
31623 void
31624 Inst_DS__DS_WRITE_B16::completeAcc(GPUDynInstPtr gpuDynInst)
31625 {
31626 } // completeAcc
31627 // --- Inst_DS__DS_ADD_RTN_U32 class methods ---
31628
31629 Inst_DS__DS_ADD_RTN_U32::Inst_DS__DS_ADD_RTN_U32(InFmt_DS *iFmt)
31630 : Inst_DS(iFmt, "ds_add_rtn_u32")
31631 {
31632 } // Inst_DS__DS_ADD_RTN_U32
31633
31634 Inst_DS__DS_ADD_RTN_U32::~Inst_DS__DS_ADD_RTN_U32()
31635 {
31636 } // ~Inst_DS__DS_ADD_RTN_U32
31637
31638 // tmp = MEM[ADDR];
31639 // MEM[ADDR] += DATA;
31640 // RETURN_DATA = tmp.
31641 void
31642 Inst_DS__DS_ADD_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
31643 {
31644 panicUnimplemented();
31645 }
31646
31647 Inst_DS__DS_SUB_RTN_U32::Inst_DS__DS_SUB_RTN_U32(InFmt_DS *iFmt)
31648 : Inst_DS(iFmt, "ds_sub_rtn_u32")
31649 {
31650 } // Inst_DS__DS_SUB_RTN_U32
31651
31652 Inst_DS__DS_SUB_RTN_U32::~Inst_DS__DS_SUB_RTN_U32()
31653 {
31654 } // ~Inst_DS__DS_SUB_RTN_U32
31655
31656 // tmp = MEM[ADDR];
31657 // MEM[ADDR] -= DATA;
31658 // RETURN_DATA = tmp.
31659 void
31660 Inst_DS__DS_SUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
31661 {
31662 panicUnimplemented();
31663 }
31664
31665 Inst_DS__DS_RSUB_RTN_U32::Inst_DS__DS_RSUB_RTN_U32(InFmt_DS *iFmt)
31666 : Inst_DS(iFmt, "ds_rsub_rtn_u32")
31667 {
31668 } // Inst_DS__DS_RSUB_RTN_U32
31669
31670 Inst_DS__DS_RSUB_RTN_U32::~Inst_DS__DS_RSUB_RTN_U32()
31671 {
31672 } // ~Inst_DS__DS_RSUB_RTN_U32
31673
31674 // tmp = MEM[ADDR];
31675 // MEM[ADDR] = DATA - MEM[ADDR];
31676 // RETURN_DATA = tmp.
31677 void
31678 Inst_DS__DS_RSUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
31679 {
31680 panicUnimplemented();
31681 }
31682
31683 Inst_DS__DS_INC_RTN_U32::Inst_DS__DS_INC_RTN_U32(InFmt_DS *iFmt)
31684 : Inst_DS(iFmt, "ds_inc_rtn_u32")
31685 {
31686 } // Inst_DS__DS_INC_RTN_U32
31687
31688 Inst_DS__DS_INC_RTN_U32::~Inst_DS__DS_INC_RTN_U32()
31689 {
31690 } // ~Inst_DS__DS_INC_RTN_U32
31691
31692 // tmp = MEM[ADDR];
31693 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
31694 // RETURN_DATA = tmp.
31695 void
31696 Inst_DS__DS_INC_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
31697 {
31698 panicUnimplemented();
31699 }
31700
31701 Inst_DS__DS_DEC_RTN_U32::Inst_DS__DS_DEC_RTN_U32(InFmt_DS *iFmt)
31702 : Inst_DS(iFmt, "ds_dec_rtn_u32")
31703 {
31704 } // Inst_DS__DS_DEC_RTN_U32
31705
31706 Inst_DS__DS_DEC_RTN_U32::~Inst_DS__DS_DEC_RTN_U32()
31707 {
31708 } // ~Inst_DS__DS_DEC_RTN_U32
31709
31710 // tmp = MEM[ADDR];
31711 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
31712 // (unsigned compare); RETURN_DATA = tmp.
31713 void
31714 Inst_DS__DS_DEC_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
31715 {
31716 panicUnimplemented();
31717 }
31718
31719 Inst_DS__DS_MIN_RTN_I32::Inst_DS__DS_MIN_RTN_I32(InFmt_DS *iFmt)
31720 : Inst_DS(iFmt, "ds_min_rtn_i32")
31721 {
31722 } // Inst_DS__DS_MIN_RTN_I32
31723
31724 Inst_DS__DS_MIN_RTN_I32::~Inst_DS__DS_MIN_RTN_I32()
31725 {
31726 } // ~Inst_DS__DS_MIN_RTN_I32
31727
31728 // tmp = MEM[ADDR];
31729 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
31730 // RETURN_DATA = tmp.
31731 void
31732 Inst_DS__DS_MIN_RTN_I32::execute(GPUDynInstPtr gpuDynInst)
31733 {
31734 panicUnimplemented();
31735 }
31736
31737 Inst_DS__DS_MAX_RTN_I32::Inst_DS__DS_MAX_RTN_I32(InFmt_DS *iFmt)
31738 : Inst_DS(iFmt, "ds_max_rtn_i32")
31739 {
31740 } // Inst_DS__DS_MAX_RTN_I32
31741
31742 Inst_DS__DS_MAX_RTN_I32::~Inst_DS__DS_MAX_RTN_I32()
31743 {
31744 } // ~Inst_DS__DS_MAX_RTN_I32
31745
31746 // tmp = MEM[ADDR];
31747 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
31748 // RETURN_DATA = tmp.
31749 void
31750 Inst_DS__DS_MAX_RTN_I32::execute(GPUDynInstPtr gpuDynInst)
31751 {
31752 panicUnimplemented();
31753 }
31754
31755 Inst_DS__DS_MIN_RTN_U32::Inst_DS__DS_MIN_RTN_U32(InFmt_DS *iFmt)
31756 : Inst_DS(iFmt, "ds_min_rtn_u32")
31757 {
31758 } // Inst_DS__DS_MIN_RTN_U32
31759
31760 Inst_DS__DS_MIN_RTN_U32::~Inst_DS__DS_MIN_RTN_U32()
31761 {
31762 } // ~Inst_DS__DS_MIN_RTN_U32
31763
31764 // tmp = MEM[ADDR];
31765 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
31766 // RETURN_DATA = tmp.
31767 void
31768 Inst_DS__DS_MIN_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
31769 {
31770 panicUnimplemented();
31771 }
31772
31773 Inst_DS__DS_MAX_RTN_U32::Inst_DS__DS_MAX_RTN_U32(InFmt_DS *iFmt)
31774 : Inst_DS(iFmt, "ds_max_rtn_u32")
31775 {
31776 } // Inst_DS__DS_MAX_RTN_U32
31777
31778 Inst_DS__DS_MAX_RTN_U32::~Inst_DS__DS_MAX_RTN_U32()
31779 {
31780 } // ~Inst_DS__DS_MAX_RTN_U32
31781
31782 // tmp = MEM[ADDR];
31783 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
31784 // RETURN_DATA = tmp.
31785 void
31786 Inst_DS__DS_MAX_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
31787 {
31788 panicUnimplemented();
31789 }
31790
31791 Inst_DS__DS_AND_RTN_B32::Inst_DS__DS_AND_RTN_B32(InFmt_DS *iFmt)
31792 : Inst_DS(iFmt, "ds_and_rtn_b32")
31793 {
31794 } // Inst_DS__DS_AND_RTN_B32
31795
31796 Inst_DS__DS_AND_RTN_B32::~Inst_DS__DS_AND_RTN_B32()
31797 {
31798 } // ~Inst_DS__DS_AND_RTN_B32
31799
31800 // tmp = MEM[ADDR];
31801 // MEM[ADDR] &= DATA;
31802 // RETURN_DATA = tmp.
31803 void
31804 Inst_DS__DS_AND_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31805 {
31806 panicUnimplemented();
31807 }
31808
31809 Inst_DS__DS_OR_RTN_B32::Inst_DS__DS_OR_RTN_B32(InFmt_DS *iFmt)
31810 : Inst_DS(iFmt, "ds_or_rtn_b32")
31811 {
31812 } // Inst_DS__DS_OR_RTN_B32
31813
31814 Inst_DS__DS_OR_RTN_B32::~Inst_DS__DS_OR_RTN_B32()
31815 {
31816 } // ~Inst_DS__DS_OR_RTN_B32
31817
31818 // tmp = MEM[ADDR];
31819 // MEM[ADDR] |= DATA;
31820 // RETURN_DATA = tmp.
31821 void
31822 Inst_DS__DS_OR_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31823 {
31824 panicUnimplemented();
31825 }
31826
31827 Inst_DS__DS_XOR_RTN_B32::Inst_DS__DS_XOR_RTN_B32(InFmt_DS *iFmt)
31828 : Inst_DS(iFmt, "ds_xor_rtn_b32")
31829 {
31830 } // Inst_DS__DS_XOR_RTN_B32
31831
31832 Inst_DS__DS_XOR_RTN_B32::~Inst_DS__DS_XOR_RTN_B32()
31833 {
31834 } // ~Inst_DS__DS_XOR_RTN_B32
31835
31836 // tmp = MEM[ADDR];
31837 // MEM[ADDR] ^= DATA;
31838 // RETURN_DATA = tmp.
31839 void
31840 Inst_DS__DS_XOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31841 {
31842 panicUnimplemented();
31843 }
31844
31845 Inst_DS__DS_MSKOR_RTN_B32::Inst_DS__DS_MSKOR_RTN_B32(InFmt_DS *iFmt)
31846 : Inst_DS(iFmt, "ds_mskor_rtn_b32")
31847 {
31848 } // Inst_DS__DS_MSKOR_RTN_B32
31849
31850 Inst_DS__DS_MSKOR_RTN_B32::~Inst_DS__DS_MSKOR_RTN_B32()
31851 {
31852 } // ~Inst_DS__DS_MSKOR_RTN_B32
31853
31854 // tmp = MEM[ADDR];
31855 // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
31856 // RETURN_DATA = tmp.
31857 void
31858 Inst_DS__DS_MSKOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31859 {
31860 panicUnimplemented();
31861 }
31862
31863 Inst_DS__DS_WRXCHG_RTN_B32::Inst_DS__DS_WRXCHG_RTN_B32(InFmt_DS *iFmt)
31864 : Inst_DS(iFmt, "ds_wrxchg_rtn_b32")
31865 {
31866 } // Inst_DS__DS_WRXCHG_RTN_B32
31867
31868 Inst_DS__DS_WRXCHG_RTN_B32::~Inst_DS__DS_WRXCHG_RTN_B32()
31869 {
31870 } // ~Inst_DS__DS_WRXCHG_RTN_B32
31871
31872 // tmp = MEM[ADDR];
31873 // MEM[ADDR] = DATA;
31874 // RETURN_DATA = tmp.
31875 // Write-exchange operation.
31876 void
31877 Inst_DS__DS_WRXCHG_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31878 {
31879 panicUnimplemented();
31880 }
31881
31882 Inst_DS__DS_WRXCHG2_RTN_B32::Inst_DS__DS_WRXCHG2_RTN_B32(InFmt_DS *iFmt)
31883 : Inst_DS(iFmt, "ds_wrxchg2_rtn_b32")
31884 {
31885 } // Inst_DS__DS_WRXCHG2_RTN_B32
31886
31887 Inst_DS__DS_WRXCHG2_RTN_B32::~Inst_DS__DS_WRXCHG2_RTN_B32()
31888 {
31889 } // ~Inst_DS__DS_WRXCHG2_RTN_B32
31890
31891 // Write-exchange 2 separate dwords.
31892 void
31893 Inst_DS__DS_WRXCHG2_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31894 {
31895 panicUnimplemented();
31896 }
31897
31898 Inst_DS__DS_WRXCHG2ST64_RTN_B32::Inst_DS__DS_WRXCHG2ST64_RTN_B32(
31899 InFmt_DS *iFmt)
31900 : Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b32")
31901 {
31902 } // Inst_DS__DS_WRXCHG2ST64_RTN_B32
31903
31904 Inst_DS__DS_WRXCHG2ST64_RTN_B32::~Inst_DS__DS_WRXCHG2ST64_RTN_B32()
31905 {
31906 } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B32
31907
31908 // Write-exchange 2 separate dwords with a stride of 64 dwords.
31909 void
31910 Inst_DS__DS_WRXCHG2ST64_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31911 {
31912 panicUnimplemented();
31913 }
31914
31915 Inst_DS__DS_CMPST_RTN_B32::Inst_DS__DS_CMPST_RTN_B32(InFmt_DS *iFmt)
31916 : Inst_DS(iFmt, "ds_cmpst_rtn_b32")
31917 {
31918 } // Inst_DS__DS_CMPST_RTN_B32
31919
31920 Inst_DS__DS_CMPST_RTN_B32::~Inst_DS__DS_CMPST_RTN_B32()
31921 {
31922 } // ~Inst_DS__DS_CMPST_RTN_B32
31923
31924 // tmp = MEM[ADDR];
31925 // src = DATA2;
31926 // cmp = DATA;
31927 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
31928 // RETURN_DATA[0] = tmp.
31929 // Compare and store.
31930 void
31931 Inst_DS__DS_CMPST_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31932 {
31933 panicUnimplemented();
31934 }
31935
31936 Inst_DS__DS_CMPST_RTN_F32::Inst_DS__DS_CMPST_RTN_F32(InFmt_DS *iFmt)
31937 : Inst_DS(iFmt, "ds_cmpst_rtn_f32")
31938 {
31939 setFlag(F32);
31940 } // Inst_DS__DS_CMPST_RTN_F32
31941
31942 Inst_DS__DS_CMPST_RTN_F32::~Inst_DS__DS_CMPST_RTN_F32()
31943 {
31944 } // ~Inst_DS__DS_CMPST_RTN_F32
31945
31946 // tmp = MEM[ADDR];
31947 // src = DATA2;
31948 // cmp = DATA;
31949 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
31950 // RETURN_DATA[0] = tmp.
31951 void
31952 Inst_DS__DS_CMPST_RTN_F32::execute(GPUDynInstPtr gpuDynInst)
31953 {
31954 panicUnimplemented();
31955 }
31956
31957 Inst_DS__DS_MIN_RTN_F32::Inst_DS__DS_MIN_RTN_F32(InFmt_DS *iFmt)
31958 : Inst_DS(iFmt, "ds_min_rtn_f32")
31959 {
31960 setFlag(F32);
31961 } // Inst_DS__DS_MIN_RTN_F32
31962
31963 Inst_DS__DS_MIN_RTN_F32::~Inst_DS__DS_MIN_RTN_F32()
31964 {
31965 } // ~Inst_DS__DS_MIN_RTN_F32
31966
31967 // tmp = MEM[ADDR];
31968 // src = DATA;
31969 // cmp = DATA2;
31970 // MEM[ADDR] = (cmp < tmp) ? src : tmp.
31971 void
31972 Inst_DS__DS_MIN_RTN_F32::execute(GPUDynInstPtr gpuDynInst)
31973 {
31974 panicUnimplemented();
31975 }
31976
31977 Inst_DS__DS_MAX_RTN_F32::Inst_DS__DS_MAX_RTN_F32(InFmt_DS *iFmt)
31978 : Inst_DS(iFmt, "ds_max_rtn_f32")
31979 {
31980 setFlag(F32);
31981 } // Inst_DS__DS_MAX_RTN_F32
31982
31983 Inst_DS__DS_MAX_RTN_F32::~Inst_DS__DS_MAX_RTN_F32()
31984 {
31985 } // ~Inst_DS__DS_MAX_RTN_F32
31986
31987 // tmp = MEM[ADDR];
31988 // src = DATA;
31989 // cmp = DATA2;
31990 // MEM[ADDR] = (tmp > cmp) ? src : tmp.
31991 void
31992 Inst_DS__DS_MAX_RTN_F32::execute(GPUDynInstPtr gpuDynInst)
31993 {
31994 panicUnimplemented();
31995 }
31996
31997 Inst_DS__DS_WRAP_RTN_B32::Inst_DS__DS_WRAP_RTN_B32(InFmt_DS *iFmt)
31998 : Inst_DS(iFmt, "ds_wrap_rtn_b32")
31999 {
32000 } // Inst_DS__DS_WRAP_RTN_B32
32001
32002 Inst_DS__DS_WRAP_RTN_B32::~Inst_DS__DS_WRAP_RTN_B32()
32003 {
32004 } // ~Inst_DS__DS_WRAP_RTN_B32
32005
32006 // tmp = MEM[ADDR];
32007 // MEM[ADDR] = (tmp >= DATA) ? tmp - DATA : tmp + DATA2;
32008 // RETURN_DATA = tmp.
32009 void
32010 Inst_DS__DS_WRAP_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
32011 {
32012 panicUnimplemented();
32013 }
32014
32015 Inst_DS__DS_ADD_RTN_F32::Inst_DS__DS_ADD_RTN_F32(InFmt_DS *iFmt)
32016 : Inst_DS(iFmt, "ds_add_rtn_f32")
32017 {
32018 setFlag(F32);
32019 } // Inst_DS__DS_ADD_RTN_F32
32020
32021 Inst_DS__DS_ADD_RTN_F32::~Inst_DS__DS_ADD_RTN_F32()
32022 {
32023 } // ~Inst_DS__DS_ADD_RTN_F32
32024
32025 // tmp = MEM[ADDR];
32026 // MEM[ADDR] += DATA;
32027 // RETURN_DATA = tmp.
32028 void
32029 Inst_DS__DS_ADD_RTN_F32::execute(GPUDynInstPtr gpuDynInst)
32030 {
32031 }
32032
32033 Inst_DS__DS_READ_B32::Inst_DS__DS_READ_B32(InFmt_DS *iFmt)
32034 : Inst_DS(iFmt, "ds_read_b32")
32035 {
32036 setFlag(MemoryRef);
32037 setFlag(Load);
32038 } // Inst_DS__DS_READ_B32
32039
32040 Inst_DS__DS_READ_B32::~Inst_DS__DS_READ_B32()
32041 {
32042 } // ~Inst_DS__DS_READ_B32
32043
32044 // RETURN_DATA = MEM[ADDR].
32045 // Dword read.
32046 void
32047 Inst_DS__DS_READ_B32::execute(GPUDynInstPtr gpuDynInst)
32048 {
32049 Wavefront *wf = gpuDynInst->wavefront();
32050 gpuDynInst->execUnitId = wf->execUnitId;
32051 gpuDynInst->exec_mask = wf->execMask();
32052 gpuDynInst->latency.init(gpuDynInst->computeUnit());
32053 gpuDynInst->latency.set(
32054 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
32055 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32056
32057 addr.read();
32058
32059 calcAddr(gpuDynInst, addr);
32060
32061 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
32062
32063 wf->rdLmReqsInPipe--;
32064 wf->outstandingReqsRdLm++;
32065 wf->outstandingReqs++;
32066 wf->validateRequestCounters();
32067 }
32068
32069 void
32070 Inst_DS__DS_READ_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
32071 {
32072 Addr offset0 = instData.OFFSET0;
32073 Addr offset1 = instData.OFFSET1;
32074 Addr offset = (offset1 << 8) | offset0;
32075
32076 initMemRead<VecElemU32>(gpuDynInst, offset);
32077 } // initiateAcc
32078
32079 void
32080 Inst_DS__DS_READ_B32::completeAcc(GPUDynInstPtr gpuDynInst)
32081 {
32082 VecOperandU32 vdst(gpuDynInst, extData.VDST);
32083
32084 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32085 if (gpuDynInst->exec_mask[lane]) {
32086 vdst[lane] = (reinterpret_cast<VecElemU32*>(
32087 gpuDynInst->d_data))[lane];
32088 }
32089 }
32090
32091 vdst.write();
32092 } // completeAcc
32093
32094 Inst_DS__DS_READ2_B32::Inst_DS__DS_READ2_B32(InFmt_DS *iFmt)
32095 : Inst_DS(iFmt, "ds_read2_b32")
32096 {
32097 setFlag(MemoryRef);
32098 setFlag(Load);
32099 } // Inst_DS__DS_READ2_B32
32100
32101 Inst_DS__DS_READ2_B32::~Inst_DS__DS_READ2_B32()
32102 {
32103 } // ~Inst_DS__DS_READ2_B32
32104
32105 // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4];
32106 // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4].
32107 // Read 2 dwords.
32108 void
32109 Inst_DS__DS_READ2_B32::execute(GPUDynInstPtr gpuDynInst)
32110 {
32111 Wavefront *wf = gpuDynInst->wavefront();
32112 gpuDynInst->execUnitId = wf->execUnitId;
32113 gpuDynInst->exec_mask = wf->execMask();
32114 gpuDynInst->latency.init(gpuDynInst->computeUnit());
32115 gpuDynInst->latency.set(
32116 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
32117 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32118
32119 addr.read();
32120
32121 calcAddr(gpuDynInst, addr);
32122
32123 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
32124
32125 wf->rdLmReqsInPipe--;
32126 wf->outstandingReqsRdLm++;
32127 wf->outstandingReqs++;
32128 wf->validateRequestCounters();
32129 }
32130
32131 void
32132 Inst_DS__DS_READ2_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
32133 {
32134 Addr offset0 = instData.OFFSET0 * 4;
32135 Addr offset1 = instData.OFFSET1 * 4;
32136
32137 initDualMemRead<VecElemU32>(gpuDynInst, offset0, offset1);
32138 } // initiateAcc
32139
32140 void
32141 Inst_DS__DS_READ2_B32::completeAcc(GPUDynInstPtr gpuDynInst)
32142 {
32143 VecOperandU32 vdst0(gpuDynInst, extData.VDST);
32144 VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
32145
32146 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32147 if (gpuDynInst->exec_mask[lane]) {
32148 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
32149 gpuDynInst->d_data))[lane * 2];
32150 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
32151 gpuDynInst->d_data))[lane * 2 + 1];
32152 }
32153 }
32154
32155 vdst0.write();
32156 vdst1.write();
32157 } // completeAcc
32158
32159 Inst_DS__DS_READ2ST64_B32::Inst_DS__DS_READ2ST64_B32(InFmt_DS *iFmt)
32160 : Inst_DS(iFmt, "ds_read2st64_b32")
32161 {
32162 setFlag(MemoryRef);
32163 setFlag(Load);
32164 } // Inst_DS__DS_READ2ST64_B32
32165
32166 Inst_DS__DS_READ2ST64_B32::~Inst_DS__DS_READ2ST64_B32()
32167 {
32168 } // ~Inst_DS__DS_READ2ST64_B32
32169
32170 // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4 * 64];
32171 // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4 * 64].
32172 // Read 2 dwords.
32173 void
32174 Inst_DS__DS_READ2ST64_B32::execute(GPUDynInstPtr gpuDynInst)
32175 {
32176 Wavefront *wf = gpuDynInst->wavefront();
32177 gpuDynInst->execUnitId = wf->execUnitId;
32178 gpuDynInst->exec_mask = wf->execMask();
32179 gpuDynInst->latency.init(gpuDynInst->computeUnit());
32180 gpuDynInst->latency.set(
32181 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
32182 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32183
32184 addr.read();
32185
32186 calcAddr(gpuDynInst, addr);
32187
32188 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
32189
32190 wf->rdLmReqsInPipe--;
32191 wf->outstandingReqsRdLm++;
32192 wf->outstandingReqs++;
32193 wf->validateRequestCounters();
32194 } // execute
32195
32196 void
32197 Inst_DS__DS_READ2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
32198 {
32199 Addr offset0 = (instData.OFFSET0 * 4 * 64);
32200 Addr offset1 = (instData.OFFSET1 * 4 * 64);
32201
32202 initDualMemRead<VecElemU32>(gpuDynInst, offset0, offset1);
32203 }
32204
32205 void
32206 Inst_DS__DS_READ2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst)
32207 {
32208 VecOperandU32 vdst0(gpuDynInst, extData.VDST);
32209 VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
32210
32211 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32212 if (gpuDynInst->exec_mask[lane]) {
32213 vdst0[lane] = (reinterpret_cast<VecElemU64*>(
32214 gpuDynInst->d_data))[lane * 2];
32215 vdst1[lane] = (reinterpret_cast<VecElemU64*>(
32216 gpuDynInst->d_data))[lane * 2 + 1];
32217 }
32218 }
32219
32220 vdst0.write();
32221 vdst1.write();
32222 }
32223 // --- Inst_DS__DS_READ_I8 class methods ---
32224
32225 Inst_DS__DS_READ_I8::Inst_DS__DS_READ_I8(InFmt_DS *iFmt)
32226 : Inst_DS(iFmt, "ds_read_i8")
32227 {
32228 setFlag(MemoryRef);
32229 setFlag(Load);
32230 } // Inst_DS__DS_READ_I8
32231
32232 Inst_DS__DS_READ_I8::~Inst_DS__DS_READ_I8()
32233 {
32234 } // ~Inst_DS__DS_READ_I8
32235
32236 // RETURN_DATA = signext(MEM[ADDR][7:0]).
32237 // Signed byte read.
32238 void
32239 Inst_DS__DS_READ_I8::execute(GPUDynInstPtr gpuDynInst)
32240 {
32241 panicUnimplemented();
32242 }
32243
32244 Inst_DS__DS_READ_U8::Inst_DS__DS_READ_U8(InFmt_DS *iFmt)
32245 : Inst_DS(iFmt, "ds_read_u8")
32246 {
32247 setFlag(MemoryRef);
32248 setFlag(Load);
32249 } // Inst_DS__DS_READ_U8
32250
32251 Inst_DS__DS_READ_U8::~Inst_DS__DS_READ_U8()
32252 {
32253 } // ~Inst_DS__DS_READ_U8
32254
32255 // RETURN_DATA = {24'h0,MEM[ADDR][7:0]}.
32256 // Unsigned byte read.
32257 void
32258 Inst_DS__DS_READ_U8::execute(GPUDynInstPtr gpuDynInst)
32259 {
32260 Wavefront *wf = gpuDynInst->wavefront();
32261 gpuDynInst->execUnitId = wf->execUnitId;
32262 gpuDynInst->exec_mask = wf->execMask();
32263 gpuDynInst->latency.init(gpuDynInst->computeUnit());
32264 gpuDynInst->latency.set(
32265 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
32266 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32267
32268 addr.read();
32269
32270 calcAddr(gpuDynInst, addr);
32271
32272 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
32273
32274 wf->rdLmReqsInPipe--;
32275 wf->outstandingReqsRdLm++;
32276 wf->outstandingReqs++;
32277 wf->validateRequestCounters();
32278 } // execute
32279
32280 void
32281 Inst_DS__DS_READ_U8::initiateAcc(GPUDynInstPtr gpuDynInst)
32282 {
32283 Addr offset0 = instData.OFFSET0;
32284 Addr offset1 = instData.OFFSET1;
32285 Addr offset = (offset1 << 8) | offset0;
32286
32287 initMemRead<VecElemU8>(gpuDynInst, offset);
32288 } // initiateAcc
32289
32290 void
32291 Inst_DS__DS_READ_U8::completeAcc(GPUDynInstPtr gpuDynInst)
32292 {
32293 VecOperandU32 vdst(gpuDynInst, extData.VDST);
32294
32295 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32296 if (gpuDynInst->exec_mask[lane]) {
32297 vdst[lane] = (VecElemU32)(reinterpret_cast<VecElemU8*>(
32298 gpuDynInst->d_data))[lane];
32299 }
32300 }
32301
32302 vdst.write();
32303 } // completeAcc
32304 // --- Inst_DS__DS_READ_I16 class methods ---
32305
32306 Inst_DS__DS_READ_I16::Inst_DS__DS_READ_I16(InFmt_DS *iFmt)
32307 : Inst_DS(iFmt, "ds_read_i16")
32308 {
32309 setFlag(MemoryRef);
32310 setFlag(Load);
32311 } // Inst_DS__DS_READ_I16
32312
32313 Inst_DS__DS_READ_I16::~Inst_DS__DS_READ_I16()
32314 {
32315 } // ~Inst_DS__DS_READ_I16
32316
32317 // RETURN_DATA = signext(MEM[ADDR][15:0]).
32318 // Signed short read.
32319 void
32320 Inst_DS__DS_READ_I16::execute(GPUDynInstPtr gpuDynInst)
32321 {
32322 panicUnimplemented();
32323 }
32324
32325 Inst_DS__DS_READ_U16::Inst_DS__DS_READ_U16(InFmt_DS *iFmt)
32326 : Inst_DS(iFmt, "ds_read_u16")
32327 {
32328 setFlag(MemoryRef);
32329 setFlag(Load);
32330 } // Inst_DS__DS_READ_U16
32331
32332 Inst_DS__DS_READ_U16::~Inst_DS__DS_READ_U16()
32333 {
32334 } // ~Inst_DS__DS_READ_U16
32335
32336 // RETURN_DATA = {16'h0,MEM[ADDR][15:0]}.
32337 // Unsigned short read.
32338 void
32339 Inst_DS__DS_READ_U16::execute(GPUDynInstPtr gpuDynInst)
32340 {
32341 Wavefront *wf = gpuDynInst->wavefront();
32342 gpuDynInst->execUnitId = wf->execUnitId;
32343 gpuDynInst->exec_mask = wf->execMask();
32344 gpuDynInst->latency.init(gpuDynInst->computeUnit());
32345 gpuDynInst->latency.set(
32346 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
32347 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32348
32349 addr.read();
32350
32351 calcAddr(gpuDynInst, addr);
32352
32353 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
32354
32355 wf->rdLmReqsInPipe--;
32356 wf->outstandingReqsRdLm++;
32357 wf->outstandingReqs++;
32358 wf->validateRequestCounters();
32359 } // execute
32360 void
32361 Inst_DS__DS_READ_U16::initiateAcc(GPUDynInstPtr gpuDynInst)
32362 {
32363 Addr offset0 = instData.OFFSET0;
32364 Addr offset1 = instData.OFFSET1;
32365 Addr offset = (offset1 << 8) | offset0;
32366
32367 initMemRead<VecElemU16>(gpuDynInst, offset);
32368 } // initiateAcc
32369
32370 void
32371 Inst_DS__DS_READ_U16::completeAcc(GPUDynInstPtr gpuDynInst)
32372 {
32373 VecOperandU32 vdst(gpuDynInst, extData.VDST);
32374
32375 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32376 if (gpuDynInst->exec_mask[lane]) {
32377 vdst[lane] = (VecElemU32)(reinterpret_cast<VecElemU16*>(
32378 gpuDynInst->d_data))[lane];
32379 }
32380 }
32381
32382 vdst.write();
32383 } // completeAcc
32384 // --- Inst_DS__DS_SWIZZLE_B32 class methods ---
32385
32386 Inst_DS__DS_SWIZZLE_B32::Inst_DS__DS_SWIZZLE_B32(InFmt_DS *iFmt)
32387 : Inst_DS(iFmt, "ds_swizzle_b32")
32388 {
32389 setFlag(Load);
32390 } // Inst_DS__DS_SWIZZLE_B32
32391
32392 Inst_DS__DS_SWIZZLE_B32::~Inst_DS__DS_SWIZZLE_B32()
32393 {
32394 } // ~Inst_DS__DS_SWIZZLE_B32
32395
32396 // RETURN_DATA = swizzle(vgpr_data, offset1:offset0).
32397 // Dword swizzle, no data is written to LDS memory;
32398 void
32399 Inst_DS__DS_SWIZZLE_B32::execute(GPUDynInstPtr gpuDynInst)
32400 {
32401 Wavefront *wf = gpuDynInst->wavefront();
32402 wf->rdLmReqsInPipe--;
32403 wf->validateRequestCounters();
32404
32405 if (gpuDynInst->exec_mask.none()) {
32406 return;
32407 }
32408
32409 gpuDynInst->execUnitId = wf->execUnitId;
32410 gpuDynInst->latency.init(gpuDynInst->computeUnit());
32411 gpuDynInst->latency.set(gpuDynInst->computeUnit()
32412 ->cyclesToTicks(Cycles(24)));
32413
32414 ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
32415 VecOperandU32 vdst(gpuDynInst, extData.VDST);
32416 /**
32417 * The "DS pattern" is comprised of both offset fields. That is, the
32418 * swizzle pattern between lanes. Bit 15 of the DS pattern dictates
32419 * which swizzle mode to use. There are two different swizzle
32420 * patterns: 1) QDMode and 2) Bit-masks mode. If bit 15 is set use
32421 * QDMode else use Bit-masks mode. The remaining bits dictate how to
32422 * swizzle the lanes.
32423 *
32424 * QDMode: Chunks the lanes into 4s and swizzles among them.
32425 * Bits 7:6 dictate where lane 3 (of the current chunk)
32426 * gets its date, 5:4 lane 2, etc.
32427 *
32428 * Bit-mask: This mode breaks bits 14:0 into 3 equal-sized chunks.
32429 * 14:10 is the xor_mask, 9:5 is the or_mask, and 4:0
32430 * is the and_mask. Each lane is swizzled by performing
32431 * the appropriate operation using these masks.
32432 */
32433 VecElemU16 ds_pattern = ((instData.OFFSET1 << 8) | instData.OFFSET0);
32434
32435 data.read();
32436
32437 if (bits(ds_pattern, 15)) {
32438 // QDMode
32439 for (int lane = 0; lane < NumVecElemPerVecReg; lane += 4) {
32440 /**
32441 * This operation allows data sharing between groups
32442 * of four consecutive threads. Note the increment by
32443 * 4 in the for loop.
32444 */
32445 if (gpuDynInst->exec_mask[lane]) {
32446 int index0 = lane + bits(ds_pattern, 1, 0);
32447 panic_if(index0 >= NumVecElemPerVecReg, "%s: index0 (%d) "
32448 "is out of bounds.\n", gpuDynInst->disassemble(),
32449 index0);
32450 vdst[lane]
32451 = gpuDynInst->exec_mask[index0] ? data[index0]: 0;
32452 }
32453 if (gpuDynInst->exec_mask[lane + 1]) {
32454 int index1 = lane + bits(ds_pattern, 3, 2);
32455 panic_if(index1 >= NumVecElemPerVecReg, "%s: index1 (%d) "
32456 "is out of bounds.\n", gpuDynInst->disassemble(),
32457 index1);
32458 vdst[lane + 1]
32459 = gpuDynInst->exec_mask[index1] ? data[index1]: 0;
32460 }
32461 if (gpuDynInst->exec_mask[lane + 2]) {
32462 int index2 = lane + bits(ds_pattern, 5, 4);
32463 panic_if(index2 >= NumVecElemPerVecReg, "%s: index2 (%d) "
32464 "is out of bounds.\n", gpuDynInst->disassemble(),
32465 index2);
32466 vdst[lane + 2]
32467 = gpuDynInst->exec_mask[index2] ? data[index2]: 0;
32468 }
32469 if (gpuDynInst->exec_mask[lane + 3]) {
32470 int index3 = lane + bits(ds_pattern, 7, 6);
32471 panic_if(index3 >= NumVecElemPerVecReg, "%s: index3 (%d) "
32472 "is out of bounds.\n", gpuDynInst->disassemble(),
32473 index3);
32474 vdst[lane + 3]
32475 = gpuDynInst->exec_mask[index3] ? data[index3]: 0;
32476 }
32477 }
32478 } else {
32479 // Bit Mode
32480 int and_mask = bits(ds_pattern, 4, 0);
32481 int or_mask = bits(ds_pattern, 9, 5);
32482 int xor_mask = bits(ds_pattern, 14, 10);
32483 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32484 if (gpuDynInst->exec_mask[lane]) {
32485 int index = (((lane & and_mask) | or_mask) ^ xor_mask);
32486 // Adjust for the next 32 lanes.
32487 if (lane > 31) {
32488 index += 32;
32489 }
32490 panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is "
32491 "out of bounds.\n", gpuDynInst->disassemble(),
32492 index);
32493 vdst[lane]
32494 = gpuDynInst->exec_mask[index] ? data[index] : 0;
32495 }
32496 }
32497 }
32498
32499 vdst.write();
32500 } // execute
32501 // --- Inst_DS__DS_PERMUTE_B32 class methods ---
32502
32503 Inst_DS__DS_PERMUTE_B32::Inst_DS__DS_PERMUTE_B32(InFmt_DS *iFmt)
32504 : Inst_DS(iFmt, "ds_permute_b32")
32505 {
32506 setFlag(MemoryRef);
32507 /**
32508 * While this operation doesn't actually use DS storage we classify
32509 * it as a load here because it does a writeback to a VGPR, which
32510 * fits in better with the LDS pipeline logic.
32511 */
32512 setFlag(Load);
32513 } // Inst_DS__DS_PERMUTE_B32
32514
32515 Inst_DS__DS_PERMUTE_B32::~Inst_DS__DS_PERMUTE_B32()
32516 {
32517 } // ~Inst_DS__DS_PERMUTE_B32
32518
32519 // Forward permute.
32520 void
32521 Inst_DS__DS_PERMUTE_B32::execute(GPUDynInstPtr gpuDynInst)
32522 {
32523 Wavefront *wf = gpuDynInst->wavefront();
32524 gpuDynInst->execUnitId = wf->execUnitId;
32525 gpuDynInst->latency.init(gpuDynInst->computeUnit());
32526 gpuDynInst->latency.set(gpuDynInst->computeUnit()
32527 ->cyclesToTicks(Cycles(24)));
32528 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32529 ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
32530 VecOperandU32 vdst(gpuDynInst, extData.VDST);
32531
32532 addr.read();
32533 data.read();
32534
32535 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32536 if (wf->execMask(lane)) {
32537 /**
32538 * One of the offset fields can be used for the index.
32539 * It is assumed OFFSET0 would be used, as OFFSET1 is
32540 * typically only used for DS ops that operate on two
32541 * disparate pieces of data.
32542 */
32543 assert(!instData.OFFSET1);
32544 /**
32545 * The address provided is a byte address, but VGPRs are
32546 * 4 bytes, so we must divide by 4 to get the actual VGPR
32547 * index. Additionally, the index is calculated modulo the
32548 * WF size, 64 in this case, so we simply extract bits 7-2.
32549 */
32550 int index = bits(addr[lane] + instData.OFFSET0, 7, 2);
32551 panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out "
32552 "of bounds.\n", gpuDynInst->disassemble(), index);
32553 /**
32554 * If the shuffled index corresponds to a lane that is
32555 * inactive then this instruction writes a 0 to the active
32556 * lane in VDST.
32557 */
32558 if (wf->execMask(index)) {
32559 vdst[index] = data[lane];
32560 } else {
32561 vdst[index] = 0;
32562 }
32563 }
32564 }
32565
32566 vdst.write();
32567
32568 wf->decLGKMInstsIssued();
32569 wf->rdLmReqsInPipe--;
32570 wf->validateRequestCounters();
32571 } // execute
32572 // --- Inst_DS__DS_BPERMUTE_B32 class methods ---
32573
32574 Inst_DS__DS_BPERMUTE_B32::Inst_DS__DS_BPERMUTE_B32(InFmt_DS *iFmt)
32575 : Inst_DS(iFmt, "ds_bpermute_b32")
32576 {
32577 setFlag(MemoryRef);
32578 /**
32579 * While this operation doesn't actually use DS storage we classify
32580 * it as a load here because it does a writeback to a VGPR, which
32581 * fits in better with the LDS pipeline logic.
32582 */
32583 setFlag(Load);
32584 } // Inst_DS__DS_BPERMUTE_B32
32585
32586 Inst_DS__DS_BPERMUTE_B32::~Inst_DS__DS_BPERMUTE_B32()
32587 {
32588 } // ~Inst_DS__DS_BPERMUTE_B32
32589
32590 // Backward permute.
32591 void
32592 Inst_DS__DS_BPERMUTE_B32::execute(GPUDynInstPtr gpuDynInst)
32593 {
32594 Wavefront *wf = gpuDynInst->wavefront();
32595 gpuDynInst->execUnitId = wf->execUnitId;
32596 gpuDynInst->latency.init(gpuDynInst->computeUnit());
32597 gpuDynInst->latency.set(gpuDynInst->computeUnit()
32598 ->cyclesToTicks(Cycles(24)));
32599 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32600 ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
32601 VecOperandU32 vdst(gpuDynInst, extData.VDST);
32602
32603 addr.read();
32604 data.read();
32605
32606 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32607 if (wf->execMask(lane)) {
32608 /**
32609 * One of the offset fields can be used for the index.
32610 * It is assumed OFFSET0 would be used, as OFFSET1 is
32611 * typically only used for DS ops that operate on two
32612 * disparate pieces of data.
32613 */
32614 assert(!instData.OFFSET1);
32615 /**
32616 * The address provided is a byte address, but VGPRs are
32617 * 4 bytes, so we must divide by 4 to get the actual VGPR
32618 * index. Additionally, the index is calculated modulo the
32619 * WF size, 64 in this case, so we simply extract bits 7-2.
32620 */
32621 int index = bits(addr[lane] + instData.OFFSET0, 7, 2);
32622 panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out "
32623 "of bounds.\n", gpuDynInst->disassemble(), index);
32624 /**
32625 * If the shuffled index corresponds to a lane that is
32626 * inactive then this instruction writes a 0 to the active
32627 * lane in VDST.
32628 */
32629 if (wf->execMask(index)) {
32630 vdst[lane] = data[index];
32631 } else {
32632 vdst[lane] = 0;
32633 }
32634 }
32635 }
32636
32637 vdst.write();
32638
32639 wf->decLGKMInstsIssued();
32640 wf->rdLmReqsInPipe--;
32641 wf->validateRequestCounters();
32642 } // execute
32643
32644 // --- Inst_DS__DS_ADD_U64 class methods ---
32645
32646 Inst_DS__DS_ADD_U64::Inst_DS__DS_ADD_U64(InFmt_DS *iFmt)
32647 : Inst_DS(iFmt, "ds_add_u64")
32648 {
32649 } // Inst_DS__DS_ADD_U64
32650
32651 Inst_DS__DS_ADD_U64::~Inst_DS__DS_ADD_U64()
32652 {
32653 } // ~Inst_DS__DS_ADD_U64
32654
32655 // tmp = MEM[ADDR];
32656 // MEM[ADDR] += DATA[0:1];
32657 // RETURN_DATA[0:1] = tmp.
32658 void
32659 Inst_DS__DS_ADD_U64::execute(GPUDynInstPtr gpuDynInst)
32660 {
32661 panicUnimplemented();
32662 }
32663
32664 Inst_DS__DS_SUB_U64::Inst_DS__DS_SUB_U64(InFmt_DS *iFmt)
32665 : Inst_DS(iFmt, "ds_sub_u64")
32666 {
32667 } // Inst_DS__DS_SUB_U64
32668
32669 Inst_DS__DS_SUB_U64::~Inst_DS__DS_SUB_U64()
32670 {
32671 } // ~Inst_DS__DS_SUB_U64
32672
32673 // tmp = MEM[ADDR];
32674 // MEM[ADDR] -= DATA[0:1];
32675 // RETURN_DATA[0:1] = tmp.
32676 void
32677 Inst_DS__DS_SUB_U64::execute(GPUDynInstPtr gpuDynInst)
32678 {
32679 panicUnimplemented();
32680 }
32681
32682 Inst_DS__DS_RSUB_U64::Inst_DS__DS_RSUB_U64(InFmt_DS *iFmt)
32683 : Inst_DS(iFmt, "ds_rsub_u64")
32684 {
32685 } // Inst_DS__DS_RSUB_U64
32686
32687 Inst_DS__DS_RSUB_U64::~Inst_DS__DS_RSUB_U64()
32688 {
32689 } // ~Inst_DS__DS_RSUB_U64
32690
32691 // tmp = MEM[ADDR];
32692 // MEM[ADDR] = DATA - MEM[ADDR];
32693 // RETURN_DATA = tmp.
32694 // Subtraction with reversed operands.
32695 void
32696 Inst_DS__DS_RSUB_U64::execute(GPUDynInstPtr gpuDynInst)
32697 {
32698 panicUnimplemented();
32699 }
32700
32701 Inst_DS__DS_INC_U64::Inst_DS__DS_INC_U64(InFmt_DS *iFmt)
32702 : Inst_DS(iFmt, "ds_inc_u64")
32703 {
32704 } // Inst_DS__DS_INC_U64
32705
32706 Inst_DS__DS_INC_U64::~Inst_DS__DS_INC_U64()
32707 {
32708 } // ~Inst_DS__DS_INC_U64
32709
32710 // tmp = MEM[ADDR];
32711 // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
32712 // RETURN_DATA[0:1] = tmp.
32713 void
32714 Inst_DS__DS_INC_U64::execute(GPUDynInstPtr gpuDynInst)
32715 {
32716 panicUnimplemented();
32717 }
32718
32719 Inst_DS__DS_DEC_U64::Inst_DS__DS_DEC_U64(InFmt_DS *iFmt)
32720 : Inst_DS(iFmt, "ds_dec_u64")
32721 {
32722 } // Inst_DS__DS_DEC_U64
32723
32724 Inst_DS__DS_DEC_U64::~Inst_DS__DS_DEC_U64()
32725 {
32726 } // ~Inst_DS__DS_DEC_U64
32727
32728 // tmp = MEM[ADDR];
32729 // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
32730 // (unsigned compare);
32731 // RETURN_DATA[0:1] = tmp.
32732 void
32733 Inst_DS__DS_DEC_U64::execute(GPUDynInstPtr gpuDynInst)
32734 {
32735 panicUnimplemented();
32736 }
32737
32738 Inst_DS__DS_MIN_I64::Inst_DS__DS_MIN_I64(InFmt_DS *iFmt)
32739 : Inst_DS(iFmt, "ds_min_i64")
32740 {
32741 } // Inst_DS__DS_MIN_I64
32742
32743 Inst_DS__DS_MIN_I64::~Inst_DS__DS_MIN_I64()
32744 {
32745 } // ~Inst_DS__DS_MIN_I64
32746
32747 // tmp = MEM[ADDR];
32748 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
32749 // RETURN_DATA[0:1] = tmp.
32750 void
32751 Inst_DS__DS_MIN_I64::execute(GPUDynInstPtr gpuDynInst)
32752 {
32753 panicUnimplemented();
32754 }
32755
32756 Inst_DS__DS_MAX_I64::Inst_DS__DS_MAX_I64(InFmt_DS *iFmt)
32757 : Inst_DS(iFmt, "ds_max_i64")
32758 {
32759 } // Inst_DS__DS_MAX_I64
32760
32761 Inst_DS__DS_MAX_I64::~Inst_DS__DS_MAX_I64()
32762 {
32763 } // ~Inst_DS__DS_MAX_I64
32764
32765 // tmp = MEM[ADDR];
32766 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
32767 // RETURN_DATA[0:1] = tmp.
32768 void
32769 Inst_DS__DS_MAX_I64::execute(GPUDynInstPtr gpuDynInst)
32770 {
32771 panicUnimplemented();
32772 }
32773
32774 Inst_DS__DS_MIN_U64::Inst_DS__DS_MIN_U64(InFmt_DS *iFmt)
32775 : Inst_DS(iFmt, "ds_min_u64")
32776 {
32777 } // Inst_DS__DS_MIN_U64
32778
32779 Inst_DS__DS_MIN_U64::~Inst_DS__DS_MIN_U64()
32780 {
32781 } // ~Inst_DS__DS_MIN_U64
32782
32783 // tmp = MEM[ADDR];
32784 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
32785 // RETURN_DATA[0:1] = tmp.
32786 void
32787 Inst_DS__DS_MIN_U64::execute(GPUDynInstPtr gpuDynInst)
32788 {
32789 panicUnimplemented();
32790 }
32791
32792 Inst_DS__DS_MAX_U64::Inst_DS__DS_MAX_U64(InFmt_DS *iFmt)
32793 : Inst_DS(iFmt, "ds_max_u64")
32794 {
32795 } // Inst_DS__DS_MAX_U64
32796
32797 Inst_DS__DS_MAX_U64::~Inst_DS__DS_MAX_U64()
32798 {
32799 } // ~Inst_DS__DS_MAX_U64
32800
32801 // tmp = MEM[ADDR];
32802 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
32803 // RETURN_DATA[0:1] = tmp.
32804 void
32805 Inst_DS__DS_MAX_U64::execute(GPUDynInstPtr gpuDynInst)
32806 {
32807 panicUnimplemented();
32808 }
32809
32810 Inst_DS__DS_AND_B64::Inst_DS__DS_AND_B64(InFmt_DS *iFmt)
32811 : Inst_DS(iFmt, "ds_and_b64")
32812 {
32813 } // Inst_DS__DS_AND_B64
32814
32815 Inst_DS__DS_AND_B64::~Inst_DS__DS_AND_B64()
32816 {
32817 } // ~Inst_DS__DS_AND_B64
32818
32819 // tmp = MEM[ADDR];
32820 // MEM[ADDR] &= DATA[0:1];
32821 // RETURN_DATA[0:1] = tmp.
32822 void
32823 Inst_DS__DS_AND_B64::execute(GPUDynInstPtr gpuDynInst)
32824 {
32825 panicUnimplemented();
32826 }
32827
32828 Inst_DS__DS_OR_B64::Inst_DS__DS_OR_B64(InFmt_DS *iFmt)
32829 : Inst_DS(iFmt, "ds_or_b64")
32830 {
32831 } // Inst_DS__DS_OR_B64
32832
32833 Inst_DS__DS_OR_B64::~Inst_DS__DS_OR_B64()
32834 {
32835 } // ~Inst_DS__DS_OR_B64
32836
32837 // tmp = MEM[ADDR];
32838 // MEM[ADDR] |= DATA[0:1];
32839 // RETURN_DATA[0:1] = tmp.
32840 void
32841 Inst_DS__DS_OR_B64::execute(GPUDynInstPtr gpuDynInst)
32842 {
32843 panicUnimplemented();
32844 }
32845
32846 Inst_DS__DS_XOR_B64::Inst_DS__DS_XOR_B64(InFmt_DS *iFmt)
32847 : Inst_DS(iFmt, "ds_xor_b64")
32848 {
32849 } // Inst_DS__DS_XOR_B64
32850
32851 Inst_DS__DS_XOR_B64::~Inst_DS__DS_XOR_B64()
32852 {
32853 } // ~Inst_DS__DS_XOR_B64
32854
32855 // tmp = MEM[ADDR];
32856 // MEM[ADDR] ^= DATA[0:1];
32857 // RETURN_DATA[0:1] = tmp.
32858 void
32859 Inst_DS__DS_XOR_B64::execute(GPUDynInstPtr gpuDynInst)
32860 {
32861 panicUnimplemented();
32862 }
32863
32864 Inst_DS__DS_MSKOR_B64::Inst_DS__DS_MSKOR_B64(InFmt_DS *iFmt)
32865 : Inst_DS(iFmt, "ds_mskor_b64")
32866 {
32867 } // Inst_DS__DS_MSKOR_B64
32868
32869 Inst_DS__DS_MSKOR_B64::~Inst_DS__DS_MSKOR_B64()
32870 {
32871 } // ~Inst_DS__DS_MSKOR_B64
32872
32873 // tmp = MEM[ADDR];
32874 // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
32875 // RETURN_DATA = tmp.
32876 void
32877 Inst_DS__DS_MSKOR_B64::execute(GPUDynInstPtr gpuDynInst)
32878 {
32879 panicUnimplemented();
32880 }
32881
32882 Inst_DS__DS_WRITE_B64::Inst_DS__DS_WRITE_B64(InFmt_DS *iFmt)
32883 : Inst_DS(iFmt, "ds_write_b64")
32884 {
32885 setFlag(MemoryRef);
32886 setFlag(Store);
32887 } // Inst_DS__DS_WRITE_B64
32888
32889 Inst_DS__DS_WRITE_B64::~Inst_DS__DS_WRITE_B64()
32890 {
32891 } // ~Inst_DS__DS_WRITE_B64
32892
32893 // MEM[ADDR] = DATA.
32894 // Write qword.
32895 void
32896 Inst_DS__DS_WRITE_B64::execute(GPUDynInstPtr gpuDynInst)
32897 {
32898 Wavefront *wf = gpuDynInst->wavefront();
32899 gpuDynInst->execUnitId = wf->execUnitId;
32900 gpuDynInst->exec_mask = wf->execMask();
32901 gpuDynInst->latency.init(gpuDynInst->computeUnit());
32902 gpuDynInst->latency.set(
32903 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
32904 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32905 ConstVecOperandU64 data(gpuDynInst, extData.DATA0);
32906
32907 addr.read();
32908 data.read();
32909
32910 calcAddr(gpuDynInst, addr);
32911
32912 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32913 if (wf->execMask(lane)) {
32914 (reinterpret_cast<VecElemU64*>(gpuDynInst->d_data))[lane]
32915 = data[lane];
32916 }
32917 }
32918
32919 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
32920
32921 wf->wrLmReqsInPipe--;
32922 wf->outstandingReqsWrLm++;
32923 wf->outstandingReqs++;
32924 wf->validateRequestCounters();
32925 }
32926
32927 void
32928 Inst_DS__DS_WRITE_B64::initiateAcc(GPUDynInstPtr gpuDynInst)
32929 {
32930 Addr offset0 = instData.OFFSET0;
32931 Addr offset1 = instData.OFFSET1;
32932 Addr offset = (offset1 << 8) | offset0;
32933
32934 initMemWrite<VecElemU64>(gpuDynInst, offset);
32935 } // initiateAcc
32936
32937 void
32938 Inst_DS__DS_WRITE_B64::completeAcc(GPUDynInstPtr gpuDynInst)
32939 {
32940 } // completeAcc
32941
32942 Inst_DS__DS_WRITE2_B64::Inst_DS__DS_WRITE2_B64(InFmt_DS *iFmt)
32943 : Inst_DS(iFmt, "ds_write2_b64")
32944 {
32945 setFlag(MemoryRef);
32946 setFlag(Store);
32947 } // Inst_DS__DS_WRITE2_B64
32948
32949 Inst_DS__DS_WRITE2_B64::~Inst_DS__DS_WRITE2_B64()
32950 {
32951 } // ~Inst_DS__DS_WRITE2_B64
32952
32953 // MEM[ADDR_BASE + OFFSET0 * 8] = DATA;
32954 // MEM[ADDR_BASE + OFFSET1 * 8] = DATA2.
32955 // Write 2 qwords.
32956 void
32957 Inst_DS__DS_WRITE2_B64::execute(GPUDynInstPtr gpuDynInst)
32958 {
32959 Wavefront *wf = gpuDynInst->wavefront();
32960 gpuDynInst->execUnitId = wf->execUnitId;
32961 gpuDynInst->exec_mask = wf->execMask();
32962 gpuDynInst->latency.init(gpuDynInst->computeUnit());
32963 gpuDynInst->latency.set(
32964 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
32965 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32966 ConstVecOperandU64 data0(gpuDynInst, extData.DATA0);
32967 ConstVecOperandU64 data1(gpuDynInst, extData.DATA1);
32968
32969 addr.read();
32970 data0.read();
32971 data1.read();
32972
32973 calcAddr(gpuDynInst, addr);
32974
32975 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32976 if (wf->execMask(lane)) {
32977 (reinterpret_cast<VecElemU64*>(
32978 gpuDynInst->d_data))[lane * 2] = data0[lane];
32979 (reinterpret_cast<VecElemU64*>(
32980 gpuDynInst->d_data))[lane * 2 + 1] = data1[lane];
32981 }
32982 }
32983
32984 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
32985
32986 wf->wrLmReqsInPipe--;
32987 wf->outstandingReqsWrLm++;
32988 wf->outstandingReqs++;
32989 wf->validateRequestCounters();
32990 }
32991
32992 void
32993 Inst_DS__DS_WRITE2_B64::initiateAcc(GPUDynInstPtr gpuDynInst)
32994 {
32995 Addr offset0 = instData.OFFSET0 * 8;
32996 Addr offset1 = instData.OFFSET1 * 8;
32997
32998 initDualMemWrite<VecElemU64>(gpuDynInst, offset0, offset1);
32999 }
33000
33001 void
33002 Inst_DS__DS_WRITE2_B64::completeAcc(GPUDynInstPtr gpuDynInst)
33003 {
33004 }
33005
33006 Inst_DS__DS_WRITE2ST64_B64::Inst_DS__DS_WRITE2ST64_B64(InFmt_DS *iFmt)
33007 : Inst_DS(iFmt, "ds_write2st64_b64")
33008 {
33009 setFlag(MemoryRef);
33010 setFlag(Store);
33011 } // Inst_DS__DS_WRITE2ST64_B64
33012
33013 Inst_DS__DS_WRITE2ST64_B64::~Inst_DS__DS_WRITE2ST64_B64()
33014 {
33015 } // ~Inst_DS__DS_WRITE2ST64_B64
33016
33017 // MEM[ADDR_BASE + OFFSET0 * 8 * 64] = DATA;
33018 // MEM[ADDR_BASE + OFFSET1 * 8 * 64] = DATA2;
33019 // Write 2 qwords.
33020 void
33021 Inst_DS__DS_WRITE2ST64_B64::execute(GPUDynInstPtr gpuDynInst)
33022 {
33023 panicUnimplemented();
33024 }
33025
33026 Inst_DS__DS_CMPST_B64::Inst_DS__DS_CMPST_B64(InFmt_DS *iFmt)
33027 : Inst_DS(iFmt, "ds_cmpst_b64")
33028 {
33029 } // Inst_DS__DS_CMPST_B64
33030
33031 Inst_DS__DS_CMPST_B64::~Inst_DS__DS_CMPST_B64()
33032 {
33033 } // ~Inst_DS__DS_CMPST_B64
33034
33035 // tmp = MEM[ADDR];
33036 // src = DATA2;
33037 // cmp = DATA;
33038 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
33039 // RETURN_DATA[0] = tmp.
33040 // Compare and store.
33041 void
33042 Inst_DS__DS_CMPST_B64::execute(GPUDynInstPtr gpuDynInst)
33043 {
33044 panicUnimplemented();
33045 }
33046
33047 Inst_DS__DS_CMPST_F64::Inst_DS__DS_CMPST_F64(InFmt_DS *iFmt)
33048 : Inst_DS(iFmt, "ds_cmpst_f64")
33049 {
33050 setFlag(F64);
33051 } // Inst_DS__DS_CMPST_F64
33052
33053 Inst_DS__DS_CMPST_F64::~Inst_DS__DS_CMPST_F64()
33054 {
33055 } // ~Inst_DS__DS_CMPST_F64
33056
33057 // tmp = MEM[ADDR];
33058 // src = DATA2;
33059 // cmp = DATA;
33060 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
33061 // RETURN_DATA[0] = tmp.
33062 void
33063 Inst_DS__DS_CMPST_F64::execute(GPUDynInstPtr gpuDynInst)
33064 {
33065 panicUnimplemented();
33066 }
33067
33068 Inst_DS__DS_MIN_F64::Inst_DS__DS_MIN_F64(InFmt_DS *iFmt)
33069 : Inst_DS(iFmt, "ds_min_f64")
33070 {
33071 setFlag(F64);
33072 } // Inst_DS__DS_MIN_F64
33073
33074 Inst_DS__DS_MIN_F64::~Inst_DS__DS_MIN_F64()
33075 {
33076 } // ~Inst_DS__DS_MIN_F64
33077
33078 // tmp = MEM[ADDR];
33079 // src = DATA;
33080 // cmp = DATA2;
33081 // MEM[ADDR] = (cmp < tmp) ? src : tmp.
33082 void
33083 Inst_DS__DS_MIN_F64::execute(GPUDynInstPtr gpuDynInst)
33084 {
33085 panicUnimplemented();
33086 }
33087
33088 Inst_DS__DS_MAX_F64::Inst_DS__DS_MAX_F64(InFmt_DS *iFmt)
33089 : Inst_DS(iFmt, "ds_max_f64")
33090 {
33091 setFlag(F64);
33092 } // Inst_DS__DS_MAX_F64
33093
33094 Inst_DS__DS_MAX_F64::~Inst_DS__DS_MAX_F64()
33095 {
33096 } // ~Inst_DS__DS_MAX_F64
33097
33098 // tmp = MEM[ADDR];
33099 // src = DATA;
33100 // cmp = DATA2;
33101 // MEM[ADDR] = (tmp > cmp) ? src : tmp.
33102 void
33103 Inst_DS__DS_MAX_F64::execute(GPUDynInstPtr gpuDynInst)
33104 {
33105 panicUnimplemented();
33106 }
33107
33108 Inst_DS__DS_ADD_RTN_U64::Inst_DS__DS_ADD_RTN_U64(InFmt_DS *iFmt)
33109 : Inst_DS(iFmt, "ds_add_rtn_u64")
33110 {
33111 } // Inst_DS__DS_ADD_RTN_U64
33112
33113 Inst_DS__DS_ADD_RTN_U64::~Inst_DS__DS_ADD_RTN_U64()
33114 {
33115 } // ~Inst_DS__DS_ADD_RTN_U64
33116
33117 // tmp = MEM[ADDR];
33118 // MEM[ADDR] += DATA[0:1];
33119 // RETURN_DATA[0:1] = tmp.
33120 void
33121 Inst_DS__DS_ADD_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
33122 {
33123 panicUnimplemented();
33124 }
33125
33126 Inst_DS__DS_SUB_RTN_U64::Inst_DS__DS_SUB_RTN_U64(InFmt_DS *iFmt)
33127 : Inst_DS(iFmt, "ds_sub_rtn_u64")
33128 {
33129 } // Inst_DS__DS_SUB_RTN_U64
33130
33131 Inst_DS__DS_SUB_RTN_U64::~Inst_DS__DS_SUB_RTN_U64()
33132 {
33133 } // ~Inst_DS__DS_SUB_RTN_U64
33134
33135 // tmp = MEM[ADDR];
33136 // MEM[ADDR] -= DATA[0:1];
33137 // RETURN_DATA[0:1] = tmp.
33138 void
33139 Inst_DS__DS_SUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
33140 {
33141 panicUnimplemented();
33142 }
33143
33144 Inst_DS__DS_RSUB_RTN_U64::Inst_DS__DS_RSUB_RTN_U64(InFmt_DS *iFmt)
33145 : Inst_DS(iFmt, "ds_rsub_rtn_u64")
33146 {
33147 } // Inst_DS__DS_RSUB_RTN_U64
33148
33149 Inst_DS__DS_RSUB_RTN_U64::~Inst_DS__DS_RSUB_RTN_U64()
33150 {
33151 } // ~Inst_DS__DS_RSUB_RTN_U64
33152
33153 // tmp = MEM[ADDR];
33154 // MEM[ADDR] = DATA - MEM[ADDR];
33155 // RETURN_DATA = tmp.
33156 // Subtraction with reversed operands.
33157 void
33158 Inst_DS__DS_RSUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
33159 {
33160 panicUnimplemented();
33161 }
33162
33163 Inst_DS__DS_INC_RTN_U64::Inst_DS__DS_INC_RTN_U64(InFmt_DS *iFmt)
33164 : Inst_DS(iFmt, "ds_inc_rtn_u64")
33165 {
33166 } // Inst_DS__DS_INC_RTN_U64
33167
33168 Inst_DS__DS_INC_RTN_U64::~Inst_DS__DS_INC_RTN_U64()
33169 {
33170 } // ~Inst_DS__DS_INC_RTN_U64
33171
33172 // tmp = MEM[ADDR];
33173 // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
33174 // RETURN_DATA[0:1] = tmp.
33175 void
33176 Inst_DS__DS_INC_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
33177 {
33178 panicUnimplemented();
33179 }
33180
33181 Inst_DS__DS_DEC_RTN_U64::Inst_DS__DS_DEC_RTN_U64(InFmt_DS *iFmt)
33182 : Inst_DS(iFmt, "ds_dec_rtn_u64")
33183 {
33184 } // Inst_DS__DS_DEC_RTN_U64
33185
33186 Inst_DS__DS_DEC_RTN_U64::~Inst_DS__DS_DEC_RTN_U64()
33187 {
33188 } // ~Inst_DS__DS_DEC_RTN_U64
33189
33190 // tmp = MEM[ADDR];
33191 // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
33192 // (unsigned compare);
33193 // RETURN_DATA[0:1] = tmp.
33194 void
33195 Inst_DS__DS_DEC_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
33196 {
33197 panicUnimplemented();
33198 }
33199
33200 Inst_DS__DS_MIN_RTN_I64::Inst_DS__DS_MIN_RTN_I64(InFmt_DS *iFmt)
33201 : Inst_DS(iFmt, "ds_min_rtn_i64")
33202 {
33203 } // Inst_DS__DS_MIN_RTN_I64
33204
33205 Inst_DS__DS_MIN_RTN_I64::~Inst_DS__DS_MIN_RTN_I64()
33206 {
33207 } // ~Inst_DS__DS_MIN_RTN_I64
33208
33209 // tmp = MEM[ADDR];
33210 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
33211 // RETURN_DATA[0:1] = tmp.
33212 void
33213 Inst_DS__DS_MIN_RTN_I64::execute(GPUDynInstPtr gpuDynInst)
33214 {
33215 panicUnimplemented();
33216 }
33217
33218 Inst_DS__DS_MAX_RTN_I64::Inst_DS__DS_MAX_RTN_I64(InFmt_DS *iFmt)
33219 : Inst_DS(iFmt, "ds_max_rtn_i64")
33220 {
33221 } // Inst_DS__DS_MAX_RTN_I64
33222
33223 Inst_DS__DS_MAX_RTN_I64::~Inst_DS__DS_MAX_RTN_I64()
33224 {
33225 } // ~Inst_DS__DS_MAX_RTN_I64
33226
33227 // tmp = MEM[ADDR];
33228 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
33229 // RETURN_DATA[0:1] = tmp.
33230 void
33231 Inst_DS__DS_MAX_RTN_I64::execute(GPUDynInstPtr gpuDynInst)
33232 {
33233 panicUnimplemented();
33234 }
33235
33236 Inst_DS__DS_MIN_RTN_U64::Inst_DS__DS_MIN_RTN_U64(InFmt_DS *iFmt)
33237 : Inst_DS(iFmt, "ds_min_rtn_u64")
33238 {
33239 } // Inst_DS__DS_MIN_RTN_U64
33240
33241 Inst_DS__DS_MIN_RTN_U64::~Inst_DS__DS_MIN_RTN_U64()
33242 {
33243 } // ~Inst_DS__DS_MIN_RTN_U64
33244
33245 // tmp = MEM[ADDR];
33246 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
33247 // RETURN_DATA[0:1] = tmp.
33248 void
33249 Inst_DS__DS_MIN_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
33250 {
33251 panicUnimplemented();
33252 }
33253
33254 Inst_DS__DS_MAX_RTN_U64::Inst_DS__DS_MAX_RTN_U64(InFmt_DS *iFmt)
33255 : Inst_DS(iFmt, "ds_max_rtn_u64")
33256 {
33257 } // Inst_DS__DS_MAX_RTN_U64
33258
33259 Inst_DS__DS_MAX_RTN_U64::~Inst_DS__DS_MAX_RTN_U64()
33260 {
33261 } // ~Inst_DS__DS_MAX_RTN_U64
33262
33263 // tmp = MEM[ADDR];
33264 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
33265 // RETURN_DATA[0:1] = tmp.
33266 void
33267 Inst_DS__DS_MAX_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
33268 {
33269 panicUnimplemented();
33270 }
33271
33272 Inst_DS__DS_AND_RTN_B64::Inst_DS__DS_AND_RTN_B64(InFmt_DS *iFmt)
33273 : Inst_DS(iFmt, "ds_and_rtn_b64")
33274 {
33275 } // Inst_DS__DS_AND_RTN_B64
33276
33277 Inst_DS__DS_AND_RTN_B64::~Inst_DS__DS_AND_RTN_B64()
33278 {
33279 } // ~Inst_DS__DS_AND_RTN_B64
33280
33281 // tmp = MEM[ADDR];
33282 // MEM[ADDR] &= DATA[0:1];
33283 // RETURN_DATA[0:1] = tmp.
33284 void
33285 Inst_DS__DS_AND_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33286 {
33287 panicUnimplemented();
33288 }
33289
33290 Inst_DS__DS_OR_RTN_B64::Inst_DS__DS_OR_RTN_B64(InFmt_DS *iFmt)
33291 : Inst_DS(iFmt, "ds_or_rtn_b64")
33292 {
33293 } // Inst_DS__DS_OR_RTN_B64
33294
33295 Inst_DS__DS_OR_RTN_B64::~Inst_DS__DS_OR_RTN_B64()
33296 {
33297 } // ~Inst_DS__DS_OR_RTN_B64
33298
33299 // tmp = MEM[ADDR];
33300 // MEM[ADDR] |= DATA[0:1];
33301 // RETURN_DATA[0:1] = tmp.
33302 void
33303 Inst_DS__DS_OR_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33304 {
33305 panicUnimplemented();
33306 }
33307
33308 Inst_DS__DS_XOR_RTN_B64::Inst_DS__DS_XOR_RTN_B64(InFmt_DS *iFmt)
33309 : Inst_DS(iFmt, "ds_xor_rtn_b64")
33310 {
33311 } // Inst_DS__DS_XOR_RTN_B64
33312
33313 Inst_DS__DS_XOR_RTN_B64::~Inst_DS__DS_XOR_RTN_B64()
33314 {
33315 } // ~Inst_DS__DS_XOR_RTN_B64
33316
33317 // tmp = MEM[ADDR];
33318 // MEM[ADDR] ^= DATA[0:1];
33319 // RETURN_DATA[0:1] = tmp.
33320 void
33321 Inst_DS__DS_XOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33322 {
33323 panicUnimplemented();
33324 }
33325
33326 Inst_DS__DS_MSKOR_RTN_B64::Inst_DS__DS_MSKOR_RTN_B64(InFmt_DS *iFmt)
33327 : Inst_DS(iFmt, "ds_mskor_rtn_b64")
33328 {
33329 } // Inst_DS__DS_MSKOR_RTN_B64
33330
33331 Inst_DS__DS_MSKOR_RTN_B64::~Inst_DS__DS_MSKOR_RTN_B64()
33332 {
33333 } // ~Inst_DS__DS_MSKOR_RTN_B64
33334
33335 // tmp = MEM[ADDR];
33336 // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
33337 // RETURN_DATA = tmp.
33338 // Masked dword OR, D0 contains the mask and D1 contains the new value.
33339 void
33340 Inst_DS__DS_MSKOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33341 {
33342 panicUnimplemented();
33343 }
33344
33345 Inst_DS__DS_WRXCHG_RTN_B64::Inst_DS__DS_WRXCHG_RTN_B64(InFmt_DS *iFmt)
33346 : Inst_DS(iFmt, "ds_wrxchg_rtn_b64")
33347 {
33348 } // Inst_DS__DS_WRXCHG_RTN_B64
33349
33350 Inst_DS__DS_WRXCHG_RTN_B64::~Inst_DS__DS_WRXCHG_RTN_B64()
33351 {
33352 } // ~Inst_DS__DS_WRXCHG_RTN_B64
33353
33354 // tmp = MEM[ADDR];
33355 // MEM[ADDR] = DATA;
33356 // RETURN_DATA = tmp.
33357 // Write-exchange operation.
33358 void
33359 Inst_DS__DS_WRXCHG_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33360 {
33361 panicUnimplemented();
33362 }
33363
33364 Inst_DS__DS_WRXCHG2_RTN_B64::Inst_DS__DS_WRXCHG2_RTN_B64(InFmt_DS *iFmt)
33365 : Inst_DS(iFmt, "ds_wrxchg2_rtn_b64")
33366 {
33367 } // Inst_DS__DS_WRXCHG2_RTN_B64
33368
33369 Inst_DS__DS_WRXCHG2_RTN_B64::~Inst_DS__DS_WRXCHG2_RTN_B64()
33370 {
33371 } // ~Inst_DS__DS_WRXCHG2_RTN_B64
33372
33373 // Write-exchange 2 separate qwords.
33374 void
33375 Inst_DS__DS_WRXCHG2_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33376 {
33377 panicUnimplemented();
33378 }
33379
33380 Inst_DS__DS_WRXCHG2ST64_RTN_B64::Inst_DS__DS_WRXCHG2ST64_RTN_B64(
33381 InFmt_DS *iFmt)
33382 : Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b64")
33383 {
33384 } // Inst_DS__DS_WRXCHG2ST64_RTN_B64
33385
33386 Inst_DS__DS_WRXCHG2ST64_RTN_B64::~Inst_DS__DS_WRXCHG2ST64_RTN_B64()
33387 {
33388 } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B64
33389
33390 // Write-exchange 2 qwords with a stride of 64 qwords.
33391 void
33392 Inst_DS__DS_WRXCHG2ST64_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33393 {
33394 panicUnimplemented();
33395 }
33396
33397 Inst_DS__DS_CMPST_RTN_B64::Inst_DS__DS_CMPST_RTN_B64(InFmt_DS *iFmt)
33398 : Inst_DS(iFmt, "ds_cmpst_rtn_b64")
33399 {
33400 } // Inst_DS__DS_CMPST_RTN_B64
33401
33402 Inst_DS__DS_CMPST_RTN_B64::~Inst_DS__DS_CMPST_RTN_B64()
33403 {
33404 } // ~Inst_DS__DS_CMPST_RTN_B64
33405
33406 // tmp = MEM[ADDR];
33407 // src = DATA2;
33408 // cmp = DATA;
33409 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
33410 // RETURN_DATA[0] = tmp.
33411 // Compare and store.
33412 void
33413 Inst_DS__DS_CMPST_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33414 {
33415 panicUnimplemented();
33416 }
33417
33418 Inst_DS__DS_CMPST_RTN_F64::Inst_DS__DS_CMPST_RTN_F64(InFmt_DS *iFmt)
33419 : Inst_DS(iFmt, "ds_cmpst_rtn_f64")
33420 {
33421 setFlag(F64);
33422 } // Inst_DS__DS_CMPST_RTN_F64
33423
33424 Inst_DS__DS_CMPST_RTN_F64::~Inst_DS__DS_CMPST_RTN_F64()
33425 {
33426 } // ~Inst_DS__DS_CMPST_RTN_F64
33427
33428 // tmp = MEM[ADDR];
33429 // src = DATA2;
33430 // cmp = DATA;
33431 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
33432 // RETURN_DATA[0] = tmp.
33433 void
33434 Inst_DS__DS_CMPST_RTN_F64::execute(GPUDynInstPtr gpuDynInst)
33435 {
33436 panicUnimplemented();
33437 }
33438
33439 Inst_DS__DS_MIN_RTN_F64::Inst_DS__DS_MIN_RTN_F64(InFmt_DS *iFmt)
33440 : Inst_DS(iFmt, "ds_min_rtn_f64")
33441 {
33442 setFlag(F64);
33443 } // Inst_DS__DS_MIN_RTN_F64
33444
33445 Inst_DS__DS_MIN_RTN_F64::~Inst_DS__DS_MIN_RTN_F64()
33446 {
33447 } // ~Inst_DS__DS_MIN_RTN_F64
33448
33449 // tmp = MEM[ADDR];
33450 // src = DATA;
33451 // cmp = DATA2;
33452 // MEM[ADDR] = (cmp < tmp) ? src : tmp.
33453 void
33454 Inst_DS__DS_MIN_RTN_F64::execute(GPUDynInstPtr gpuDynInst)
33455 {
33456 panicUnimplemented();
33457 }
33458
33459 Inst_DS__DS_MAX_RTN_F64::Inst_DS__DS_MAX_RTN_F64(InFmt_DS *iFmt)
33460 : Inst_DS(iFmt, "ds_max_rtn_f64")
33461 {
33462 setFlag(F64);
33463 } // Inst_DS__DS_MAX_RTN_F64
33464
33465 Inst_DS__DS_MAX_RTN_F64::~Inst_DS__DS_MAX_RTN_F64()
33466 {
33467 } // ~Inst_DS__DS_MAX_RTN_F64
33468
33469 // tmp = MEM[ADDR];
33470 // src = DATA;
33471 // cmp = DATA2;
33472 // MEM[ADDR] = (tmp > cmp) ? src : tmp.
33473 void
33474 Inst_DS__DS_MAX_RTN_F64::execute(GPUDynInstPtr gpuDynInst)
33475 {
33476 panicUnimplemented();
33477 }
33478
33479 Inst_DS__DS_READ_B64::Inst_DS__DS_READ_B64(InFmt_DS *iFmt)
33480 : Inst_DS(iFmt, "ds_read_b64")
33481 {
33482 setFlag(MemoryRef);
33483 setFlag(Load);
33484 } // Inst_DS__DS_READ_B64
33485
33486 Inst_DS__DS_READ_B64::~Inst_DS__DS_READ_B64()
33487 {
33488 } // ~Inst_DS__DS_READ_B64
33489
33490 // RETURN_DATA = MEM[ADDR].
33491 // Read 1 qword.
33492 void
33493 Inst_DS__DS_READ_B64::execute(GPUDynInstPtr gpuDynInst)
33494 {
33495 Wavefront *wf = gpuDynInst->wavefront();
33496 gpuDynInst->execUnitId = wf->execUnitId;
33497 gpuDynInst->exec_mask = wf->execMask();
33498 gpuDynInst->latency.init(gpuDynInst->computeUnit());
33499 gpuDynInst->latency.set(
33500 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
33501 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
33502
33503 addr.read();
33504
33505 calcAddr(gpuDynInst, addr);
33506
33507 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
33508
33509 wf->rdLmReqsInPipe--;
33510 wf->outstandingReqsRdLm++;
33511 wf->outstandingReqs++;
33512 wf->validateRequestCounters();
33513 }
33514
33515 void
33516 Inst_DS__DS_READ_B64::initiateAcc(GPUDynInstPtr gpuDynInst)
33517 {
33518 Addr offset0 = instData.OFFSET0;
33519 Addr offset1 = instData.OFFSET1;
33520 Addr offset = (offset1 << 8) | offset0;
33521
33522 initMemRead<VecElemU64>(gpuDynInst, offset);
33523 } // initiateAcc
33524
33525 void
33526 Inst_DS__DS_READ_B64::completeAcc(GPUDynInstPtr gpuDynInst)
33527 {
33528 VecOperandU64 vdst(gpuDynInst, extData.VDST);
33529
33530 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
33531 if (gpuDynInst->exec_mask[lane]) {
33532 vdst[lane] = (reinterpret_cast<VecElemU64*>(
33533 gpuDynInst->d_data))[lane];
33534 }
33535 }
33536
33537 vdst.write();
33538 } // completeAcc
33539
33540 Inst_DS__DS_READ2_B64::Inst_DS__DS_READ2_B64(InFmt_DS *iFmt)
33541 : Inst_DS(iFmt, "ds_read2_b64")
33542 {
33543 setFlag(MemoryRef);
33544 setFlag(Load);
33545 } // Inst_DS__DS_READ2_B64
33546
33547 Inst_DS__DS_READ2_B64::~Inst_DS__DS_READ2_B64()
33548 {
33549 } // ~Inst_DS__DS_READ2_B64
33550
33551 // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8];
33552 // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8].
33553 // Read 2 qwords.
33554 void
33555 Inst_DS__DS_READ2_B64::execute(GPUDynInstPtr gpuDynInst)
33556 {
33557 Wavefront *wf = gpuDynInst->wavefront();
33558 gpuDynInst->execUnitId = wf->execUnitId;
33559 gpuDynInst->exec_mask = wf->execMask();
33560 gpuDynInst->latency.init(gpuDynInst->computeUnit());
33561 gpuDynInst->latency.set(
33562 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
33563 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
33564
33565 addr.read();
33566
33567 calcAddr(gpuDynInst, addr);
33568
33569 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
33570
33571 wf->rdLmReqsInPipe--;
33572 wf->outstandingReqsRdLm++;
33573 wf->outstandingReqs++;
33574 wf->validateRequestCounters();
33575 }
33576
33577 void
33578 Inst_DS__DS_READ2_B64::initiateAcc(GPUDynInstPtr gpuDynInst)
33579 {
33580 Addr offset0 = instData.OFFSET0 * 8;
33581 Addr offset1 = instData.OFFSET1 * 8;
33582
33583 initDualMemRead<VecElemU64>(gpuDynInst, offset0, offset1);
33584 } // initiateAcc
33585
33586 void
33587 Inst_DS__DS_READ2_B64::completeAcc(GPUDynInstPtr gpuDynInst)
33588 {
33589 VecOperandU64 vdst0(gpuDynInst, extData.VDST);
33590 VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2);
33591
33592 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
33593 if (gpuDynInst->exec_mask[lane]) {
33594 vdst0[lane] = (reinterpret_cast<VecElemU64*>(
33595 gpuDynInst->d_data))[lane * 2];
33596 vdst1[lane] = (reinterpret_cast<VecElemU64*>(
33597 gpuDynInst->d_data))[lane * 2 + 1];
33598 }
33599 }
33600
33601 vdst0.write();
33602 vdst1.write();
33603 } // completeAcc
33604
33605 Inst_DS__DS_READ2ST64_B64::Inst_DS__DS_READ2ST64_B64(InFmt_DS *iFmt)
33606 : Inst_DS(iFmt, "ds_read2st64_b64")
33607 {
33608 setFlag(MemoryRef);
33609 setFlag(Load);
33610 } // Inst_DS__DS_READ2ST64_B64
33611
33612 Inst_DS__DS_READ2ST64_B64::~Inst_DS__DS_READ2ST64_B64()
33613 {
33614 } // ~Inst_DS__DS_READ2ST64_B64
33615
33616 // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8 * 64];
33617 // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8 * 64].
33618 // Read 2 qwords.
33619 void
33620 Inst_DS__DS_READ2ST64_B64::execute(GPUDynInstPtr gpuDynInst)
33621 {
33622 Wavefront *wf = gpuDynInst->wavefront();
33623 gpuDynInst->execUnitId = wf->execUnitId;
33624 gpuDynInst->exec_mask = wf->execMask();
33625 gpuDynInst->latency.init(gpuDynInst->computeUnit());
33626 gpuDynInst->latency.set(
33627 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
33628 ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
33629
33630 addr.read();
33631
33632 calcAddr(gpuDynInst, addr);
33633
33634 gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
33635
33636 wf->rdLmReqsInPipe--;
33637 wf->outstandingReqsRdLm++;
33638 wf->outstandingReqs++;
33639 wf->validateRequestCounters();
33640 }
33641
33642 void
33643 Inst_DS__DS_READ2ST64_B64::initiateAcc(GPUDynInstPtr gpuDynInst)
33644 {
33645 Addr offset0 = (instData.OFFSET0 * 8 * 64);
33646 Addr offset1 = (instData.OFFSET1 * 8 * 64);
33647
33648 initDualMemRead<VecElemU64>(gpuDynInst, offset0, offset1);
33649 }
33650
33651 void
33652 Inst_DS__DS_READ2ST64_B64::completeAcc(GPUDynInstPtr gpuDynInst)
33653 {
33654 VecOperandU64 vdst0(gpuDynInst, extData.VDST);
33655 VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2);
33656
33657 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
33658 if (gpuDynInst->exec_mask[lane]) {
33659 vdst0[lane] = (reinterpret_cast<VecElemU64*>(
33660 gpuDynInst->d_data))[lane * 2];
33661 vdst1[lane] = (reinterpret_cast<VecElemU64*>(
33662 gpuDynInst->d_data))[lane * 2 + 1];
33663 }
33664 }
33665
33666 vdst0.write();
33667 vdst1.write();
33668 }
33669
33670 Inst_DS__DS_CONDXCHG32_RTN_B64::Inst_DS__DS_CONDXCHG32_RTN_B64(
33671 InFmt_DS *iFmt)
33672 : Inst_DS(iFmt, "ds_condxchg32_rtn_b64")
33673 {
33674 } // Inst_DS__DS_CONDXCHG32_RTN_B64
33675
33676 Inst_DS__DS_CONDXCHG32_RTN_B64::~Inst_DS__DS_CONDXCHG32_RTN_B64()
33677 {
33678 } // ~Inst_DS__DS_CONDXCHG32_RTN_B64
33679
33680 // Conditional write exchange.
33681 void
33682 Inst_DS__DS_CONDXCHG32_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33683 {
33684 panicUnimplemented();
33685 }
33686
33687 Inst_DS__DS_ADD_SRC2_U32::Inst_DS__DS_ADD_SRC2_U32(InFmt_DS *iFmt)
33688 : Inst_DS(iFmt, "ds_add_src2_u32")
33689 {
33690 } // Inst_DS__DS_ADD_SRC2_U32
33691
33692 Inst_DS__DS_ADD_SRC2_U32::~Inst_DS__DS_ADD_SRC2_U32()
33693 {
33694 } // ~Inst_DS__DS_ADD_SRC2_U32
33695
33696 // A = ADDR_BASE;
33697 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33698 // {offset1[6],offset1[6:0],offset0});
33699 // MEM[A] = MEM[A] + MEM[B].
33700 void
33701 Inst_DS__DS_ADD_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
33702 {
33703 panicUnimplemented();
33704 }
33705
33706 Inst_DS__DS_SUB_SRC2_U32::Inst_DS__DS_SUB_SRC2_U32(InFmt_DS *iFmt)
33707 : Inst_DS(iFmt, "ds_sub_src2_u32")
33708 {
33709 } // Inst_DS__DS_SUB_SRC2_U32
33710
33711 Inst_DS__DS_SUB_SRC2_U32::~Inst_DS__DS_SUB_SRC2_U32()
33712 {
33713 } // ~Inst_DS__DS_SUB_SRC2_U32
33714
33715 // A = ADDR_BASE;
33716 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33717 // {offset1[6],offset1[6:0],offset0});
33718 // MEM[A] = MEM[A] - MEM[B].
33719 void
33720 Inst_DS__DS_SUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
33721 {
33722 panicUnimplemented();
33723 }
33724
33725 Inst_DS__DS_RSUB_SRC2_U32::Inst_DS__DS_RSUB_SRC2_U32(InFmt_DS *iFmt)
33726 : Inst_DS(iFmt, "ds_rsub_src2_u32")
33727 {
33728 } // Inst_DS__DS_RSUB_SRC2_U32
33729
33730 Inst_DS__DS_RSUB_SRC2_U32::~Inst_DS__DS_RSUB_SRC2_U32()
33731 {
33732 } // ~Inst_DS__DS_RSUB_SRC2_U32
33733
33734 // A = ADDR_BASE;
33735 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33736 // {offset1[6],offset1[6:0],offset0});
33737 // MEM[A] = MEM[B] - MEM[A].
33738 void
33739 Inst_DS__DS_RSUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
33740 {
33741 panicUnimplemented();
33742 }
33743
33744 Inst_DS__DS_INC_SRC2_U32::Inst_DS__DS_INC_SRC2_U32(InFmt_DS *iFmt)
33745 : Inst_DS(iFmt, "ds_inc_src2_u32")
33746 {
33747 } // Inst_DS__DS_INC_SRC2_U32
33748
33749 Inst_DS__DS_INC_SRC2_U32::~Inst_DS__DS_INC_SRC2_U32()
33750 {
33751 } // ~Inst_DS__DS_INC_SRC2_U32
33752
33753 // A = ADDR_BASE;
33754 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33755 // {offset1[6],offset1[6:0],offset0});
33756 // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1).
33757 void
33758 Inst_DS__DS_INC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
33759 {
33760 panicUnimplemented();
33761 }
33762
33763 Inst_DS__DS_DEC_SRC2_U32::Inst_DS__DS_DEC_SRC2_U32(InFmt_DS *iFmt)
33764 : Inst_DS(iFmt, "ds_dec_src2_u32")
33765 {
33766 } // Inst_DS__DS_DEC_SRC2_U32
33767
33768 Inst_DS__DS_DEC_SRC2_U32::~Inst_DS__DS_DEC_SRC2_U32()
33769 {
33770 } // ~Inst_DS__DS_DEC_SRC2_U32
33771
33772 // A = ADDR_BASE;
33773 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33774 // {offset1[6],offset1[6:0],offset0});
33775 // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1).
33776 // Uint decrement.
33777 void
33778 Inst_DS__DS_DEC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
33779 {
33780 panicUnimplemented();
33781 }
33782
33783 Inst_DS__DS_MIN_SRC2_I32::Inst_DS__DS_MIN_SRC2_I32(InFmt_DS *iFmt)
33784 : Inst_DS(iFmt, "ds_min_src2_i32")
33785 {
33786 } // Inst_DS__DS_MIN_SRC2_I32
33787
33788 Inst_DS__DS_MIN_SRC2_I32::~Inst_DS__DS_MIN_SRC2_I32()
33789 {
33790 } // ~Inst_DS__DS_MIN_SRC2_I32
33791
33792 // A = ADDR_BASE;
33793 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33794 // {offset1[6],offset1[6:0],offset0});
33795 // MEM[A] = min(MEM[A], MEM[B]).
33796 void
33797 Inst_DS__DS_MIN_SRC2_I32::execute(GPUDynInstPtr gpuDynInst)
33798 {
33799 panicUnimplemented();
33800 }
33801
33802 Inst_DS__DS_MAX_SRC2_I32::Inst_DS__DS_MAX_SRC2_I32(InFmt_DS *iFmt)
33803 : Inst_DS(iFmt, "ds_max_src2_i32")
33804 {
33805 } // Inst_DS__DS_MAX_SRC2_I32
33806
33807 Inst_DS__DS_MAX_SRC2_I32::~Inst_DS__DS_MAX_SRC2_I32()
33808 {
33809 } // ~Inst_DS__DS_MAX_SRC2_I32
33810
33811 // A = ADDR_BASE;
33812 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33813 // {offset1[6],offset1[6:0],offset0});
33814 // MEM[A] = max(MEM[A], MEM[B]).
33815 void
33816 Inst_DS__DS_MAX_SRC2_I32::execute(GPUDynInstPtr gpuDynInst)
33817 {
33818 panicUnimplemented();
33819 }
33820
33821 Inst_DS__DS_MIN_SRC2_U32::Inst_DS__DS_MIN_SRC2_U32(InFmt_DS *iFmt)
33822 : Inst_DS(iFmt, "ds_min_src2_u32")
33823 {
33824 } // Inst_DS__DS_MIN_SRC2_U32
33825
33826 Inst_DS__DS_MIN_SRC2_U32::~Inst_DS__DS_MIN_SRC2_U32()
33827 {
33828 } // ~Inst_DS__DS_MIN_SRC2_U32
33829
33830 // A = ADDR_BASE;
33831 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33832 // {offset1[6],offset1[6:0],offset0});
33833 // MEM[A] = min(MEM[A], MEM[B]).
33834 void
33835 Inst_DS__DS_MIN_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
33836 {
33837 panicUnimplemented();
33838 }
33839
33840 Inst_DS__DS_MAX_SRC2_U32::Inst_DS__DS_MAX_SRC2_U32(InFmt_DS *iFmt)
33841 : Inst_DS(iFmt, "ds_max_src2_u32")
33842 {
33843 } // Inst_DS__DS_MAX_SRC2_U32
33844
33845 Inst_DS__DS_MAX_SRC2_U32::~Inst_DS__DS_MAX_SRC2_U32()
33846 {
33847 } // ~Inst_DS__DS_MAX_SRC2_U32
33848
33849 // A = ADDR_BASE;
33850 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33851 // {offset1[6],offset1[6:0],offset0});
33852 // MEM[A] = max(MEM[A], MEM[B]).
33853 void
33854 Inst_DS__DS_MAX_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
33855 {
33856 panicUnimplemented();
33857 }
33858
33859 Inst_DS__DS_AND_SRC2_B32::Inst_DS__DS_AND_SRC2_B32(InFmt_DS *iFmt)
33860 : Inst_DS(iFmt, "ds_and_src2_b32")
33861 {
33862 } // Inst_DS__DS_AND_SRC2_B32
33863
33864 Inst_DS__DS_AND_SRC2_B32::~Inst_DS__DS_AND_SRC2_B32()
33865 {
33866 } // ~Inst_DS__DS_AND_SRC2_B32
33867
33868 // A = ADDR_BASE;
33869 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33870 // {offset1[6],offset1[6:0],offset0});
33871 // MEM[A] = MEM[A] & MEM[B].
33872 void
33873 Inst_DS__DS_AND_SRC2_B32::execute(GPUDynInstPtr gpuDynInst)
33874 {
33875 panicUnimplemented();
33876 }
33877
33878 Inst_DS__DS_OR_SRC2_B32::Inst_DS__DS_OR_SRC2_B32(InFmt_DS *iFmt)
33879 : Inst_DS(iFmt, "ds_or_src2_b32")
33880 {
33881 } // Inst_DS__DS_OR_SRC2_B32
33882
33883 Inst_DS__DS_OR_SRC2_B32::~Inst_DS__DS_OR_SRC2_B32()
33884 {
33885 } // ~Inst_DS__DS_OR_SRC2_B32
33886
33887 // A = ADDR_BASE;
33888 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33889 // {offset1[6],offset1[6:0],offset0});
33890 // MEM[A] = MEM[A] | MEM[B].
33891 void
33892 Inst_DS__DS_OR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst)
33893 {
33894 panicUnimplemented();
33895 }
33896
33897 Inst_DS__DS_XOR_SRC2_B32::Inst_DS__DS_XOR_SRC2_B32(InFmt_DS *iFmt)
33898 : Inst_DS(iFmt, "ds_xor_src2_b32")
33899 {
33900 } // Inst_DS__DS_XOR_SRC2_B32
33901
33902 Inst_DS__DS_XOR_SRC2_B32::~Inst_DS__DS_XOR_SRC2_B32()
33903 {
33904 } // ~Inst_DS__DS_XOR_SRC2_B32
33905
33906 // A = ADDR_BASE;
33907 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33908 // {offset1[6],offset1[6:0],offset0});
33909 // MEM[A] = MEM[A] ^ MEM[B].
33910 void
33911 Inst_DS__DS_XOR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst)
33912 {
33913 panicUnimplemented();
33914 }
33915
33916 Inst_DS__DS_WRITE_SRC2_B32::Inst_DS__DS_WRITE_SRC2_B32(InFmt_DS *iFmt)
33917 : Inst_DS(iFmt, "ds_write_src2_b32")
33918 {
33919 setFlag(MemoryRef);
33920 setFlag(Store);
33921 } // Inst_DS__DS_WRITE_SRC2_B32
33922
33923 Inst_DS__DS_WRITE_SRC2_B32::~Inst_DS__DS_WRITE_SRC2_B32()
33924 {
33925 } // ~Inst_DS__DS_WRITE_SRC2_B32
33926
33927 // A = ADDR_BASE;
33928 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33929 // {offset1[6],offset1[6:0],offset0});
33930 // MEM[A] = MEM[B].
33931 // Write dword.
33932 void
33933 Inst_DS__DS_WRITE_SRC2_B32::execute(GPUDynInstPtr gpuDynInst)
33934 {
33935 panicUnimplemented();
33936 }
33937
33938 Inst_DS__DS_MIN_SRC2_F32::Inst_DS__DS_MIN_SRC2_F32(InFmt_DS *iFmt)
33939 : Inst_DS(iFmt, "ds_min_src2_f32")
33940 {
33941 setFlag(F32);
33942 } // Inst_DS__DS_MIN_SRC2_F32
33943
33944 Inst_DS__DS_MIN_SRC2_F32::~Inst_DS__DS_MIN_SRC2_F32()
33945 {
33946 } // ~Inst_DS__DS_MIN_SRC2_F32
33947
33948 // A = ADDR_BASE;
33949 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33950 // {offset1[6],offset1[6:0],offset0});
33951 // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A].
33952 void
33953 Inst_DS__DS_MIN_SRC2_F32::execute(GPUDynInstPtr gpuDynInst)
33954 {
33955 panicUnimplemented();
33956 }
33957
33958 Inst_DS__DS_MAX_SRC2_F32::Inst_DS__DS_MAX_SRC2_F32(InFmt_DS *iFmt)
33959 : Inst_DS(iFmt, "ds_max_src2_f32")
33960 {
33961 setFlag(F32);
33962 } // Inst_DS__DS_MAX_SRC2_F32
33963
33964 Inst_DS__DS_MAX_SRC2_F32::~Inst_DS__DS_MAX_SRC2_F32()
33965 {
33966 } // ~Inst_DS__DS_MAX_SRC2_F32
33967
33968 // A = ADDR_BASE;
33969 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33970 // {offset1[6],offset1[6:0],offset0});
33971 // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A].
33972 void
33973 Inst_DS__DS_MAX_SRC2_F32::execute(GPUDynInstPtr gpuDynInst)
33974 {
33975 panicUnimplemented();
33976 }
33977
33978 Inst_DS__DS_ADD_SRC2_F32::Inst_DS__DS_ADD_SRC2_F32(InFmt_DS *iFmt)
33979 : Inst_DS(iFmt, "ds_add_src2_f32")
33980 {
33981 setFlag(F32);
33982 } // Inst_DS__DS_ADD_SRC2_F32
33983
33984 Inst_DS__DS_ADD_SRC2_F32::~Inst_DS__DS_ADD_SRC2_F32()
33985 {
33986 } // ~Inst_DS__DS_ADD_SRC2_F32
33987
33988 // A = ADDR_BASE;
33989 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33990 // {offset1[6],offset1[6:0],offset0});
33991 // MEM[A] = MEM[B] + MEM[A].
33992 void
33993 Inst_DS__DS_ADD_SRC2_F32::execute(GPUDynInstPtr gpuDynInst)
33994 {
33995 panicUnimplemented();
33996 }
33997
33998 Inst_DS__DS_GWS_SEMA_RELEASE_ALL::Inst_DS__DS_GWS_SEMA_RELEASE_ALL(
33999 InFmt_DS *iFmt)
34000 : Inst_DS(iFmt, "ds_gws_sema_release_all")
34001 {
34002 } // Inst_DS__DS_GWS_SEMA_RELEASE_ALL
34003
34004 Inst_DS__DS_GWS_SEMA_RELEASE_ALL::~Inst_DS__DS_GWS_SEMA_RELEASE_ALL()
34005 {
34006 } // ~Inst_DS__DS_GWS_SEMA_RELEASE_ALL
34007
34008 void
34009 Inst_DS__DS_GWS_SEMA_RELEASE_ALL::execute(GPUDynInstPtr gpuDynInst)
34010 {
34011 panicUnimplemented();
34012 }
34013
34014 Inst_DS__DS_GWS_INIT::Inst_DS__DS_GWS_INIT(InFmt_DS *iFmt)
34015 : Inst_DS(iFmt, "ds_gws_init")
34016 {
34017 } // Inst_DS__DS_GWS_INIT
34018
34019 Inst_DS__DS_GWS_INIT::~Inst_DS__DS_GWS_INIT()
34020 {
34021 } // ~Inst_DS__DS_GWS_INIT
34022
34023 void
34024 Inst_DS__DS_GWS_INIT::execute(GPUDynInstPtr gpuDynInst)
34025 {
34026 panicUnimplemented();
34027 }
34028
34029 Inst_DS__DS_GWS_SEMA_V::Inst_DS__DS_GWS_SEMA_V(InFmt_DS *iFmt)
34030 : Inst_DS(iFmt, "ds_gws_sema_v")
34031 {
34032 } // Inst_DS__DS_GWS_SEMA_V
34033
34034 Inst_DS__DS_GWS_SEMA_V::~Inst_DS__DS_GWS_SEMA_V()
34035 {
34036 } // ~Inst_DS__DS_GWS_SEMA_V
34037
34038 void
34039 Inst_DS__DS_GWS_SEMA_V::execute(GPUDynInstPtr gpuDynInst)
34040 {
34041 panicUnimplemented();
34042 }
34043
34044 Inst_DS__DS_GWS_SEMA_BR::Inst_DS__DS_GWS_SEMA_BR(InFmt_DS *iFmt)
34045 : Inst_DS(iFmt, "ds_gws_sema_br")
34046 {
34047 } // Inst_DS__DS_GWS_SEMA_BR
34048
34049 Inst_DS__DS_GWS_SEMA_BR::~Inst_DS__DS_GWS_SEMA_BR()
34050 {
34051 } // ~Inst_DS__DS_GWS_SEMA_BR
34052
34053 void
34054 Inst_DS__DS_GWS_SEMA_BR::execute(GPUDynInstPtr gpuDynInst)
34055 {
34056 panicUnimplemented();
34057 }
34058
34059 Inst_DS__DS_GWS_SEMA_P::Inst_DS__DS_GWS_SEMA_P(InFmt_DS *iFmt)
34060 : Inst_DS(iFmt, "ds_gws_sema_p")
34061 {
34062 } // Inst_DS__DS_GWS_SEMA_P
34063
34064 Inst_DS__DS_GWS_SEMA_P::~Inst_DS__DS_GWS_SEMA_P()
34065 {
34066 } // ~Inst_DS__DS_GWS_SEMA_P
34067
34068 void
34069 Inst_DS__DS_GWS_SEMA_P::execute(GPUDynInstPtr gpuDynInst)
34070 {
34071 panicUnimplemented();
34072 }
34073
34074 Inst_DS__DS_GWS_BARRIER::Inst_DS__DS_GWS_BARRIER(InFmt_DS *iFmt)
34075 : Inst_DS(iFmt, "ds_gws_barrier")
34076 {
34077 } // Inst_DS__DS_GWS_BARRIER
34078
34079 Inst_DS__DS_GWS_BARRIER::~Inst_DS__DS_GWS_BARRIER()
34080 {
34081 } // ~Inst_DS__DS_GWS_BARRIER
34082
34083 void
34084 Inst_DS__DS_GWS_BARRIER::execute(GPUDynInstPtr gpuDynInst)
34085 {
34086 panicUnimplemented();
34087 }
34088
34089 Inst_DS__DS_CONSUME::Inst_DS__DS_CONSUME(InFmt_DS *iFmt)
34090 : Inst_DS(iFmt, "ds_consume")
34091 {
34092 } // Inst_DS__DS_CONSUME
34093
34094 Inst_DS__DS_CONSUME::~Inst_DS__DS_CONSUME()
34095 {
34096 } // ~Inst_DS__DS_CONSUME
34097
34098 void
34099 Inst_DS__DS_CONSUME::execute(GPUDynInstPtr gpuDynInst)
34100 {
34101 panicUnimplemented();
34102 }
34103
34104 Inst_DS__DS_APPEND::Inst_DS__DS_APPEND(InFmt_DS *iFmt)
34105 : Inst_DS(iFmt, "ds_append")
34106 {
34107 } // Inst_DS__DS_APPEND
34108
34109 Inst_DS__DS_APPEND::~Inst_DS__DS_APPEND()
34110 {
34111 } // ~Inst_DS__DS_APPEND
34112
34113 void
34114 Inst_DS__DS_APPEND::execute(GPUDynInstPtr gpuDynInst)
34115 {
34116 panicUnimplemented();
34117 }
34118
34119 Inst_DS__DS_ORDERED_COUNT::Inst_DS__DS_ORDERED_COUNT(InFmt_DS *iFmt)
34120 : Inst_DS(iFmt, "ds_ordered_count")
34121 {
34122 } // Inst_DS__DS_ORDERED_COUNT
34123
34124 Inst_DS__DS_ORDERED_COUNT::~Inst_DS__DS_ORDERED_COUNT()
34125 {
34126 } // ~Inst_DS__DS_ORDERED_COUNT
34127
34128 void
34129 Inst_DS__DS_ORDERED_COUNT::execute(GPUDynInstPtr gpuDynInst)
34130 {
34131 panicUnimplemented();
34132 }
34133
34134 Inst_DS__DS_ADD_SRC2_U64::Inst_DS__DS_ADD_SRC2_U64(InFmt_DS *iFmt)
34135 : Inst_DS(iFmt, "ds_add_src2_u64")
34136 {
34137 } // Inst_DS__DS_ADD_SRC2_U64
34138
34139 Inst_DS__DS_ADD_SRC2_U64::~Inst_DS__DS_ADD_SRC2_U64()
34140 {
34141 } // ~Inst_DS__DS_ADD_SRC2_U64
34142
34143 // A = ADDR_BASE;
34144 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34145 // {offset1[6],offset1[6:0],offset0});
34146 // MEM[A] = MEM[A] + MEM[B].
34147 void
34148 Inst_DS__DS_ADD_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
34149 {
34150 panicUnimplemented();
34151 }
34152
34153 Inst_DS__DS_SUB_SRC2_U64::Inst_DS__DS_SUB_SRC2_U64(InFmt_DS *iFmt)
34154 : Inst_DS(iFmt, "ds_sub_src2_u64")
34155 {
34156 } // Inst_DS__DS_SUB_SRC2_U64
34157
34158 Inst_DS__DS_SUB_SRC2_U64::~Inst_DS__DS_SUB_SRC2_U64()
34159 {
34160 } // ~Inst_DS__DS_SUB_SRC2_U64
34161
34162 // A = ADDR_BASE;
34163 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34164 // {offset1[6],offset1[6:0],offset0});
34165 // MEM[A] = MEM[A] - MEM[B].
34166 void
34167 Inst_DS__DS_SUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
34168 {
34169 panicUnimplemented();
34170 }
34171
34172 Inst_DS__DS_RSUB_SRC2_U64::Inst_DS__DS_RSUB_SRC2_U64(InFmt_DS *iFmt)
34173 : Inst_DS(iFmt, "ds_rsub_src2_u64")
34174 {
34175 } // Inst_DS__DS_RSUB_SRC2_U64
34176
34177 Inst_DS__DS_RSUB_SRC2_U64::~Inst_DS__DS_RSUB_SRC2_U64()
34178 {
34179 } // ~Inst_DS__DS_RSUB_SRC2_U64
34180
34181 // A = ADDR_BASE;
34182 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34183 // {offset1[6],offset1[6:0],offset0});
34184 // MEM[A] = MEM[B] - MEM[A].
34185 void
34186 Inst_DS__DS_RSUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
34187 {
34188 panicUnimplemented();
34189 }
34190
34191 Inst_DS__DS_INC_SRC2_U64::Inst_DS__DS_INC_SRC2_U64(InFmt_DS *iFmt)
34192 : Inst_DS(iFmt, "ds_inc_src2_u64")
34193 {
34194 } // Inst_DS__DS_INC_SRC2_U64
34195
34196 Inst_DS__DS_INC_SRC2_U64::~Inst_DS__DS_INC_SRC2_U64()
34197 {
34198 } // ~Inst_DS__DS_INC_SRC2_U64
34199
34200 // A = ADDR_BASE;
34201 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34202 // {offset1[6],offset1[6:0],offset0});
34203 // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1).
34204 void
34205 Inst_DS__DS_INC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
34206 {
34207 panicUnimplemented();
34208 }
34209
34210 Inst_DS__DS_DEC_SRC2_U64::Inst_DS__DS_DEC_SRC2_U64(InFmt_DS *iFmt)
34211 : Inst_DS(iFmt, "ds_dec_src2_u64")
34212 {
34213 } // Inst_DS__DS_DEC_SRC2_U64
34214
34215 Inst_DS__DS_DEC_SRC2_U64::~Inst_DS__DS_DEC_SRC2_U64()
34216 {
34217 } // ~Inst_DS__DS_DEC_SRC2_U64
34218
34219 // A = ADDR_BASE;
34220 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34221 // {offset1[6],offset1[6:0],offset0});
34222 // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1).
34223 // Uint decrement.
34224 void
34225 Inst_DS__DS_DEC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
34226 {
34227 panicUnimplemented();
34228 }
34229
34230 Inst_DS__DS_MIN_SRC2_I64::Inst_DS__DS_MIN_SRC2_I64(InFmt_DS *iFmt)
34231 : Inst_DS(iFmt, "ds_min_src2_i64")
34232 {
34233 } // Inst_DS__DS_MIN_SRC2_I64
34234
34235 Inst_DS__DS_MIN_SRC2_I64::~Inst_DS__DS_MIN_SRC2_I64()
34236 {
34237 } // ~Inst_DS__DS_MIN_SRC2_I64
34238
34239 // A = ADDR_BASE;
34240 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34241 // {offset1[6],offset1[6:0],offset0});
34242 // MEM[A] = min(MEM[A], MEM[B]).
34243 void
34244 Inst_DS__DS_MIN_SRC2_I64::execute(GPUDynInstPtr gpuDynInst)
34245 {
34246 panicUnimplemented();
34247 }
34248
34249 Inst_DS__DS_MAX_SRC2_I64::Inst_DS__DS_MAX_SRC2_I64(InFmt_DS *iFmt)
34250 : Inst_DS(iFmt, "ds_max_src2_i64")
34251 {
34252 } // Inst_DS__DS_MAX_SRC2_I64
34253
34254 Inst_DS__DS_MAX_SRC2_I64::~Inst_DS__DS_MAX_SRC2_I64()
34255 {
34256 } // ~Inst_DS__DS_MAX_SRC2_I64
34257
34258 // A = ADDR_BASE;
34259 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34260 // {offset1[6],offset1[6:0],offset0});
34261 // MEM[A] = max(MEM[A], MEM[B]).
34262 void
34263 Inst_DS__DS_MAX_SRC2_I64::execute(GPUDynInstPtr gpuDynInst)
34264 {
34265 panicUnimplemented();
34266 }
34267
34268 Inst_DS__DS_MIN_SRC2_U64::Inst_DS__DS_MIN_SRC2_U64(InFmt_DS *iFmt)
34269 : Inst_DS(iFmt, "ds_min_src2_u64")
34270 {
34271 } // Inst_DS__DS_MIN_SRC2_U64
34272
34273 Inst_DS__DS_MIN_SRC2_U64::~Inst_DS__DS_MIN_SRC2_U64()
34274 {
34275 } // ~Inst_DS__DS_MIN_SRC2_U64
34276
34277 // A = ADDR_BASE;
34278 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34279 // {offset1[6],offset1[6:0],offset0});
34280 // MEM[A] = min(MEM[A], MEM[B]).
34281 void
34282 Inst_DS__DS_MIN_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
34283 {
34284 panicUnimplemented();
34285 }
34286
34287 Inst_DS__DS_MAX_SRC2_U64::Inst_DS__DS_MAX_SRC2_U64(InFmt_DS *iFmt)
34288 : Inst_DS(iFmt, "ds_max_src2_u64")
34289 {
34290 } // Inst_DS__DS_MAX_SRC2_U64
34291
34292 Inst_DS__DS_MAX_SRC2_U64::~Inst_DS__DS_MAX_SRC2_U64()
34293 {
34294 } // ~Inst_DS__DS_MAX_SRC2_U64
34295
34296 // A = ADDR_BASE;
34297 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34298 // {offset1[6],offset1[6:0],offset0});
34299 // MEM[A] = max(MEM[A], MEM[B]).
34300 void
34301 Inst_DS__DS_MAX_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
34302 {
34303 panicUnimplemented();
34304 }
34305
34306 Inst_DS__DS_AND_SRC2_B64::Inst_DS__DS_AND_SRC2_B64(InFmt_DS *iFmt)
34307 : Inst_DS(iFmt, "ds_and_src2_b64")
34308 {
34309 } // Inst_DS__DS_AND_SRC2_B64
34310
34311 Inst_DS__DS_AND_SRC2_B64::~Inst_DS__DS_AND_SRC2_B64()
34312 {
34313 } // ~Inst_DS__DS_AND_SRC2_B64
34314
34315 // A = ADDR_BASE;
34316 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34317 // {offset1[6],offset1[6:0],offset0});
34318 // MEM[A] = MEM[A] & MEM[B].
34319 void
34320 Inst_DS__DS_AND_SRC2_B64::execute(GPUDynInstPtr gpuDynInst)
34321 {
34322 panicUnimplemented();
34323 }
34324
34325 Inst_DS__DS_OR_SRC2_B64::Inst_DS__DS_OR_SRC2_B64(InFmt_DS *iFmt)
34326 : Inst_DS(iFmt, "ds_or_src2_b64")
34327 {
34328 } // Inst_DS__DS_OR_SRC2_B64
34329
34330 Inst_DS__DS_OR_SRC2_B64::~Inst_DS__DS_OR_SRC2_B64()
34331 {
34332 } // ~Inst_DS__DS_OR_SRC2_B64
34333
34334 // A = ADDR_BASE;
34335 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34336 // {offset1[6],offset1[6:0],offset0});
34337 // MEM[A] = MEM[A] | MEM[B].
34338 void
34339 Inst_DS__DS_OR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst)
34340 {
34341 panicUnimplemented();
34342 }
34343
34344 Inst_DS__DS_XOR_SRC2_B64::Inst_DS__DS_XOR_SRC2_B64(InFmt_DS *iFmt)
34345 : Inst_DS(iFmt, "ds_xor_src2_b64")
34346 {
34347 } // Inst_DS__DS_XOR_SRC2_B64
34348
34349 Inst_DS__DS_XOR_SRC2_B64::~Inst_DS__DS_XOR_SRC2_B64()
34350 {
34351 } // ~Inst_DS__DS_XOR_SRC2_B64
34352
34353 // A = ADDR_BASE;
34354 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34355 // {offset1[6],offset1[6:0],offset0});
34356 // MEM[A] = MEM[A] ^ MEM[B].
34357 void
34358 Inst_DS__DS_XOR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst)
34359 {
34360 panicUnimplemented();
34361 }
34362
34363 Inst_DS__DS_WRITE_SRC2_B64::Inst_DS__DS_WRITE_SRC2_B64(InFmt_DS *iFmt)
34364 : Inst_DS(iFmt, "ds_write_src2_b64")
34365 {
34366 setFlag(MemoryRef);
34367 setFlag(Store);
34368 } // Inst_DS__DS_WRITE_SRC2_B64
34369
34370 Inst_DS__DS_WRITE_SRC2_B64::~Inst_DS__DS_WRITE_SRC2_B64()
34371 {
34372 } // ~Inst_DS__DS_WRITE_SRC2_B64
34373
34374 // A = ADDR_BASE;
34375 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34376 // {offset1[6],offset1[6:0],offset0});
34377 // MEM[A] = MEM[B].
34378 // Write qword.
34379 void
34380 Inst_DS__DS_WRITE_SRC2_B64::execute(GPUDynInstPtr gpuDynInst)
34381 {
34382 panicUnimplemented();
34383 }
34384
34385 Inst_DS__DS_MIN_SRC2_F64::Inst_DS__DS_MIN_SRC2_F64(InFmt_DS *iFmt)
34386 : Inst_DS(iFmt, "ds_min_src2_f64")
34387 {
34388 setFlag(F64);
34389 } // Inst_DS__DS_MIN_SRC2_F64
34390
34391 Inst_DS__DS_MIN_SRC2_F64::~Inst_DS__DS_MIN_SRC2_F64()
34392 {
34393 } // ~Inst_DS__DS_MIN_SRC2_F64
34394
34395 // A = ADDR_BASE;
34396 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34397 // {offset1[6],offset1[6:0],offset0});
34398 // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A].
34399 void
34400 Inst_DS__DS_MIN_SRC2_F64::execute(GPUDynInstPtr gpuDynInst)
34401 {
34402 panicUnimplemented();
34403 }
34404
34405 Inst_DS__DS_MAX_SRC2_F64::Inst_DS__DS_MAX_SRC2_F64(InFmt_DS *iFmt)
34406 : Inst_DS(iFmt, "ds_max_src2_f64")
34407 {
34408 setFlag(F64);
34409 } // Inst_DS__DS_MAX_SRC2_F64
34410
34411 Inst_DS__DS_MAX_SRC2_F64::~Inst_DS__DS_MAX_SRC2_F64()
34412 {
34413 } // ~Inst_DS__DS_MAX_SRC2_F64
34414
34415 // A = ADDR_BASE;
34416 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34417 // {offset1[6],offset1[6:0],offset0});
34418 // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A].
34419 void
34420 Inst_DS__DS_MAX_SRC2_F64::execute(GPUDynInstPtr gpuDynInst)
34421 {
34422 panicUnimplemented();
34423 }
34424
34425 Inst_DS__DS_WRITE_B96::Inst_DS__DS_WRITE_B96(InFmt_DS *iFmt)
34426 : Inst_DS(iFmt, "ds_write_b96")
34427 {
34428 setFlag(MemoryRef);
34429 setFlag(Store);
34430 } // Inst_DS__DS_WRITE_B96
34431
34432 Inst_DS__DS_WRITE_B96::~Inst_DS__DS_WRITE_B96()
34433 {
34434 } // ~Inst_DS__DS_WRITE_B96
34435
34436 // {MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[95:0].
34437 // Tri-dword write.
34438 void
34439 Inst_DS__DS_WRITE_B96::execute(GPUDynInstPtr gpuDynInst)
34440 {
34441 panicUnimplemented();
34442 }
34443
34444 Inst_DS__DS_WRITE_B128::Inst_DS__DS_WRITE_B128(InFmt_DS *iFmt)
34445 : Inst_DS(iFmt, "ds_write_b128")
34446 {
34447 setFlag(MemoryRef);
34448 setFlag(Store);
34449 } // Inst_DS__DS_WRITE_B128
34450
34451 Inst_DS__DS_WRITE_B128::~Inst_DS__DS_WRITE_B128()
34452 {
34453 } // ~Inst_DS__DS_WRITE_B128
34454
34455 // {MEM[ADDR + 12], MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[127:0].
34456 // Qword write.
34457 void
34458 Inst_DS__DS_WRITE_B128::execute(GPUDynInstPtr gpuDynInst)
34459 {
34460 panicUnimplemented();
34461 }
34462
34463 Inst_DS__DS_READ_B96::Inst_DS__DS_READ_B96(InFmt_DS *iFmt)
34464 : Inst_DS(iFmt, "ds_read_b96")
34465 {
34466 setFlag(MemoryRef);
34467 setFlag(Load);
34468 } // Inst_DS__DS_READ_B96
34469
34470 Inst_DS__DS_READ_B96::~Inst_DS__DS_READ_B96()
34471 {
34472 } // ~Inst_DS__DS_READ_B96
34473
34474 // Tri-dword read.
34475 void
34476 Inst_DS__DS_READ_B96::execute(GPUDynInstPtr gpuDynInst)
34477 {
34478 panicUnimplemented();
34479 }
34480
34481 Inst_DS__DS_READ_B128::Inst_DS__DS_READ_B128(InFmt_DS *iFmt)
34482 : Inst_DS(iFmt, "ds_read_b128")
34483 {
34484 setFlag(MemoryRef);
34485 setFlag(Load);
34486 } // Inst_DS__DS_READ_B128
34487
34488 Inst_DS__DS_READ_B128::~Inst_DS__DS_READ_B128()
34489 {
34490 } // ~Inst_DS__DS_READ_B128
34491
34492 // Qword read.
34493 void
34494 Inst_DS__DS_READ_B128::execute(GPUDynInstPtr gpuDynInst)
34495 {
34496 panicUnimplemented();
34497 }
34498
34499 Inst_MUBUF__BUFFER_LOAD_FORMAT_X
34500 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_X(InFmt_MUBUF *iFmt)
34501 : Inst_MUBUF(iFmt, "buffer_load_format_x")
34502 {
34503 setFlag(MemoryRef);
34504 setFlag(Load);
34505 setFlag(GlobalSegment);
34506 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_X
34507
34508 Inst_MUBUF__BUFFER_LOAD_FORMAT_X::~Inst_MUBUF__BUFFER_LOAD_FORMAT_X()
34509 {
34510 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_X
34511
34512 // Untyped buffer load 1 dword with format conversion.
34513 void
34514 Inst_MUBUF__BUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst)
34515 {
34516 panicUnimplemented();
34517 }
34518
34519 void
34520 Inst_MUBUF__BUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst)
34521 {
34522 } // initiateAcc
34523
34524 void
34525 Inst_MUBUF__BUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst)
34526 {
34527 }
34528
34529 Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
34530 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XY(InFmt_MUBUF *iFmt)
34531 : Inst_MUBUF(iFmt, "buffer_load_format_xy")
34532 {
34533 setFlag(MemoryRef);
34534 setFlag(Load);
34535 setFlag(GlobalSegment);
34536 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
34537
34538 Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY()
34539 {
34540 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
34541
34542 // Untyped buffer load 2 dwords with format conversion.
34543 void
34544 Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst)
34545 {
34546 panicUnimplemented();
34547 }
34548
34549 void
34550 Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst)
34551 {
34552 } // initiateAcc
34553
34554 void
34555 Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst)
34556 {
34557 }
34558
34559 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
34560 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ(InFmt_MUBUF *iFmt)
34561 : Inst_MUBUF(iFmt, "buffer_load_format_xyz")
34562 {
34563 setFlag(MemoryRef);
34564 setFlag(Load);
34565 setFlag(GlobalSegment);
34566 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
34567
34568 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ()
34569 {
34570 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
34571
34572 // Untyped buffer load 3 dwords with format conversion.
34573 void
34574 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst)
34575 {
34576 panicUnimplemented();
34577 }
34578
34579 void
34580 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst)
34581 {
34582 } // initiateAcc
34583
34584 void
34585 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst)
34586 {
34587 }
34588
34589 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
34590 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW(InFmt_MUBUF *iFmt)
34591 : Inst_MUBUF(iFmt, "buffer_load_format_xyzw")
34592 {
34593 setFlag(MemoryRef);
34594 setFlag(Load);
34595 setFlag(GlobalSegment);
34596 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
34597
34598 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW()
34599 {
34600 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
34601
34602 // Untyped buffer load 4 dwords with format conversion.
34603 void
34604 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst)
34605 {
34606 panicUnimplemented();
34607 }
34608
34609 void
34610 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst)
34611 {
34612 } // initiateAcc
34613
34614 void
34615 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst)
34616 {
34617 }
34618
34619 Inst_MUBUF__BUFFER_STORE_FORMAT_X
34620 ::Inst_MUBUF__BUFFER_STORE_FORMAT_X(InFmt_MUBUF *iFmt)
34621 : Inst_MUBUF(iFmt, "buffer_store_format_x")
34622 {
34623 setFlag(MemoryRef);
34624 setFlag(Store);
34625 setFlag(GlobalSegment);
34626 } // Inst_MUBUF__BUFFER_STORE_FORMAT_X
34627
34628 Inst_MUBUF__BUFFER_STORE_FORMAT_X::~Inst_MUBUF__BUFFER_STORE_FORMAT_X()
34629 {
34630 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_X
34631
34632 // Untyped buffer store 1 dword with format conversion.
34633 void
34634 Inst_MUBUF__BUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst)
34635 {
34636 panicUnimplemented();
34637 }
34638
34639 void
34640 Inst_MUBUF__BUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst)
34641 {
34642 } // initiateAcc
34643
34644 void
34645 Inst_MUBUF__BUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst)
34646 {
34647 }
34648
34649 Inst_MUBUF__BUFFER_STORE_FORMAT_XY
34650 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XY(InFmt_MUBUF *iFmt)
34651 : Inst_MUBUF(iFmt, "buffer_store_format_xy")
34652 {
34653 setFlag(MemoryRef);
34654 setFlag(Store);
34655 setFlag(GlobalSegment);
34656 } // Inst_MUBUF__BUFFER_STORE_FORMAT_XY
34657
34658 Inst_MUBUF__BUFFER_STORE_FORMAT_XY::~Inst_MUBUF__BUFFER_STORE_FORMAT_XY()
34659 {
34660 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XY
34661
34662 // Untyped buffer store 2 dwords with format conversion.
34663 void
34664 Inst_MUBUF__BUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst)
34665 {
34666 panicUnimplemented();
34667 }
34668
34669 void
34670 Inst_MUBUF__BUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst)
34671 {
34672 } // initiateAcc
34673
34674 void
34675 Inst_MUBUF__BUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst)
34676 {
34677 }
34678
34679 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
34680 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ(InFmt_MUBUF *iFmt)
34681 : Inst_MUBUF(iFmt, "buffer_store_format_xyz")
34682 {
34683 setFlag(MemoryRef);
34684 setFlag(Store);
34685 setFlag(GlobalSegment);
34686 } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
34687
34688 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ()
34689 {
34690 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
34691
34692 // Untyped buffer store 3 dwords with format conversion.
34693 void
34694 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst)
34695 {
34696 panicUnimplemented();
34697 }
34698
34699 void
34700 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst)
34701 {
34702 } // initiateAcc
34703
34704 void
34705 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst)
34706 {
34707 }
34708
34709 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
34710 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW(InFmt_MUBUF *iFmt)
34711 : Inst_MUBUF(iFmt, "buffer_store_format_xyzw")
34712 {
34713 setFlag(MemoryRef);
34714 setFlag(Store);
34715 setFlag(GlobalSegment);
34716 } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
34717
34718 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
34719 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW()
34720 {
34721 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
34722
34723 // Untyped buffer store 4 dwords with format conversion.
34724 void
34725 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst)
34726 {
34727 panicUnimplemented();
34728 }
34729
34730 void
34731 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst)
34732 {
34733 } // initiateAcc
34734
34735 void
34736 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst)
34737 {
34738 }
34739
34740 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
34741 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X(InFmt_MUBUF *iFmt)
34742 : Inst_MUBUF(iFmt, "buffer_load_format_d16_x")
34743 {
34744 setFlag(MemoryRef);
34745 setFlag(Load);
34746 setFlag(GlobalSegment);
34747 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
34748
34749 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
34750 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X()
34751 {
34752 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
34753
34754 // Untyped buffer load 1 dword with format conversion.
34755 void
34756 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst)
34757 {
34758 panicUnimplemented();
34759 }
34760
34761 void
34762 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::initiateAcc(GPUDynInstPtr gpuDynInst)
34763 {
34764 } // initiateAcc
34765
34766 void
34767 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::completeAcc(GPUDynInstPtr gpuDynInst)
34768 {
34769 }
34770
34771 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
34772 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY(InFmt_MUBUF *iFmt)
34773 : Inst_MUBUF(iFmt, "buffer_load_format_d16_xy")
34774 {
34775 setFlag(MemoryRef);
34776 setFlag(Load);
34777 setFlag(GlobalSegment);
34778 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
34779
34780 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
34781 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY()
34782 {
34783 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
34784
34785 // Untyped buffer load 2 dwords with format conversion.
34786 void
34787 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst)
34788 {
34789 panicUnimplemented();
34790 }
34791
34792 void
34793 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::initiateAcc(
34794 GPUDynInstPtr gpuDynInst)
34795 {
34796 } // initiateAcc
34797
34798 void
34799 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::completeAcc(
34800 GPUDynInstPtr gpuDynInst)
34801 {
34802 }
34803
34804 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
34805 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt)
34806 : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyz")
34807 {
34808 setFlag(MemoryRef);
34809 setFlag(Load);
34810 setFlag(GlobalSegment);
34811 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
34812
34813 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
34814 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ()
34815 {
34816 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
34817
34818 // Untyped buffer load 3 dwords with format conversion.
34819 void
34820 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst)
34821 {
34822 panicUnimplemented();
34823 }
34824
34825 void
34826 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc(
34827 GPUDynInstPtr gpuDynInst)
34828 {
34829 } // initiateAcc
34830
34831 void
34832 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::completeAcc(
34833 GPUDynInstPtr gpuDynInst)
34834 {
34835 }
34836
34837 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
34838 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt)
34839 : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyzw")
34840 {
34841 setFlag(MemoryRef);
34842 setFlag(Load);
34843 setFlag(GlobalSegment);
34844 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
34845
34846 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
34847 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW()
34848 {
34849 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
34850
34851 // Untyped buffer load 4 dwords with format conversion.
34852 void
34853 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst)
34854 {
34855 panicUnimplemented();
34856 }
34857
34858 void
34859 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc(
34860 GPUDynInstPtr gpuDynInst)
34861 {
34862 } // initiateAcc
34863
34864 void
34865 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::completeAcc(
34866 GPUDynInstPtr gpuDynInst)
34867 {
34868 }
34869
34870 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
34871 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X(InFmt_MUBUF *iFmt)
34872 : Inst_MUBUF(iFmt, "buffer_store_format_d16_x")
34873 {
34874 setFlag(MemoryRef);
34875 setFlag(Store);
34876 setFlag(GlobalSegment);
34877 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
34878
34879 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
34880 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X()
34881 {
34882 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
34883
34884 // Untyped buffer store 1 dword with format conversion.
34885 void
34886 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst)
34887 {
34888 panicUnimplemented();
34889 }
34890
34891 void
34892 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::initiateAcc(
34893 GPUDynInstPtr gpuDynInst)
34894 {
34895 } // initiateAcc
34896
34897 void
34898 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::completeAcc(
34899 GPUDynInstPtr gpuDynInst)
34900 {
34901 }
34902
34903 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
34904 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY(InFmt_MUBUF *iFmt)
34905 : Inst_MUBUF(iFmt, "buffer_store_format_d16_xy")
34906 {
34907 setFlag(MemoryRef);
34908 setFlag(Store);
34909 setFlag(GlobalSegment);
34910 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
34911
34912 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
34913 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY()
34914 {
34915 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
34916
34917 // Untyped buffer store 2 dwords with format conversion.
34918 void
34919 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst)
34920 {
34921 panicUnimplemented();
34922 }
34923
34924 void
34925 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::initiateAcc(
34926 GPUDynInstPtr gpuDynInst)
34927 {
34928 } // initiateAcc
34929
34930 void
34931 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::completeAcc(
34932 GPUDynInstPtr gpuDynInst)
34933 {
34934 }
34935
34936 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
34937 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt)
34938 : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyz")
34939 {
34940 setFlag(MemoryRef);
34941 setFlag(Store);
34942 setFlag(GlobalSegment);
34943 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
34944
34945 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
34946 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ()
34947 {
34948 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
34949
34950 // Untyped buffer store 3 dwords with format conversion.
34951 void
34952 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst)
34953 {
34954 panicUnimplemented();
34955 }
34956
34957 void
34958 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::initiateAcc(
34959 GPUDynInstPtr gpuDynInst)
34960 {
34961 } // initiateAcc
34962
34963 void
34964 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::completeAcc(
34965 GPUDynInstPtr gpuDynInst)
34966 {
34967 }
34968
34969 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
34970 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt)
34971 : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyzw")
34972 {
34973 setFlag(MemoryRef);
34974 setFlag(Store);
34975 setFlag(GlobalSegment);
34976 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
34977
34978 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
34979 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW()
34980 {
34981 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
34982
34983 // Untyped buffer store 4 dwords with format conversion.
34984 void
34985 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst)
34986 {
34987 panicUnimplemented();
34988 }
34989
34990 void
34991 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::initiateAcc(
34992 GPUDynInstPtr gpuDynInst)
34993 {
34994 } // initiateAcc
34995
34996 void
34997 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::completeAcc(
34998 GPUDynInstPtr gpuDynInst)
34999 {
35000 }
35001
35002 Inst_MUBUF__BUFFER_LOAD_UBYTE
35003 ::Inst_MUBUF__BUFFER_LOAD_UBYTE(InFmt_MUBUF *iFmt)
35004 : Inst_MUBUF(iFmt, "buffer_load_ubyte")
35005 {
35006 setFlag(MemoryRef);
35007 setFlag(Load);
35008 if (instData.LDS) {
35009 setFlag(GroupSegment);
35010 } else {
35011 setFlag(GlobalSegment);
35012 }
35013 } // Inst_MUBUF__BUFFER_LOAD_UBYTE
35014
35015 Inst_MUBUF__BUFFER_LOAD_UBYTE::~Inst_MUBUF__BUFFER_LOAD_UBYTE()
35016 {
35017 } // ~Inst_MUBUF__BUFFER_LOAD_UBYTE
35018
35019 // Untyped buffer load unsigned byte (zero extend to VGPR destination).
35020 void
35021 Inst_MUBUF__BUFFER_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst)
35022 {
35023 Wavefront *wf = gpuDynInst->wavefront();
35024 gpuDynInst->execUnitId = wf->execUnitId;
35025 gpuDynInst->exec_mask = wf->execMask();
35026 gpuDynInst->latency.init(gpuDynInst->computeUnit());
35027 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35028
35029 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35030 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35031 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35032 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35033
35034 rsrcDesc.read();
35035 offset.read();
35036
35037 int inst_offset = instData.OFFSET;
35038
35039 if (!instData.IDXEN && !instData.OFFEN) {
35040 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35041 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35042 addr0, addr1, rsrcDesc, offset, inst_offset);
35043 } else if (!instData.IDXEN && instData.OFFEN) {
35044 addr0.read();
35045 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35046 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35047 addr0, addr1, rsrcDesc, offset, inst_offset);
35048 } else if (instData.IDXEN && !instData.OFFEN) {
35049 addr0.read();
35050 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35051 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35052 addr1, addr0, rsrcDesc, offset, inst_offset);
35053 } else {
35054 addr0.read();
35055 addr1.read();
35056 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35057 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35058 addr1, addr0, rsrcDesc, offset, inst_offset);
35059 }
35060
35061 if (isLocalMem()) {
35062 gpuDynInst->computeUnit()->localMemoryPipe.
35063 issueRequest(gpuDynInst);
35064 wf->rdLmReqsInPipe--;
35065 wf->outstandingReqsRdLm++;
35066 } else {
35067 gpuDynInst->computeUnit()->globalMemoryPipe.
35068 issueRequest(gpuDynInst);
35069 wf->rdGmReqsInPipe--;
35070 wf->outstandingReqsRdGm++;
35071 }
35072
35073 wf->outstandingReqs++;
35074 wf->validateRequestCounters();
35075 }
35076
35077 void
35078 Inst_MUBUF__BUFFER_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
35079 {
35080 initMemRead<VecElemU8>(gpuDynInst);
35081 } // initiateAcc
35082
35083 void
35084 Inst_MUBUF__BUFFER_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst)
35085 {
35086 VecOperandU32 vdst(gpuDynInst, extData.VDATA);
35087
35088 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35089 if (gpuDynInst->exec_mask[lane]) {
35090 if (!oobMask[lane]) {
35091 vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU8*>(
35092 gpuDynInst->d_data))[lane]);
35093 } else {
35094 vdst[lane] = 0;
35095 }
35096 }
35097 }
35098
35099 vdst.write();
35100 }
35101
35102
35103 Inst_MUBUF__BUFFER_LOAD_SBYTE
35104 ::Inst_MUBUF__BUFFER_LOAD_SBYTE(InFmt_MUBUF *iFmt)
35105 : Inst_MUBUF(iFmt, "buffer_load_sbyte")
35106 {
35107 setFlag(MemoryRef);
35108 setFlag(Load);
35109 setFlag(GlobalSegment);
35110 } // Inst_MUBUF__BUFFER_LOAD_SBYTE
35111
35112 Inst_MUBUF__BUFFER_LOAD_SBYTE::~Inst_MUBUF__BUFFER_LOAD_SBYTE()
35113 {
35114 } // ~Inst_MUBUF__BUFFER_LOAD_SBYTE
35115
35116 // Untyped buffer load signed byte (sign extend to VGPR destination).
35117 void
35118 Inst_MUBUF__BUFFER_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst)
35119 {
35120 panicUnimplemented();
35121 }
35122
35123 void
35124 Inst_MUBUF__BUFFER_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
35125 {
35126 } // initiateAcc
35127
35128 void
35129 Inst_MUBUF__BUFFER_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst)
35130 {
35131 }
35132
35133 Inst_MUBUF__BUFFER_LOAD_USHORT
35134 ::Inst_MUBUF__BUFFER_LOAD_USHORT(InFmt_MUBUF *iFmt)
35135 : Inst_MUBUF(iFmt, "buffer_load_ushort")
35136 {
35137 setFlag(MemoryRef);
35138 setFlag(Load);
35139 if (instData.LDS) {
35140 setFlag(GroupSegment);
35141 } else {
35142 setFlag(GlobalSegment);
35143 }
35144 } // Inst_MUBUF__BUFFER_LOAD_USHORT
35145
35146 Inst_MUBUF__BUFFER_LOAD_USHORT::~Inst_MUBUF__BUFFER_LOAD_USHORT()
35147 {
35148 } // ~Inst_MUBUF__BUFFER_LOAD_USHORT
35149
35150 // Untyped buffer load unsigned short (zero extend to VGPR destination).
35151 void
35152 Inst_MUBUF__BUFFER_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst)
35153 {
35154 Wavefront *wf = gpuDynInst->wavefront();
35155 gpuDynInst->execUnitId = wf->execUnitId;
35156 gpuDynInst->exec_mask = wf->execMask();
35157 gpuDynInst->latency.init(gpuDynInst->computeUnit());
35158 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35159
35160 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35161 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35162 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35163 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35164
35165 rsrcDesc.read();
35166 offset.read();
35167
35168 int inst_offset = instData.OFFSET;
35169
35170 if (!instData.IDXEN && !instData.OFFEN) {
35171 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35172 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35173 addr0, addr1, rsrcDesc, offset, inst_offset);
35174 } else if (!instData.IDXEN && instData.OFFEN) {
35175 addr0.read();
35176 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35177 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35178 addr0, addr1, rsrcDesc, offset, inst_offset);
35179 } else if (instData.IDXEN && !instData.OFFEN) {
35180 addr0.read();
35181 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35182 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35183 addr1, addr0, rsrcDesc, offset, inst_offset);
35184 } else {
35185 addr0.read();
35186 addr1.read();
35187 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35188 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35189 addr1, addr0, rsrcDesc, offset, inst_offset);
35190 }
35191
35192 if (isLocalMem()) {
35193 gpuDynInst->computeUnit()->localMemoryPipe
35194 .issueRequest(gpuDynInst);
35195 wf->rdLmReqsInPipe--;
35196 wf->outstandingReqsRdLm++;
35197 } else {
35198 gpuDynInst->computeUnit()->globalMemoryPipe
35199 .issueRequest(gpuDynInst);
35200 wf->rdGmReqsInPipe--;
35201 wf->outstandingReqsRdGm++;
35202 }
35203
35204 wf->outstandingReqs++;
35205 wf->validateRequestCounters();
35206 }
35207
35208 void
35209 Inst_MUBUF__BUFFER_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
35210 {
35211 initMemRead<VecElemU16>(gpuDynInst);
35212 } // initiateAcc
35213
35214 void
35215 Inst_MUBUF__BUFFER_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst)
35216 {
35217 VecOperandU32 vdst(gpuDynInst, extData.VDATA);
35218
35219 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35220 if (gpuDynInst->exec_mask[lane]) {
35221 if (!oobMask[lane]) {
35222 vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU16*>(
35223 gpuDynInst->d_data))[lane]);
35224 } else {
35225 vdst[lane] = 0;
35226 }
35227 }
35228 }
35229
35230 vdst.write();
35231 }
35232
35233
35234 Inst_MUBUF__BUFFER_LOAD_SSHORT
35235 ::Inst_MUBUF__BUFFER_LOAD_SSHORT(InFmt_MUBUF *iFmt)
35236 : Inst_MUBUF(iFmt, "buffer_load_sshort")
35237 {
35238 setFlag(MemoryRef);
35239 setFlag(Load);
35240 setFlag(GlobalSegment);
35241 } // Inst_MUBUF__BUFFER_LOAD_SSHORT
35242
35243 Inst_MUBUF__BUFFER_LOAD_SSHORT::~Inst_MUBUF__BUFFER_LOAD_SSHORT()
35244 {
35245 } // ~Inst_MUBUF__BUFFER_LOAD_SSHORT
35246
35247 // Untyped buffer load signed short (sign extend to VGPR destination).
35248 void
35249 Inst_MUBUF__BUFFER_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst)
35250 {
35251 panicUnimplemented();
35252 }
35253
35254 void
35255 Inst_MUBUF__BUFFER_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
35256 {
35257 } // initiateAcc
35258
35259 void
35260 Inst_MUBUF__BUFFER_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst)
35261 {
35262 }
35263
35264 Inst_MUBUF__BUFFER_LOAD_DWORD
35265 ::Inst_MUBUF__BUFFER_LOAD_DWORD(InFmt_MUBUF *iFmt)
35266 : Inst_MUBUF(iFmt, "buffer_load_dword")
35267 {
35268 setFlag(MemoryRef);
35269 setFlag(Load);
35270 if (instData.LDS) {
35271 setFlag(GroupSegment);
35272 } else {
35273 setFlag(GlobalSegment);
35274 }
35275 } // Inst_MUBUF__BUFFER_LOAD_DWORD
35276
35277 Inst_MUBUF__BUFFER_LOAD_DWORD::~Inst_MUBUF__BUFFER_LOAD_DWORD()
35278 {
35279 } // ~Inst_MUBUF__BUFFER_LOAD_DWORD
35280
35281 // Untyped buffer load dword.
35282 void
35283 Inst_MUBUF__BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst)
35284 {
35285 Wavefront *wf = gpuDynInst->wavefront();
35286 gpuDynInst->execUnitId = wf->execUnitId;
35287 gpuDynInst->exec_mask = wf->execMask();
35288 gpuDynInst->latency.init(gpuDynInst->computeUnit());
35289 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35290
35291 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35292 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35293 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35294 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35295
35296 rsrcDesc.read();
35297 offset.read();
35298
35299 int inst_offset = instData.OFFSET;
35300
35301 if (!instData.IDXEN && !instData.OFFEN) {
35302 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35303 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35304 addr0, addr1, rsrcDesc, offset, inst_offset);
35305 } else if (!instData.IDXEN && instData.OFFEN) {
35306 addr0.read();
35307 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35308 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35309 addr0, addr1, rsrcDesc, offset, inst_offset);
35310 } else if (instData.IDXEN && !instData.OFFEN) {
35311 addr0.read();
35312 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35313 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35314 addr1, addr0, rsrcDesc, offset, inst_offset);
35315 } else {
35316 addr0.read();
35317 addr1.read();
35318 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35319 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35320 addr1, addr0, rsrcDesc, offset, inst_offset);
35321 }
35322
35323 if (isLocalMem()) {
35324 gpuDynInst->computeUnit()->localMemoryPipe
35325 .issueRequest(gpuDynInst);
35326 wf->rdLmReqsInPipe--;
35327 wf->outstandingReqsRdLm++;
35328 } else {
35329 gpuDynInst->computeUnit()->globalMemoryPipe
35330 .issueRequest(gpuDynInst);
35331 wf->rdGmReqsInPipe--;
35332 wf->outstandingReqsRdGm++;
35333 }
35334
35335 wf->outstandingReqs++;
35336 wf->validateRequestCounters();
35337 }
35338
35339 void
35340 Inst_MUBUF__BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
35341 {
35342 initMemRead<VecElemU32>(gpuDynInst);
35343 } // initiateAcc
35344
35345 void
35346 Inst_MUBUF__BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
35347 {
35348 VecOperandU32 vdst(gpuDynInst, extData.VDATA);
35349
35350 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35351 if (gpuDynInst->exec_mask[lane]) {
35352 if (!oobMask[lane]) {
35353 vdst[lane] = (reinterpret_cast<VecElemU32*>(
35354 gpuDynInst->d_data))[lane];
35355 } else {
35356 vdst[lane] = 0;
35357 }
35358 }
35359 }
35360
35361 vdst.write();
35362 } // completeAcc
35363
35364 Inst_MUBUF__BUFFER_LOAD_DWORDX2
35365 ::Inst_MUBUF__BUFFER_LOAD_DWORDX2(InFmt_MUBUF *iFmt)
35366 : Inst_MUBUF(iFmt, "buffer_load_dwordx2")
35367 {
35368 setFlag(MemoryRef);
35369 setFlag(Load);
35370 if (instData.LDS) {
35371 setFlag(GroupSegment);
35372 } else {
35373 setFlag(GlobalSegment);
35374 }
35375 } // Inst_MUBUF__BUFFER_LOAD_DWORDX2
35376
35377 Inst_MUBUF__BUFFER_LOAD_DWORDX2::~Inst_MUBUF__BUFFER_LOAD_DWORDX2()
35378 {
35379 } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX2
35380
35381 // Untyped buffer load 2 dwords.
35382 void
35383 Inst_MUBUF__BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
35384 {
35385 Wavefront *wf = gpuDynInst->wavefront();
35386 gpuDynInst->execUnitId = wf->execUnitId;
35387 gpuDynInst->exec_mask = wf->execMask();
35388 gpuDynInst->latency.init(gpuDynInst->computeUnit());
35389 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35390
35391 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35392 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35393 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35394 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35395
35396 rsrcDesc.read();
35397 offset.read();
35398
35399 int inst_offset = instData.OFFSET;
35400
35401 if (!instData.IDXEN && !instData.OFFEN) {
35402 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35403 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35404 addr0, addr1, rsrcDesc, offset, inst_offset);
35405 } else if (!instData.IDXEN && instData.OFFEN) {
35406 addr0.read();
35407 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35408 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35409 addr0, addr1, rsrcDesc, offset, inst_offset);
35410 } else if (instData.IDXEN && !instData.OFFEN) {
35411 addr0.read();
35412 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35413 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35414 addr1, addr0, rsrcDesc, offset, inst_offset);
35415 } else {
35416 addr0.read();
35417 addr1.read();
35418 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35419 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35420 addr1, addr0, rsrcDesc, offset, inst_offset);
35421 }
35422
35423 if (isLocalMem()) {
35424 gpuDynInst->computeUnit()->localMemoryPipe
35425 .issueRequest(gpuDynInst);
35426 wf->rdLmReqsInPipe--;
35427 wf->outstandingReqsRdLm++;
35428 } else {
35429 gpuDynInst->computeUnit()->globalMemoryPipe
35430 .issueRequest(gpuDynInst);
35431 wf->rdGmReqsInPipe--;
35432 wf->outstandingReqsRdGm++;
35433 }
35434
35435 wf->outstandingReqs++;
35436 wf->validateRequestCounters();
35437 } // execute
35438
35439 void
35440 Inst_MUBUF__BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
35441 {
35442 initMemRead<2>(gpuDynInst);
35443 } // initiateAcc
35444
35445 void
35446 Inst_MUBUF__BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
35447 {
35448 VecOperandU32 vdst0(gpuDynInst, extData.VDATA);
35449 VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1);
35450
35451 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35452 if (gpuDynInst->exec_mask[lane]) {
35453 if (!oobMask[lane]) {
35454 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
35455 gpuDynInst->d_data))[lane * 2];
35456 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
35457 gpuDynInst->d_data))[lane * 2 + 1];
35458 } else {
35459 vdst0[lane] = 0;
35460 vdst1[lane] = 0;
35461 }
35462 }
35463 }
35464
35465 vdst0.write();
35466 vdst1.write();
35467 } // completeAcc
35468
35469 Inst_MUBUF__BUFFER_LOAD_DWORDX3
35470 ::Inst_MUBUF__BUFFER_LOAD_DWORDX3(InFmt_MUBUF *iFmt)
35471 : Inst_MUBUF(iFmt, "buffer_load_dwordx3")
35472 {
35473 setFlag(MemoryRef);
35474 setFlag(Load);
35475 if (instData.LDS) {
35476 setFlag(GroupSegment);
35477 } else {
35478 setFlag(GlobalSegment);
35479 }
35480 } // Inst_MUBUF__BUFFER_LOAD_DWORDX3
35481
35482 Inst_MUBUF__BUFFER_LOAD_DWORDX3::~Inst_MUBUF__BUFFER_LOAD_DWORDX3()
35483 {
35484 } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX3
35485
35486 // Untyped buffer load 3 dwords.
35487 void
35488 Inst_MUBUF__BUFFER_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst)
35489 {
35490 Wavefront *wf = gpuDynInst->wavefront();
35491 gpuDynInst->execUnitId = wf->execUnitId;
35492 gpuDynInst->exec_mask = wf->execMask();
35493 gpuDynInst->latency.init(gpuDynInst->computeUnit());
35494 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35495
35496 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35497 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35498 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35499 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35500
35501 rsrcDesc.read();
35502 offset.read();
35503
35504 int inst_offset = instData.OFFSET;
35505
35506 if (!instData.IDXEN && !instData.OFFEN) {
35507 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35508 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35509 addr0, addr1, rsrcDesc, offset, inst_offset);
35510 } else if (!instData.IDXEN && instData.OFFEN) {
35511 addr0.read();
35512 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35513 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35514 addr0, addr1, rsrcDesc, offset, inst_offset);
35515 } else if (instData.IDXEN && !instData.OFFEN) {
35516 addr0.read();
35517 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35518 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35519 addr1, addr0, rsrcDesc, offset, inst_offset);
35520 } else {
35521 addr0.read();
35522 addr1.read();
35523 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35524 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35525 addr1, addr0, rsrcDesc, offset, inst_offset);
35526 }
35527
35528 if (isLocalMem()) {
35529 gpuDynInst->computeUnit()->localMemoryPipe
35530 .issueRequest(gpuDynInst);
35531 wf->rdLmReqsInPipe--;
35532 wf->outstandingReqsRdLm++;
35533 } else {
35534 gpuDynInst->computeUnit()->globalMemoryPipe
35535 .issueRequest(gpuDynInst);
35536 wf->rdGmReqsInPipe--;
35537 wf->outstandingReqsRdGm++;
35538 }
35539
35540 wf->outstandingReqs++;
35541 wf->validateRequestCounters();
35542 } // execute
35543
35544 void
35545 Inst_MUBUF__BUFFER_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst)
35546 {
35547 initMemRead<3>(gpuDynInst);
35548 } // initiateAcc
35549
35550 void
35551 Inst_MUBUF__BUFFER_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst)
35552 {
35553 VecOperandU32 vdst0(gpuDynInst, extData.VDATA);
35554 VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1);
35555 VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2);
35556
35557 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35558 if (gpuDynInst->exec_mask[lane]) {
35559 if (!oobMask[lane]) {
35560 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
35561 gpuDynInst->d_data))[lane * 3];
35562 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
35563 gpuDynInst->d_data))[lane * 3 + 1];
35564 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
35565 gpuDynInst->d_data))[lane * 3 + 2];
35566 } else {
35567 vdst0[lane] = 0;
35568 vdst1[lane] = 0;
35569 vdst2[lane] = 0;
35570 }
35571 }
35572 }
35573
35574 vdst0.write();
35575 vdst1.write();
35576 vdst2.write();
35577 } // completeAcc
35578
35579 Inst_MUBUF__BUFFER_LOAD_DWORDX4
35580 ::Inst_MUBUF__BUFFER_LOAD_DWORDX4(InFmt_MUBUF *iFmt)
35581 : Inst_MUBUF(iFmt, "buffer_load_dwordx4")
35582 {
35583 setFlag(MemoryRef);
35584 setFlag(Load);
35585 if (instData.LDS) {
35586 setFlag(GroupSegment);
35587 } else {
35588 setFlag(GlobalSegment);
35589 }
35590 } // Inst_MUBUF__BUFFER_LOAD_DWORDX4
35591
35592 Inst_MUBUF__BUFFER_LOAD_DWORDX4::~Inst_MUBUF__BUFFER_LOAD_DWORDX4()
35593 {
35594 } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX4
35595
35596 // Untyped buffer load 4 dwords.
35597 void
35598 Inst_MUBUF__BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
35599 {
35600 Wavefront *wf = gpuDynInst->wavefront();
35601 gpuDynInst->execUnitId = wf->execUnitId;
35602 gpuDynInst->exec_mask = wf->execMask();
35603 gpuDynInst->latency.init(gpuDynInst->computeUnit());
35604 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35605
35606 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35607 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35608 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35609 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35610
35611 rsrcDesc.read();
35612 offset.read();
35613
35614 int inst_offset = instData.OFFSET;
35615
35616 if (!instData.IDXEN && !instData.OFFEN) {
35617 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35618 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35619 addr0, addr1, rsrcDesc, offset, inst_offset);
35620 } else if (!instData.IDXEN && instData.OFFEN) {
35621 addr0.read();
35622 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35623 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35624 addr0, addr1, rsrcDesc, offset, inst_offset);
35625 } else if (instData.IDXEN && !instData.OFFEN) {
35626 addr0.read();
35627 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35628 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35629 addr1, addr0, rsrcDesc, offset, inst_offset);
35630 } else {
35631 addr0.read();
35632 addr1.read();
35633 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35634 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35635 addr1, addr0, rsrcDesc, offset, inst_offset);
35636 }
35637
35638 if (isLocalMem()) {
35639 gpuDynInst->computeUnit()->localMemoryPipe
35640 .issueRequest(gpuDynInst);
35641 wf->rdLmReqsInPipe--;
35642 wf->outstandingReqsRdLm++;
35643 } else {
35644 gpuDynInst->computeUnit()->globalMemoryPipe
35645 .issueRequest(gpuDynInst);
35646 wf->rdGmReqsInPipe--;
35647 wf->outstandingReqsRdGm++;
35648 }
35649
35650 wf->outstandingReqs++;
35651 wf->validateRequestCounters();
35652 } // execute
35653
35654 void
35655 Inst_MUBUF__BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
35656 {
35657 initMemRead<4>(gpuDynInst);
35658 } // initiateAcc
35659
35660 void
35661 Inst_MUBUF__BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
35662 {
35663 VecOperandU32 vdst0(gpuDynInst, extData.VDATA);
35664 VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1);
35665 VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2);
35666 VecOperandU32 vdst3(gpuDynInst, extData.VDATA + 3);
35667
35668 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35669 if (gpuDynInst->exec_mask[lane]) {
35670 if (!oobMask[lane]) {
35671 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
35672 gpuDynInst->d_data))[lane * 4];
35673 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
35674 gpuDynInst->d_data))[lane * 4 + 1];
35675 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
35676 gpuDynInst->d_data))[lane * 4 + 2];
35677 vdst3[lane] = (reinterpret_cast<VecElemU32*>(
35678 gpuDynInst->d_data))[lane * 4 + 3];
35679 } else {
35680 vdst0[lane] = 0;
35681 vdst1[lane] = 0;
35682 vdst2[lane] = 0;
35683 vdst3[lane] = 0;
35684 }
35685 }
35686 }
35687
35688 vdst0.write();
35689 vdst1.write();
35690 vdst2.write();
35691 vdst3.write();
35692 } // completeAcc
35693
35694 Inst_MUBUF__BUFFER_STORE_BYTE
35695 ::Inst_MUBUF__BUFFER_STORE_BYTE(InFmt_MUBUF *iFmt)
35696 : Inst_MUBUF(iFmt, "buffer_store_byte")
35697 {
35698 setFlag(MemoryRef);
35699 setFlag(Store);
35700 if (instData.LDS) {
35701 setFlag(GroupSegment);
35702 } else {
35703 setFlag(GlobalSegment);
35704 }
35705 } // Inst_MUBUF__BUFFER_STORE_BYTE
35706
35707 Inst_MUBUF__BUFFER_STORE_BYTE::~Inst_MUBUF__BUFFER_STORE_BYTE()
35708 {
35709 } // ~Inst_MUBUF__BUFFER_STORE_BYTE
35710
35711 // Untyped buffer store byte.
35712 void
35713 Inst_MUBUF__BUFFER_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst)
35714 {
35715 Wavefront *wf = gpuDynInst->wavefront();
35716 gpuDynInst->execUnitId = wf->execUnitId;
35717 gpuDynInst->exec_mask = wf->execMask();
35718 gpuDynInst->latency.init(gpuDynInst->computeUnit());
35719 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35720
35721 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35722 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35723 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35724 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35725
35726 rsrcDesc.read();
35727 offset.read();
35728
35729 int inst_offset = instData.OFFSET;
35730
35731 if (!instData.IDXEN && !instData.OFFEN) {
35732 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35733 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35734 addr0, addr1, rsrcDesc, offset, inst_offset);
35735 } else if (!instData.IDXEN && instData.OFFEN) {
35736 addr0.read();
35737 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35738 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35739 addr0, addr1, rsrcDesc, offset, inst_offset);
35740 } else if (instData.IDXEN && !instData.OFFEN) {
35741 addr0.read();
35742 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35743 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35744 addr1, addr0, rsrcDesc, offset, inst_offset);
35745 } else {
35746 addr0.read();
35747 addr1.read();
35748 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35749 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35750 addr1, addr0, rsrcDesc, offset, inst_offset);
35751 }
35752
35753 if (isLocalMem()) {
35754 gpuDynInst->computeUnit()->localMemoryPipe
35755 .issueRequest(gpuDynInst);
35756 wf->wrLmReqsInPipe--;
35757 wf->outstandingReqsWrLm++;
35758 } else {
35759 gpuDynInst->computeUnit()->globalMemoryPipe
35760 .issueRequest(gpuDynInst);
35761 wf->wrGmReqsInPipe--;
35762 wf->outstandingReqsWrGm++;
35763 }
35764
35765 wf->outstandingReqs++;
35766 wf->validateRequestCounters();
35767 }
35768
35769 void
35770 Inst_MUBUF__BUFFER_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
35771 {
35772 ConstVecOperandI8 data(gpuDynInst, extData.VDATA);
35773 data.read();
35774
35775 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35776 if (gpuDynInst->exec_mask[lane]) {
35777 (reinterpret_cast<VecElemI8*>(gpuDynInst->d_data))[lane]
35778 = data[lane];
35779 }
35780 }
35781
35782 initMemWrite<VecElemI8>(gpuDynInst);
35783 } // initiateAcc
35784
35785 void
35786 Inst_MUBUF__BUFFER_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst)
35787 {
35788 }
35789
35790 Inst_MUBUF__BUFFER_STORE_SHORT
35791 ::Inst_MUBUF__BUFFER_STORE_SHORT(InFmt_MUBUF *iFmt)
35792 : Inst_MUBUF(iFmt, "buffer_store_short")
35793 {
35794 setFlag(MemoryRef);
35795 setFlag(Store);
35796 if (instData.LDS) {
35797 setFlag(GroupSegment);
35798 } else {
35799 setFlag(GlobalSegment);
35800 }
35801 } // Inst_MUBUF__BUFFER_STORE_SHORT
35802
35803 Inst_MUBUF__BUFFER_STORE_SHORT::~Inst_MUBUF__BUFFER_STORE_SHORT()
35804 {
35805 } // ~Inst_MUBUF__BUFFER_STORE_SHORT
35806
35807 // Untyped buffer store short.
35808 void
35809 Inst_MUBUF__BUFFER_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst)
35810 {
35811 Wavefront *wf = gpuDynInst->wavefront();
35812 gpuDynInst->execUnitId = wf->execUnitId;
35813 gpuDynInst->exec_mask = wf->execMask();
35814 gpuDynInst->latency.init(gpuDynInst->computeUnit());
35815 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35816
35817 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35818 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35819 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35820 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35821
35822 rsrcDesc.read();
35823 offset.read();
35824
35825 int inst_offset = instData.OFFSET;
35826
35827 if (!instData.IDXEN && !instData.OFFEN) {
35828 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35829 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35830 addr0, addr1, rsrcDesc, offset, inst_offset);
35831 } else if (!instData.IDXEN && instData.OFFEN) {
35832 addr0.read();
35833 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35834 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35835 addr0, addr1, rsrcDesc, offset, inst_offset);
35836 } else if (instData.IDXEN && !instData.OFFEN) {
35837 addr0.read();
35838 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35839 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35840 addr1, addr0, rsrcDesc, offset, inst_offset);
35841 } else {
35842 addr0.read();
35843 addr1.read();
35844 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35845 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35846 addr1, addr0, rsrcDesc, offset, inst_offset);
35847 }
35848
35849 if (isLocalMem()) {
35850 gpuDynInst->computeUnit()->localMemoryPipe
35851 .issueRequest(gpuDynInst);
35852 wf->wrLmReqsInPipe--;
35853 wf->outstandingReqsWrLm++;
35854 } else {
35855 gpuDynInst->computeUnit()->globalMemoryPipe
35856 .issueRequest(gpuDynInst);
35857 wf->wrGmReqsInPipe--;
35858 wf->outstandingReqsWrGm++;
35859 }
35860
35861 wf->outstandingReqs++;
35862 wf->validateRequestCounters();
35863 }
35864
35865 void
35866 Inst_MUBUF__BUFFER_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
35867 {
35868 ConstVecOperandI16 data(gpuDynInst, extData.VDATA);
35869 data.read();
35870
35871 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35872 if (gpuDynInst->exec_mask[lane]) {
35873 (reinterpret_cast<VecElemI16*>(gpuDynInst->d_data))[lane]
35874 = data[lane];
35875 }
35876 }
35877
35878 initMemWrite<VecElemI16>(gpuDynInst);
35879 } // initiateAcc
35880
35881 void
35882 Inst_MUBUF__BUFFER_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst)
35883 {
35884 }
35885
35886 Inst_MUBUF__BUFFER_STORE_DWORD::
35887 Inst_MUBUF__BUFFER_STORE_DWORD(InFmt_MUBUF *iFmt)
35888 : Inst_MUBUF(iFmt, "buffer_store_dword")
35889 {
35890 setFlag(MemoryRef);
35891 setFlag(Store);
35892 if (instData.LDS) {
35893 setFlag(GroupSegment);
35894 } else {
35895 setFlag(GlobalSegment);
35896 }
35897 } // Inst_MUBUF__BUFFER_STORE_DWORD
35898
35899 Inst_MUBUF__BUFFER_STORE_DWORD::~Inst_MUBUF__BUFFER_STORE_DWORD()
35900 {
35901 } // ~Inst_MUBUF__BUFFER_STORE_DWORD
35902
35903 // Untyped buffer store dword.
35904 void
35905 Inst_MUBUF__BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst)
35906 {
35907 Wavefront *wf = gpuDynInst->wavefront();
35908 gpuDynInst->execUnitId = wf->execUnitId;
35909 gpuDynInst->exec_mask = wf->execMask();
35910 gpuDynInst->latency.init(gpuDynInst->computeUnit());
35911 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35912
35913 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35914 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35915 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35916 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35917
35918 rsrcDesc.read();
35919 offset.read();
35920
35921 int inst_offset = instData.OFFSET;
35922
35923 if (!instData.IDXEN && !instData.OFFEN) {
35924 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35925 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35926 addr0, addr1, rsrcDesc, offset, inst_offset);
35927 } else if (!instData.IDXEN && instData.OFFEN) {
35928 addr0.read();
35929 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35930 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35931 addr0, addr1, rsrcDesc, offset, inst_offset);
35932 } else if (instData.IDXEN && !instData.OFFEN) {
35933 addr0.read();
35934 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35935 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35936 addr1, addr0, rsrcDesc, offset, inst_offset);
35937 } else {
35938 addr0.read();
35939 addr1.read();
35940 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35941 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35942 addr1, addr0, rsrcDesc, offset, inst_offset);
35943 }
35944
35945 if (isLocalMem()) {
35946 gpuDynInst->computeUnit()->localMemoryPipe
35947 .issueRequest(gpuDynInst);
35948 wf->wrLmReqsInPipe--;
35949 wf->outstandingReqsWrLm++;
35950 } else {
35951 gpuDynInst->computeUnit()->globalMemoryPipe
35952 .issueRequest(gpuDynInst);
35953 wf->wrGmReqsInPipe--;
35954 wf->outstandingReqsWrGm++;
35955 }
35956
35957 wf->outstandingReqs++;
35958 wf->validateRequestCounters();
35959 }
35960
35961 void
35962 Inst_MUBUF__BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
35963 {
35964 ConstVecOperandU32 data(gpuDynInst, extData.VDATA);
35965 data.read();
35966
35967 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35968 if (gpuDynInst->exec_mask[lane]) {
35969 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane]
35970 = data[lane];
35971 }
35972 }
35973
35974 initMemWrite<VecElemU32>(gpuDynInst);
35975 } // initiateAcc
35976
35977 void
35978 Inst_MUBUF__BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
35979 {
35980 } // completeAcc
35981
35982 Inst_MUBUF__BUFFER_STORE_DWORDX2
35983 ::Inst_MUBUF__BUFFER_STORE_DWORDX2(InFmt_MUBUF *iFmt)
35984 : Inst_MUBUF(iFmt, "buffer_store_dwordx2")
35985 {
35986 setFlag(MemoryRef);
35987 setFlag(Store);
35988 if (instData.LDS) {
35989 setFlag(GroupSegment);
35990 } else {
35991 setFlag(GlobalSegment);
35992 }
35993 } // Inst_MUBUF__BUFFER_STORE_DWORDX2
35994
35995 Inst_MUBUF__BUFFER_STORE_DWORDX2::~Inst_MUBUF__BUFFER_STORE_DWORDX2()
35996 {
35997 } // ~Inst_MUBUF__BUFFER_STORE_DWORDX2
35998
35999 // Untyped buffer store 2 dwords.
36000 void
36001 Inst_MUBUF__BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
36002 {
36003 Wavefront *wf = gpuDynInst->wavefront();
36004 gpuDynInst->execUnitId = wf->execUnitId;
36005 gpuDynInst->exec_mask = wf->execMask();
36006 gpuDynInst->latency.init(gpuDynInst->computeUnit());
36007 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
36008
36009 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
36010 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
36011 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
36012 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
36013 ConstVecOperandU32 data0(gpuDynInst, extData.VDATA);
36014 ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1);
36015
36016 rsrcDesc.read();
36017 offset.read();
36018 data0.read();
36019 data1.read();
36020
36021 int inst_offset = instData.OFFSET;
36022
36023 if (!instData.IDXEN && !instData.OFFEN) {
36024 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36025 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36026 addr0, addr1, rsrcDesc, offset, inst_offset);
36027 } else if (!instData.IDXEN && instData.OFFEN) {
36028 addr0.read();
36029 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36030 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36031 addr0, addr1, rsrcDesc, offset, inst_offset);
36032 } else if (instData.IDXEN && !instData.OFFEN) {
36033 addr0.read();
36034 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36035 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36036 addr1, addr0, rsrcDesc, offset, inst_offset);
36037 } else {
36038 addr0.read();
36039 addr1.read();
36040 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36041 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36042 addr1, addr0, rsrcDesc, offset, inst_offset);
36043 }
36044
36045 if (isLocalMem()) {
36046 gpuDynInst->computeUnit()->localMemoryPipe
36047 .issueRequest(gpuDynInst);
36048 wf->wrLmReqsInPipe--;
36049 wf->outstandingReqsWrLm++;
36050 } else {
36051 gpuDynInst->computeUnit()->globalMemoryPipe
36052 .issueRequest(gpuDynInst);
36053 wf->wrGmReqsInPipe--;
36054 wf->outstandingReqsWrGm++;
36055 }
36056
36057 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
36058 if (gpuDynInst->exec_mask[lane]) {
36059 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 4]
36060 = data0[lane];
36061 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 1]
36062 = data1[lane];
36063 }
36064 }
36065
36066 wf->outstandingReqs++;
36067 wf->validateRequestCounters();
36068 } // execute
36069
36070 void
36071 Inst_MUBUF__BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
36072 {
36073 initMemWrite<2>(gpuDynInst);
36074 } // initiateAcc
36075
36076 void
36077 Inst_MUBUF__BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
36078 {
36079 } // completeAcc
36080
36081 Inst_MUBUF__BUFFER_STORE_DWORDX3
36082 ::Inst_MUBUF__BUFFER_STORE_DWORDX3(InFmt_MUBUF *iFmt)
36083 : Inst_MUBUF(iFmt, "buffer_store_dwordx3")
36084 {
36085 setFlag(MemoryRef);
36086 setFlag(Store);
36087 if (instData.LDS) {
36088 setFlag(GroupSegment);
36089 } else {
36090 setFlag(GlobalSegment);
36091 }
36092 } // Inst_MUBUF__BUFFER_STORE_DWORDX3
36093
36094 Inst_MUBUF__BUFFER_STORE_DWORDX3::~Inst_MUBUF__BUFFER_STORE_DWORDX3()
36095 {
36096 } // ~Inst_MUBUF__BUFFER_STORE_DWORDX3
36097
36098 // Untyped buffer store 3 dwords.
36099 void
36100 Inst_MUBUF__BUFFER_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst)
36101 {
36102 Wavefront *wf = gpuDynInst->wavefront();
36103 gpuDynInst->execUnitId = wf->execUnitId;
36104 gpuDynInst->exec_mask = wf->execMask();
36105 gpuDynInst->latency.init(gpuDynInst->computeUnit());
36106 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
36107
36108 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
36109 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
36110 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
36111 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
36112 ConstVecOperandU32 data0(gpuDynInst, extData.VDATA);
36113 ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1);
36114 ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2);
36115
36116 rsrcDesc.read();
36117 offset.read();
36118 data0.read();
36119 data1.read();
36120 data2.read();
36121
36122 int inst_offset = instData.OFFSET;
36123
36124 if (!instData.IDXEN && !instData.OFFEN) {
36125 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36126 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36127 addr0, addr1, rsrcDesc, offset, inst_offset);
36128 } else if (!instData.IDXEN && instData.OFFEN) {
36129 addr0.read();
36130 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36131 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36132 addr0, addr1, rsrcDesc, offset, inst_offset);
36133 } else if (instData.IDXEN && !instData.OFFEN) {
36134 addr0.read();
36135 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36136 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36137 addr1, addr0, rsrcDesc, offset, inst_offset);
36138 } else {
36139 addr0.read();
36140 addr1.read();
36141 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36142 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36143 addr1, addr0, rsrcDesc, offset, inst_offset);
36144 }
36145
36146 if (isLocalMem()) {
36147 gpuDynInst->computeUnit()->localMemoryPipe
36148 .issueRequest(gpuDynInst);
36149 wf->wrLmReqsInPipe--;
36150 wf->outstandingReqsWrLm++;
36151 } else {
36152 gpuDynInst->computeUnit()->globalMemoryPipe
36153 .issueRequest(gpuDynInst);
36154 wf->wrGmReqsInPipe--;
36155 wf->outstandingReqsWrGm++;
36156 }
36157
36158 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
36159 if (gpuDynInst->exec_mask[lane]) {
36160 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 4]
36161 = data0[lane];
36162 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 1]
36163 = data1[lane];
36164 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 2]
36165 = data2[lane];
36166 }
36167 }
36168
36169 wf->outstandingReqs++;
36170 wf->validateRequestCounters();
36171 } // execute
36172
36173 void
36174 Inst_MUBUF__BUFFER_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst)
36175 {
36176 initMemWrite<3>(gpuDynInst);
36177 } // initiateAcc
36178
36179 void
36180 Inst_MUBUF__BUFFER_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst)
36181 {
36182 } // completeAcc
36183
36184 Inst_MUBUF__BUFFER_STORE_DWORDX4
36185 ::Inst_MUBUF__BUFFER_STORE_DWORDX4(InFmt_MUBUF *iFmt)
36186 : Inst_MUBUF(iFmt, "buffer_store_dwordx4")
36187 {
36188 setFlag(MemoryRef);
36189 setFlag(Store);
36190 if (instData.LDS) {
36191 setFlag(GroupSegment);
36192 } else {
36193 setFlag(GlobalSegment);
36194 }
36195 } // Inst_MUBUF__BUFFER_STORE_DWORDX4
36196
36197 Inst_MUBUF__BUFFER_STORE_DWORDX4::~Inst_MUBUF__BUFFER_STORE_DWORDX4()
36198 {
36199 } // ~Inst_MUBUF__BUFFER_STORE_DWORDX4
36200
36201 // Untyped buffer store 4 dwords.
36202 void
36203 Inst_MUBUF__BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
36204 {
36205 Wavefront *wf = gpuDynInst->wavefront();
36206 gpuDynInst->execUnitId = wf->execUnitId;
36207 gpuDynInst->exec_mask = wf->execMask();
36208 gpuDynInst->latency.init(gpuDynInst->computeUnit());
36209 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
36210
36211 ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
36212 ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
36213 ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
36214 ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
36215 ConstVecOperandU32 data0(gpuDynInst, extData.VDATA);
36216 ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1);
36217 ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2);
36218 ConstVecOperandU32 data3(gpuDynInst, extData.VDATA + 3);
36219
36220 rsrcDesc.read();
36221 offset.read();
36222 data0.read();
36223 data1.read();
36224 data2.read();
36225 data3.read();
36226
36227 int inst_offset = instData.OFFSET;
36228
36229 if (!instData.IDXEN && !instData.OFFEN) {
36230 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36231 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36232 addr0, addr1, rsrcDesc, offset, inst_offset);
36233 } else if (!instData.IDXEN && instData.OFFEN) {
36234 addr0.read();
36235 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36236 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36237 addr0, addr1, rsrcDesc, offset, inst_offset);
36238 } else if (instData.IDXEN && !instData.OFFEN) {
36239 addr0.read();
36240 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36241 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36242 addr1, addr0, rsrcDesc, offset, inst_offset);
36243 } else {
36244 addr0.read();
36245 addr1.read();
36246 calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36247 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36248 addr1, addr0, rsrcDesc, offset, inst_offset);
36249 }
36250
36251 if (isLocalMem()) {
36252 gpuDynInst->computeUnit()->localMemoryPipe
36253 .issueRequest(gpuDynInst);
36254 wf->wrLmReqsInPipe--;
36255 wf->outstandingReqsWrLm++;
36256 } else {
36257 gpuDynInst->computeUnit()->globalMemoryPipe
36258 .issueRequest(gpuDynInst);
36259 wf->wrGmReqsInPipe--;
36260 wf->outstandingReqsWrGm++;
36261 }
36262
36263 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
36264 if (gpuDynInst->exec_mask[lane]) {
36265 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 4]
36266 = data0[lane];
36267 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 1]
36268 = data1[lane];
36269 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 2]
36270 = data2[lane];
36271 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 3]
36272 = data3[lane];
36273 }
36274 }
36275
36276 wf->outstandingReqs++;
36277 wf->validateRequestCounters();
36278 } // execute
36279
36280 void
36281 Inst_MUBUF__BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
36282 {
36283 initMemWrite<4>(gpuDynInst);
36284 } // initiateAcc
36285
36286 void
36287 Inst_MUBUF__BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
36288 {
36289 } // completeAcc
36290
36291 Inst_MUBUF__BUFFER_STORE_LDS_DWORD
36292 ::Inst_MUBUF__BUFFER_STORE_LDS_DWORD(InFmt_MUBUF *iFmt)
36293 : Inst_MUBUF(iFmt, "buffer_store_lds_dword")
36294 {
36295 setFlag(GlobalSegment);
36296 } // Inst_MUBUF__BUFFER_STORE_LDS_DWORD
36297
36298 Inst_MUBUF__BUFFER_STORE_LDS_DWORD::~Inst_MUBUF__BUFFER_STORE_LDS_DWORD()
36299 {
36300 } // ~Inst_MUBUF__BUFFER_STORE_LDS_DWORD
36301
36302 // Store one DWORD from LDS memory to system memory without utilizing
36303 // VGPRs.
36304 void
36305 Inst_MUBUF__BUFFER_STORE_LDS_DWORD::execute(GPUDynInstPtr gpuDynInst)
36306 {
36307 panicUnimplemented();
36308 }
36309
36310 Inst_MUBUF__BUFFER_WBINVL1::Inst_MUBUF__BUFFER_WBINVL1(InFmt_MUBUF *iFmt)
36311 : Inst_MUBUF(iFmt, "buffer_wbinvl1")
36312 {
36313 setFlag(MemoryRef);
36314 setFlag(GPUStaticInst::MemSync);
36315 setFlag(GlobalSegment);
36316 setFlag(MemSync);
36317 } // Inst_MUBUF__BUFFER_WBINVL1
36318
36319 Inst_MUBUF__BUFFER_WBINVL1::~Inst_MUBUF__BUFFER_WBINVL1()
36320 {
36321 } // ~Inst_MUBUF__BUFFER_WBINVL1
36322
36323 // Write back and invalidate the shader L1.
36324 // Always returns ACK to shader.
36325 void
36326 Inst_MUBUF__BUFFER_WBINVL1::execute(GPUDynInstPtr gpuDynInst)
36327 {
36328 Wavefront *wf = gpuDynInst->wavefront();
36329 gpuDynInst->execUnitId = wf->execUnitId;
36330 gpuDynInst->exec_mask = wf->execMask();
36331 gpuDynInst->latency.init(gpuDynInst->computeUnit());
36332 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
36333
36334 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
36335 gpuDynInst->computeUnit()->globalMemoryPipe.
36336 issueRequest(gpuDynInst);
36337 wf->wrGmReqsInPipe--;
36338 wf->rdGmReqsInPipe--;
36339
36340 wf->outstandingReqsWrGm++;
36341 wf->outstandingReqsRdGm++;
36342 } else {
36343 fatal("Non global flat instructions not implemented yet.\n");
36344 }
36345
36346 wf->outstandingReqs++;
36347 wf->validateRequestCounters();
36348 }
36349
36350 void
36351 Inst_MUBUF__BUFFER_WBINVL1::initiateAcc(GPUDynInstPtr gpuDynInst)
36352 {
36353 injectGlobalMemFence(gpuDynInst);
36354 } // initiateAcc
36355
36356 void
36357 Inst_MUBUF__BUFFER_WBINVL1::completeAcc(GPUDynInstPtr gpuDynInst)
36358 {
36359 } // completeAcc
36360
36361 Inst_MUBUF__BUFFER_WBINVL1_VOL
36362 ::Inst_MUBUF__BUFFER_WBINVL1_VOL(InFmt_MUBUF*iFmt)
36363 : Inst_MUBUF(iFmt, "buffer_wbinvl1_vol") {
36364 /**
36365 * This instruction is same as buffer_wbinvl1 instruction except this
36366 * instruction only invalidate L1 shader line with MTYPE for system
36367 * or group coherence. Since L1 do not differentiate between its cache
36368 * lines, this instruction currently behaves (and implemented )
36369 * exactly like buffer_wbinvl1 instruction.
36370 */
36371 setFlag(MemoryRef);
36372 setFlag(GPUStaticInst::MemSync);
36373 setFlag(GlobalSegment);
36374 setFlag(MemSync);
36375 } // Inst_MUBUF__BUFFER_WBINVL1_VOL
36376
36377 Inst_MUBUF__BUFFER_WBINVL1_VOL::~Inst_MUBUF__BUFFER_WBINVL1_VOL()
36378 {
36379 } // ~Inst_MUBUF__BUFFER_WBINVL1_VOL
36380
36381 // Write back and invalidate the shader L1 only for lines that are marked
36382 // volatile. Always returns ACK to shader.
36383 void
36384 Inst_MUBUF__BUFFER_WBINVL1_VOL::execute(GPUDynInstPtr gpuDynInst)
36385 {
36386 Wavefront *wf = gpuDynInst->wavefront();
36387 gpuDynInst->execUnitId = wf->execUnitId;
36388 gpuDynInst->exec_mask = wf->execMask();
36389 gpuDynInst->latency.init(gpuDynInst->computeUnit());
36390 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
36391
36392 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
36393 gpuDynInst->computeUnit()->globalMemoryPipe.
36394 issueRequest(gpuDynInst);
36395 wf->wrGmReqsInPipe--;
36396 wf->rdGmReqsInPipe--;
36397
36398 wf->outstandingReqsWrGm++;
36399 wf->outstandingReqsRdGm++;
36400 } else {
36401 fatal("Non global flat instructions not implemented yet.\n");
36402 }
36403
36404 wf->outstandingReqs++;
36405 wf->validateRequestCounters();
36406 }
36407 void
36408 Inst_MUBUF__BUFFER_WBINVL1_VOL::initiateAcc(GPUDynInstPtr gpuDynInst)
36409 {
36410 injectGlobalMemFence(gpuDynInst);
36411 } // initiateAcc
36412 void
36413 Inst_MUBUF__BUFFER_WBINVL1_VOL::completeAcc(GPUDynInstPtr gpuDynInst)
36414 {
36415 } // completeAcc
36416
36417 Inst_MUBUF__BUFFER_ATOMIC_SWAP
36418 ::Inst_MUBUF__BUFFER_ATOMIC_SWAP(InFmt_MUBUF *iFmt)
36419 : Inst_MUBUF(iFmt, "buffer_atomic_swap")
36420 {
36421 setFlag(AtomicExch);
36422 if (instData.GLC) {
36423 setFlag(AtomicReturn);
36424 } else {
36425 setFlag(AtomicNoReturn);
36426 } // if
36427 setFlag(MemoryRef);
36428 setFlag(GlobalSegment);
36429 } // Inst_MUBUF__BUFFER_ATOMIC_SWAP
36430
36431 Inst_MUBUF__BUFFER_ATOMIC_SWAP::~Inst_MUBUF__BUFFER_ATOMIC_SWAP()
36432 {
36433 } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP
36434
36435 // tmp = MEM[ADDR];
36436 // MEM[ADDR] = DATA;
36437 // RETURN_DATA = tmp.
36438 void
36439 Inst_MUBUF__BUFFER_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst)
36440 {
36441 panicUnimplemented();
36442 }
36443
36444 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
36445 ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP(InFmt_MUBUF *iFmt)
36446 : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap")
36447 {
36448 setFlag(AtomicCAS);
36449 if (instData.GLC) {
36450 setFlag(AtomicReturn);
36451 } else {
36452 setFlag(AtomicNoReturn);
36453 }
36454 setFlag(MemoryRef);
36455 setFlag(GlobalSegment);
36456 } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
36457
36458 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP()
36459 {
36460 } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
36461
36462 // tmp = MEM[ADDR];
36463 // src = DATA[0];
36464 // cmp = DATA[1];
36465 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
36466 // RETURN_DATA[0] = tmp.
36467 void
36468 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst)
36469 {
36470 panicUnimplemented();
36471 }
36472
36473 Inst_MUBUF__BUFFER_ATOMIC_ADD
36474 ::Inst_MUBUF__BUFFER_ATOMIC_ADD(InFmt_MUBUF *iFmt)
36475 : Inst_MUBUF(iFmt, "buffer_atomic_add")
36476 {
36477 setFlag(AtomicAdd);
36478 if (instData.GLC) {
36479 setFlag(AtomicReturn);
36480 } else {
36481 setFlag(AtomicNoReturn);
36482 } // if
36483 setFlag(MemoryRef);
36484 setFlag(GlobalSegment);
36485 } // Inst_MUBUF__BUFFER_ATOMIC_ADD
36486
36487 Inst_MUBUF__BUFFER_ATOMIC_ADD::~Inst_MUBUF__BUFFER_ATOMIC_ADD()
36488 {
36489 } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD
36490
36491 // tmp = MEM[ADDR];
36492 // MEM[ADDR] += DATA;
36493 // RETURN_DATA = tmp.
36494 void
36495 Inst_MUBUF__BUFFER_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst)
36496 {
36497 panicUnimplemented();
36498 }
36499
36500 Inst_MUBUF__BUFFER_ATOMIC_SUB
36501 ::Inst_MUBUF__BUFFER_ATOMIC_SUB(InFmt_MUBUF *iFmt)
36502 : Inst_MUBUF(iFmt, "buffer_atomic_sub")
36503 {
36504 setFlag(AtomicSub);
36505 if (instData.GLC) {
36506 setFlag(AtomicReturn);
36507 } else {
36508 setFlag(AtomicNoReturn);
36509 }
36510 setFlag(MemoryRef);
36511 setFlag(GlobalSegment);
36512 } // Inst_MUBUF__BUFFER_ATOMIC_SUB
36513
36514 Inst_MUBUF__BUFFER_ATOMIC_SUB::~Inst_MUBUF__BUFFER_ATOMIC_SUB()
36515 {
36516 } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB
36517
36518 // tmp = MEM[ADDR];
36519 // MEM[ADDR] -= DATA;
36520 // RETURN_DATA = tmp.
36521 void
36522 Inst_MUBUF__BUFFER_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst)
36523 {
36524 panicUnimplemented();
36525 }
36526
36527 Inst_MUBUF__BUFFER_ATOMIC_SMIN
36528 ::Inst_MUBUF__BUFFER_ATOMIC_SMIN(InFmt_MUBUF *iFmt)
36529 : Inst_MUBUF(iFmt, "buffer_atomic_smin")
36530 {
36531 setFlag(AtomicMin);
36532 if (instData.GLC) {
36533 setFlag(AtomicReturn);
36534 } else {
36535 setFlag(AtomicNoReturn);
36536 }
36537 setFlag(MemoryRef);
36538 setFlag(GlobalSegment);
36539 } // Inst_MUBUF__BUFFER_ATOMIC_SMIN
36540
36541 Inst_MUBUF__BUFFER_ATOMIC_SMIN::~Inst_MUBUF__BUFFER_ATOMIC_SMIN()
36542 {
36543 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN
36544
36545 // tmp = MEM[ADDR];
36546 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
36547 // RETURN_DATA = tmp.
36548 void
36549 Inst_MUBUF__BUFFER_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst)
36550 {
36551 panicUnimplemented();
36552 }
36553
36554 Inst_MUBUF__BUFFER_ATOMIC_UMIN
36555 ::Inst_MUBUF__BUFFER_ATOMIC_UMIN(InFmt_MUBUF *iFmt)
36556 : Inst_MUBUF(iFmt, "buffer_atomic_umin")
36557 {
36558 setFlag(AtomicMin);
36559 if (instData.GLC) {
36560 setFlag(AtomicReturn);
36561 } else {
36562 setFlag(AtomicNoReturn);
36563 }
36564 setFlag(MemoryRef);
36565 setFlag(GlobalSegment);
36566 } // Inst_MUBUF__BUFFER_ATOMIC_UMIN
36567
36568 Inst_MUBUF__BUFFER_ATOMIC_UMIN::~Inst_MUBUF__BUFFER_ATOMIC_UMIN()
36569 {
36570 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN
36571
36572 // tmp = MEM[ADDR];
36573 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
36574 // RETURN_DATA = tmp.
36575 void
36576 Inst_MUBUF__BUFFER_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst)
36577 {
36578 panicUnimplemented();
36579 }
36580
36581 Inst_MUBUF__BUFFER_ATOMIC_SMAX
36582 ::Inst_MUBUF__BUFFER_ATOMIC_SMAX(InFmt_MUBUF *iFmt)
36583 : Inst_MUBUF(iFmt, "buffer_atomic_smax")
36584 {
36585 setFlag(AtomicMax);
36586 if (instData.GLC) {
36587 setFlag(AtomicReturn);
36588 } else {
36589 setFlag(AtomicNoReturn);
36590 }
36591 setFlag(MemoryRef);
36592 setFlag(GlobalSegment);
36593 } // Inst_MUBUF__BUFFER_ATOMIC_SMAX
36594
36595 Inst_MUBUF__BUFFER_ATOMIC_SMAX::~Inst_MUBUF__BUFFER_ATOMIC_SMAX()
36596 {
36597 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX
36598
36599 // tmp = MEM[ADDR];
36600 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
36601 // RETURN_DATA = tmp.
36602 void
36603 Inst_MUBUF__BUFFER_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst)
36604 {
36605 panicUnimplemented();
36606 }
36607
36608 Inst_MUBUF__BUFFER_ATOMIC_UMAX
36609 ::Inst_MUBUF__BUFFER_ATOMIC_UMAX(InFmt_MUBUF *iFmt)
36610 : Inst_MUBUF(iFmt, "buffer_atomic_umax")
36611 {
36612 setFlag(AtomicMax);
36613 if (instData.GLC) {
36614 setFlag(AtomicReturn);
36615 } else {
36616 setFlag(AtomicNoReturn);
36617 } // if
36618 setFlag(MemoryRef);
36619 setFlag(GlobalSegment);
36620 } // Inst_MUBUF__BUFFER_ATOMIC_UMAX
36621
36622 Inst_MUBUF__BUFFER_ATOMIC_UMAX::~Inst_MUBUF__BUFFER_ATOMIC_UMAX()
36623 {
36624 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX
36625
36626 // tmp = MEM[ADDR];
36627 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
36628 // RETURN_DATA = tmp.
36629 void
36630 Inst_MUBUF__BUFFER_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst)
36631 {
36632 panicUnimplemented();
36633 }
36634
36635 Inst_MUBUF__BUFFER_ATOMIC_AND
36636 ::Inst_MUBUF__BUFFER_ATOMIC_AND(InFmt_MUBUF *iFmt)
36637 : Inst_MUBUF(iFmt, "buffer_atomic_and")
36638 {
36639 setFlag(AtomicAnd);
36640 if (instData.GLC) {
36641 setFlag(AtomicReturn);
36642 } else {
36643 setFlag(AtomicNoReturn);
36644 }
36645 setFlag(MemoryRef);
36646 setFlag(GlobalSegment);
36647 } // Inst_MUBUF__BUFFER_ATOMIC_AND
36648
36649 Inst_MUBUF__BUFFER_ATOMIC_AND::~Inst_MUBUF__BUFFER_ATOMIC_AND()
36650 {
36651 } // ~Inst_MUBUF__BUFFER_ATOMIC_AND
36652
36653 // tmp = MEM[ADDR];
36654 // MEM[ADDR] &= DATA;
36655 // RETURN_DATA = tmp.
36656 void
36657 Inst_MUBUF__BUFFER_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst)
36658 {
36659 panicUnimplemented();
36660 }
36661
36662 Inst_MUBUF__BUFFER_ATOMIC_OR
36663 ::Inst_MUBUF__BUFFER_ATOMIC_OR(InFmt_MUBUF *iFmt)
36664 : Inst_MUBUF(iFmt, "buffer_atomic_or")
36665 {
36666 setFlag(AtomicOr);
36667 if (instData.GLC) {
36668 setFlag(AtomicReturn);
36669 } else {
36670 setFlag(AtomicNoReturn);
36671 }
36672 setFlag(MemoryRef);
36673 setFlag(GlobalSegment);
36674 } // Inst_MUBUF__BUFFER_ATOMIC_OR
36675
36676 Inst_MUBUF__BUFFER_ATOMIC_OR::~Inst_MUBUF__BUFFER_ATOMIC_OR()
36677 {
36678 } // ~Inst_MUBUF__BUFFER_ATOMIC_OR
36679
36680 // tmp = MEM[ADDR];
36681 // MEM[ADDR] |= DATA;
36682 // RETURN_DATA = tmp.
36683 void
36684 Inst_MUBUF__BUFFER_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst)
36685 {
36686 panicUnimplemented();
36687 }
36688
36689 Inst_MUBUF__BUFFER_ATOMIC_XOR
36690 ::Inst_MUBUF__BUFFER_ATOMIC_XOR(InFmt_MUBUF *iFmt)
36691 : Inst_MUBUF(iFmt, "buffer_atomic_xor")
36692 {
36693 setFlag(AtomicXor);
36694 if (instData.GLC) {
36695 setFlag(AtomicReturn);
36696 } else {
36697 setFlag(AtomicNoReturn);
36698 }
36699 setFlag(MemoryRef);
36700 setFlag(GlobalSegment);
36701 } // Inst_MUBUF__BUFFER_ATOMIC_XOR
36702
36703 Inst_MUBUF__BUFFER_ATOMIC_XOR::~Inst_MUBUF__BUFFER_ATOMIC_XOR()
36704 {
36705 } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR
36706
36707 // tmp = MEM[ADDR];
36708 // MEM[ADDR] ^= DATA;
36709 // RETURN_DATA = tmp.
36710 void
36711 Inst_MUBUF__BUFFER_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst)
36712 {
36713 panicUnimplemented();
36714 }
36715
36716 Inst_MUBUF__BUFFER_ATOMIC_INC
36717 ::Inst_MUBUF__BUFFER_ATOMIC_INC(InFmt_MUBUF *iFmt)
36718 : Inst_MUBUF(iFmt, "buffer_atomic_inc")
36719 {
36720 setFlag(AtomicInc);
36721 if (instData.GLC) {
36722 setFlag(AtomicReturn);
36723 } else {
36724 setFlag(AtomicNoReturn);
36725 }
36726 setFlag(MemoryRef);
36727 setFlag(GlobalSegment);
36728 } // Inst_MUBUF__BUFFER_ATOMIC_INC
36729
36730 Inst_MUBUF__BUFFER_ATOMIC_INC::~Inst_MUBUF__BUFFER_ATOMIC_INC()
36731 {
36732 } // ~Inst_MUBUF__BUFFER_ATOMIC_INC
36733
36734 // tmp = MEM[ADDR];
36735 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
36736 // RETURN_DATA = tmp.
36737 void
36738 Inst_MUBUF__BUFFER_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst)
36739 {
36740 panicUnimplemented();
36741 }
36742
36743 Inst_MUBUF__BUFFER_ATOMIC_DEC
36744 ::Inst_MUBUF__BUFFER_ATOMIC_DEC(InFmt_MUBUF *iFmt)
36745 : Inst_MUBUF(iFmt, "buffer_atomic_dec")
36746 {
36747 setFlag(AtomicDec);
36748 if (instData.GLC) {
36749 setFlag(AtomicReturn);
36750 } else {
36751 setFlag(AtomicNoReturn);
36752 }
36753 setFlag(MemoryRef);
36754 setFlag(GlobalSegment);
36755 } // Inst_MUBUF__BUFFER_ATOMIC_DEC
36756
36757 Inst_MUBUF__BUFFER_ATOMIC_DEC::~Inst_MUBUF__BUFFER_ATOMIC_DEC()
36758 {
36759 } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC
36760
36761 // tmp = MEM[ADDR];
36762 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
36763 // (unsigned compare); RETURN_DATA = tmp.
36764 void
36765 Inst_MUBUF__BUFFER_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst)
36766 {
36767 panicUnimplemented();
36768 }
36769
36770 Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
36771 ::Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2(InFmt_MUBUF *iFmt)
36772 : Inst_MUBUF(iFmt, "buffer_atomic_swap_x2")
36773 {
36774 setFlag(AtomicExch);
36775 if (instData.GLC) {
36776 setFlag(AtomicReturn);
36777 } else {
36778 setFlag(AtomicNoReturn);
36779 }
36780 setFlag(MemoryRef);
36781 setFlag(GlobalSegment);
36782 } // Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
36783
36784 Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2()
36785 {
36786 } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
36787
36788 // tmp = MEM[ADDR];
36789 // MEM[ADDR] = DATA[0:1];
36790 // RETURN_DATA[0:1] = tmp.
36791 void
36792 Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst)
36793 {
36794 panicUnimplemented();
36795 }
36796
36797 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
36798 ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2(InFmt_MUBUF *iFmt)
36799 : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap_x2")
36800 {
36801 setFlag(AtomicCAS);
36802 if (instData.GLC) {
36803 setFlag(AtomicReturn);
36804 } else {
36805 setFlag(AtomicNoReturn);
36806 }
36807 setFlag(MemoryRef);
36808 setFlag(GlobalSegment);
36809 } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
36810
36811 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
36812 ::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2()
36813 {
36814 } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
36815
36816 // tmp = MEM[ADDR];
36817 // src = DATA[0:1];
36818 // cmp = DATA[2:3];
36819 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
36820 // RETURN_DATA[0:1] = tmp.
36821 void
36822 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst)
36823 {
36824 panicUnimplemented();
36825 }
36826
36827 Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
36828 ::Inst_MUBUF__BUFFER_ATOMIC_ADD_X2(InFmt_MUBUF *iFmt)
36829 : Inst_MUBUF(iFmt, "buffer_atomic_add_x2")
36830 {
36831 setFlag(AtomicAdd);
36832 if (instData.GLC) {
36833 setFlag(AtomicReturn);
36834 } else {
36835 setFlag(AtomicNoReturn);
36836 }
36837 setFlag(MemoryRef);
36838 setFlag(GlobalSegment);
36839 } // Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
36840
36841 Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2()
36842 {
36843 } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
36844
36845 // tmp = MEM[ADDR];
36846 // MEM[ADDR] += DATA[0:1];
36847 // RETURN_DATA[0:1] = tmp.
36848 void
36849 Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst)
36850 {
36851 panicUnimplemented();
36852 }
36853
36854 Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
36855 ::Inst_MUBUF__BUFFER_ATOMIC_SUB_X2(InFmt_MUBUF *iFmt)
36856 : Inst_MUBUF(iFmt, "buffer_atomic_sub_x2")
36857 {
36858 setFlag(AtomicSub);
36859 if (instData.GLC) {
36860 setFlag(AtomicReturn);
36861 } else {
36862 setFlag(AtomicNoReturn);
36863 }
36864 setFlag(MemoryRef);
36865 setFlag(GlobalSegment);
36866 } // Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
36867
36868 Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2()
36869 {
36870 } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
36871
36872 // tmp = MEM[ADDR];
36873 // MEM[ADDR] -= DATA[0:1];
36874 // RETURN_DATA[0:1] = tmp.
36875 void
36876 Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst)
36877 {
36878 panicUnimplemented();
36879 }
36880
36881 Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
36882 ::Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2(InFmt_MUBUF *iFmt)
36883 : Inst_MUBUF(iFmt, "buffer_atomic_smin_x2")
36884 {
36885 setFlag(AtomicMin);
36886 if (instData.GLC) {
36887 setFlag(AtomicReturn);
36888 } else {
36889 setFlag(AtomicNoReturn);
36890 }
36891 setFlag(MemoryRef);
36892 setFlag(GlobalSegment);
36893 } // Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
36894
36895 Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2()
36896 {
36897 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
36898
36899 // tmp = MEM[ADDR];
36900 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
36901 // RETURN_DATA[0:1] = tmp.
36902 void
36903 Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst)
36904 {
36905 panicUnimplemented();
36906 }
36907
36908 Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
36909 ::Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2(InFmt_MUBUF *iFmt)
36910 : Inst_MUBUF(iFmt, "buffer_atomic_umin_x2")
36911 {
36912 setFlag(AtomicMin);
36913 if (instData.GLC) {
36914 setFlag(AtomicReturn);
36915 } else {
36916 setFlag(AtomicNoReturn);
36917 }
36918 setFlag(MemoryRef);
36919 setFlag(GlobalSegment);
36920 } // Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
36921
36922 Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2()
36923 {
36924 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
36925
36926 // tmp = MEM[ADDR];
36927 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
36928 // RETURN_DATA[0:1] = tmp.
36929 void
36930 Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst)
36931 {
36932 panicUnimplemented();
36933 }
36934
36935 Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
36936 ::Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2(InFmt_MUBUF *iFmt)
36937 : Inst_MUBUF(iFmt, "buffer_atomic_smax_x2")
36938 {
36939 setFlag(AtomicMax);
36940 if (instData.GLC) {
36941 setFlag(AtomicReturn);
36942 } else {
36943 setFlag(AtomicNoReturn);
36944 }
36945 setFlag(MemoryRef);
36946 setFlag(GlobalSegment);
36947 } // Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
36948
36949 Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2()
36950 {
36951 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
36952
36953 // tmp = MEM[ADDR];
36954 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
36955 // RETURN_DATA[0:1] = tmp.
36956 void
36957 Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst)
36958 {
36959 panicUnimplemented();
36960 }
36961
36962 Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
36963 ::Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2(InFmt_MUBUF *iFmt)
36964 : Inst_MUBUF(iFmt, "buffer_atomic_umax_x2")
36965 {
36966 setFlag(AtomicMax);
36967 if (instData.GLC) {
36968 setFlag(AtomicReturn);
36969 } else {
36970 setFlag(AtomicNoReturn);
36971 }
36972 setFlag(MemoryRef);
36973 setFlag(GlobalSegment);
36974 } // Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
36975
36976 Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2()
36977 {
36978 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
36979
36980 // tmp = MEM[ADDR];
36981 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
36982 // RETURN_DATA[0:1] = tmp.
36983 void
36984 Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst)
36985 {
36986 panicUnimplemented();
36987 }
36988
36989 Inst_MUBUF__BUFFER_ATOMIC_AND_X2
36990 ::Inst_MUBUF__BUFFER_ATOMIC_AND_X2(InFmt_MUBUF *iFmt)
36991 : Inst_MUBUF(iFmt, "buffer_atomic_and_x2")
36992 {
36993 setFlag(AtomicAnd);
36994 if (instData.GLC) {
36995 setFlag(AtomicReturn);
36996 } else {
36997 setFlag(AtomicNoReturn);
36998 }
36999 setFlag(MemoryRef);
37000 setFlag(GlobalSegment);
37001 } // Inst_MUBUF__BUFFER_ATOMIC_AND_X2
37002
37003 Inst_MUBUF__BUFFER_ATOMIC_AND_X2::~Inst_MUBUF__BUFFER_ATOMIC_AND_X2()
37004 {
37005 } // ~Inst_MUBUF__BUFFER_ATOMIC_AND_X2
37006
37007 // tmp = MEM[ADDR];
37008 // MEM[ADDR] &= DATA[0:1];
37009 // RETURN_DATA[0:1] = tmp.
37010 void
37011 Inst_MUBUF__BUFFER_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst)
37012 {
37013 panicUnimplemented();
37014 }
37015
37016 Inst_MUBUF__BUFFER_ATOMIC_OR_X2
37017 ::Inst_MUBUF__BUFFER_ATOMIC_OR_X2(InFmt_MUBUF *iFmt)
37018 : Inst_MUBUF(iFmt, "buffer_atomic_or_x2")
37019 {
37020 setFlag(AtomicOr);
37021 if (instData.GLC) {
37022 setFlag(AtomicReturn);
37023 } else {
37024 setFlag(AtomicNoReturn);
37025 }
37026 setFlag(MemoryRef);
37027 setFlag(GlobalSegment);
37028 } // Inst_MUBUF__BUFFER_ATOMIC_OR_X2
37029
37030 Inst_MUBUF__BUFFER_ATOMIC_OR_X2::~Inst_MUBUF__BUFFER_ATOMIC_OR_X2()
37031 {
37032 } // ~Inst_MUBUF__BUFFER_ATOMIC_OR_X2
37033
37034 // tmp = MEM[ADDR];
37035 // MEM[ADDR] |= DATA[0:1];
37036 // RETURN_DATA[0:1] = tmp.
37037 void
37038 Inst_MUBUF__BUFFER_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst)
37039 {
37040 panicUnimplemented();
37041 }
37042
37043 Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
37044 ::Inst_MUBUF__BUFFER_ATOMIC_XOR_X2(InFmt_MUBUF *iFmt)
37045 : Inst_MUBUF(iFmt, "buffer_atomic_xor_x2")
37046 {
37047 setFlag(AtomicXor);
37048 if (instData.GLC) {
37049 setFlag(AtomicReturn);
37050 } else {
37051 setFlag(AtomicNoReturn);
37052 }
37053 setFlag(MemoryRef);
37054 setFlag(GlobalSegment);
37055 } // Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
37056
37057 Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2()
37058 {
37059 } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
37060
37061 // tmp = MEM[ADDR];
37062 // MEM[ADDR] ^= DATA[0:1];
37063 // RETURN_DATA[0:1] = tmp.
37064 void
37065 Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst)
37066 {
37067 panicUnimplemented();
37068 }
37069
37070 Inst_MUBUF__BUFFER_ATOMIC_INC_X2
37071 ::Inst_MUBUF__BUFFER_ATOMIC_INC_X2(InFmt_MUBUF *iFmt)
37072 : Inst_MUBUF(iFmt, "buffer_atomic_inc_x2")
37073 {
37074 setFlag(AtomicInc);
37075 if (instData.GLC) {
37076 setFlag(AtomicReturn);
37077 } else {
37078 setFlag(AtomicNoReturn);
37079 }
37080 setFlag(MemoryRef);
37081 setFlag(GlobalSegment);
37082 } // Inst_MUBUF__BUFFER_ATOMIC_INC_X2
37083
37084 Inst_MUBUF__BUFFER_ATOMIC_INC_X2::~Inst_MUBUF__BUFFER_ATOMIC_INC_X2()
37085 {
37086 } // ~Inst_MUBUF__BUFFER_ATOMIC_INC_X2
37087
37088 // tmp = MEM[ADDR];
37089 // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
37090 // RETURN_DATA[0:1] = tmp.
37091 void
37092 Inst_MUBUF__BUFFER_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst)
37093 {
37094 panicUnimplemented();
37095 }
37096
37097 Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
37098 ::Inst_MUBUF__BUFFER_ATOMIC_DEC_X2(InFmt_MUBUF *iFmt)
37099 : Inst_MUBUF(iFmt, "buffer_atomic_dec_x2")
37100 {
37101 setFlag(AtomicDec);
37102 if (instData.GLC) {
37103 setFlag(AtomicReturn);
37104 } else {
37105 setFlag(AtomicNoReturn);
37106 }
37107 setFlag(MemoryRef);
37108 setFlag(GlobalSegment);
37109 } // Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
37110
37111 Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2()
37112 {
37113 } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
37114
37115 // tmp = MEM[ADDR];
37116 // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
37117 // (unsigned compare);
37118 // RETURN_DATA[0:1] = tmp.
37119 void
37120 Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst)
37121 {
37122 panicUnimplemented();
37123 }
37124
37125 Inst_MTBUF__TBUFFER_LOAD_FORMAT_X
37126 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_X(InFmt_MTBUF *iFmt)
37127 : Inst_MTBUF(iFmt, "tbuffer_load_format_x")
37128 {
37129 setFlag(MemoryRef);
37130 setFlag(Load);
37131 setFlag(GlobalSegment);
37132 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_X
37133
37134 Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X()
37135 {
37136 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X
37137
37138 // Typed buffer load 1 dword with format conversion.
37139 void
37140 Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst)
37141 {
37142 panicUnimplemented();
37143 }
37144
37145 void
37146 Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst)
37147 {
37148 } // initiateAcc
37149
37150 void
37151 Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst)
37152 {
37153 }
37154
37155 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY
37156 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY(InFmt_MTBUF *iFmt)
37157 : Inst_MTBUF(iFmt, "tbuffer_load_format_xy")
37158 {
37159 setFlag(MemoryRef);
37160 setFlag(Load);
37161 setFlag(GlobalSegment);
37162 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY
37163
37164 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY()
37165 {
37166 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY
37167
37168 // Typed buffer load 2 dwords with format conversion.
37169 void
37170 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst)
37171 {
37172 panicUnimplemented();
37173 }
37174
37175 void
37176 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst)
37177 {
37178 } // initiateAcc
37179
37180 void
37181 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst)
37182 {
37183 }
37184
37185 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ
37186 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ(InFmt_MTBUF *iFmt)
37187 : Inst_MTBUF(iFmt, "tbuffer_load_format_xyz")
37188 {
37189 setFlag(MemoryRef);
37190 setFlag(Load);
37191 setFlag(GlobalSegment);
37192 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ
37193
37194 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ()
37195 {
37196 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ
37197
37198 // Typed buffer load 3 dwords with format conversion.
37199 void
37200 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst)
37201 {
37202 panicUnimplemented();
37203 }
37204
37205 void
37206 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst)
37207 {
37208 } // initiateAcc
37209
37210 void
37211 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst)
37212 {
37213 }
37214
37215 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
37216 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW(InFmt_MTBUF *iFmt)
37217 : Inst_MTBUF(iFmt, "tbuffer_load_format_xyzw")
37218 {
37219 setFlag(MemoryRef);
37220 setFlag(Load);
37221 setFlag(GlobalSegment);
37222 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
37223
37224 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
37225 ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW()
37226 {
37227 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
37228
37229 // Typed buffer load 4 dwords with format conversion.
37230 void
37231 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst)
37232 {
37233 panicUnimplemented();
37234 }
37235
37236 void
37237 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst)
37238 {
37239 } // initiateAcc
37240
37241 void
37242 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst)
37243 {
37244 }
37245
37246 Inst_MTBUF__TBUFFER_STORE_FORMAT_X
37247 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_X(InFmt_MTBUF *iFmt)
37248 : Inst_MTBUF(iFmt, "tbuffer_store_format_x")
37249 {
37250 setFlag(MemoryRef);
37251 setFlag(Store);
37252 setFlag(GlobalSegment);
37253 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_X
37254
37255 Inst_MTBUF__TBUFFER_STORE_FORMAT_X::~Inst_MTBUF__TBUFFER_STORE_FORMAT_X()
37256 {
37257 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_X
37258
37259 // Typed buffer store 1 dword with format conversion.
37260 void
37261 Inst_MTBUF__TBUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst)
37262 {
37263 panicUnimplemented();
37264 }
37265
37266 void
37267 Inst_MTBUF__TBUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst)
37268 {
37269 } // initiateAcc
37270
37271 void
37272 Inst_MTBUF__TBUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst)
37273 {
37274 }
37275
37276 Inst_MTBUF__TBUFFER_STORE_FORMAT_XY
37277 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XY(InFmt_MTBUF *iFmt)
37278 : Inst_MTBUF(iFmt, "tbuffer_store_format_xy")
37279 {
37280 setFlag(MemoryRef);
37281 setFlag(Store);
37282 setFlag(GlobalSegment);
37283 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XY
37284
37285 Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY()
37286 {
37287 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY
37288
37289 // Typed buffer store 2 dwords with format conversion.
37290 void
37291 Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst)
37292 {
37293 panicUnimplemented();
37294 }
37295
37296 void
37297 Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst)
37298 {
37299 } // initiateAcc
37300
37301 void
37302 Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst)
37303 {
37304 }
37305
37306 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
37307 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ(InFmt_MTBUF *iFmt)
37308 : Inst_MTBUF(iFmt, "tbuffer_store_format_xyz")
37309 {
37310 setFlag(MemoryRef);
37311 setFlag(Store);
37312 setFlag(GlobalSegment);
37313 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
37314
37315 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
37316 ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ()
37317 {
37318 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
37319
37320 // Typed buffer store 3 dwords with format conversion.
37321 void
37322 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst)
37323 {
37324 panicUnimplemented();
37325 }
37326
37327 void
37328 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst)
37329 {
37330 } // initiateAcc
37331
37332 void
37333 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst)
37334 {
37335 }
37336
37337 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
37338 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW(InFmt_MTBUF *iFmt)
37339 : Inst_MTBUF(iFmt, "tbuffer_store_format_xyzw")
37340 {
37341 setFlag(MemoryRef);
37342 setFlag(Store);
37343 setFlag(GlobalSegment);
37344 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
37345
37346 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
37347 ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW()
37348 {
37349 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
37350
37351 // Typed buffer store 4 dwords with format conversion.
37352 void
37353 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst)
37354 {
37355 panicUnimplemented();
37356 }
37357
37358 void
37359 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::initiateAcc(
37360 GPUDynInstPtr gpuDynInst)
37361 {
37362 } // initiateAcc
37363
37364 void
37365 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::completeAcc(
37366 GPUDynInstPtr gpuDynInst)
37367 {
37368 }
37369
37370 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X
37371 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X(InFmt_MTBUF *iFmt)
37372 : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_x")
37373 {
37374 setFlag(MemoryRef);
37375 setFlag(Load);
37376 setFlag(GlobalSegment);
37377 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X
37378
37379 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::
37380 ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X()
37381 {
37382 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X
37383
37384 // Typed buffer load 1 dword with format conversion.
37385 void
37386 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst)
37387 {
37388 panicUnimplemented();
37389 }
37390
37391 void
37392 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::initiateAcc(
37393 GPUDynInstPtr gpuDynInst)
37394 {
37395 } // initiateAcc
37396
37397 void
37398 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::completeAcc(
37399 GPUDynInstPtr gpuDynInst)
37400 {
37401 }
37402
37403 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
37404 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY(InFmt_MTBUF *iFmt)
37405 : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xy")
37406 {
37407 setFlag(MemoryRef);
37408 setFlag(Load);
37409 setFlag(GlobalSegment);
37410 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
37411
37412 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
37413 ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY()
37414 {
37415 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
37416
37417 // Typed buffer load 2 dwords with format conversion.
37418 void
37419 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst)
37420 {
37421 panicUnimplemented();
37422 }
37423
37424 void
37425 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::initiateAcc(
37426 GPUDynInstPtr gpuDynInst)
37427 {
37428 } // initiateAcc
37429
37430 void
37431 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::completeAcc(
37432 GPUDynInstPtr gpuDynInst)
37433 {
37434 }
37435
37436 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
37437 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ(
37438 InFmt_MTBUF *iFmt)
37439 : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xyz")
37440 {
37441 setFlag(MemoryRef);
37442 setFlag(Load);
37443 setFlag(GlobalSegment);
37444 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
37445
37446 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
37447 ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ()
37448 {
37449 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
37450
37451 // Typed buffer load 3 dwords with format conversion.
37452 void
37453 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst)
37454 {
37455 panicUnimplemented();
37456 }
37457
37458 void
37459 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc(
37460 GPUDynInstPtr gpuDynInst)
37461 {
37462 } // initiateAcc
37463
37464 void
37465 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::completeAcc(
37466 GPUDynInstPtr gpuDynInst)
37467 {
37468 }
37469
37470 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
37471 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW(
37472 InFmt_MTBUF *iFmt)
37473 : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xyzw")
37474 {
37475 setFlag(MemoryRef);
37476 setFlag(Load);
37477 setFlag(GlobalSegment);
37478 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
37479
37480 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
37481 ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW()
37482 {
37483 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
37484
37485 // Typed buffer load 4 dwords with format conversion.
37486 void
37487 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst)
37488 {
37489 panicUnimplemented();
37490 }
37491
37492 void
37493 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc(
37494 GPUDynInstPtr gpuDynInst)
37495 {
37496 } // initiateAcc
37497
37498 void
37499 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::completeAcc(
37500 GPUDynInstPtr gpuDynInst)
37501 {
37502 }
37503
37504 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
37505 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X(InFmt_MTBUF *iFmt)
37506 : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_x")
37507 {
37508 setFlag(MemoryRef);
37509 setFlag(Store);
37510 setFlag(GlobalSegment);
37511 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
37512
37513 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
37514 ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X()
37515 {
37516 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
37517
37518 // Typed buffer store 1 dword with format conversion.
37519 void
37520 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst)
37521 {
37522 panicUnimplemented();
37523 }
37524
37525 void
37526 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::initiateAcc(
37527 GPUDynInstPtr gpuDynInst)
37528 {
37529 } // initiateAcc
37530
37531 void
37532 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::completeAcc(
37533 GPUDynInstPtr gpuDynInst)
37534 {
37535 }
37536
37537 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
37538 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY(InFmt_MTBUF *iFmt)
37539 : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xy")
37540 {
37541 setFlag(MemoryRef);
37542 setFlag(Store);
37543 setFlag(GlobalSegment);
37544 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
37545
37546 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
37547 ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY()
37548 {
37549 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
37550
37551 // Typed buffer store 2 dwords with format conversion.
37552 void
37553 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst)
37554 {
37555 panicUnimplemented();
37556 }
37557
37558 void
37559 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::initiateAcc(
37560 GPUDynInstPtr gpuDynInst)
37561 {
37562 } // initiateAcc
37563
37564 void
37565 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::completeAcc(
37566 GPUDynInstPtr gpuDynInst)
37567 {
37568 }
37569
37570 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
37571 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ(InFmt_MTBUF *iFmt)
37572 : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xyz")
37573 {
37574 setFlag(MemoryRef);
37575 setFlag(Store);
37576 setFlag(GlobalSegment);
37577 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
37578
37579 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
37580 ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ()
37581 {
37582 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
37583
37584 // Typed buffer store 3 dwords with format conversion.
37585 void
37586 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst)
37587 {
37588 panicUnimplemented();
37589 }
37590
37591 void
37592 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::initiateAcc(
37593 GPUDynInstPtr gpuDynInst)
37594 {
37595 } // initiateAcc
37596
37597 void
37598 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::completeAcc(
37599 GPUDynInstPtr gpuDynInst)
37600 {
37601 }
37602
37603 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
37604 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW(InFmt_MTBUF *iFmt)
37605 : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xyzw")
37606 {
37607 setFlag(MemoryRef);
37608 setFlag(Store);
37609 setFlag(GlobalSegment);
37610 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
37611
37612 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
37613 ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW()
37614 {
37615 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
37616
37617 // Typed buffer store 4 dwords with format conversion.
37618 void
37619 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::execute(
37620 GPUDynInstPtr gpuDynInst)
37621 {
37622 panicUnimplemented();
37623 }
37624
37625 void
37626 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::initiateAcc(
37627 GPUDynInstPtr gpuDynInst)
37628 {
37629 } // initiateAcc
37630
37631 void
37632 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::completeAcc(
37633 GPUDynInstPtr gpuDynInst)
37634 {
37635 }
37636
37637 Inst_MIMG__IMAGE_LOAD::Inst_MIMG__IMAGE_LOAD(InFmt_MIMG *iFmt)
37638 : Inst_MIMG(iFmt, "image_load")
37639 {
37640 setFlag(MemoryRef);
37641 setFlag(Load);
37642 setFlag(GlobalSegment);
37643 } // Inst_MIMG__IMAGE_LOAD
37644
37645 Inst_MIMG__IMAGE_LOAD::~Inst_MIMG__IMAGE_LOAD()
37646 {
37647 } // ~Inst_MIMG__IMAGE_LOAD
37648
37649 // Image memory load with format conversion specified
37650 void
37651 Inst_MIMG__IMAGE_LOAD::execute(GPUDynInstPtr gpuDynInst)
37652 {
37653 panicUnimplemented();
37654 }
37655
37656 void
37657 Inst_MIMG__IMAGE_LOAD::initiateAcc(GPUDynInstPtr gpuDynInst)
37658 {
37659 } // initiateAcc
37660
37661 void
37662 Inst_MIMG__IMAGE_LOAD::completeAcc(GPUDynInstPtr gpuDynInst)
37663 {
37664 }
37665
37666 Inst_MIMG__IMAGE_LOAD_MIP::Inst_MIMG__IMAGE_LOAD_MIP(InFmt_MIMG *iFmt)
37667 : Inst_MIMG(iFmt, "image_load_mip")
37668 {
37669 setFlag(MemoryRef);
37670 setFlag(Load);
37671 setFlag(GlobalSegment);
37672 } // Inst_MIMG__IMAGE_LOAD_MIP
37673
37674 Inst_MIMG__IMAGE_LOAD_MIP::~Inst_MIMG__IMAGE_LOAD_MIP()
37675 {
37676 } // ~Inst_MIMG__IMAGE_LOAD_MIP
37677
37678 void
37679 Inst_MIMG__IMAGE_LOAD_MIP::execute(GPUDynInstPtr gpuDynInst)
37680 {
37681 panicUnimplemented();
37682 }
37683
37684 void
37685 Inst_MIMG__IMAGE_LOAD_MIP::initiateAcc(GPUDynInstPtr gpuDynInst)
37686 {
37687 } // initiateAcc
37688
37689 void
37690 Inst_MIMG__IMAGE_LOAD_MIP::completeAcc(GPUDynInstPtr gpuDynInst)
37691 {
37692 }
37693
37694 Inst_MIMG__IMAGE_LOAD_PCK::Inst_MIMG__IMAGE_LOAD_PCK(InFmt_MIMG *iFmt)
37695 : Inst_MIMG(iFmt, "image_load_pck")
37696 {
37697 setFlag(MemoryRef);
37698 setFlag(Load);
37699 setFlag(GlobalSegment);
37700 } // Inst_MIMG__IMAGE_LOAD_PCK
37701
37702 Inst_MIMG__IMAGE_LOAD_PCK::~Inst_MIMG__IMAGE_LOAD_PCK()
37703 {
37704 } // ~Inst_MIMG__IMAGE_LOAD_PCK
37705
37706 void
37707 Inst_MIMG__IMAGE_LOAD_PCK::execute(GPUDynInstPtr gpuDynInst)
37708 {
37709 panicUnimplemented();
37710 }
37711
37712 void
37713 Inst_MIMG__IMAGE_LOAD_PCK::initiateAcc(GPUDynInstPtr gpuDynInst)
37714 {
37715 } // initiateAcc
37716
37717 void
37718 Inst_MIMG__IMAGE_LOAD_PCK::completeAcc(GPUDynInstPtr gpuDynInst)
37719 {
37720 }
37721
37722 Inst_MIMG__IMAGE_LOAD_PCK_SGN::Inst_MIMG__IMAGE_LOAD_PCK_SGN(
37723 InFmt_MIMG *iFmt)
37724 : Inst_MIMG(iFmt, "image_load_pck_sgn")
37725 {
37726 setFlag(MemoryRef);
37727 setFlag(Load);
37728 setFlag(GlobalSegment);
37729 } // Inst_MIMG__IMAGE_LOAD_PCK_SGN
37730
37731 Inst_MIMG__IMAGE_LOAD_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_PCK_SGN()
37732 {
37733 } // ~Inst_MIMG__IMAGE_LOAD_PCK_SGN
37734
37735 // Image memory load with with no format conversion and sign extension
37736 void
37737 Inst_MIMG__IMAGE_LOAD_PCK_SGN::execute(GPUDynInstPtr gpuDynInst)
37738 {
37739 panicUnimplemented();
37740 }
37741
37742 void
37743 Inst_MIMG__IMAGE_LOAD_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst)
37744 {
37745 } // initiateAcc
37746
37747 void
37748 Inst_MIMG__IMAGE_LOAD_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst)
37749 {
37750 }
37751
37752 Inst_MIMG__IMAGE_LOAD_MIP_PCK::Inst_MIMG__IMAGE_LOAD_MIP_PCK(
37753 InFmt_MIMG *iFmt)
37754 : Inst_MIMG(iFmt, "image_load_mip_pck")
37755 {
37756 setFlag(MemoryRef);
37757 setFlag(Load);
37758 setFlag(GlobalSegment);
37759 } // Inst_MIMG__IMAGE_LOAD_MIP_PCK
37760
37761 Inst_MIMG__IMAGE_LOAD_MIP_PCK::~Inst_MIMG__IMAGE_LOAD_MIP_PCK()
37762 {
37763 } // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK
37764
37765 // Image memory load with user-supplied mip level, no format conversion
37766 void
37767 Inst_MIMG__IMAGE_LOAD_MIP_PCK::execute(GPUDynInstPtr gpuDynInst)
37768 {
37769 panicUnimplemented();
37770 }
37771
37772 void
37773 Inst_MIMG__IMAGE_LOAD_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst)
37774 {
37775 } // initiateAcc
37776
37777 void
37778 Inst_MIMG__IMAGE_LOAD_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst)
37779 {
37780 }
37781
37782 Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN(
37783 InFmt_MIMG *iFmt)
37784 : Inst_MIMG(iFmt, "image_load_mip_pck_sgn")
37785 {
37786 setFlag(MemoryRef);
37787 setFlag(Load);
37788 setFlag(GlobalSegment);
37789 } // Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN
37790
37791 Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN()
37792 {
37793 } // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN
37794
37795 // Image memory load with user-supplied mip level, no format conversion.
37796 void
37797 Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::execute(GPUDynInstPtr gpuDynInst)
37798 {
37799 panicUnimplemented();
37800 }
37801
37802 void
37803 Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst)
37804 {
37805 } // initiateAcc
37806
37807 void
37808 Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst)
37809 {
37810 }
37811
37812 Inst_MIMG__IMAGE_STORE::Inst_MIMG__IMAGE_STORE(InFmt_MIMG *iFmt)
37813 : Inst_MIMG(iFmt, "image_store")
37814 {
37815 setFlag(MemoryRef);
37816 setFlag(Store);
37817 setFlag(GlobalSegment);
37818 } // Inst_MIMG__IMAGE_STORE
37819
37820 Inst_MIMG__IMAGE_STORE::~Inst_MIMG__IMAGE_STORE()
37821 {
37822 } // ~Inst_MIMG__IMAGE_STORE
37823
37824 // Image memory store with format conversion specified
37825 void
37826 Inst_MIMG__IMAGE_STORE::execute(GPUDynInstPtr gpuDynInst)
37827 {
37828 panicUnimplemented();
37829 }
37830
37831 void
37832 Inst_MIMG__IMAGE_STORE::initiateAcc(GPUDynInstPtr gpuDynInst)
37833 {
37834 } // initiateAcc
37835
37836 void
37837 Inst_MIMG__IMAGE_STORE::completeAcc(GPUDynInstPtr gpuDynInst)
37838 {
37839 }
37840
37841 Inst_MIMG__IMAGE_STORE_MIP::Inst_MIMG__IMAGE_STORE_MIP(InFmt_MIMG *iFmt)
37842 : Inst_MIMG(iFmt, "image_store_mip")
37843 {
37844 setFlag(MemoryRef);
37845 setFlag(Store);
37846 setFlag(GlobalSegment);
37847 } // Inst_MIMG__IMAGE_STORE_MIP
37848
37849 Inst_MIMG__IMAGE_STORE_MIP::~Inst_MIMG__IMAGE_STORE_MIP()
37850 {
37851 } // ~Inst_MIMG__IMAGE_STORE_MIP
37852
37853 void
37854 Inst_MIMG__IMAGE_STORE_MIP::execute(GPUDynInstPtr gpuDynInst)
37855 {
37856 panicUnimplemented();
37857 }
37858
37859 void
37860 Inst_MIMG__IMAGE_STORE_MIP::initiateAcc(GPUDynInstPtr gpuDynInst)
37861 {
37862 } // initiateAcc
37863
37864 void
37865 Inst_MIMG__IMAGE_STORE_MIP::completeAcc(GPUDynInstPtr gpuDynInst)
37866 {
37867 }
37868
37869 Inst_MIMG__IMAGE_STORE_PCK::Inst_MIMG__IMAGE_STORE_PCK(InFmt_MIMG *iFmt)
37870 : Inst_MIMG(iFmt, "image_store_pck")
37871 {
37872 setFlag(MemoryRef);
37873 setFlag(Store);
37874 setFlag(GlobalSegment);
37875 } // Inst_MIMG__IMAGE_STORE_PCK
37876
37877 Inst_MIMG__IMAGE_STORE_PCK::~Inst_MIMG__IMAGE_STORE_PCK()
37878 {
37879 } // ~Inst_MIMG__IMAGE_STORE_PCK
37880
37881 // Image memory store of packed data without format conversion.
37882 void
37883 Inst_MIMG__IMAGE_STORE_PCK::execute(GPUDynInstPtr gpuDynInst)
37884 {
37885 panicUnimplemented();
37886 }
37887
37888 void
37889 Inst_MIMG__IMAGE_STORE_PCK::initiateAcc(GPUDynInstPtr gpuDynInst)
37890 {
37891 } // initiateAcc
37892
37893 void
37894 Inst_MIMG__IMAGE_STORE_PCK::completeAcc(GPUDynInstPtr gpuDynInst)
37895 {
37896 }
37897
37898 Inst_MIMG__IMAGE_STORE_MIP_PCK::Inst_MIMG__IMAGE_STORE_MIP_PCK(
37899 InFmt_MIMG *iFmt)
37900 : Inst_MIMG(iFmt, "image_store_mip_pck")
37901 {
37902 setFlag(MemoryRef);
37903 setFlag(Store);
37904 setFlag(GlobalSegment);
37905 } // Inst_MIMG__IMAGE_STORE_MIP_PCK
37906
37907 Inst_MIMG__IMAGE_STORE_MIP_PCK::~Inst_MIMG__IMAGE_STORE_MIP_PCK()
37908 {
37909 } // ~Inst_MIMG__IMAGE_STORE_MIP_PCK
37910
37911 // Image memory store of packed data without format conversion
37912 void
37913 Inst_MIMG__IMAGE_STORE_MIP_PCK::execute(GPUDynInstPtr gpuDynInst)
37914 {
37915 panicUnimplemented();
37916 }
37917
37918 void
37919 Inst_MIMG__IMAGE_STORE_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst)
37920 {
37921 } // initiateAcc
37922
37923 void
37924 Inst_MIMG__IMAGE_STORE_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst)
37925 {
37926 }
37927
37928 Inst_MIMG__IMAGE_GET_RESINFO::Inst_MIMG__IMAGE_GET_RESINFO(
37929 InFmt_MIMG *iFmt)
37930 : Inst_MIMG(iFmt, "image_get_resinfo")
37931 {
37932 setFlag(GlobalSegment);
37933 } // Inst_MIMG__IMAGE_GET_RESINFO
37934
37935 Inst_MIMG__IMAGE_GET_RESINFO::~Inst_MIMG__IMAGE_GET_RESINFO()
37936 {
37937 } // ~Inst_MIMG__IMAGE_GET_RESINFO
37938
37939 void
37940 Inst_MIMG__IMAGE_GET_RESINFO::execute(GPUDynInstPtr gpuDynInst)
37941 {
37942 panicUnimplemented();
37943 }
37944
37945 Inst_MIMG__IMAGE_ATOMIC_SWAP::Inst_MIMG__IMAGE_ATOMIC_SWAP(
37946 InFmt_MIMG *iFmt)
37947 : Inst_MIMG(iFmt, "image_atomic_swap")
37948 {
37949 setFlag(AtomicExch);
37950 if (instData.GLC) {
37951 setFlag(AtomicReturn);
37952 } else {
37953 setFlag(AtomicNoReturn);
37954 }
37955 setFlag(MemoryRef);
37956 setFlag(GlobalSegment);
37957 } // Inst_MIMG__IMAGE_ATOMIC_SWAP
37958
37959 Inst_MIMG__IMAGE_ATOMIC_SWAP::~Inst_MIMG__IMAGE_ATOMIC_SWAP()
37960 {
37961 } // ~Inst_MIMG__IMAGE_ATOMIC_SWAP
37962
37963 // tmp = MEM[ADDR];
37964 // MEM[ADDR] = DATA;
37965 // RETURN_DATA = tmp.
37966 void
37967 Inst_MIMG__IMAGE_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst)
37968 {
37969 panicUnimplemented();
37970 }
37971
37972 Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::Inst_MIMG__IMAGE_ATOMIC_CMPSWAP(
37973 InFmt_MIMG *iFmt)
37974 : Inst_MIMG(iFmt, "image_atomic_cmpswap")
37975 {
37976 setFlag(AtomicCAS);
37977 if (instData.GLC) {
37978 setFlag(AtomicReturn);
37979 } else {
37980 setFlag(AtomicNoReturn);
37981 }
37982 setFlag(MemoryRef);
37983 setFlag(GlobalSegment);
37984 } // Inst_MIMG__IMAGE_ATOMIC_CMPSWAP
37985
37986 Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP()
37987 {
37988 } // ~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP
37989
37990 // tmp = MEM[ADDR];
37991 // src = DATA[0];
37992 // cmp = DATA[1];
37993 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
37994 // RETURN_DATA[0] = tmp.
37995 void
37996 Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst)
37997 {
37998 panicUnimplemented();
37999 }
38000
38001 Inst_MIMG__IMAGE_ATOMIC_ADD::Inst_MIMG__IMAGE_ATOMIC_ADD(InFmt_MIMG *iFmt)
38002 : Inst_MIMG(iFmt, "image_atomic_add")
38003 {
38004 setFlag(AtomicAdd);
38005 if (instData.GLC) {
38006 setFlag(AtomicReturn);
38007 } else {
38008 setFlag(AtomicNoReturn);
38009 }
38010 setFlag(MemoryRef);
38011 setFlag(GlobalSegment);
38012 } // Inst_MIMG__IMAGE_ATOMIC_ADD
38013
38014 Inst_MIMG__IMAGE_ATOMIC_ADD::~Inst_MIMG__IMAGE_ATOMIC_ADD()
38015 {
38016 } // ~Inst_MIMG__IMAGE_ATOMIC_ADD
38017
38018 // tmp = MEM[ADDR];
38019 // MEM[ADDR] += DATA;
38020 // RETURN_DATA = tmp.
38021 void
38022 Inst_MIMG__IMAGE_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst)
38023 {
38024 panicUnimplemented();
38025 }
38026
38027 Inst_MIMG__IMAGE_ATOMIC_SUB::Inst_MIMG__IMAGE_ATOMIC_SUB(InFmt_MIMG *iFmt)
38028 : Inst_MIMG(iFmt, "image_atomic_sub")
38029 {
38030 setFlag(AtomicSub);
38031 if (instData.GLC) {
38032 setFlag(AtomicReturn);
38033 } else {
38034 setFlag(AtomicNoReturn);
38035 }
38036 setFlag(MemoryRef);
38037 setFlag(GlobalSegment);
38038 } // Inst_MIMG__IMAGE_ATOMIC_SUB
38039
38040 Inst_MIMG__IMAGE_ATOMIC_SUB::~Inst_MIMG__IMAGE_ATOMIC_SUB()
38041 {
38042 } // ~Inst_MIMG__IMAGE_ATOMIC_SUB
38043
38044 // tmp = MEM[ADDR];
38045 // MEM[ADDR] -= DATA;
38046 // RETURN_DATA = tmp.
38047 void
38048 Inst_MIMG__IMAGE_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst)
38049 {
38050 panicUnimplemented();
38051 }
38052
38053 Inst_MIMG__IMAGE_ATOMIC_SMIN::Inst_MIMG__IMAGE_ATOMIC_SMIN(
38054 InFmt_MIMG *iFmt)
38055 : Inst_MIMG(iFmt, "image_atomic_smin")
38056 {
38057 setFlag(AtomicMin);
38058 if (instData.GLC) {
38059 setFlag(AtomicReturn);
38060 } else {
38061 setFlag(AtomicNoReturn);
38062 }
38063 setFlag(MemoryRef);
38064 setFlag(GlobalSegment);
38065 } // Inst_MIMG__IMAGE_ATOMIC_SMIN
38066
38067 Inst_MIMG__IMAGE_ATOMIC_SMIN::~Inst_MIMG__IMAGE_ATOMIC_SMIN()
38068 {
38069 } // ~Inst_MIMG__IMAGE_ATOMIC_SMIN
38070
38071 // tmp = MEM[ADDR];
38072 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
38073 // RETURN_DATA = tmp.
38074 void
38075 Inst_MIMG__IMAGE_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst)
38076 {
38077 panicUnimplemented();
38078 }
38079
38080 Inst_MIMG__IMAGE_ATOMIC_UMIN::Inst_MIMG__IMAGE_ATOMIC_UMIN(
38081 InFmt_MIMG *iFmt)
38082 : Inst_MIMG(iFmt, "image_atomic_umin")
38083 {
38084 setFlag(AtomicMin);
38085 if (instData.GLC) {
38086 setFlag(AtomicReturn);
38087 } else {
38088 setFlag(AtomicNoReturn);
38089 }
38090 setFlag(MemoryRef);
38091 setFlag(GlobalSegment);
38092 } // Inst_MIMG__IMAGE_ATOMIC_UMIN
38093
38094 Inst_MIMG__IMAGE_ATOMIC_UMIN::~Inst_MIMG__IMAGE_ATOMIC_UMIN()
38095 {
38096 } // ~Inst_MIMG__IMAGE_ATOMIC_UMIN
38097
38098 // tmp = MEM[ADDR];
38099 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
38100 // RETURN_DATA = tmp.
38101 void
38102 Inst_MIMG__IMAGE_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst)
38103 {
38104 panicUnimplemented();
38105 }
38106
38107 Inst_MIMG__IMAGE_ATOMIC_SMAX::Inst_MIMG__IMAGE_ATOMIC_SMAX(
38108 InFmt_MIMG *iFmt)
38109 : Inst_MIMG(iFmt, "image_atomic_smax")
38110 {
38111 setFlag(AtomicMax);
38112 if (instData.GLC) {
38113 setFlag(AtomicReturn);
38114 } else {
38115 setFlag(AtomicNoReturn);
38116 }
38117 setFlag(MemoryRef);
38118 setFlag(GlobalSegment);
38119 } // Inst_MIMG__IMAGE_ATOMIC_SMAX
38120
38121 Inst_MIMG__IMAGE_ATOMIC_SMAX::~Inst_MIMG__IMAGE_ATOMIC_SMAX()
38122 {
38123 } // ~Inst_MIMG__IMAGE_ATOMIC_SMAX
38124
38125 // tmp = MEM[ADDR];
38126 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
38127 // RETURN_DATA = tmp.
38128 void
38129 Inst_MIMG__IMAGE_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst)
38130 {
38131 panicUnimplemented();
38132 }
38133
38134 Inst_MIMG__IMAGE_ATOMIC_UMAX::Inst_MIMG__IMAGE_ATOMIC_UMAX(
38135 InFmt_MIMG *iFmt)
38136 : Inst_MIMG(iFmt, "image_atomic_umax")
38137 {
38138 setFlag(AtomicMax);
38139 if (instData.GLC) {
38140 setFlag(AtomicReturn);
38141 } else {
38142 setFlag(AtomicNoReturn);
38143 }
38144 setFlag(MemoryRef);
38145 setFlag(GlobalSegment);
38146 } // Inst_MIMG__IMAGE_ATOMIC_UMAX
38147
38148 Inst_MIMG__IMAGE_ATOMIC_UMAX::~Inst_MIMG__IMAGE_ATOMIC_UMAX()
38149 {
38150 } // ~Inst_MIMG__IMAGE_ATOMIC_UMAX
38151
38152 // tmp = MEM[ADDR];
38153 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
38154 // RETURN_DATA = tmp.
38155 void
38156 Inst_MIMG__IMAGE_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst)
38157 {
38158 panicUnimplemented();
38159 }
38160
38161 Inst_MIMG__IMAGE_ATOMIC_AND::Inst_MIMG__IMAGE_ATOMIC_AND(InFmt_MIMG *iFmt)
38162 : Inst_MIMG(iFmt, "image_atomic_and")
38163 {
38164 setFlag(AtomicAnd);
38165 if (instData.GLC) {
38166 setFlag(AtomicReturn);
38167 } else {
38168 setFlag(AtomicNoReturn);
38169 }
38170 setFlag(MemoryRef);
38171 setFlag(GlobalSegment);
38172 } // Inst_MIMG__IMAGE_ATOMIC_AND
38173
38174 Inst_MIMG__IMAGE_ATOMIC_AND::~Inst_MIMG__IMAGE_ATOMIC_AND()
38175 {
38176 } // ~Inst_MIMG__IMAGE_ATOMIC_AND
38177
38178 // tmp = MEM[ADDR];
38179 // MEM[ADDR] &= DATA;
38180 // RETURN_DATA = tmp.
38181 void
38182 Inst_MIMG__IMAGE_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst)
38183 {
38184 panicUnimplemented();
38185 }
38186
38187 Inst_MIMG__IMAGE_ATOMIC_OR::Inst_MIMG__IMAGE_ATOMIC_OR(InFmt_MIMG *iFmt)
38188 : Inst_MIMG(iFmt, "image_atomic_or")
38189 {
38190 setFlag(AtomicOr);
38191 if (instData.GLC) {
38192 setFlag(AtomicReturn);
38193 } else {
38194 setFlag(AtomicNoReturn);
38195 }
38196 setFlag(MemoryRef);
38197 setFlag(GlobalSegment);
38198 } // Inst_MIMG__IMAGE_ATOMIC_OR
38199
38200 Inst_MIMG__IMAGE_ATOMIC_OR::~Inst_MIMG__IMAGE_ATOMIC_OR()
38201 {
38202 } // ~Inst_MIMG__IMAGE_ATOMIC_OR
38203
38204 // tmp = MEM[ADDR];
38205 // MEM[ADDR] |= DATA;
38206 // RETURN_DATA = tmp.
38207 void
38208 Inst_MIMG__IMAGE_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst)
38209 {
38210 panicUnimplemented();
38211 }
38212
38213 Inst_MIMG__IMAGE_ATOMIC_XOR::Inst_MIMG__IMAGE_ATOMIC_XOR(InFmt_MIMG *iFmt)
38214 : Inst_MIMG(iFmt, "image_atomic_xor")
38215 {
38216 setFlag(AtomicXor);
38217 if (instData.GLC) {
38218 setFlag(AtomicReturn);
38219 } else {
38220 setFlag(AtomicNoReturn);
38221 }
38222 setFlag(MemoryRef);
38223 setFlag(GlobalSegment);
38224 } // Inst_MIMG__IMAGE_ATOMIC_XOR
38225
38226 Inst_MIMG__IMAGE_ATOMIC_XOR::~Inst_MIMG__IMAGE_ATOMIC_XOR()
38227 {
38228 } // ~Inst_MIMG__IMAGE_ATOMIC_XOR
38229
38230 // tmp = MEM[ADDR];
38231 // MEM[ADDR] ^= DATA;
38232 // RETURN_DATA = tmp.
38233 void
38234 Inst_MIMG__IMAGE_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst)
38235 {
38236 panicUnimplemented();
38237 }
38238
38239 Inst_MIMG__IMAGE_ATOMIC_INC::Inst_MIMG__IMAGE_ATOMIC_INC(InFmt_MIMG *iFmt)
38240 : Inst_MIMG(iFmt, "image_atomic_inc")
38241 {
38242 setFlag(AtomicInc);
38243 if (instData.GLC) {
38244 setFlag(AtomicReturn);
38245 } else {
38246 setFlag(AtomicNoReturn);
38247 }
38248 setFlag(MemoryRef);
38249 setFlag(GlobalSegment);
38250 } // Inst_MIMG__IMAGE_ATOMIC_INC
38251
38252 Inst_MIMG__IMAGE_ATOMIC_INC::~Inst_MIMG__IMAGE_ATOMIC_INC()
38253 {
38254 } // ~Inst_MIMG__IMAGE_ATOMIC_INC
38255
38256 // tmp = MEM[ADDR];
38257 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
38258 // RETURN_DATA = tmp.
38259 void
38260 Inst_MIMG__IMAGE_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst)
38261 {
38262 panicUnimplemented();
38263 }
38264
38265 Inst_MIMG__IMAGE_ATOMIC_DEC::Inst_MIMG__IMAGE_ATOMIC_DEC(InFmt_MIMG *iFmt)
38266 : Inst_MIMG(iFmt, "image_atomic_dec")
38267 {
38268 setFlag(AtomicDec);
38269 if (instData.GLC) {
38270 setFlag(AtomicReturn);
38271 } else {
38272 setFlag(AtomicNoReturn);
38273 }
38274 setFlag(MemoryRef);
38275 setFlag(GlobalSegment);
38276 } // Inst_MIMG__IMAGE_ATOMIC_DEC
38277
38278 Inst_MIMG__IMAGE_ATOMIC_DEC::~Inst_MIMG__IMAGE_ATOMIC_DEC()
38279 {
38280 } // ~Inst_MIMG__IMAGE_ATOMIC_DEC
38281
38282 // tmp = MEM[ADDR];
38283 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
38284 // (unsigned compare); RETURN_DATA = tmp.
38285 void
38286 Inst_MIMG__IMAGE_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst)
38287 {
38288 panicUnimplemented();
38289 }
38290
38291 Inst_MIMG__IMAGE_SAMPLE::Inst_MIMG__IMAGE_SAMPLE(InFmt_MIMG *iFmt)
38292 : Inst_MIMG(iFmt, "image_sample")
38293 {
38294 setFlag(GlobalSegment);
38295 } // Inst_MIMG__IMAGE_SAMPLE
38296
38297 Inst_MIMG__IMAGE_SAMPLE::~Inst_MIMG__IMAGE_SAMPLE()
38298 {
38299 } // ~Inst_MIMG__IMAGE_SAMPLE
38300
38301 void
38302 Inst_MIMG__IMAGE_SAMPLE::execute(GPUDynInstPtr gpuDynInst)
38303 {
38304 panicUnimplemented();
38305 }
38306
38307 Inst_MIMG__IMAGE_SAMPLE_CL::Inst_MIMG__IMAGE_SAMPLE_CL(InFmt_MIMG *iFmt)
38308 : Inst_MIMG(iFmt, "image_sample_cl")
38309 {
38310 setFlag(GlobalSegment);
38311 } // Inst_MIMG__IMAGE_SAMPLE_CL
38312
38313 Inst_MIMG__IMAGE_SAMPLE_CL::~Inst_MIMG__IMAGE_SAMPLE_CL()
38314 {
38315 } // ~Inst_MIMG__IMAGE_SAMPLE_CL
38316
38317 void
38318 Inst_MIMG__IMAGE_SAMPLE_CL::execute(GPUDynInstPtr gpuDynInst)
38319 {
38320 panicUnimplemented();
38321 }
38322
38323 Inst_MIMG__IMAGE_SAMPLE_D::Inst_MIMG__IMAGE_SAMPLE_D(InFmt_MIMG *iFmt)
38324 : Inst_MIMG(iFmt, "image_sample_d")
38325 {
38326 setFlag(GlobalSegment);
38327 } // Inst_MIMG__IMAGE_SAMPLE_D
38328
38329 Inst_MIMG__IMAGE_SAMPLE_D::~Inst_MIMG__IMAGE_SAMPLE_D()
38330 {
38331 } // ~Inst_MIMG__IMAGE_SAMPLE_D
38332
38333 void
38334 Inst_MIMG__IMAGE_SAMPLE_D::execute(GPUDynInstPtr gpuDynInst)
38335 {
38336 panicUnimplemented();
38337 }
38338
38339 Inst_MIMG__IMAGE_SAMPLE_D_CL::Inst_MIMG__IMAGE_SAMPLE_D_CL(
38340 InFmt_MIMG *iFmt)
38341 : Inst_MIMG(iFmt, "image_sample_d_cl")
38342 {
38343 setFlag(GlobalSegment);
38344 } // Inst_MIMG__IMAGE_SAMPLE_D_CL
38345
38346 Inst_MIMG__IMAGE_SAMPLE_D_CL::~Inst_MIMG__IMAGE_SAMPLE_D_CL()
38347 {
38348 } // ~Inst_MIMG__IMAGE_SAMPLE_D_CL
38349
38350 void
38351 Inst_MIMG__IMAGE_SAMPLE_D_CL::execute(GPUDynInstPtr gpuDynInst)
38352 {
38353 panicUnimplemented();
38354 }
38355
38356 Inst_MIMG__IMAGE_SAMPLE_L::Inst_MIMG__IMAGE_SAMPLE_L(InFmt_MIMG *iFmt)
38357 : Inst_MIMG(iFmt, "image_sample_l")
38358 {
38359 setFlag(GlobalSegment);
38360 } // Inst_MIMG__IMAGE_SAMPLE_L
38361
38362 Inst_MIMG__IMAGE_SAMPLE_L::~Inst_MIMG__IMAGE_SAMPLE_L()
38363 {
38364 } // ~Inst_MIMG__IMAGE_SAMPLE_L
38365
38366 void
38367 Inst_MIMG__IMAGE_SAMPLE_L::execute(GPUDynInstPtr gpuDynInst)
38368 {
38369 panicUnimplemented();
38370 }
38371
38372 Inst_MIMG__IMAGE_SAMPLE_B::Inst_MIMG__IMAGE_SAMPLE_B(InFmt_MIMG *iFmt)
38373 : Inst_MIMG(iFmt, "image_sample_b")
38374 {
38375 setFlag(GlobalSegment);
38376 } // Inst_MIMG__IMAGE_SAMPLE_B
38377
38378 Inst_MIMG__IMAGE_SAMPLE_B::~Inst_MIMG__IMAGE_SAMPLE_B()
38379 {
38380 } // ~Inst_MIMG__IMAGE_SAMPLE_B
38381
38382 void
38383 Inst_MIMG__IMAGE_SAMPLE_B::execute(GPUDynInstPtr gpuDynInst)
38384 {
38385 panicUnimplemented();
38386 }
38387
38388 Inst_MIMG__IMAGE_SAMPLE_B_CL::Inst_MIMG__IMAGE_SAMPLE_B_CL(
38389 InFmt_MIMG *iFmt)
38390 : Inst_MIMG(iFmt, "image_sample_b_cl")
38391 {
38392 setFlag(GlobalSegment);
38393 } // Inst_MIMG__IMAGE_SAMPLE_B_CL
38394
38395 Inst_MIMG__IMAGE_SAMPLE_B_CL::~Inst_MIMG__IMAGE_SAMPLE_B_CL()
38396 {
38397 } // ~Inst_MIMG__IMAGE_SAMPLE_B_CL
38398
38399 void
38400 Inst_MIMG__IMAGE_SAMPLE_B_CL::execute(GPUDynInstPtr gpuDynInst)
38401 {
38402 panicUnimplemented();
38403 }
38404
38405 Inst_MIMG__IMAGE_SAMPLE_LZ::Inst_MIMG__IMAGE_SAMPLE_LZ(InFmt_MIMG *iFmt)
38406 : Inst_MIMG(iFmt, "image_sample_lz")
38407 {
38408 setFlag(GlobalSegment);
38409 } // Inst_MIMG__IMAGE_SAMPLE_LZ
38410
38411 Inst_MIMG__IMAGE_SAMPLE_LZ::~Inst_MIMG__IMAGE_SAMPLE_LZ()
38412 {
38413 } // ~Inst_MIMG__IMAGE_SAMPLE_LZ
38414
38415 void
38416 Inst_MIMG__IMAGE_SAMPLE_LZ::execute(GPUDynInstPtr gpuDynInst)
38417 {
38418 panicUnimplemented();
38419 }
38420
38421 Inst_MIMG__IMAGE_SAMPLE_C::Inst_MIMG__IMAGE_SAMPLE_C(InFmt_MIMG *iFmt)
38422 : Inst_MIMG(iFmt, "image_sample_c")
38423 {
38424 setFlag(GlobalSegment);
38425 } // Inst_MIMG__IMAGE_SAMPLE_C
38426
38427 Inst_MIMG__IMAGE_SAMPLE_C::~Inst_MIMG__IMAGE_SAMPLE_C()
38428 {
38429 } // ~Inst_MIMG__IMAGE_SAMPLE_C
38430
38431 void
38432 Inst_MIMG__IMAGE_SAMPLE_C::execute(GPUDynInstPtr gpuDynInst)
38433 {
38434 panicUnimplemented();
38435 }
38436
38437 Inst_MIMG__IMAGE_SAMPLE_C_CL::Inst_MIMG__IMAGE_SAMPLE_C_CL(
38438 InFmt_MIMG *iFmt)
38439 : Inst_MIMG(iFmt, "image_sample_c_cl")
38440 {
38441 setFlag(GlobalSegment);
38442 } // Inst_MIMG__IMAGE_SAMPLE_C_CL
38443
38444 Inst_MIMG__IMAGE_SAMPLE_C_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CL()
38445 {
38446 } // ~Inst_MIMG__IMAGE_SAMPLE_C_CL
38447
38448 void
38449 Inst_MIMG__IMAGE_SAMPLE_C_CL::execute(GPUDynInstPtr gpuDynInst)
38450 {
38451 panicUnimplemented();
38452 }
38453
38454 Inst_MIMG__IMAGE_SAMPLE_C_D::Inst_MIMG__IMAGE_SAMPLE_C_D(InFmt_MIMG *iFmt)
38455 : Inst_MIMG(iFmt, "image_sample_c_d")
38456 {
38457 setFlag(GlobalSegment);
38458 } // Inst_MIMG__IMAGE_SAMPLE_C_D
38459
38460 Inst_MIMG__IMAGE_SAMPLE_C_D::~Inst_MIMG__IMAGE_SAMPLE_C_D()
38461 {
38462 } // ~Inst_MIMG__IMAGE_SAMPLE_C_D
38463
38464 void
38465 Inst_MIMG__IMAGE_SAMPLE_C_D::execute(GPUDynInstPtr gpuDynInst)
38466 {
38467 panicUnimplemented();
38468 }
38469
38470 Inst_MIMG__IMAGE_SAMPLE_C_D_CL::Inst_MIMG__IMAGE_SAMPLE_C_D_CL(
38471 InFmt_MIMG *iFmt)
38472 : Inst_MIMG(iFmt, "image_sample_c_d_cl")
38473 {
38474 setFlag(GlobalSegment);
38475 } // Inst_MIMG__IMAGE_SAMPLE_C_D_CL
38476
38477 Inst_MIMG__IMAGE_SAMPLE_C_D_CL::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL()
38478 {
38479 } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL
38480
38481 void
38482 Inst_MIMG__IMAGE_SAMPLE_C_D_CL::execute(GPUDynInstPtr gpuDynInst)
38483 {
38484 panicUnimplemented();
38485 }
38486
38487 Inst_MIMG__IMAGE_SAMPLE_C_L::Inst_MIMG__IMAGE_SAMPLE_C_L(InFmt_MIMG *iFmt)
38488 : Inst_MIMG(iFmt, "image_sample_c_l")
38489 {
38490 setFlag(GlobalSegment);
38491 } // Inst_MIMG__IMAGE_SAMPLE_C_L
38492
38493 Inst_MIMG__IMAGE_SAMPLE_C_L::~Inst_MIMG__IMAGE_SAMPLE_C_L()
38494 {
38495 } // ~Inst_MIMG__IMAGE_SAMPLE_C_L
38496
38497 void
38498 Inst_MIMG__IMAGE_SAMPLE_C_L::execute(GPUDynInstPtr gpuDynInst)
38499 {
38500 panicUnimplemented();
38501 }
38502
38503 Inst_MIMG__IMAGE_SAMPLE_C_B::Inst_MIMG__IMAGE_SAMPLE_C_B(InFmt_MIMG *iFmt)
38504 : Inst_MIMG(iFmt, "image_sample_c_b")
38505 {
38506 setFlag(GlobalSegment);
38507 } // Inst_MIMG__IMAGE_SAMPLE_C_B
38508
38509 Inst_MIMG__IMAGE_SAMPLE_C_B::~Inst_MIMG__IMAGE_SAMPLE_C_B()
38510 {
38511 } // ~Inst_MIMG__IMAGE_SAMPLE_C_B
38512
38513 void
38514 Inst_MIMG__IMAGE_SAMPLE_C_B::execute(GPUDynInstPtr gpuDynInst)
38515 {
38516 panicUnimplemented();
38517 }
38518
38519 Inst_MIMG__IMAGE_SAMPLE_C_B_CL::Inst_MIMG__IMAGE_SAMPLE_C_B_CL(
38520 InFmt_MIMG *iFmt)
38521 : Inst_MIMG(iFmt, "image_sample_c_b_cl")
38522 {
38523 setFlag(GlobalSegment);
38524 } // Inst_MIMG__IMAGE_SAMPLE_C_B_CL
38525
38526 Inst_MIMG__IMAGE_SAMPLE_C_B_CL::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL()
38527 {
38528 } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL
38529
38530 void
38531 Inst_MIMG__IMAGE_SAMPLE_C_B_CL::execute(GPUDynInstPtr gpuDynInst)
38532 {
38533 panicUnimplemented();
38534 }
38535
38536 Inst_MIMG__IMAGE_SAMPLE_C_LZ::Inst_MIMG__IMAGE_SAMPLE_C_LZ(
38537 InFmt_MIMG *iFmt)
38538 : Inst_MIMG(iFmt, "image_sample_c_lz")
38539 {
38540 setFlag(GlobalSegment);
38541 } // Inst_MIMG__IMAGE_SAMPLE_C_LZ
38542
38543 Inst_MIMG__IMAGE_SAMPLE_C_LZ::~Inst_MIMG__IMAGE_SAMPLE_C_LZ()
38544 {
38545 } // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ
38546
38547 void
38548 Inst_MIMG__IMAGE_SAMPLE_C_LZ::execute(GPUDynInstPtr gpuDynInst)
38549 {
38550 panicUnimplemented();
38551 }
38552
38553 Inst_MIMG__IMAGE_SAMPLE_O::Inst_MIMG__IMAGE_SAMPLE_O(InFmt_MIMG *iFmt)
38554 : Inst_MIMG(iFmt, "image_sample_o")
38555 {
38556 setFlag(GlobalSegment);
38557 } // Inst_MIMG__IMAGE_SAMPLE_O
38558
38559 Inst_MIMG__IMAGE_SAMPLE_O::~Inst_MIMG__IMAGE_SAMPLE_O()
38560 {
38561 } // ~Inst_MIMG__IMAGE_SAMPLE_O
38562
38563 void
38564 Inst_MIMG__IMAGE_SAMPLE_O::execute(GPUDynInstPtr gpuDynInst)
38565 {
38566 panicUnimplemented();
38567 }
38568
38569 Inst_MIMG__IMAGE_SAMPLE_CL_O::Inst_MIMG__IMAGE_SAMPLE_CL_O(
38570 InFmt_MIMG *iFmt)
38571 : Inst_MIMG(iFmt, "image_sample_cl_o")
38572 {
38573 setFlag(GlobalSegment);
38574 } // Inst_MIMG__IMAGE_SAMPLE_CL_O
38575
38576 Inst_MIMG__IMAGE_SAMPLE_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CL_O()
38577 {
38578 } // ~Inst_MIMG__IMAGE_SAMPLE_CL_O
38579
38580 void
38581 Inst_MIMG__IMAGE_SAMPLE_CL_O::execute(GPUDynInstPtr gpuDynInst)
38582 {
38583 panicUnimplemented();
38584 }
38585
38586 Inst_MIMG__IMAGE_SAMPLE_D_O::Inst_MIMG__IMAGE_SAMPLE_D_O(InFmt_MIMG *iFmt)
38587 : Inst_MIMG(iFmt, "image_sample_d_o")
38588 {
38589 setFlag(GlobalSegment);
38590 } // Inst_MIMG__IMAGE_SAMPLE_D_O
38591
38592 Inst_MIMG__IMAGE_SAMPLE_D_O::~Inst_MIMG__IMAGE_SAMPLE_D_O()
38593 {
38594 } // ~Inst_MIMG__IMAGE_SAMPLE_D_O
38595
38596 void
38597 Inst_MIMG__IMAGE_SAMPLE_D_O::execute(GPUDynInstPtr gpuDynInst)
38598 {
38599 panicUnimplemented();
38600 }
38601
38602 Inst_MIMG__IMAGE_SAMPLE_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_D_CL_O(
38603 InFmt_MIMG *iFmt)
38604 : Inst_MIMG(iFmt, "image_sample_d_cl_o")
38605 {
38606 setFlag(GlobalSegment);
38607 } // Inst_MIMG__IMAGE_SAMPLE_D_CL_O
38608
38609 Inst_MIMG__IMAGE_SAMPLE_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_D_CL_O()
38610 {
38611 } // ~Inst_MIMG__IMAGE_SAMPLE_D_CL_O
38612
38613 void
38614 Inst_MIMG__IMAGE_SAMPLE_D_CL_O::execute(GPUDynInstPtr gpuDynInst)
38615 {
38616 panicUnimplemented();
38617 }
38618
38619 Inst_MIMG__IMAGE_SAMPLE_L_O::Inst_MIMG__IMAGE_SAMPLE_L_O(InFmt_MIMG *iFmt)
38620 : Inst_MIMG(iFmt, "image_sample_l_o")
38621 {
38622 setFlag(GlobalSegment);
38623 } // Inst_MIMG__IMAGE_SAMPLE_L_O
38624
38625 Inst_MIMG__IMAGE_SAMPLE_L_O::~Inst_MIMG__IMAGE_SAMPLE_L_O()
38626 {
38627 } // ~Inst_MIMG__IMAGE_SAMPLE_L_O
38628
38629 void
38630 Inst_MIMG__IMAGE_SAMPLE_L_O::execute(GPUDynInstPtr gpuDynInst)
38631 {
38632 panicUnimplemented();
38633 }
38634
38635 Inst_MIMG__IMAGE_SAMPLE_B_O::Inst_MIMG__IMAGE_SAMPLE_B_O(InFmt_MIMG *iFmt)
38636 : Inst_MIMG(iFmt, "image_sample_b_o")
38637 {
38638 setFlag(GlobalSegment);
38639 } // Inst_MIMG__IMAGE_SAMPLE_B_O
38640
38641 Inst_MIMG__IMAGE_SAMPLE_B_O::~Inst_MIMG__IMAGE_SAMPLE_B_O()
38642 {
38643 } // ~Inst_MIMG__IMAGE_SAMPLE_B_O
38644
38645 void
38646 Inst_MIMG__IMAGE_SAMPLE_B_O::execute(GPUDynInstPtr gpuDynInst)
38647 {
38648 panicUnimplemented();
38649 }
38650
38651 Inst_MIMG__IMAGE_SAMPLE_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_B_CL_O(
38652 InFmt_MIMG *iFmt)
38653 : Inst_MIMG(iFmt, "image_sample_b_cl_o")
38654 {
38655 setFlag(GlobalSegment);
38656 } // Inst_MIMG__IMAGE_SAMPLE_B_CL_O
38657
38658 Inst_MIMG__IMAGE_SAMPLE_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_B_CL_O()
38659 {
38660 } // ~Inst_MIMG__IMAGE_SAMPLE_B_CL_O
38661
38662 void
38663 Inst_MIMG__IMAGE_SAMPLE_B_CL_O::execute(GPUDynInstPtr gpuDynInst)
38664 {
38665 panicUnimplemented();
38666 }
38667
38668 Inst_MIMG__IMAGE_SAMPLE_LZ_O::Inst_MIMG__IMAGE_SAMPLE_LZ_O(
38669 InFmt_MIMG *iFmt)
38670 : Inst_MIMG(iFmt, "image_sample_lz_o")
38671 {
38672 setFlag(GlobalSegment);
38673 } // Inst_MIMG__IMAGE_SAMPLE_LZ_O
38674
38675 Inst_MIMG__IMAGE_SAMPLE_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_LZ_O()
38676 {
38677 } // ~Inst_MIMG__IMAGE_SAMPLE_LZ_O
38678
38679 void
38680 Inst_MIMG__IMAGE_SAMPLE_LZ_O::execute(GPUDynInstPtr gpuDynInst)
38681 {
38682 panicUnimplemented();
38683 }
38684
38685 Inst_MIMG__IMAGE_SAMPLE_C_O::Inst_MIMG__IMAGE_SAMPLE_C_O(InFmt_MIMG *iFmt)
38686 : Inst_MIMG(iFmt, "image_sample_c_o")
38687 {
38688 setFlag(GlobalSegment);
38689 } // Inst_MIMG__IMAGE_SAMPLE_C_O
38690
38691 Inst_MIMG__IMAGE_SAMPLE_C_O::~Inst_MIMG__IMAGE_SAMPLE_C_O()
38692 {
38693 } // ~Inst_MIMG__IMAGE_SAMPLE_C_O
38694
38695 void
38696 Inst_MIMG__IMAGE_SAMPLE_C_O::execute(GPUDynInstPtr gpuDynInst)
38697 {
38698 panicUnimplemented();
38699 }
38700
38701 Inst_MIMG__IMAGE_SAMPLE_C_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CL_O(
38702 InFmt_MIMG *iFmt)
38703 : Inst_MIMG(iFmt, "image_sample_c_cl_o")
38704 {
38705 setFlag(GlobalSegment);
38706 } // Inst_MIMG__IMAGE_SAMPLE_C_CL_O
38707
38708 Inst_MIMG__IMAGE_SAMPLE_C_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CL_O()
38709 {
38710 } // ~Inst_MIMG__IMAGE_SAMPLE_C_CL_O
38711
38712 void
38713 Inst_MIMG__IMAGE_SAMPLE_C_CL_O::execute(GPUDynInstPtr gpuDynInst)
38714 {
38715 panicUnimplemented();
38716 }
38717
38718 Inst_MIMG__IMAGE_SAMPLE_C_D_O::Inst_MIMG__IMAGE_SAMPLE_C_D_O(
38719 InFmt_MIMG *iFmt)
38720 : Inst_MIMG(iFmt, "image_sample_c_d_o")
38721 {
38722 setFlag(GlobalSegment);
38723 } // Inst_MIMG__IMAGE_SAMPLE_C_D_O
38724
38725 Inst_MIMG__IMAGE_SAMPLE_C_D_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_O()
38726 {
38727 } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_O
38728
38729 void
38730 Inst_MIMG__IMAGE_SAMPLE_C_D_O::execute(GPUDynInstPtr gpuDynInst)
38731 {
38732 panicUnimplemented();
38733 }
38734
38735 Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O(
38736 InFmt_MIMG *iFmt)
38737 : Inst_MIMG(iFmt, "image_sample_c_d_cl_o")
38738 {
38739 setFlag(GlobalSegment);
38740 } // Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O
38741
38742 Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O()
38743 {
38744 } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O
38745
38746 void
38747 Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::execute(GPUDynInstPtr gpuDynInst)
38748 {
38749 panicUnimplemented();
38750 }
38751
38752 Inst_MIMG__IMAGE_SAMPLE_C_L_O::Inst_MIMG__IMAGE_SAMPLE_C_L_O(
38753 InFmt_MIMG *iFmt)
38754 : Inst_MIMG(iFmt, "image_sample_c_l_o")
38755 {
38756 setFlag(GlobalSegment);
38757 } // Inst_MIMG__IMAGE_SAMPLE_C_L_O
38758
38759 Inst_MIMG__IMAGE_SAMPLE_C_L_O::~Inst_MIMG__IMAGE_SAMPLE_C_L_O()
38760 {
38761 } // ~Inst_MIMG__IMAGE_SAMPLE_C_L_O
38762
38763 void
38764 Inst_MIMG__IMAGE_SAMPLE_C_L_O::execute(GPUDynInstPtr gpuDynInst)
38765 {
38766 panicUnimplemented();
38767 }
38768
38769 Inst_MIMG__IMAGE_SAMPLE_C_B_O::Inst_MIMG__IMAGE_SAMPLE_C_B_O(
38770 InFmt_MIMG *iFmt)
38771 : Inst_MIMG(iFmt, "image_sample_c_b_o")
38772 {
38773 setFlag(GlobalSegment);
38774 } // Inst_MIMG__IMAGE_SAMPLE_C_B_O
38775
38776 Inst_MIMG__IMAGE_SAMPLE_C_B_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_O()
38777 {
38778 } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_O
38779
38780 void
38781 Inst_MIMG__IMAGE_SAMPLE_C_B_O::execute(GPUDynInstPtr gpuDynInst)
38782 {
38783 panicUnimplemented();
38784 }
38785
38786 Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O(
38787 InFmt_MIMG *iFmt)
38788 : Inst_MIMG(iFmt, "image_sample_c_b_cl_o")
38789 {
38790 setFlag(GlobalSegment);
38791 } // Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O
38792
38793 Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O()
38794 {
38795 } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O
38796
38797 void
38798 Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst)
38799 {
38800 panicUnimplemented();
38801 }
38802
38803 Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::Inst_MIMG__IMAGE_SAMPLE_C_LZ_O(
38804 InFmt_MIMG *iFmt)
38805 : Inst_MIMG(iFmt, "image_sample_c_lz_o")
38806 {
38807 setFlag(GlobalSegment);
38808 } // Inst_MIMG__IMAGE_SAMPLE_C_LZ_O
38809
38810 Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O()
38811 {
38812 } // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O
38813
38814 void
38815 Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::execute(GPUDynInstPtr gpuDynInst)
38816 {
38817 panicUnimplemented();
38818 }
38819
38820 Inst_MIMG__IMAGE_GATHER4::Inst_MIMG__IMAGE_GATHER4(InFmt_MIMG *iFmt)
38821 : Inst_MIMG(iFmt, "image_gather4")
38822 {
38823 setFlag(GlobalSegment);
38824 } // Inst_MIMG__IMAGE_GATHER4
38825
38826 Inst_MIMG__IMAGE_GATHER4::~Inst_MIMG__IMAGE_GATHER4()
38827 {
38828 } // ~Inst_MIMG__IMAGE_GATHER4
38829
38830 void
38831 Inst_MIMG__IMAGE_GATHER4::execute(GPUDynInstPtr gpuDynInst)
38832 {
38833 panicUnimplemented();
38834 }
38835
38836 Inst_MIMG__IMAGE_GATHER4_CL::Inst_MIMG__IMAGE_GATHER4_CL(InFmt_MIMG *iFmt)
38837 : Inst_MIMG(iFmt, "image_gather4_cl")
38838 {
38839 setFlag(GlobalSegment);
38840 } // Inst_MIMG__IMAGE_GATHER4_CL
38841
38842 Inst_MIMG__IMAGE_GATHER4_CL::~Inst_MIMG__IMAGE_GATHER4_CL()
38843 {
38844 } // ~Inst_MIMG__IMAGE_GATHER4_CL
38845
38846 void
38847 Inst_MIMG__IMAGE_GATHER4_CL::execute(GPUDynInstPtr gpuDynInst)
38848 {
38849 panicUnimplemented();
38850 }
38851
38852 Inst_MIMG__IMAGE_GATHER4_L::Inst_MIMG__IMAGE_GATHER4_L(InFmt_MIMG *iFmt)
38853 : Inst_MIMG(iFmt, "image_gather4_l")
38854 {
38855 setFlag(GlobalSegment);
38856 } // Inst_MIMG__IMAGE_GATHER4_L
38857
38858 Inst_MIMG__IMAGE_GATHER4_L::~Inst_MIMG__IMAGE_GATHER4_L()
38859 {
38860 } // ~Inst_MIMG__IMAGE_GATHER4_L
38861
38862 void
38863 Inst_MIMG__IMAGE_GATHER4_L::execute(GPUDynInstPtr gpuDynInst)
38864 {
38865 panicUnimplemented();
38866 }
38867
38868 Inst_MIMG__IMAGE_GATHER4_B::Inst_MIMG__IMAGE_GATHER4_B(InFmt_MIMG *iFmt)
38869 : Inst_MIMG(iFmt, "image_gather4_b")
38870 {
38871 setFlag(GlobalSegment);
38872 } // Inst_MIMG__IMAGE_GATHER4_B
38873
38874 Inst_MIMG__IMAGE_GATHER4_B::~Inst_MIMG__IMAGE_GATHER4_B()
38875 {
38876 } // ~Inst_MIMG__IMAGE_GATHER4_B
38877
38878 void
38879 Inst_MIMG__IMAGE_GATHER4_B::execute(GPUDynInstPtr gpuDynInst)
38880 {
38881 panicUnimplemented();
38882 }
38883
38884 Inst_MIMG__IMAGE_GATHER4_B_CL::Inst_MIMG__IMAGE_GATHER4_B_CL(
38885 InFmt_MIMG *iFmt)
38886 : Inst_MIMG(iFmt, "image_gather4_b_cl")
38887 {
38888 setFlag(GlobalSegment);
38889 } // Inst_MIMG__IMAGE_GATHER4_B_CL
38890
38891 Inst_MIMG__IMAGE_GATHER4_B_CL::~Inst_MIMG__IMAGE_GATHER4_B_CL()
38892 {
38893 } // ~Inst_MIMG__IMAGE_GATHER4_B_CL
38894
38895 void
38896 Inst_MIMG__IMAGE_GATHER4_B_CL::execute(GPUDynInstPtr gpuDynInst)
38897 {
38898 panicUnimplemented();
38899 }
38900
38901 Inst_MIMG__IMAGE_GATHER4_LZ::Inst_MIMG__IMAGE_GATHER4_LZ(InFmt_MIMG *iFmt)
38902 : Inst_MIMG(iFmt, "image_gather4_lz")
38903 {
38904 setFlag(GlobalSegment);
38905 } // Inst_MIMG__IMAGE_GATHER4_LZ
38906
38907 Inst_MIMG__IMAGE_GATHER4_LZ::~Inst_MIMG__IMAGE_GATHER4_LZ()
38908 {
38909 } // ~Inst_MIMG__IMAGE_GATHER4_LZ
38910
38911 void
38912 Inst_MIMG__IMAGE_GATHER4_LZ::execute(GPUDynInstPtr gpuDynInst)
38913 {
38914 panicUnimplemented();
38915 }
38916
38917 Inst_MIMG__IMAGE_GATHER4_C::Inst_MIMG__IMAGE_GATHER4_C(InFmt_MIMG *iFmt)
38918 : Inst_MIMG(iFmt, "image_gather4_c")
38919 {
38920 setFlag(GlobalSegment);
38921 } // Inst_MIMG__IMAGE_GATHER4_C
38922
38923 Inst_MIMG__IMAGE_GATHER4_C::~Inst_MIMG__IMAGE_GATHER4_C()
38924 {
38925 } // ~Inst_MIMG__IMAGE_GATHER4_C
38926
38927 void
38928 Inst_MIMG__IMAGE_GATHER4_C::execute(GPUDynInstPtr gpuDynInst)
38929 {
38930 panicUnimplemented();
38931 }
38932
38933 Inst_MIMG__IMAGE_GATHER4_C_CL::Inst_MIMG__IMAGE_GATHER4_C_CL(
38934 InFmt_MIMG *iFmt)
38935 : Inst_MIMG(iFmt, "image_gather4_c_cl")
38936 {
38937 setFlag(GlobalSegment);
38938 } // Inst_MIMG__IMAGE_GATHER4_C_CL
38939
38940 Inst_MIMG__IMAGE_GATHER4_C_CL::~Inst_MIMG__IMAGE_GATHER4_C_CL()
38941 {
38942 } // ~Inst_MIMG__IMAGE_GATHER4_C_CL
38943
38944 void
38945 Inst_MIMG__IMAGE_GATHER4_C_CL::execute(GPUDynInstPtr gpuDynInst)
38946 {
38947 panicUnimplemented();
38948 }
38949
38950 Inst_MIMG__IMAGE_GATHER4_C_L::Inst_MIMG__IMAGE_GATHER4_C_L(
38951 InFmt_MIMG *iFmt)
38952 : Inst_MIMG(iFmt, "image_gather4_c_l")
38953 {
38954 setFlag(GlobalSegment);
38955 } // Inst_MIMG__IMAGE_GATHER4_C_L
38956
38957 Inst_MIMG__IMAGE_GATHER4_C_L::~Inst_MIMG__IMAGE_GATHER4_C_L()
38958 {
38959 } // ~Inst_MIMG__IMAGE_GATHER4_C_L
38960
38961 void
38962 Inst_MIMG__IMAGE_GATHER4_C_L::execute(GPUDynInstPtr gpuDynInst)
38963 {
38964 panicUnimplemented();
38965 }
38966
38967 Inst_MIMG__IMAGE_GATHER4_C_B::Inst_MIMG__IMAGE_GATHER4_C_B(
38968 InFmt_MIMG *iFmt)
38969 : Inst_MIMG(iFmt, "image_gather4_c_b")
38970 {
38971 setFlag(GlobalSegment);
38972 } // Inst_MIMG__IMAGE_GATHER4_C_B
38973
38974 Inst_MIMG__IMAGE_GATHER4_C_B::~Inst_MIMG__IMAGE_GATHER4_C_B()
38975 {
38976 } // ~Inst_MIMG__IMAGE_GATHER4_C_B
38977
38978 void
38979 Inst_MIMG__IMAGE_GATHER4_C_B::execute(GPUDynInstPtr gpuDynInst)
38980 {
38981 panicUnimplemented();
38982 }
38983
38984 Inst_MIMG__IMAGE_GATHER4_C_B_CL::Inst_MIMG__IMAGE_GATHER4_C_B_CL(
38985 InFmt_MIMG *iFmt)
38986 : Inst_MIMG(iFmt, "image_gather4_c_b_cl")
38987 {
38988 setFlag(GlobalSegment);
38989 } // Inst_MIMG__IMAGE_GATHER4_C_B_CL
38990
38991 Inst_MIMG__IMAGE_GATHER4_C_B_CL::~Inst_MIMG__IMAGE_GATHER4_C_B_CL()
38992 {
38993 } // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL
38994
38995 void
38996 Inst_MIMG__IMAGE_GATHER4_C_B_CL::execute(GPUDynInstPtr gpuDynInst)
38997 {
38998 panicUnimplemented();
38999 }
39000
39001 Inst_MIMG__IMAGE_GATHER4_C_LZ::Inst_MIMG__IMAGE_GATHER4_C_LZ(
39002 InFmt_MIMG *iFmt)
39003 : Inst_MIMG(iFmt, "image_gather4_c_lz")
39004 {
39005 setFlag(GlobalSegment);
39006 } // Inst_MIMG__IMAGE_GATHER4_C_LZ
39007
39008 Inst_MIMG__IMAGE_GATHER4_C_LZ::~Inst_MIMG__IMAGE_GATHER4_C_LZ()
39009 {
39010 } // ~Inst_MIMG__IMAGE_GATHER4_C_LZ
39011
39012 void
39013 Inst_MIMG__IMAGE_GATHER4_C_LZ::execute(GPUDynInstPtr gpuDynInst)
39014 {
39015 panicUnimplemented();
39016 }
39017
39018 Inst_MIMG__IMAGE_GATHER4_O::Inst_MIMG__IMAGE_GATHER4_O(InFmt_MIMG *iFmt)
39019 : Inst_MIMG(iFmt, "image_gather4_o")
39020 {
39021 setFlag(GlobalSegment);
39022 } // Inst_MIMG__IMAGE_GATHER4_O
39023
39024 Inst_MIMG__IMAGE_GATHER4_O::~Inst_MIMG__IMAGE_GATHER4_O()
39025 {
39026 } // ~Inst_MIMG__IMAGE_GATHER4_O
39027
39028 void
39029 Inst_MIMG__IMAGE_GATHER4_O::execute(GPUDynInstPtr gpuDynInst)
39030 {
39031 panicUnimplemented();
39032 }
39033
39034 Inst_MIMG__IMAGE_GATHER4_CL_O::Inst_MIMG__IMAGE_GATHER4_CL_O(
39035 InFmt_MIMG *iFmt)
39036 : Inst_MIMG(iFmt, "image_gather4_cl_o")
39037 {
39038 setFlag(GlobalSegment);
39039 } // Inst_MIMG__IMAGE_GATHER4_CL_O
39040
39041 Inst_MIMG__IMAGE_GATHER4_CL_O::~Inst_MIMG__IMAGE_GATHER4_CL_O()
39042 {
39043 } // ~Inst_MIMG__IMAGE_GATHER4_CL_O
39044
39045 void
39046 Inst_MIMG__IMAGE_GATHER4_CL_O::execute(GPUDynInstPtr gpuDynInst)
39047 {
39048 panicUnimplemented();
39049 }
39050
39051 Inst_MIMG__IMAGE_GATHER4_L_O::Inst_MIMG__IMAGE_GATHER4_L_O(
39052 InFmt_MIMG *iFmt)
39053 : Inst_MIMG(iFmt, "image_gather4_l_o")
39054 {
39055 setFlag(GlobalSegment);
39056 } // Inst_MIMG__IMAGE_GATHER4_L_O
39057
39058 Inst_MIMG__IMAGE_GATHER4_L_O::~Inst_MIMG__IMAGE_GATHER4_L_O()
39059 {
39060 } // ~Inst_MIMG__IMAGE_GATHER4_L_O
39061
39062 void
39063 Inst_MIMG__IMAGE_GATHER4_L_O::execute(GPUDynInstPtr gpuDynInst)
39064 {
39065 panicUnimplemented();
39066 }
39067
39068 Inst_MIMG__IMAGE_GATHER4_B_O::Inst_MIMG__IMAGE_GATHER4_B_O(
39069 InFmt_MIMG *iFmt)
39070 : Inst_MIMG(iFmt, "image_gather4_b_o")
39071 {
39072 setFlag(GlobalSegment);
39073 } // Inst_MIMG__IMAGE_GATHER4_B_O
39074
39075 Inst_MIMG__IMAGE_GATHER4_B_O::~Inst_MIMG__IMAGE_GATHER4_B_O()
39076 {
39077 } // ~Inst_MIMG__IMAGE_GATHER4_B_O
39078
39079 void
39080 Inst_MIMG__IMAGE_GATHER4_B_O::execute(GPUDynInstPtr gpuDynInst)
39081 {
39082 panicUnimplemented();
39083 }
39084
39085 Inst_MIMG__IMAGE_GATHER4_B_CL_O::Inst_MIMG__IMAGE_GATHER4_B_CL_O(
39086 InFmt_MIMG *iFmt)
39087 : Inst_MIMG(iFmt, "image_gather4_b_cl_o")
39088 {
39089 setFlag(GlobalSegment);
39090 } // Inst_MIMG__IMAGE_GATHER4_B_CL_O
39091
39092 Inst_MIMG__IMAGE_GATHER4_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_B_CL_O()
39093 {
39094 } // ~Inst_MIMG__IMAGE_GATHER4_B_CL_O
39095
39096 void
39097 Inst_MIMG__IMAGE_GATHER4_B_CL_O::execute(GPUDynInstPtr gpuDynInst)
39098 {
39099 panicUnimplemented();
39100 }
39101
39102 Inst_MIMG__IMAGE_GATHER4_LZ_O::Inst_MIMG__IMAGE_GATHER4_LZ_O(
39103 InFmt_MIMG *iFmt)
39104 : Inst_MIMG(iFmt, "image_gather4_lz_o")
39105 {
39106 setFlag(GlobalSegment);
39107 } // Inst_MIMG__IMAGE_GATHER4_LZ_O
39108
39109 Inst_MIMG__IMAGE_GATHER4_LZ_O::~Inst_MIMG__IMAGE_GATHER4_LZ_O()
39110 {
39111 } // ~Inst_MIMG__IMAGE_GATHER4_LZ_O
39112
39113 void
39114 Inst_MIMG__IMAGE_GATHER4_LZ_O::execute(GPUDynInstPtr gpuDynInst)
39115 {
39116 panicUnimplemented();
39117 }
39118
39119 Inst_MIMG__IMAGE_GATHER4_C_O::Inst_MIMG__IMAGE_GATHER4_C_O(
39120 InFmt_MIMG *iFmt)
39121 : Inst_MIMG(iFmt, "image_gather4_c_o")
39122 {
39123 setFlag(GlobalSegment);
39124 } // Inst_MIMG__IMAGE_GATHER4_C_O
39125
39126 Inst_MIMG__IMAGE_GATHER4_C_O::~Inst_MIMG__IMAGE_GATHER4_C_O()
39127 {
39128 } // ~Inst_MIMG__IMAGE_GATHER4_C_O
39129
39130 void
39131 Inst_MIMG__IMAGE_GATHER4_C_O::execute(GPUDynInstPtr gpuDynInst)
39132 {
39133 panicUnimplemented();
39134 }
39135
39136 Inst_MIMG__IMAGE_GATHER4_C_CL_O::Inst_MIMG__IMAGE_GATHER4_C_CL_O(
39137 InFmt_MIMG *iFmt)
39138 : Inst_MIMG(iFmt, "image_gather4_c_cl_o")
39139 {
39140 setFlag(GlobalSegment);
39141 } // Inst_MIMG__IMAGE_GATHER4_C_CL_O
39142
39143 Inst_MIMG__IMAGE_GATHER4_C_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_CL_O()
39144 {
39145 } // ~Inst_MIMG__IMAGE_GATHER4_C_CL_O
39146
39147 void
39148 Inst_MIMG__IMAGE_GATHER4_C_CL_O::execute(GPUDynInstPtr gpuDynInst)
39149 {
39150 panicUnimplemented();
39151 }
39152
39153 Inst_MIMG__IMAGE_GATHER4_C_L_O::Inst_MIMG__IMAGE_GATHER4_C_L_O(
39154 InFmt_MIMG *iFmt)
39155 : Inst_MIMG(iFmt, "image_gather4_c_l_o")
39156 {
39157 setFlag(GlobalSegment);
39158 } // Inst_MIMG__IMAGE_GATHER4_C_L_O
39159
39160 Inst_MIMG__IMAGE_GATHER4_C_L_O::~Inst_MIMG__IMAGE_GATHER4_C_L_O()
39161 {
39162 } // ~Inst_MIMG__IMAGE_GATHER4_C_L_O
39163
39164 void
39165 Inst_MIMG__IMAGE_GATHER4_C_L_O::execute(GPUDynInstPtr gpuDynInst)
39166 {
39167 panicUnimplemented();
39168 }
39169
39170 Inst_MIMG__IMAGE_GATHER4_C_B_O::Inst_MIMG__IMAGE_GATHER4_C_B_O(
39171 InFmt_MIMG *iFmt)
39172 : Inst_MIMG(iFmt, "image_gather4_c_b_o")
39173 {
39174 setFlag(GlobalSegment);
39175 } // Inst_MIMG__IMAGE_GATHER4_C_B_O
39176
39177 Inst_MIMG__IMAGE_GATHER4_C_B_O::~Inst_MIMG__IMAGE_GATHER4_C_B_O()
39178 {
39179 } // ~Inst_MIMG__IMAGE_GATHER4_C_B_O
39180
39181 void
39182 Inst_MIMG__IMAGE_GATHER4_C_B_O::execute(GPUDynInstPtr gpuDynInst)
39183 {
39184 panicUnimplemented();
39185 }
39186
39187 Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::Inst_MIMG__IMAGE_GATHER4_C_B_CL_O(
39188 InFmt_MIMG *iFmt)
39189 : Inst_MIMG(iFmt, "image_gather4_c_b_cl_o")
39190 {
39191 setFlag(GlobalSegment);
39192 } // Inst_MIMG__IMAGE_GATHER4_C_B_CL_O
39193
39194 Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O()
39195 {
39196 } // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O
39197
39198 void
39199 Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst)
39200 {
39201 panicUnimplemented();
39202 }
39203
39204 Inst_MIMG__IMAGE_GATHER4_C_LZ_O::Inst_MIMG__IMAGE_GATHER4_C_LZ_O(
39205 InFmt_MIMG *iFmt)
39206 : Inst_MIMG(iFmt, "image_gather4_c_lz_o")
39207 {
39208 setFlag(GlobalSegment);
39209 } // Inst_MIMG__IMAGE_GATHER4_C_LZ_O
39210
39211 Inst_MIMG__IMAGE_GATHER4_C_LZ_O::~Inst_MIMG__IMAGE_GATHER4_C_LZ_O()
39212 {
39213 } // ~Inst_MIMG__IMAGE_GATHER4_C_LZ_O
39214
39215 void
39216 Inst_MIMG__IMAGE_GATHER4_C_LZ_O::execute(GPUDynInstPtr gpuDynInst)
39217 {
39218 panicUnimplemented();
39219 }
39220
39221 Inst_MIMG__IMAGE_GET_LOD::Inst_MIMG__IMAGE_GET_LOD(InFmt_MIMG *iFmt)
39222 : Inst_MIMG(iFmt, "image_get_lod")
39223 {
39224 setFlag(GlobalSegment);
39225 } // Inst_MIMG__IMAGE_GET_LOD
39226
39227 Inst_MIMG__IMAGE_GET_LOD::~Inst_MIMG__IMAGE_GET_LOD()
39228 {
39229 } // ~Inst_MIMG__IMAGE_GET_LOD
39230
39231 void
39232 Inst_MIMG__IMAGE_GET_LOD::execute(GPUDynInstPtr gpuDynInst)
39233 {
39234 panicUnimplemented();
39235 }
39236
39237 Inst_MIMG__IMAGE_SAMPLE_CD::Inst_MIMG__IMAGE_SAMPLE_CD(InFmt_MIMG *iFmt)
39238 : Inst_MIMG(iFmt, "image_sample_cd")
39239 {
39240 setFlag(GlobalSegment);
39241 } // Inst_MIMG__IMAGE_SAMPLE_CD
39242
39243 Inst_MIMG__IMAGE_SAMPLE_CD::~Inst_MIMG__IMAGE_SAMPLE_CD()
39244 {
39245 } // ~Inst_MIMG__IMAGE_SAMPLE_CD
39246
39247 void
39248 Inst_MIMG__IMAGE_SAMPLE_CD::execute(GPUDynInstPtr gpuDynInst)
39249 {
39250 panicUnimplemented();
39251 }
39252
39253 Inst_MIMG__IMAGE_SAMPLE_CD_CL::Inst_MIMG__IMAGE_SAMPLE_CD_CL(
39254 InFmt_MIMG *iFmt)
39255 : Inst_MIMG(iFmt, "image_sample_cd_cl")
39256 {
39257 setFlag(GlobalSegment);
39258 } // Inst_MIMG__IMAGE_SAMPLE_CD_CL
39259
39260 Inst_MIMG__IMAGE_SAMPLE_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_CD_CL()
39261 {
39262 } // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL
39263
39264 void
39265 Inst_MIMG__IMAGE_SAMPLE_CD_CL::execute(GPUDynInstPtr gpuDynInst)
39266 {
39267 panicUnimplemented();
39268 }
39269
39270 Inst_MIMG__IMAGE_SAMPLE_C_CD::Inst_MIMG__IMAGE_SAMPLE_C_CD(
39271 InFmt_MIMG *iFmt)
39272 : Inst_MIMG(iFmt, "image_sample_c_cd")
39273 {
39274 setFlag(GlobalSegment);
39275 } // Inst_MIMG__IMAGE_SAMPLE_C_CD
39276
39277 Inst_MIMG__IMAGE_SAMPLE_C_CD::~Inst_MIMG__IMAGE_SAMPLE_C_CD()
39278 {
39279 } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD
39280
39281 void
39282 Inst_MIMG__IMAGE_SAMPLE_C_CD::execute(GPUDynInstPtr gpuDynInst)
39283 {
39284 panicUnimplemented();
39285 }
39286
39287 Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL(
39288 InFmt_MIMG *iFmt)
39289 : Inst_MIMG(iFmt, "image_sample_c_cd_cl")
39290 {
39291 setFlag(GlobalSegment);
39292 } // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL
39293
39294 Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL()
39295 {
39296 } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL
39297
39298 void
39299 Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::execute(GPUDynInstPtr gpuDynInst)
39300 {
39301 panicUnimplemented();
39302 }
39303
39304 Inst_MIMG__IMAGE_SAMPLE_CD_O::Inst_MIMG__IMAGE_SAMPLE_CD_O(
39305 InFmt_MIMG *iFmt)
39306 : Inst_MIMG(iFmt, "image_sample_cd_o")
39307 {
39308 setFlag(GlobalSegment);
39309 } // Inst_MIMG__IMAGE_SAMPLE_CD_O
39310
39311 Inst_MIMG__IMAGE_SAMPLE_CD_O::~Inst_MIMG__IMAGE_SAMPLE_CD_O()
39312 {
39313 } // ~Inst_MIMG__IMAGE_SAMPLE_CD_O
39314
39315 void
39316 Inst_MIMG__IMAGE_SAMPLE_CD_O::execute(GPUDynInstPtr gpuDynInst)
39317 {
39318 panicUnimplemented();
39319 }
39320
39321 Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_CD_CL_O(
39322 InFmt_MIMG *iFmt)
39323 : Inst_MIMG(iFmt, "image_sample_cd_cl_o")
39324 {
39325 setFlag(GlobalSegment);
39326 } // Inst_MIMG__IMAGE_SAMPLE_CD_CL_O
39327
39328 Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O()
39329 {
39330 } // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O
39331
39332 void
39333 Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::execute(GPUDynInstPtr gpuDynInst)
39334 {
39335 panicUnimplemented();
39336 }
39337
39338 Inst_MIMG__IMAGE_SAMPLE_C_CD_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_O(
39339 InFmt_MIMG *iFmt)
39340 : Inst_MIMG(iFmt, "image_sample_c_cd_o")
39341 {
39342 setFlag(GlobalSegment);
39343 } // Inst_MIMG__IMAGE_SAMPLE_C_CD_O
39344
39345 Inst_MIMG__IMAGE_SAMPLE_C_CD_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_O()
39346 {
39347 } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_O
39348
39349 void
39350 Inst_MIMG__IMAGE_SAMPLE_C_CD_O::execute(GPUDynInstPtr gpuDynInst)
39351 {
39352 panicUnimplemented();
39353 }
39354
39355 Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O(
39356 InFmt_MIMG *iFmt)
39357 : Inst_MIMG(iFmt, "image_sample_c_cd_cl_o")
39358 {
39359 setFlag(GlobalSegment);
39360 } // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O
39361
39362 Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O()
39363 {
39364 } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O
39365
39366 void
39367 Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::execute(GPUDynInstPtr gpuDynInst)
39368 {
39369 panicUnimplemented();
39370 }
39371
39372 Inst_EXP__EXP::Inst_EXP__EXP(InFmt_EXP *iFmt)
39373 : Inst_EXP(iFmt, "exp")
39374 {
39375 } // Inst_EXP__EXP
39376
39377 Inst_EXP__EXP::~Inst_EXP__EXP()
39378 {
39379 } // ~Inst_EXP__EXP
39380
39381 void
39382 Inst_EXP__EXP::execute(GPUDynInstPtr gpuDynInst)
39383 {
39384 panicUnimplemented();
39385 }
39386
39387 Inst_FLAT__FLAT_LOAD_UBYTE::Inst_FLAT__FLAT_LOAD_UBYTE(InFmt_FLAT *iFmt)
39388 : Inst_FLAT(iFmt, "flat_load_ubyte")
39389 {
39390 setFlag(MemoryRef);
39391 setFlag(Load);
39392 } // Inst_FLAT__FLAT_LOAD_UBYTE
39393
39394 Inst_FLAT__FLAT_LOAD_UBYTE::~Inst_FLAT__FLAT_LOAD_UBYTE()
39395 {
39396 } // ~Inst_FLAT__FLAT_LOAD_UBYTE
39397
39398 // Untyped buffer load unsigned byte (zero extend to VGPR destination).
39399 void
39400 Inst_FLAT__FLAT_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst)
39401 {
39402 Wavefront *wf = gpuDynInst->wavefront();
39403
39404 if (wf->execMask().none()) {
39405 wf->decVMemInstsIssued();
39406 wf->decLGKMInstsIssued();
39407 wf->rdGmReqsInPipe--;
39408 wf->rdLmReqsInPipe--;
39409 return;
39410 }
39411
39412 gpuDynInst->execUnitId = wf->execUnitId;
39413 gpuDynInst->exec_mask = gpuDynInst->wavefront()->execMask();
39414 gpuDynInst->latency.init(gpuDynInst->computeUnit());
39415 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39416
39417 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39418
39419 addr.read();
39420
39421 calcAddr(gpuDynInst, addr);
39422
39423 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39424 gpuDynInst->computeUnit()->globalMemoryPipe
39425 .issueRequest(gpuDynInst);
39426 wf->rdGmReqsInPipe--;
39427 wf->outstandingReqsRdGm++;
39428 } else {
39429 fatal("Non global flat instructions not implemented yet.\n");
39430 }
39431
39432 gpuDynInst->wavefront()->outstandingReqs++;
39433 gpuDynInst->wavefront()->validateRequestCounters();
39434 } // execute
39435
39436 void
39437 Inst_FLAT__FLAT_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
39438 {
39439 initMemRead<VecElemU8>(gpuDynInst);
39440 } // initiateAcc
39441
39442 void
39443 Inst_FLAT__FLAT_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst)
39444 {
39445 VecOperandU32 vdst(gpuDynInst, extData.VDST);
39446
39447 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
39448 if (gpuDynInst->exec_mask[lane]) {
39449 vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU8*>(
39450 gpuDynInst->d_data))[lane]);
39451 }
39452 }
39453 vdst.write();
39454 } // execute
39455 // --- Inst_FLAT__FLAT_LOAD_SBYTE class methods ---
39456
39457 Inst_FLAT__FLAT_LOAD_SBYTE::Inst_FLAT__FLAT_LOAD_SBYTE(InFmt_FLAT *iFmt)
39458 : Inst_FLAT(iFmt, "flat_load_sbyte")
39459 {
39460 setFlag(MemoryRef);
39461 setFlag(Load);
39462 } // Inst_FLAT__FLAT_LOAD_SBYTE
39463
39464 Inst_FLAT__FLAT_LOAD_SBYTE::~Inst_FLAT__FLAT_LOAD_SBYTE()
39465 {
39466 } // ~Inst_FLAT__FLAT_LOAD_SBYTE
39467
39468 // Untyped buffer load signed byte (sign extend to VGPR destination).
39469 void
39470 Inst_FLAT__FLAT_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst)
39471 {
39472 panicUnimplemented();
39473 }
39474
39475 void
39476 Inst_FLAT__FLAT_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
39477 {
39478 } // initiateAcc
39479
39480 void
39481 Inst_FLAT__FLAT_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst)
39482 {
39483 }
39484
39485 Inst_FLAT__FLAT_LOAD_USHORT::Inst_FLAT__FLAT_LOAD_USHORT(InFmt_FLAT *iFmt)
39486 : Inst_FLAT(iFmt, "flat_load_ushort")
39487 {
39488 setFlag(MemoryRef);
39489 setFlag(Load);
39490 } // Inst_FLAT__FLAT_LOAD_USHORT
39491
39492 Inst_FLAT__FLAT_LOAD_USHORT::~Inst_FLAT__FLAT_LOAD_USHORT()
39493 {
39494 } // ~Inst_FLAT__FLAT_LOAD_USHORT
39495
39496 // Untyped buffer load unsigned short (zero extend to VGPR destination).
39497 void
39498 Inst_FLAT__FLAT_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst)
39499 {
39500 Wavefront *wf = gpuDynInst->wavefront();
39501
39502 if (wf->execMask().none()) {
39503 wf->decVMemInstsIssued();
39504 wf->decLGKMInstsIssued();
39505 wf->rdGmReqsInPipe--;
39506 wf->rdLmReqsInPipe--;
39507 return;
39508 }
39509
39510 gpuDynInst->execUnitId = wf->execUnitId;
39511 gpuDynInst->exec_mask = gpuDynInst->wavefront()->execMask();
39512 gpuDynInst->latency.init(gpuDynInst->computeUnit());
39513 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39514
39515 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39516
39517 addr.read();
39518
39519 calcAddr(gpuDynInst, addr);
39520
39521 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39522 gpuDynInst->computeUnit()->globalMemoryPipe
39523 .issueRequest(gpuDynInst);
39524 wf->rdGmReqsInPipe--;
39525 wf->outstandingReqsRdGm++;
39526 } else {
39527 fatal("Non global flat instructions not implemented yet.\n");
39528 }
39529
39530 gpuDynInst->wavefront()->outstandingReqs++;
39531 gpuDynInst->wavefront()->validateRequestCounters();
39532 }
39533
39534 void
39535 Inst_FLAT__FLAT_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
39536 {
39537 initMemRead<VecElemU16>(gpuDynInst);
39538 } // initiateAcc
39539
39540 void
39541 Inst_FLAT__FLAT_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst)
39542 {
39543 VecOperandU32 vdst(gpuDynInst, extData.VDST);
39544
39545 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
39546 if (gpuDynInst->exec_mask[lane]) {
39547 vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU16*>(
39548 gpuDynInst->d_data))[lane]);
39549 }
39550 }
39551 vdst.write();
39552 }
39553
39554
39555 Inst_FLAT__FLAT_LOAD_SSHORT::Inst_FLAT__FLAT_LOAD_SSHORT(InFmt_FLAT *iFmt)
39556 : Inst_FLAT(iFmt, "flat_load_sshort")
39557 {
39558 setFlag(MemoryRef);
39559 setFlag(Load);
39560 } // Inst_FLAT__FLAT_LOAD_SSHORT
39561
39562 Inst_FLAT__FLAT_LOAD_SSHORT::~Inst_FLAT__FLAT_LOAD_SSHORT()
39563 {
39564 } // ~Inst_FLAT__FLAT_LOAD_SSHORT
39565
39566 // Untyped buffer load signed short (sign extend to VGPR destination).
39567 void
39568 Inst_FLAT__FLAT_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst)
39569 {
39570 panicUnimplemented();
39571 }
39572
39573 void
39574 Inst_FLAT__FLAT_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
39575 {
39576 } // initiateAcc
39577
39578 void
39579 Inst_FLAT__FLAT_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst)
39580 {
39581 }
39582
39583 Inst_FLAT__FLAT_LOAD_DWORD::Inst_FLAT__FLAT_LOAD_DWORD(InFmt_FLAT *iFmt)
39584 : Inst_FLAT(iFmt, "flat_load_dword")
39585 {
39586 setFlag(MemoryRef);
39587 setFlag(Load);
39588 } // Inst_FLAT__FLAT_LOAD_DWORD
39589
39590 Inst_FLAT__FLAT_LOAD_DWORD::~Inst_FLAT__FLAT_LOAD_DWORD()
39591 {
39592 } // ~Inst_FLAT__FLAT_LOAD_DWORD
39593
39594 // Untyped buffer load dword.
39595 void
39596 Inst_FLAT__FLAT_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst)
39597 {
39598 Wavefront *wf = gpuDynInst->wavefront();
39599
39600 if (wf->execMask().none()) {
39601 wf->decVMemInstsIssued();
39602 wf->decLGKMInstsIssued();
39603 wf->rdGmReqsInPipe--;
39604 wf->rdLmReqsInPipe--;
39605 return;
39606 }
39607
39608 gpuDynInst->execUnitId = wf->execUnitId;
39609 gpuDynInst->exec_mask = gpuDynInst->wavefront()->execMask();
39610 gpuDynInst->latency.init(gpuDynInst->computeUnit());
39611 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39612
39613 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39614
39615 addr.read();
39616
39617 calcAddr(gpuDynInst, addr);
39618
39619 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39620 gpuDynInst->computeUnit()->globalMemoryPipe
39621 .issueRequest(gpuDynInst);
39622 wf->rdGmReqsInPipe--;
39623 wf->outstandingReqsRdGm++;
39624 } else {
39625 fatal("Non global flat instructions not implemented yet.\n");
39626 }
39627
39628 gpuDynInst->wavefront()->outstandingReqs++;
39629 gpuDynInst->wavefront()->validateRequestCounters();
39630 }
39631
39632 void
39633 Inst_FLAT__FLAT_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
39634 {
39635 initMemRead<VecElemU32>(gpuDynInst);
39636 } // initiateAcc
39637
39638 void
39639 Inst_FLAT__FLAT_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
39640 {
39641 VecOperandU32 vdst(gpuDynInst, extData.VDST);
39642
39643 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
39644 if (gpuDynInst->exec_mask[lane]) {
39645 vdst[lane] = (reinterpret_cast<VecElemU32*>(
39646 gpuDynInst->d_data))[lane];
39647 }
39648 }
39649 vdst.write();
39650 } // completeAcc
39651
39652 Inst_FLAT__FLAT_LOAD_DWORDX2::Inst_FLAT__FLAT_LOAD_DWORDX2(
39653 InFmt_FLAT *iFmt)
39654 : Inst_FLAT(iFmt, "flat_load_dwordx2")
39655 {
39656 setFlag(MemoryRef);
39657 setFlag(Load);
39658 } // Inst_FLAT__FLAT_LOAD_DWORDX2
39659
39660 Inst_FLAT__FLAT_LOAD_DWORDX2::~Inst_FLAT__FLAT_LOAD_DWORDX2()
39661 {
39662 } // ~Inst_FLAT__FLAT_LOAD_DWORDX2
39663
39664 // Untyped buffer load 2 dwords.
39665 void
39666 Inst_FLAT__FLAT_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
39667 {
39668 Wavefront *wf = gpuDynInst->wavefront();
39669
39670 if (wf->execMask().none()) {
39671 wf->decVMemInstsIssued();
39672 wf->decLGKMInstsIssued();
39673 wf->rdGmReqsInPipe--;
39674 wf->rdLmReqsInPipe--;
39675 return;
39676 }
39677
39678 gpuDynInst->execUnitId = wf->execUnitId;
39679 gpuDynInst->exec_mask = gpuDynInst->wavefront()->execMask();
39680 gpuDynInst->latency.init(gpuDynInst->computeUnit());
39681 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39682
39683 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39684
39685 addr.read();
39686
39687 calcAddr(gpuDynInst, addr);
39688
39689 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39690 gpuDynInst->computeUnit()->globalMemoryPipe
39691 .issueRequest(gpuDynInst);
39692 wf->rdGmReqsInPipe--;
39693 wf->outstandingReqsRdGm++;
39694 } else {
39695 fatal("Non global flat instructions not implemented yet.\n");
39696 }
39697
39698 gpuDynInst->wavefront()->outstandingReqs++;
39699 gpuDynInst->wavefront()->validateRequestCounters();
39700 }
39701
39702 void
39703 Inst_FLAT__FLAT_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
39704 {
39705 initMemRead<VecElemU64>(gpuDynInst);
39706 } // initiateAcc
39707
39708 void
39709 Inst_FLAT__FLAT_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
39710 {
39711 VecOperandU64 vdst(gpuDynInst, extData.VDST);
39712
39713 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
39714 if (gpuDynInst->exec_mask[lane]) {
39715 vdst[lane] = (reinterpret_cast<VecElemU64*>(
39716 gpuDynInst->d_data))[lane];
39717 }
39718 }
39719 vdst.write();
39720 } // completeAcc
39721
39722 Inst_FLAT__FLAT_LOAD_DWORDX3::Inst_FLAT__FLAT_LOAD_DWORDX3(
39723 InFmt_FLAT *iFmt)
39724 : Inst_FLAT(iFmt, "flat_load_dwordx3")
39725 {
39726 setFlag(MemoryRef);
39727 setFlag(Load);
39728 } // Inst_FLAT__FLAT_LOAD_DWORDX3
39729
39730 Inst_FLAT__FLAT_LOAD_DWORDX3::~Inst_FLAT__FLAT_LOAD_DWORDX3()
39731 {
39732 } // ~Inst_FLAT__FLAT_LOAD_DWORDX3
39733
39734 // Untyped buffer load 3 dwords.
39735 void
39736 Inst_FLAT__FLAT_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst)
39737 {
39738 Wavefront *wf = gpuDynInst->wavefront();
39739
39740 if (wf->execMask().none()) {
39741 wf->decVMemInstsIssued();
39742 wf->decLGKMInstsIssued();
39743 wf->rdGmReqsInPipe--;
39744 wf->rdLmReqsInPipe--;
39745 return;
39746 }
39747
39748 gpuDynInst->execUnitId = wf->execUnitId;
39749 gpuDynInst->exec_mask = wf->execMask();
39750 gpuDynInst->latency.init(gpuDynInst->computeUnit());
39751 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39752
39753 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39754
39755 addr.read();
39756
39757 calcAddr(gpuDynInst, addr);
39758
39759 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39760 gpuDynInst->computeUnit()->globalMemoryPipe
39761 .issueRequest(gpuDynInst);
39762 wf->rdGmReqsInPipe--;
39763 wf->outstandingReqsRdGm++;
39764 } else {
39765 fatal("Non global flat instructions not implemented yet.\n");
39766 }
39767
39768 gpuDynInst->wavefront()->outstandingReqs++;
39769 gpuDynInst->wavefront()->validateRequestCounters();
39770 }
39771
39772 void
39773 Inst_FLAT__FLAT_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst)
39774 {
39775 initMemRead<3>(gpuDynInst);
39776 } // initiateAcc
39777
39778 void
39779 Inst_FLAT__FLAT_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst)
39780 {
39781 VecOperandU32 vdst0(gpuDynInst, extData.VDST);
39782 VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
39783 VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2);
39784
39785 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
39786 if (gpuDynInst->exec_mask[lane]) {
39787 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
39788 gpuDynInst->d_data))[lane * 3];
39789 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
39790 gpuDynInst->d_data))[lane * 3 + 1];
39791 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
39792 gpuDynInst->d_data))[lane * 3 + 2];
39793 }
39794 }
39795
39796 vdst0.write();
39797 vdst1.write();
39798 vdst2.write();
39799 } // completeAcc
39800
39801 Inst_FLAT__FLAT_LOAD_DWORDX4::Inst_FLAT__FLAT_LOAD_DWORDX4(
39802 InFmt_FLAT *iFmt)
39803 : Inst_FLAT(iFmt, "flat_load_dwordx4")
39804 {
39805 setFlag(MemoryRef);
39806 setFlag(Load);
39807 } // Inst_FLAT__FLAT_LOAD_DWORDX4
39808
39809 Inst_FLAT__FLAT_LOAD_DWORDX4::~Inst_FLAT__FLAT_LOAD_DWORDX4()
39810 {
39811 } // ~Inst_FLAT__FLAT_LOAD_DWORDX4
39812
39813 // Untyped buffer load 4 dwords.
39814 void
39815 Inst_FLAT__FLAT_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
39816 {
39817 Wavefront *wf = gpuDynInst->wavefront();
39818
39819 if (wf->execMask().none()) {
39820 wf->decVMemInstsIssued();
39821 wf->decLGKMInstsIssued();
39822 wf->rdGmReqsInPipe--;
39823 wf->rdLmReqsInPipe--;
39824 }
39825
39826 gpuDynInst->execUnitId = wf->execUnitId;
39827 gpuDynInst->exec_mask = wf->execMask();
39828 gpuDynInst->latency.init(gpuDynInst->computeUnit());
39829 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39830
39831 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39832
39833 addr.read();
39834
39835 calcAddr(gpuDynInst, addr);
39836
39837 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39838 gpuDynInst->computeUnit()->globalMemoryPipe
39839 .issueRequest(gpuDynInst);
39840 wf->rdGmReqsInPipe--;
39841 wf->outstandingReqsRdGm++;
39842 } else {
39843 fatal("Non global flat instructions not implemented yet.\n");
39844 }
39845
39846 gpuDynInst->wavefront()->outstandingReqs++;
39847 gpuDynInst->wavefront()->validateRequestCounters();
39848 }
39849
39850 void
39851 Inst_FLAT__FLAT_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
39852 {
39853 initMemRead<4>(gpuDynInst);
39854 } // initiateAcc
39855
39856 void
39857 Inst_FLAT__FLAT_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
39858 {
39859 VecOperandU32 vdst0(gpuDynInst, extData.VDST);
39860 VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
39861 VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2);
39862 VecOperandU32 vdst3(gpuDynInst, extData.VDST + 3);
39863
39864 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
39865 if (gpuDynInst->exec_mask[lane]) {
39866 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
39867 gpuDynInst->d_data))[lane * 4];
39868 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
39869 gpuDynInst->d_data))[lane * 4 + 1];
39870 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
39871 gpuDynInst->d_data))[lane * 4 + 2];
39872 vdst3[lane] = (reinterpret_cast<VecElemU32*>(
39873 gpuDynInst->d_data))[lane * 4 + 3];
39874 }
39875 }
39876
39877 vdst0.write();
39878 vdst1.write();
39879 vdst2.write();
39880 vdst3.write();
39881 } // completeAcc
39882
39883 Inst_FLAT__FLAT_STORE_BYTE::Inst_FLAT__FLAT_STORE_BYTE(InFmt_FLAT *iFmt)
39884 : Inst_FLAT(iFmt, "flat_store_byte")
39885 {
39886 setFlag(MemoryRef);
39887 setFlag(Store);
39888 } // Inst_FLAT__FLAT_STORE_BYTE
39889
39890 Inst_FLAT__FLAT_STORE_BYTE::~Inst_FLAT__FLAT_STORE_BYTE()
39891 {
39892 } // ~Inst_FLAT__FLAT_STORE_BYTE
39893
39894 // Untyped buffer store byte.
39895 void
39896 Inst_FLAT__FLAT_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst)
39897 {
39898 Wavefront *wf = gpuDynInst->wavefront();
39899
39900 if (wf->execMask().none()) {
39901 wf->decVMemInstsIssued();
39902 wf->decLGKMInstsIssued();
39903 wf->wrGmReqsInPipe--;
39904 wf->wrLmReqsInPipe--;
39905 return;
39906 }
39907
39908 gpuDynInst->execUnitId = wf->execUnitId;
39909 gpuDynInst->exec_mask = wf->execMask();
39910 gpuDynInst->latency.init(gpuDynInst->computeUnit());
39911 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39912
39913 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39914
39915 addr.read();
39916
39917 calcAddr(gpuDynInst, addr);
39918
39919 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39920 gpuDynInst->computeUnit()->globalMemoryPipe
39921 .issueRequest(gpuDynInst);
39922 wf->wrGmReqsInPipe--;
39923 wf->outstandingReqsWrGm++;
39924 } else {
39925 fatal("Non global flat instructions not implemented yet.\n");
39926 }
39927
39928 gpuDynInst->wavefront()->outstandingReqs++;
39929 gpuDynInst->wavefront()->validateRequestCounters();
39930 } // execute
39931
39932 void
39933 Inst_FLAT__FLAT_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
39934 {
39935 ConstVecOperandU8 data(gpuDynInst, extData.DATA);
39936 data.read();
39937
39938 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
39939 if (gpuDynInst->exec_mask[lane]) {
39940 (reinterpret_cast<VecElemU8*>(gpuDynInst->d_data))[lane]
39941 = data[lane];
39942 }
39943 }
39944
39945 initMemWrite<VecElemU8>(gpuDynInst);
39946 } // initiateAcc
39947
39948 void
39949 Inst_FLAT__FLAT_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst)
39950 {
39951 }
39952
39953 Inst_FLAT__FLAT_STORE_SHORT::Inst_FLAT__FLAT_STORE_SHORT(InFmt_FLAT *iFmt)
39954 : Inst_FLAT(iFmt, "flat_store_short")
39955 {
39956 setFlag(MemoryRef);
39957 setFlag(Store);
39958 } // Inst_FLAT__FLAT_STORE_SHORT
39959
39960 Inst_FLAT__FLAT_STORE_SHORT::~Inst_FLAT__FLAT_STORE_SHORT()
39961 {
39962 } // ~Inst_FLAT__FLAT_STORE_SHORT
39963
39964 // Untyped buffer store short.
39965 void
39966 Inst_FLAT__FLAT_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst)
39967 {
39968 Wavefront *wf = gpuDynInst->wavefront();
39969
39970 if (wf->execMask().none()) {
39971 wf->decVMemInstsIssued();
39972 wf->decLGKMInstsIssued();
39973 wf->wrGmReqsInPipe--;
39974 wf->wrLmReqsInPipe--;
39975 return;
39976 }
39977
39978 gpuDynInst->execUnitId = wf->execUnitId;
39979 gpuDynInst->exec_mask = wf->execMask();
39980 gpuDynInst->latency.init(gpuDynInst->computeUnit());
39981 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39982
39983 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39984
39985 addr.read();
39986
39987 calcAddr(gpuDynInst, addr);
39988
39989 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39990 gpuDynInst->computeUnit()->globalMemoryPipe
39991 .issueRequest(gpuDynInst);
39992 wf->wrGmReqsInPipe--;
39993 wf->outstandingReqsWrGm++;
39994 } else {
39995 fatal("Non global flat instructions not implemented yet.\n");
39996 }
39997
39998 gpuDynInst->wavefront()->outstandingReqs++;
39999 gpuDynInst->wavefront()->validateRequestCounters();
40000 }
40001
40002 void
40003 Inst_FLAT__FLAT_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
40004 {
40005 ConstVecOperandU16 data(gpuDynInst, extData.DATA);
40006
40007 data.read();
40008
40009 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40010 if (gpuDynInst->exec_mask[lane]) {
40011 (reinterpret_cast<VecElemU16*>(gpuDynInst->d_data))[lane]
40012 = data[lane];
40013 }
40014 }
40015
40016 initMemWrite<VecElemU16>(gpuDynInst);
40017 } // initiateAcc
40018
40019 void
40020 Inst_FLAT__FLAT_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst)
40021 {
40022 } // completeAcc
40023
40024 Inst_FLAT__FLAT_STORE_DWORD::Inst_FLAT__FLAT_STORE_DWORD(InFmt_FLAT *iFmt)
40025 : Inst_FLAT(iFmt, "flat_store_dword")
40026 {
40027 setFlag(MemoryRef);
40028 setFlag(Store);
40029 } // Inst_FLAT__FLAT_STORE_DWORD
40030
40031 Inst_FLAT__FLAT_STORE_DWORD::~Inst_FLAT__FLAT_STORE_DWORD()
40032 {
40033 } // ~Inst_FLAT__FLAT_STORE_DWORD
40034
40035 // Untyped buffer store dword.
40036 void
40037 Inst_FLAT__FLAT_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst)
40038 {
40039 Wavefront *wf = gpuDynInst->wavefront();
40040
40041 if (wf->execMask().none()) {
40042 wf->decVMemInstsIssued();
40043 wf->decLGKMInstsIssued();
40044 wf->wrGmReqsInPipe--;
40045 wf->wrLmReqsInPipe--;
40046 return;
40047 }
40048
40049 gpuDynInst->execUnitId = wf->execUnitId;
40050 gpuDynInst->exec_mask = wf->execMask();
40051 gpuDynInst->latency.init(gpuDynInst->computeUnit());
40052 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40053
40054 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40055
40056 addr.read();
40057
40058 calcAddr(gpuDynInst, addr);
40059
40060 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
40061 gpuDynInst->computeUnit()->globalMemoryPipe
40062 .issueRequest(gpuDynInst);
40063 wf->wrGmReqsInPipe--;
40064 wf->outstandingReqsWrGm++;
40065 } else {
40066 fatal("Non global flat instructions not implemented yet.\n");
40067 }
40068
40069 gpuDynInst->wavefront()->outstandingReqs++;
40070 gpuDynInst->wavefront()->validateRequestCounters();
40071 }
40072
40073 void
40074 Inst_FLAT__FLAT_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
40075 {
40076 ConstVecOperandU32 data(gpuDynInst, extData.DATA);
40077 data.read();
40078
40079 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40080 if (gpuDynInst->exec_mask[lane]) {
40081 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane]
40082 = data[lane];
40083 }
40084 }
40085
40086 initMemWrite<VecElemU32>(gpuDynInst);
40087 } // initiateAcc
40088
40089 void
40090 Inst_FLAT__FLAT_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
40091 {
40092 } // completeAcc
40093
40094 Inst_FLAT__FLAT_STORE_DWORDX2::Inst_FLAT__FLAT_STORE_DWORDX2(
40095 InFmt_FLAT *iFmt)
40096 : Inst_FLAT(iFmt, "flat_store_dwordx2")
40097 {
40098 setFlag(MemoryRef);
40099 setFlag(Store);
40100 } // Inst_FLAT__FLAT_STORE_DWORDX2
40101
40102 Inst_FLAT__FLAT_STORE_DWORDX2::~Inst_FLAT__FLAT_STORE_DWORDX2()
40103 {
40104 } // ~Inst_FLAT__FLAT_STORE_DWORDX2
40105
40106 // Untyped buffer store 2 dwords.
40107 void
40108 Inst_FLAT__FLAT_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
40109 {
40110 Wavefront *wf = gpuDynInst->wavefront();
40111
40112 if (wf->execMask().none()) {
40113 wf->decVMemInstsIssued();
40114 wf->decLGKMInstsIssued();
40115 wf->wrGmReqsInPipe--;
40116 wf->wrLmReqsInPipe--;
40117 return;
40118 }
40119
40120 gpuDynInst->execUnitId = wf->execUnitId;
40121 gpuDynInst->exec_mask = wf->execMask();
40122 gpuDynInst->latency.init(gpuDynInst->computeUnit());
40123 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40124
40125 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40126
40127 addr.read();
40128
40129 calcAddr(gpuDynInst, addr);
40130
40131 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
40132 gpuDynInst->computeUnit()->globalMemoryPipe
40133 .issueRequest(gpuDynInst);
40134 wf->wrGmReqsInPipe--;
40135 wf->outstandingReqsWrGm++;
40136 } else {
40137 fatal("Non global flat instructions not implemented yet.\n");
40138 }
40139
40140 wf->outstandingReqs++;
40141 wf->validateRequestCounters();
40142 }
40143
40144 void
40145 Inst_FLAT__FLAT_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
40146 {
40147 ConstVecOperandU64 data(gpuDynInst, extData.DATA);
40148 data.read();
40149
40150 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40151 if (gpuDynInst->exec_mask[lane]) {
40152 (reinterpret_cast<VecElemU64*>(gpuDynInst->d_data))[lane]
40153 = data[lane];
40154 }
40155 }
40156
40157 initMemWrite<VecElemU64>(gpuDynInst);
40158 } // initiateAcc
40159
40160 void
40161 Inst_FLAT__FLAT_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
40162 {
40163 } // completeAcc
40164
40165 Inst_FLAT__FLAT_STORE_DWORDX3::Inst_FLAT__FLAT_STORE_DWORDX3(
40166 InFmt_FLAT *iFmt)
40167 : Inst_FLAT(iFmt, "flat_store_dwordx3")
40168 {
40169 setFlag(MemoryRef);
40170 setFlag(Store);
40171 } // Inst_FLAT__FLAT_STORE_DWORDX3
40172
40173 Inst_FLAT__FLAT_STORE_DWORDX3::~Inst_FLAT__FLAT_STORE_DWORDX3()
40174 {
40175 } // ~Inst_FLAT__FLAT_STORE_DWORDX3
40176
40177 // Untyped buffer store 3 dwords.
40178 void
40179 Inst_FLAT__FLAT_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst)
40180 {
40181 Wavefront *wf = gpuDynInst->wavefront();
40182
40183 if (wf->execMask().none()) {
40184 wf->decVMemInstsIssued();
40185 wf->decLGKMInstsIssued();
40186 wf->wrGmReqsInPipe--;
40187 wf->wrLmReqsInPipe--;
40188 return;
40189 }
40190
40191 gpuDynInst->execUnitId = wf->execUnitId;
40192 gpuDynInst->exec_mask = wf->execMask();
40193 gpuDynInst->latency.init(gpuDynInst->computeUnit());
40194 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40195
40196 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40197
40198 addr.read();
40199
40200 calcAddr(gpuDynInst, addr);
40201
40202 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
40203 gpuDynInst->computeUnit()->globalMemoryPipe
40204 .issueRequest(gpuDynInst);
40205 wf->wrGmReqsInPipe--;
40206 wf->outstandingReqsWrGm++;
40207 } else {
40208 fatal("Non global flat instructions not implemented yet.\n");
40209 }
40210
40211 gpuDynInst->wavefront()->outstandingReqs++;
40212 gpuDynInst->wavefront()->validateRequestCounters();
40213 }
40214
40215 void
40216 Inst_FLAT__FLAT_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst)
40217 {
40218 ConstVecOperandU32 data0(gpuDynInst, extData.DATA);
40219 ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1);
40220 ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2);
40221
40222 data0.read();
40223 data1.read();
40224 data2.read();
40225
40226 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40227 if (gpuDynInst->exec_mask[lane]) {
40228 (reinterpret_cast<VecElemU32*>(
40229 gpuDynInst->d_data))[lane * 3] = data0[lane];
40230 (reinterpret_cast<VecElemU32*>(
40231 gpuDynInst->d_data))[lane * 3 + 1] = data1[lane];
40232 (reinterpret_cast<VecElemU32*>(
40233 gpuDynInst->d_data))[lane * 3 + 2] = data2[lane];
40234 }
40235 }
40236
40237 initMemWrite<3>(gpuDynInst);
40238 } // initiateAcc
40239
40240 void
40241 Inst_FLAT__FLAT_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst)
40242 {
40243 } // completeAcc
40244
40245 Inst_FLAT__FLAT_STORE_DWORDX4::Inst_FLAT__FLAT_STORE_DWORDX4(
40246 InFmt_FLAT *iFmt)
40247 : Inst_FLAT(iFmt, "flat_store_dwordx4")
40248 {
40249 setFlag(MemoryRef);
40250 setFlag(Store);
40251 } // Inst_FLAT__FLAT_STORE_DWORDX4
40252
40253 Inst_FLAT__FLAT_STORE_DWORDX4::~Inst_FLAT__FLAT_STORE_DWORDX4()
40254 {
40255 } // ~Inst_FLAT__FLAT_STORE_DWORDX4
40256
40257 // Untyped buffer store 4 dwords.
40258 void
40259 Inst_FLAT__FLAT_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
40260 {
40261 Wavefront *wf = gpuDynInst->wavefront();
40262
40263 if (wf->execMask().none()) {
40264 wf->decVMemInstsIssued();
40265 wf->decLGKMInstsIssued();
40266 wf->wrGmReqsInPipe--;
40267 wf->wrLmReqsInPipe--;
40268 return;
40269 }
40270
40271 gpuDynInst->execUnitId = wf->execUnitId;
40272 gpuDynInst->exec_mask = wf->execMask();
40273 gpuDynInst->latency.init(gpuDynInst->computeUnit());
40274 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40275
40276 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40277
40278 addr.read();
40279
40280 calcAddr(gpuDynInst, addr);
40281
40282 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
40283 gpuDynInst->computeUnit()->globalMemoryPipe
40284 .issueRequest(gpuDynInst);
40285 wf->wrGmReqsInPipe--;
40286 wf->outstandingReqsWrGm++;
40287 } else {
40288 fatal("Non global flat instructions not implemented yet.\n");
40289 }
40290
40291 gpuDynInst->wavefront()->outstandingReqs++;
40292 gpuDynInst->wavefront()->validateRequestCounters();
40293 }
40294
40295 void
40296 Inst_FLAT__FLAT_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
40297 {
40298 ConstVecOperandU32 data0(gpuDynInst, extData.DATA);
40299 ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1);
40300 ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2);
40301 ConstVecOperandU32 data3(gpuDynInst, extData.DATA + 3);
40302
40303 data0.read();
40304 data1.read();
40305 data2.read();
40306 data3.read();
40307
40308 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40309 if (gpuDynInst->exec_mask[lane]) {
40310 (reinterpret_cast<VecElemU32*>(
40311 gpuDynInst->d_data))[lane * 4] = data0[lane];
40312 (reinterpret_cast<VecElemU32*>(
40313 gpuDynInst->d_data))[lane * 4 + 1] = data1[lane];
40314 (reinterpret_cast<VecElemU32*>(
40315 gpuDynInst->d_data))[lane * 4 + 2] = data2[lane];
40316 (reinterpret_cast<VecElemU32*>(
40317 gpuDynInst->d_data))[lane * 4 + 3] = data3[lane];
40318 }
40319 }
40320
40321 initMemWrite<4>(gpuDynInst);
40322 } // initiateAcc
40323
40324 void
40325 Inst_FLAT__FLAT_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
40326 {
40327 } // completeAcc
40328
40329 Inst_FLAT__FLAT_ATOMIC_SWAP::Inst_FLAT__FLAT_ATOMIC_SWAP(InFmt_FLAT *iFmt)
40330 : Inst_FLAT(iFmt, "flat_atomic_swap")
40331 {
40332 setFlag(AtomicExch);
40333 if (instData.GLC) {
40334 setFlag(AtomicReturn);
40335 } else {
40336 setFlag(AtomicNoReturn);
40337 } // if
40338 setFlag(MemoryRef);
40339 } // Inst_FLAT__FLAT_ATOMIC_SWAP
40340
40341 Inst_FLAT__FLAT_ATOMIC_SWAP::~Inst_FLAT__FLAT_ATOMIC_SWAP()
40342 {
40343 } // ~Inst_FLAT__FLAT_ATOMIC_SWAP
40344
40345 // tmp = MEM[ADDR];
40346 // MEM[ADDR] = DATA;
40347 // RETURN_DATA = tmp.
40348 void
40349 Inst_FLAT__FLAT_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst)
40350 {
40351 Wavefront *wf = gpuDynInst->wavefront();
40352
40353 if (wf->execMask().none()) {
40354 wf->decVMemInstsIssued();
40355 wf->decLGKMInstsIssued();
40356 wf->wrGmReqsInPipe--;
40357 wf->rdGmReqsInPipe--;
40358 return;
40359 }
40360
40361 gpuDynInst->execUnitId = wf->execUnitId;
40362 gpuDynInst->exec_mask = wf->execMask();
40363 gpuDynInst->latency.init(gpuDynInst->computeUnit());
40364 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40365
40366 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40367
40368 addr.read();
40369
40370 calcAddr(gpuDynInst, addr);
40371
40372 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL ||
40373 gpuDynInst->executedAs() == Enums::SC_PRIVATE) {
40374 // TODO: additional address computation required for scratch
40375 panic_if(gpuDynInst->executedAs() == Enums::SC_PRIVATE,
40376 "Flats to private aperture not tested yet\n");
40377 gpuDynInst->computeUnit()->globalMemoryPipe.
40378 issueRequest(gpuDynInst);
40379 wf->wrGmReqsInPipe--;
40380 wf->outstandingReqsWrGm++;
40381 wf->rdGmReqsInPipe--;
40382 wf->outstandingReqsRdGm++;
40383 } else {
40384 fatal("Non global flat instructions not implemented yet.\n");
40385 }
40386
40387 gpuDynInst->wavefront()->outstandingReqs++;
40388 gpuDynInst->wavefront()->validateRequestCounters();
40389
40390 ConstVecOperandU32 data(gpuDynInst, extData.DATA);
40391
40392 data.read();
40393
40394 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40395 if (gpuDynInst->exec_mask[lane]) {
40396 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
40397 = data[lane];
40398 }
40399 }
40400
40401 } // execute
40402
40403 void
40404 Inst_FLAT__FLAT_ATOMIC_SWAP::initiateAcc(GPUDynInstPtr gpuDynInst)
40405 {
40406 initAtomicAccess<VecElemU32>(gpuDynInst);
40407 } // initiateAcc
40408
40409 void
40410 Inst_FLAT__FLAT_ATOMIC_SWAP::completeAcc(GPUDynInstPtr gpuDynInst)
40411 {
40412 if (isAtomicRet()) {
40413 VecOperandU32 vdst(gpuDynInst, extData.VDST);
40414
40415 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40416 if (gpuDynInst->exec_mask[lane]) {
40417 vdst[lane] = (reinterpret_cast<VecElemU32*>(
40418 gpuDynInst->d_data))[lane];
40419 }
40420 }
40421
40422 vdst.write();
40423 }
40424 } // completeAcc
40425
40426 // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP class methods ---
40427
40428 Inst_FLAT__FLAT_ATOMIC_CMPSWAP
40429 ::Inst_FLAT__FLAT_ATOMIC_CMPSWAP(InFmt_FLAT *iFmt)
40430 : Inst_FLAT(iFmt, "flat_atomic_cmpswap")
40431 {
40432 setFlag(AtomicCAS);
40433 if (instData.GLC) {
40434 setFlag(AtomicReturn);
40435 } else {
40436 setFlag(AtomicNoReturn);
40437 } // if
40438 setFlag(MemoryRef);
40439 } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP
40440
40441 Inst_FLAT__FLAT_ATOMIC_CMPSWAP::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP()
40442 {
40443 } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP
40444
40445 // tmp = MEM[ADDR];
40446 // src = DATA[0];
40447 // cmp = DATA[1];
40448 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
40449 // RETURN_DATA[0] = tmp.
40450 void
40451 Inst_FLAT__FLAT_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst)
40452 {
40453 Wavefront *wf = gpuDynInst->wavefront();
40454
40455 if (wf->execMask().none()) {
40456 wf->decVMemInstsIssued();
40457 wf->decLGKMInstsIssued();
40458 wf->wrGmReqsInPipe--;
40459 wf->rdGmReqsInPipe--;
40460 return;
40461 }
40462
40463 gpuDynInst->execUnitId = wf->execUnitId;
40464 gpuDynInst->exec_mask = wf->execMask();
40465 gpuDynInst->latency.init(gpuDynInst->computeUnit());
40466 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40467
40468 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40469 ConstVecOperandU32 data(gpuDynInst, extData.DATA);
40470 ConstVecOperandU32 cmp(gpuDynInst, extData.DATA + 1);
40471
40472 addr.read();
40473 data.read();
40474 cmp.read();
40475
40476 calcAddr(gpuDynInst, addr);
40477
40478 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40479 if (gpuDynInst->exec_mask[lane]) {
40480 (reinterpret_cast<VecElemU32*>(gpuDynInst->x_data))[lane]
40481 = data[lane];
40482 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
40483 = cmp[lane];
40484 }
40485 }
40486
40487 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL ||
40488 gpuDynInst->executedAs() == Enums::SC_PRIVATE) {
40489 /**
40490 * TODO: If you encounter this panic, just remove this panic
40491 * and restart the simulation. It should just work fine but
40492 * this is to warn user that this path is never tested although
40493 * all the necessary logic is implemented
40494 */
40495 panic_if(gpuDynInst->executedAs() == Enums::SC_PRIVATE,
40496 "Flats to private aperture not tested yet\n");
40497 gpuDynInst->computeUnit()->globalMemoryPipe.
40498 issueRequest(gpuDynInst);
40499 wf->wrGmReqsInPipe--;
40500 wf->outstandingReqsWrGm++;
40501 wf->rdGmReqsInPipe--;
40502 wf->outstandingReqsRdGm++;
40503 } else {
40504 fatal("Non global flat instructions not implemented yet.\n");
40505 }
40506
40507 gpuDynInst->wavefront()->outstandingReqs++;
40508 gpuDynInst->wavefront()->validateRequestCounters();
40509 }
40510
40511 void
40512 Inst_FLAT__FLAT_ATOMIC_CMPSWAP::initiateAcc(GPUDynInstPtr gpuDynInst)
40513 {
40514 initAtomicAccess<VecElemU32>(gpuDynInst);
40515 } // initiateAcc
40516
40517 void
40518 Inst_FLAT__FLAT_ATOMIC_CMPSWAP::completeAcc(GPUDynInstPtr gpuDynInst)
40519 {
40520 if (isAtomicRet()) {
40521 VecOperandU32 vdst(gpuDynInst, extData.VDST);
40522
40523 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40524 if (gpuDynInst->exec_mask[lane]) {
40525 vdst[lane] = (reinterpret_cast<VecElemU32*>(
40526 gpuDynInst->d_data))[lane];
40527 }
40528 }
40529
40530 vdst.write();
40531 }
40532 } // completeAcc
40533
40534 Inst_FLAT__FLAT_ATOMIC_ADD::Inst_FLAT__FLAT_ATOMIC_ADD(InFmt_FLAT *iFmt)
40535 : Inst_FLAT(iFmt, "flat_atomic_add")
40536 {
40537 setFlag(AtomicAdd);
40538 if (instData.GLC) {
40539 setFlag(AtomicReturn);
40540 } else {
40541 setFlag(AtomicNoReturn);
40542 } // if
40543 setFlag(MemoryRef);
40544 } // Inst_FLAT__FLAT_ATOMIC_ADD
40545
40546 Inst_FLAT__FLAT_ATOMIC_ADD::~Inst_FLAT__FLAT_ATOMIC_ADD()
40547 {
40548 } // ~Inst_FLAT__FLAT_ATOMIC_ADD
40549
40550 // tmp = MEM[ADDR];
40551 // MEM[ADDR] += DATA;
40552 // RETURN_DATA = tmp.
40553 void
40554 Inst_FLAT__FLAT_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst)
40555 {
40556 Wavefront *wf = gpuDynInst->wavefront();
40557
40558 if (wf->execMask().none()) {
40559 wf->decVMemInstsIssued();
40560 wf->decLGKMInstsIssued();
40561 wf->wrGmReqsInPipe--;
40562 wf->rdGmReqsInPipe--;
40563 return;
40564 }
40565
40566 gpuDynInst->execUnitId = wf->execUnitId;
40567 gpuDynInst->exec_mask = wf->execMask();
40568 gpuDynInst->latency.init(gpuDynInst->computeUnit());
40569 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40570
40571 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40572 ConstVecOperandU32 data(gpuDynInst, extData.DATA);
40573
40574 addr.read();
40575 data.read();
40576
40577 calcAddr(gpuDynInst, addr);
40578
40579 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40580 if (gpuDynInst->exec_mask[lane]) {
40581 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
40582 = data[lane];
40583 }
40584 }
40585
40586 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
40587 gpuDynInst->computeUnit()->globalMemoryPipe.
40588 issueRequest(gpuDynInst);
40589 wf->wrGmReqsInPipe--;
40590 wf->outstandingReqsWrGm++;
40591 wf->rdGmReqsInPipe--;
40592 wf->outstandingReqsRdGm++;
40593 } else {
40594 fatal("Non global flat instructions not implemented yet.\n");
40595 }
40596
40597 gpuDynInst->wavefront()->outstandingReqs++;
40598 gpuDynInst->wavefront()->validateRequestCounters();
40599 }
40600
40601 void
40602 Inst_FLAT__FLAT_ATOMIC_ADD::initiateAcc(GPUDynInstPtr gpuDynInst)
40603 {
40604 initAtomicAccess<VecElemU32>(gpuDynInst);
40605 } // initiateAcc
40606
40607 void
40608 Inst_FLAT__FLAT_ATOMIC_ADD::completeAcc(GPUDynInstPtr gpuDynInst)
40609 {
40610 if (isAtomicRet()) {
40611 VecOperandU32 vdst(gpuDynInst, extData.VDST);
40612
40613 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40614 if (gpuDynInst->exec_mask[lane]) {
40615 vdst[lane] = (reinterpret_cast<VecElemU32*>(
40616 gpuDynInst->d_data))[lane];
40617 }
40618 }
40619
40620 vdst.write();
40621 }
40622 } // completeAcc
40623
40624 Inst_FLAT__FLAT_ATOMIC_SUB::Inst_FLAT__FLAT_ATOMIC_SUB(InFmt_FLAT *iFmt)
40625 : Inst_FLAT(iFmt, "flat_atomic_sub")
40626 {
40627 setFlag(AtomicSub);
40628 if (instData.GLC) {
40629 setFlag(AtomicReturn);
40630 } else {
40631 setFlag(AtomicNoReturn);
40632 } // if
40633 setFlag(MemoryRef);
40634 } // Inst_FLAT__FLAT_ATOMIC_SUB
40635
40636 Inst_FLAT__FLAT_ATOMIC_SUB::~Inst_FLAT__FLAT_ATOMIC_SUB()
40637 {
40638 } // ~Inst_FLAT__FLAT_ATOMIC_SUB
40639
40640 // tmp = MEM[ADDR];
40641 // MEM[ADDR] -= DATA;
40642 // RETURN_DATA = tmp.
40643 void
40644 Inst_FLAT__FLAT_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst)
40645 {
40646 Wavefront *wf = gpuDynInst->wavefront();
40647
40648 if (wf->execMask().none()) {
40649 wf->decVMemInstsIssued();
40650 wf->decLGKMInstsIssued();
40651 wf->wrGmReqsInPipe--;
40652 wf->rdGmReqsInPipe--;
40653 return;
40654 }
40655
40656 gpuDynInst->execUnitId = wf->execUnitId;
40657 gpuDynInst->exec_mask = wf->execMask();
40658 gpuDynInst->latency.init(gpuDynInst->computeUnit());
40659 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40660
40661 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40662 ConstVecOperandU32 data(gpuDynInst, extData.DATA);
40663
40664 addr.read();
40665 data.read();
40666
40667 calcAddr(gpuDynInst, addr);
40668
40669 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40670 if (gpuDynInst->exec_mask[lane]) {
40671 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
40672 = data[lane];
40673 }
40674 }
40675
40676 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
40677 gpuDynInst->computeUnit()->globalMemoryPipe.
40678 issueRequest(gpuDynInst);
40679 wf->wrGmReqsInPipe--;
40680 wf->outstandingReqsWrGm++;
40681 wf->rdGmReqsInPipe--;
40682 wf->outstandingReqsRdGm++;
40683 } else {
40684 fatal("Non global flat instructions not implemented yet.\n");
40685 }
40686
40687 gpuDynInst->wavefront()->outstandingReqs++;
40688 gpuDynInst->wavefront()->validateRequestCounters();
40689 }
40690 void
40691 Inst_FLAT__FLAT_ATOMIC_SUB::initiateAcc(GPUDynInstPtr gpuDynInst)
40692 {
40693 initAtomicAccess<VecElemU32>(gpuDynInst);
40694 } // initiateAcc
40695
40696 void
40697 Inst_FLAT__FLAT_ATOMIC_SUB::completeAcc(GPUDynInstPtr gpuDynInst)
40698 {
40699 if (isAtomicRet()) {
40700 VecOperandU32 vdst(gpuDynInst, extData.VDST);
40701
40702 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40703 if (gpuDynInst->exec_mask[lane]) {
40704 vdst[lane] = (reinterpret_cast<VecElemU32*>(
40705 gpuDynInst->d_data))[lane];
40706 }
40707 }
40708
40709 vdst.write();
40710 }
40711 } // completeAcc
40712
40713 Inst_FLAT__FLAT_ATOMIC_SMIN::Inst_FLAT__FLAT_ATOMIC_SMIN(InFmt_FLAT *iFmt)
40714 : Inst_FLAT(iFmt, "flat_atomic_smin")
40715 {
40716 setFlag(AtomicMin);
40717 if (instData.GLC) {
40718 setFlag(AtomicReturn);
40719 } else {
40720 setFlag(AtomicNoReturn);
40721 }
40722 setFlag(MemoryRef);
40723 } // Inst_FLAT__FLAT_ATOMIC_SMIN
40724
40725 Inst_FLAT__FLAT_ATOMIC_SMIN::~Inst_FLAT__FLAT_ATOMIC_SMIN()
40726 {
40727 } // ~Inst_FLAT__FLAT_ATOMIC_SMIN
40728
40729 // tmp = MEM[ADDR];
40730 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
40731 // RETURN_DATA = tmp.
40732 void
40733 Inst_FLAT__FLAT_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst)
40734 {
40735 panicUnimplemented();
40736 }
40737
40738 Inst_FLAT__FLAT_ATOMIC_UMIN::Inst_FLAT__FLAT_ATOMIC_UMIN(InFmt_FLAT *iFmt)
40739 : Inst_FLAT(iFmt, "flat_atomic_umin")
40740 {
40741 setFlag(AtomicMin);
40742 if (instData.GLC) {
40743 setFlag(AtomicReturn);
40744 } else {
40745 setFlag(AtomicNoReturn);
40746 }
40747 setFlag(MemoryRef);
40748 } // Inst_FLAT__FLAT_ATOMIC_UMIN
40749
40750 Inst_FLAT__FLAT_ATOMIC_UMIN::~Inst_FLAT__FLAT_ATOMIC_UMIN()
40751 {
40752 } // ~Inst_FLAT__FLAT_ATOMIC_UMIN
40753
40754 // tmp = MEM[ADDR];
40755 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
40756 // RETURN_DATA = tmp.
40757 void
40758 Inst_FLAT__FLAT_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst)
40759 {
40760 panicUnimplemented();
40761 }
40762
40763 Inst_FLAT__FLAT_ATOMIC_SMAX::Inst_FLAT__FLAT_ATOMIC_SMAX(InFmt_FLAT *iFmt)
40764 : Inst_FLAT(iFmt, "flat_atomic_smax")
40765 {
40766 setFlag(AtomicMax);
40767 if (instData.GLC) {
40768 setFlag(AtomicReturn);
40769 } else {
40770 setFlag(AtomicNoReturn);
40771 }
40772 setFlag(MemoryRef);
40773 } // Inst_FLAT__FLAT_ATOMIC_SMAX
40774
40775 Inst_FLAT__FLAT_ATOMIC_SMAX::~Inst_FLAT__FLAT_ATOMIC_SMAX()
40776 {
40777 } // ~Inst_FLAT__FLAT_ATOMIC_SMAX
40778
40779 // tmp = MEM[ADDR];
40780 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
40781 // RETURN_DATA = tmp.
40782 void
40783 Inst_FLAT__FLAT_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst)
40784 {
40785 panicUnimplemented();
40786 }
40787
40788 Inst_FLAT__FLAT_ATOMIC_UMAX::Inst_FLAT__FLAT_ATOMIC_UMAX(InFmt_FLAT *iFmt)
40789 : Inst_FLAT(iFmt, "flat_atomic_umax")
40790 {
40791 setFlag(AtomicMax);
40792 if (instData.GLC) {
40793 setFlag(AtomicReturn);
40794 } else {
40795 setFlag(AtomicNoReturn);
40796 }
40797 setFlag(MemoryRef);
40798 } // Inst_FLAT__FLAT_ATOMIC_UMAX
40799
40800 Inst_FLAT__FLAT_ATOMIC_UMAX::~Inst_FLAT__FLAT_ATOMIC_UMAX()
40801 {
40802 } // ~Inst_FLAT__FLAT_ATOMIC_UMAX
40803
40804 // tmp = MEM[ADDR];
40805 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
40806 // RETURN_DATA = tmp.
40807 void
40808 Inst_FLAT__FLAT_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst)
40809 {
40810 panicUnimplemented();
40811 }
40812
40813 Inst_FLAT__FLAT_ATOMIC_AND::Inst_FLAT__FLAT_ATOMIC_AND(InFmt_FLAT *iFmt)
40814 : Inst_FLAT(iFmt, "flat_atomic_and")
40815 {
40816 setFlag(AtomicAnd);
40817 if (instData.GLC) {
40818 setFlag(AtomicReturn);
40819 } else {
40820 setFlag(AtomicNoReturn);
40821 }
40822 setFlag(MemoryRef);
40823 } // Inst_FLAT__FLAT_ATOMIC_AND
40824
40825 Inst_FLAT__FLAT_ATOMIC_AND::~Inst_FLAT__FLAT_ATOMIC_AND()
40826 {
40827 } // ~Inst_FLAT__FLAT_ATOMIC_AND
40828
40829 // tmp = MEM[ADDR];
40830 // MEM[ADDR] &= DATA;
40831 // RETURN_DATA = tmp.
40832 void
40833 Inst_FLAT__FLAT_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst)
40834 {
40835 panicUnimplemented();
40836 }
40837
40838 Inst_FLAT__FLAT_ATOMIC_OR::Inst_FLAT__FLAT_ATOMIC_OR(InFmt_FLAT *iFmt)
40839 : Inst_FLAT(iFmt, "flat_atomic_or")
40840 {
40841 setFlag(AtomicOr);
40842 if (instData.GLC) {
40843 setFlag(AtomicReturn);
40844 } else {
40845 setFlag(AtomicNoReturn);
40846 }
40847 setFlag(MemoryRef);
40848 } // Inst_FLAT__FLAT_ATOMIC_OR
40849
40850 Inst_FLAT__FLAT_ATOMIC_OR::~Inst_FLAT__FLAT_ATOMIC_OR()
40851 {
40852 } // ~Inst_FLAT__FLAT_ATOMIC_OR
40853
40854 // tmp = MEM[ADDR];
40855 // MEM[ADDR] |= DATA;
40856 // RETURN_DATA = tmp.
40857 void
40858 Inst_FLAT__FLAT_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst)
40859 {
40860 panicUnimplemented();
40861 }
40862
40863 Inst_FLAT__FLAT_ATOMIC_XOR::Inst_FLAT__FLAT_ATOMIC_XOR(InFmt_FLAT *iFmt)
40864 : Inst_FLAT(iFmt, "flat_atomic_xor")
40865 {
40866 setFlag(AtomicXor);
40867 if (instData.GLC) {
40868 setFlag(AtomicReturn);
40869 } else {
40870 setFlag(AtomicNoReturn);
40871 }
40872 setFlag(MemoryRef);
40873 } // Inst_FLAT__FLAT_ATOMIC_XOR
40874
40875 Inst_FLAT__FLAT_ATOMIC_XOR::~Inst_FLAT__FLAT_ATOMIC_XOR()
40876 {
40877 } // ~Inst_FLAT__FLAT_ATOMIC_XOR
40878
40879 // tmp = MEM[ADDR];
40880 // MEM[ADDR] ^= DATA;
40881 // RETURN_DATA = tmp.
40882 void
40883 Inst_FLAT__FLAT_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst)
40884 {
40885 panicUnimplemented();
40886 }
40887
40888 Inst_FLAT__FLAT_ATOMIC_INC::Inst_FLAT__FLAT_ATOMIC_INC(InFmt_FLAT *iFmt)
40889 : Inst_FLAT(iFmt, "flat_atomic_inc")
40890 {
40891 setFlag(AtomicInc);
40892 if (instData.GLC) {
40893 setFlag(AtomicReturn);
40894 } else {
40895 setFlag(AtomicNoReturn);
40896 }
40897 setFlag(MemoryRef);
40898 } // Inst_FLAT__FLAT_ATOMIC_INC
40899
40900 Inst_FLAT__FLAT_ATOMIC_INC::~Inst_FLAT__FLAT_ATOMIC_INC()
40901 {
40902 } // ~Inst_FLAT__FLAT_ATOMIC_INC
40903
40904 // tmp = MEM[ADDR];
40905 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
40906 // RETURN_DATA = tmp.
40907 void
40908 Inst_FLAT__FLAT_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst)
40909 {
40910 Wavefront *wf = gpuDynInst->wavefront();
40911
40912 if (wf->execMask().none()) {
40913 wf->decVMemInstsIssued();
40914 wf->decLGKMInstsIssued();
40915 wf->wrGmReqsInPipe--;
40916 wf->rdGmReqsInPipe--;
40917 return;
40918 }
40919
40920 gpuDynInst->execUnitId = wf->execUnitId;
40921 gpuDynInst->exec_mask = wf->execMask();
40922 gpuDynInst->latency.init(gpuDynInst->computeUnit());
40923 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40924
40925 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40926 ConstVecOperandU32 data(gpuDynInst, extData.DATA);
40927
40928 addr.read();
40929 data.read();
40930
40931 calcAddr(gpuDynInst, addr);
40932
40933 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40934 if (gpuDynInst->exec_mask[lane]) {
40935 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
40936 = data[lane];
40937 }
40938 }
40939
40940 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
40941 gpuDynInst->computeUnit()->globalMemoryPipe.
40942 issueRequest(gpuDynInst);
40943 wf->wrGmReqsInPipe--;
40944 wf->outstandingReqsWrGm++;
40945 wf->rdGmReqsInPipe--;
40946 wf->outstandingReqsRdGm++;
40947 } else {
40948 fatal("Non global flat instructions not implemented yet.\n");
40949 }
40950
40951 gpuDynInst->wavefront()->outstandingReqs++;
40952 gpuDynInst->wavefront()->validateRequestCounters();
40953 }
40954
40955 void
40956 Inst_FLAT__FLAT_ATOMIC_INC::initiateAcc(GPUDynInstPtr gpuDynInst)
40957 {
40958 initAtomicAccess<VecElemU32>(gpuDynInst);
40959 } // initiateAcc
40960
40961 void
40962 Inst_FLAT__FLAT_ATOMIC_INC::completeAcc(GPUDynInstPtr gpuDynInst)
40963 {
40964 if (isAtomicRet()) {
40965 VecOperandU32 vdst(gpuDynInst, extData.VDST);
40966
40967 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40968 if (gpuDynInst->exec_mask[lane]) {
40969 vdst[lane] = (reinterpret_cast<VecElemU32*>(
40970 gpuDynInst->d_data))[lane];
40971 }
40972 }
40973
40974 vdst.write();
40975 }
40976 } // completeAcc
40977
40978 Inst_FLAT__FLAT_ATOMIC_DEC::Inst_FLAT__FLAT_ATOMIC_DEC(InFmt_FLAT *iFmt)
40979 : Inst_FLAT(iFmt, "flat_atomic_dec")
40980 {
40981 setFlag(AtomicDec);
40982 if (instData.GLC) {
40983 setFlag(AtomicReturn);
40984 } else {
40985 setFlag(AtomicNoReturn);
40986 }
40987 setFlag(MemoryRef);
40988 } // Inst_FLAT__FLAT_ATOMIC_DEC
40989
40990 Inst_FLAT__FLAT_ATOMIC_DEC::~Inst_FLAT__FLAT_ATOMIC_DEC()
40991 {
40992 } // ~Inst_FLAT__FLAT_ATOMIC_DEC
40993
40994 // tmp = MEM[ADDR];
40995 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
40996 // (unsigned compare); RETURN_DATA = tmp.
40997 void
40998 Inst_FLAT__FLAT_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst)
40999 {
41000 Wavefront *wf = gpuDynInst->wavefront();
41001
41002 if (wf->execMask().none()) {
41003 wf->decVMemInstsIssued();
41004 wf->decLGKMInstsIssued();
41005 wf->wrGmReqsInPipe--;
41006 wf->rdGmReqsInPipe--;
41007 return;
41008 }
41009
41010 gpuDynInst->execUnitId = wf->execUnitId;
41011 gpuDynInst->exec_mask = wf->execMask();
41012 gpuDynInst->latency.init(gpuDynInst->computeUnit());
41013 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
41014
41015 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
41016 ConstVecOperandU32 data(gpuDynInst, extData.DATA);
41017
41018 addr.read();
41019 data.read();
41020
41021 calcAddr(gpuDynInst, addr);
41022
41023 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41024 if (gpuDynInst->exec_mask[lane]) {
41025 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
41026 = data[lane];
41027 }
41028 }
41029
41030 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
41031 gpuDynInst->computeUnit()->globalMemoryPipe.
41032 issueRequest(gpuDynInst);
41033 wf->wrGmReqsInPipe--;
41034 wf->outstandingReqsWrGm++;
41035 wf->rdGmReqsInPipe--;
41036 wf->outstandingReqsRdGm++;
41037 } else {
41038 fatal("Non global flat instructions not implemented yet.\n");
41039 }
41040
41041 gpuDynInst->wavefront()->outstandingReqs++;
41042 gpuDynInst->wavefront()->validateRequestCounters();
41043 }
41044
41045 void
41046 Inst_FLAT__FLAT_ATOMIC_DEC::initiateAcc(GPUDynInstPtr gpuDynInst)
41047 {
41048 initAtomicAccess<VecElemU32>(gpuDynInst);
41049 } // initiateAcc
41050
41051 void
41052 Inst_FLAT__FLAT_ATOMIC_DEC::completeAcc(GPUDynInstPtr gpuDynInst)
41053 {
41054 if (isAtomicRet()) {
41055 VecOperandU32 vdst(gpuDynInst, extData.VDST);
41056
41057 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41058 if (gpuDynInst->exec_mask[lane]) {
41059 vdst[lane] = (reinterpret_cast<VecElemU32*>(
41060 gpuDynInst->d_data))[lane];
41061 }
41062 }
41063
41064 vdst.write();
41065 }
41066 } // completeAcc
41067
41068 Inst_FLAT__FLAT_ATOMIC_SWAP_X2::Inst_FLAT__FLAT_ATOMIC_SWAP_X2(
41069 InFmt_FLAT *iFmt)
41070 : Inst_FLAT(iFmt, "flat_atomic_swap_x2")
41071 {
41072 setFlag(AtomicExch);
41073 if (instData.GLC) {
41074 setFlag(AtomicReturn);
41075 } else {
41076 setFlag(AtomicNoReturn);
41077 }
41078 setFlag(MemoryRef);
41079 } // Inst_FLAT__FLAT_ATOMIC_SWAP_X2
41080
41081 Inst_FLAT__FLAT_ATOMIC_SWAP_X2::~Inst_FLAT__FLAT_ATOMIC_SWAP_X2()
41082 {
41083 } // ~Inst_FLAT__FLAT_ATOMIC_SWAP_X2
41084
41085 // tmp = MEM[ADDR];
41086 // MEM[ADDR] = DATA[0:1];
41087 // RETURN_DATA[0:1] = tmp.
41088 void
41089 Inst_FLAT__FLAT_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst)
41090 {
41091 panicUnimplemented();
41092 }
41093
41094 Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2(
41095 InFmt_FLAT *iFmt)
41096 : Inst_FLAT(iFmt, "flat_atomic_cmpswap_x2")
41097 {
41098 setFlag(AtomicCAS);
41099 if (instData.GLC) {
41100 setFlag(AtomicReturn);
41101 } else {
41102 setFlag(AtomicNoReturn);
41103 }
41104 setFlag(MemoryRef);
41105 } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2
41106
41107 Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2()
41108 {
41109 } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2
41110
41111 // tmp = MEM[ADDR];
41112 // src = DATA[0:1];
41113 // cmp = DATA[2:3];
41114 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
41115 // RETURN_DATA[0:1] = tmp.
41116 void
41117 Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst)
41118 {
41119 Wavefront *wf = gpuDynInst->wavefront();
41120
41121 if (wf->execMask().none()) {
41122 wf->decVMemInstsIssued();
41123 wf->decLGKMInstsIssued();
41124 wf->wrGmReqsInPipe--;
41125 wf->rdGmReqsInPipe--;
41126 return;
41127 }
41128
41129 gpuDynInst->execUnitId = wf->execUnitId;
41130 gpuDynInst->exec_mask = wf->execMask();
41131 gpuDynInst->latency.init(gpuDynInst->computeUnit());
41132 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
41133
41134 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
41135 ConstVecOperandU64 data(gpuDynInst, extData.DATA);
41136 ConstVecOperandU64 cmp(gpuDynInst, extData.DATA + 2);
41137
41138 addr.read();
41139 data.read();
41140 cmp.read();
41141
41142 calcAddr(gpuDynInst, addr);
41143
41144 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41145 if (gpuDynInst->exec_mask[lane]) {
41146 (reinterpret_cast<VecElemU64*>(gpuDynInst->x_data))[lane]
41147 = data[lane];
41148 (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
41149 = cmp[lane];
41150 }
41151 }
41152
41153 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL ||
41154 gpuDynInst->executedAs() == Enums::SC_PRIVATE) {
41155 /**
41156 * TODO: If you encounter this panic, just remove this panic
41157 * and restart the simulation. It should just work fine but
41158 * this is to warn user that this path is never tested although
41159 * all the necessary logic is implemented
41160 */
41161 panic_if(gpuDynInst->executedAs() == Enums::SC_PRIVATE,
41162 "Flats to private aperture not tested yet\n");
41163 gpuDynInst->computeUnit()->globalMemoryPipe.
41164 issueRequest(gpuDynInst);
41165 wf->wrGmReqsInPipe--;
41166 wf->outstandingReqsWrGm++;
41167 wf->rdGmReqsInPipe--;
41168 wf->outstandingReqsRdGm++;
41169 } else {
41170 fatal("Non global flat instructions not implemented yet.\n");
41171 }
41172
41173 gpuDynInst->wavefront()->outstandingReqs++;
41174 gpuDynInst->wavefront()->validateRequestCounters();
41175 }
41176
41177 void
41178 Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::initiateAcc(GPUDynInstPtr gpuDynInst)
41179 {
41180 initAtomicAccess<VecElemU64>(gpuDynInst);
41181 } // initiateAcc
41182
41183 void
41184 Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::completeAcc(GPUDynInstPtr gpuDynInst)
41185 {
41186 if (isAtomicRet()) {
41187 VecOperandU64 vdst(gpuDynInst, extData.VDST);
41188
41189 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41190 if (gpuDynInst->exec_mask[lane]) {
41191 vdst[lane] = (reinterpret_cast<VecElemU64*>(
41192 gpuDynInst->d_data))[lane];
41193 }
41194 }
41195
41196 vdst.write();
41197 }
41198 } // completeAcc
41199
41200 Inst_FLAT__FLAT_ATOMIC_ADD_X2::Inst_FLAT__FLAT_ATOMIC_ADD_X2(
41201 InFmt_FLAT *iFmt)
41202 : Inst_FLAT(iFmt, "flat_atomic_add_x2")
41203 {
41204 setFlag(AtomicAdd);
41205 if (instData.GLC) {
41206 setFlag(AtomicReturn);
41207 } else {
41208 setFlag(AtomicNoReturn);
41209 }
41210 setFlag(MemoryRef);
41211 } // Inst_FLAT__FLAT_ATOMIC_ADD_X2
41212
41213 Inst_FLAT__FLAT_ATOMIC_ADD_X2::~Inst_FLAT__FLAT_ATOMIC_ADD_X2()
41214 {
41215 } // ~Inst_FLAT__FLAT_ATOMIC_ADD_X2
41216
41217 // tmp = MEM[ADDR];
41218 // MEM[ADDR] += DATA[0:1];
41219 // RETURN_DATA[0:1] = tmp.
41220 void
41221 Inst_FLAT__FLAT_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst)
41222 {
41223 Wavefront *wf = gpuDynInst->wavefront();
41224
41225 if (wf->execMask().none()) {
41226 wf->decVMemInstsIssued();
41227 wf->decLGKMInstsIssued();
41228 wf->wrGmReqsInPipe--;
41229 wf->rdGmReqsInPipe--;
41230 return;
41231 }
41232
41233 gpuDynInst->execUnitId = wf->execUnitId;
41234 gpuDynInst->exec_mask = wf->execMask();
41235 gpuDynInst->latency.init(gpuDynInst->computeUnit());
41236 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
41237
41238 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
41239 ConstVecOperandU64 data(gpuDynInst, extData.DATA);
41240
41241 addr.read();
41242 data.read();
41243
41244 calcAddr(gpuDynInst, addr);
41245
41246 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41247 if (gpuDynInst->exec_mask[lane]) {
41248 (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
41249 = data[lane];
41250 }
41251 }
41252
41253 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
41254 gpuDynInst->computeUnit()->globalMemoryPipe.
41255 issueRequest(gpuDynInst);
41256 wf->wrGmReqsInPipe--;
41257 wf->outstandingReqsWrGm++;
41258 wf->rdGmReqsInPipe--;
41259 wf->outstandingReqsRdGm++;
41260 } else {
41261 fatal("Non global flat instructions not implemented yet.\n");
41262 }
41263
41264 gpuDynInst->wavefront()->outstandingReqs++;
41265 gpuDynInst->wavefront()->validateRequestCounters();
41266 }
41267
41268 void
41269 Inst_FLAT__FLAT_ATOMIC_ADD_X2::initiateAcc(GPUDynInstPtr gpuDynInst)
41270 {
41271 initAtomicAccess<VecElemU64>(gpuDynInst);
41272 } // initiateAcc
41273
41274 void
41275 Inst_FLAT__FLAT_ATOMIC_ADD_X2::completeAcc(GPUDynInstPtr gpuDynInst)
41276 {
41277 if (isAtomicRet()) {
41278 VecOperandU64 vdst(gpuDynInst, extData.VDST);
41279
41280
41281 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41282 if (gpuDynInst->exec_mask[lane]) {
41283 vdst[lane] = (reinterpret_cast<VecElemU64*>(
41284 gpuDynInst->d_data))[lane];
41285 }
41286 }
41287
41288 vdst.write();
41289 }
41290 } // completeAcc
41291
41292 Inst_FLAT__FLAT_ATOMIC_SUB_X2::Inst_FLAT__FLAT_ATOMIC_SUB_X2(
41293 InFmt_FLAT *iFmt)
41294 : Inst_FLAT(iFmt, "flat_atomic_sub_x2")
41295 {
41296 setFlag(AtomicSub);
41297 if (instData.GLC) {
41298 setFlag(AtomicReturn);
41299 } else {
41300 setFlag(AtomicNoReturn);
41301 }
41302 setFlag(MemoryRef);
41303 } // Inst_FLAT__FLAT_ATOMIC_SUB_X2
41304
41305 Inst_FLAT__FLAT_ATOMIC_SUB_X2::~Inst_FLAT__FLAT_ATOMIC_SUB_X2()
41306 {
41307 } // ~Inst_FLAT__FLAT_ATOMIC_SUB_X2
41308
41309 // tmp = MEM[ADDR];
41310 // MEM[ADDR] -= DATA[0:1];
41311 // RETURN_DATA[0:1] = tmp.
41312 void
41313 Inst_FLAT__FLAT_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst)
41314 {
41315 Wavefront *wf = gpuDynInst->wavefront();
41316
41317 if (wf->execMask().none()) {
41318 wf->decVMemInstsIssued();
41319 wf->decLGKMInstsIssued();
41320 wf->wrGmReqsInPipe--;
41321 wf->rdGmReqsInPipe--;
41322 return;
41323 }
41324
41325 gpuDynInst->execUnitId = wf->execUnitId;
41326 gpuDynInst->exec_mask = wf->execMask();
41327 gpuDynInst->latency.init(gpuDynInst->computeUnit());
41328 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
41329
41330 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
41331 ConstVecOperandU64 data(gpuDynInst, extData.DATA);
41332
41333 addr.read();
41334 data.read();
41335
41336 calcAddr(gpuDynInst, addr);
41337
41338 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41339 if (gpuDynInst->exec_mask[lane]) {
41340 (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
41341 = data[lane];
41342 }
41343 }
41344
41345 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
41346 gpuDynInst->computeUnit()->globalMemoryPipe.
41347 issueRequest(gpuDynInst);
41348 wf->wrGmReqsInPipe--;
41349 wf->outstandingReqsWrGm++;
41350 wf->rdGmReqsInPipe--;
41351 wf->outstandingReqsRdGm++;
41352 } else {
41353 fatal("Non global flat instructions not implemented yet.\n");
41354 }
41355
41356 gpuDynInst->wavefront()->outstandingReqs++;
41357 gpuDynInst->wavefront()->validateRequestCounters();
41358 }
41359
41360 void
41361 Inst_FLAT__FLAT_ATOMIC_SUB_X2::initiateAcc(GPUDynInstPtr gpuDynInst)
41362 {
41363 initAtomicAccess<VecElemU64>(gpuDynInst);
41364 } // initiateAcc
41365
41366 void
41367 Inst_FLAT__FLAT_ATOMIC_SUB_X2::completeAcc(GPUDynInstPtr gpuDynInst)
41368 {
41369 if (isAtomicRet()) {
41370 VecOperandU64 vdst(gpuDynInst, extData.VDST);
41371
41372
41373 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41374 if (gpuDynInst->exec_mask[lane]) {
41375 vdst[lane] = (reinterpret_cast<VecElemU64*>(
41376 gpuDynInst->d_data))[lane];
41377 }
41378 }
41379
41380 vdst.write();
41381 }
41382 } // completeAcc
41383
41384 Inst_FLAT__FLAT_ATOMIC_SMIN_X2::Inst_FLAT__FLAT_ATOMIC_SMIN_X2(
41385 InFmt_FLAT *iFmt)
41386 : Inst_FLAT(iFmt, "flat_atomic_smin_x2")
41387 {
41388 setFlag(AtomicMin);
41389 if (instData.GLC) {
41390 setFlag(AtomicReturn);
41391 } else {
41392 setFlag(AtomicNoReturn);
41393 }
41394 setFlag(MemoryRef);
41395 } // Inst_FLAT__FLAT_ATOMIC_SMIN_X2
41396
41397 Inst_FLAT__FLAT_ATOMIC_SMIN_X2::~Inst_FLAT__FLAT_ATOMIC_SMIN_X2()
41398 {
41399 } // ~Inst_FLAT__FLAT_ATOMIC_SMIN_X2
41400
41401 // tmp = MEM[ADDR];
41402 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
41403 // RETURN_DATA[0:1] = tmp.
41404 void
41405 Inst_FLAT__FLAT_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst)
41406 {
41407 panicUnimplemented();
41408 }
41409
41410 Inst_FLAT__FLAT_ATOMIC_UMIN_X2::Inst_FLAT__FLAT_ATOMIC_UMIN_X2(
41411 InFmt_FLAT *iFmt)
41412 : Inst_FLAT(iFmt, "flat_atomic_umin_x2")
41413 {
41414 setFlag(AtomicMin);
41415 if (instData.GLC) {
41416 setFlag(AtomicReturn);
41417 } else {
41418 setFlag(AtomicNoReturn);
41419 }
41420 setFlag(MemoryRef);
41421 } // Inst_FLAT__FLAT_ATOMIC_UMIN_X2
41422
41423 Inst_FLAT__FLAT_ATOMIC_UMIN_X2::~Inst_FLAT__FLAT_ATOMIC_UMIN_X2()
41424 {
41425 } // ~Inst_FLAT__FLAT_ATOMIC_UMIN_X2
41426
41427 // tmp = MEM[ADDR];
41428 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
41429 // RETURN_DATA[0:1] = tmp.
41430 void
41431 Inst_FLAT__FLAT_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst)
41432 {
41433 panicUnimplemented();
41434 }
41435
41436 Inst_FLAT__FLAT_ATOMIC_SMAX_X2::Inst_FLAT__FLAT_ATOMIC_SMAX_X2(
41437 InFmt_FLAT *iFmt)
41438 : Inst_FLAT(iFmt, "flat_atomic_smax_x2")
41439 {
41440 setFlag(AtomicMax);
41441 if (instData.GLC) {
41442 setFlag(AtomicReturn);
41443 } else {
41444 setFlag(AtomicNoReturn);
41445 }
41446 setFlag(MemoryRef);
41447 } // Inst_FLAT__FLAT_ATOMIC_SMAX_X2
41448
41449 Inst_FLAT__FLAT_ATOMIC_SMAX_X2::~Inst_FLAT__FLAT_ATOMIC_SMAX_X2()
41450 {
41451 } // ~Inst_FLAT__FLAT_ATOMIC_SMAX_X2
41452
41453 // tmp = MEM[ADDR];
41454 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
41455 // RETURN_DATA[0:1] = tmp.
41456 void
41457 Inst_FLAT__FLAT_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst)
41458 {
41459 panicUnimplemented();
41460 }
41461
41462 Inst_FLAT__FLAT_ATOMIC_UMAX_X2::Inst_FLAT__FLAT_ATOMIC_UMAX_X2(
41463 InFmt_FLAT *iFmt)
41464 : Inst_FLAT(iFmt, "flat_atomic_umax_x2")
41465 {
41466 setFlag(AtomicMax);
41467 if (instData.GLC) {
41468 setFlag(AtomicReturn);
41469 } else {
41470 setFlag(AtomicNoReturn);
41471 }
41472 setFlag(MemoryRef);
41473 } // Inst_FLAT__FLAT_ATOMIC_UMAX_X2
41474
41475 Inst_FLAT__FLAT_ATOMIC_UMAX_X2::~Inst_FLAT__FLAT_ATOMIC_UMAX_X2()
41476 {
41477 } // ~Inst_FLAT__FLAT_ATOMIC_UMAX_X2
41478
41479 // tmp = MEM[ADDR];
41480 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
41481 // RETURN_DATA[0:1] = tmp.
41482 void
41483 Inst_FLAT__FLAT_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst)
41484 {
41485 panicUnimplemented();
41486 }
41487
41488 Inst_FLAT__FLAT_ATOMIC_AND_X2::Inst_FLAT__FLAT_ATOMIC_AND_X2(
41489 InFmt_FLAT *iFmt)
41490 : Inst_FLAT(iFmt, "flat_atomic_and_x2")
41491 {
41492 setFlag(AtomicAnd);
41493 if (instData.GLC) {
41494 setFlag(AtomicReturn);
41495 } else {
41496 setFlag(AtomicNoReturn);
41497 }
41498 setFlag(MemoryRef);
41499 } // Inst_FLAT__FLAT_ATOMIC_AND_X2
41500
41501 Inst_FLAT__FLAT_ATOMIC_AND_X2::~Inst_FLAT__FLAT_ATOMIC_AND_X2()
41502 {
41503 } // ~Inst_FLAT__FLAT_ATOMIC_AND_X2
41504
41505 // tmp = MEM[ADDR];
41506 // MEM[ADDR] &= DATA[0:1];
41507 // RETURN_DATA[0:1] = tmp.
41508 void
41509 Inst_FLAT__FLAT_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst)
41510 {
41511 panicUnimplemented();
41512 }
41513
41514 Inst_FLAT__FLAT_ATOMIC_OR_X2::Inst_FLAT__FLAT_ATOMIC_OR_X2(
41515 InFmt_FLAT *iFmt)
41516 : Inst_FLAT(iFmt, "flat_atomic_or_x2")
41517 {
41518 setFlag(AtomicOr);
41519 if (instData.GLC) {
41520 setFlag(AtomicReturn);
41521 } else {
41522 setFlag(AtomicNoReturn);
41523 }
41524 setFlag(MemoryRef);
41525 } // Inst_FLAT__FLAT_ATOMIC_OR_X2
41526
41527 Inst_FLAT__FLAT_ATOMIC_OR_X2::~Inst_FLAT__FLAT_ATOMIC_OR_X2()
41528 {
41529 } // ~Inst_FLAT__FLAT_ATOMIC_OR_X2
41530
41531 // tmp = MEM[ADDR];
41532 // MEM[ADDR] |= DATA[0:1];
41533 // RETURN_DATA[0:1] = tmp.
41534 void
41535 Inst_FLAT__FLAT_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst)
41536 {
41537 panicUnimplemented();
41538 }
41539
41540 Inst_FLAT__FLAT_ATOMIC_XOR_X2::Inst_FLAT__FLAT_ATOMIC_XOR_X2(
41541 InFmt_FLAT *iFmt)
41542 : Inst_FLAT(iFmt, "flat_atomic_xor_x2")
41543 {
41544 setFlag(AtomicXor);
41545 if (instData.GLC) {
41546 setFlag(AtomicReturn);
41547 } else {
41548 setFlag(AtomicNoReturn);
41549 }
41550 setFlag(MemoryRef);
41551 } // Inst_FLAT__FLAT_ATOMIC_XOR_X2
41552
41553 Inst_FLAT__FLAT_ATOMIC_XOR_X2::~Inst_FLAT__FLAT_ATOMIC_XOR_X2()
41554 {
41555 } // ~Inst_FLAT__FLAT_ATOMIC_XOR_X2
41556
41557 // tmp = MEM[ADDR];
41558 // MEM[ADDR] ^= DATA[0:1];
41559 // RETURN_DATA[0:1] = tmp.
41560 void
41561 Inst_FLAT__FLAT_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst)
41562 {
41563 panicUnimplemented();
41564 }
41565
41566 Inst_FLAT__FLAT_ATOMIC_INC_X2::Inst_FLAT__FLAT_ATOMIC_INC_X2(
41567 InFmt_FLAT *iFmt)
41568 : Inst_FLAT(iFmt, "flat_atomic_inc_x2")
41569 {
41570 setFlag(AtomicInc);
41571 if (instData.GLC) {
41572 setFlag(AtomicReturn);
41573 } else {
41574 setFlag(AtomicNoReturn);
41575 }
41576 setFlag(MemoryRef);
41577 } // Inst_FLAT__FLAT_ATOMIC_INC_X2
41578
41579 Inst_FLAT__FLAT_ATOMIC_INC_X2::~Inst_FLAT__FLAT_ATOMIC_INC_X2()
41580 {
41581 } // ~Inst_FLAT__FLAT_ATOMIC_INC_X2
41582
41583 // tmp = MEM[ADDR];
41584 // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
41585 // RETURN_DATA[0:1] = tmp.
41586 void
41587 Inst_FLAT__FLAT_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst)
41588 {
41589 Wavefront *wf = gpuDynInst->wavefront();
41590
41591 if (wf->execMask().none()) {
41592 wf->decVMemInstsIssued();
41593 wf->decLGKMInstsIssued();
41594 wf->wrGmReqsInPipe--;
41595 wf->rdGmReqsInPipe--;
41596 return;
41597 }
41598
41599 gpuDynInst->execUnitId = wf->execUnitId;
41600 gpuDynInst->exec_mask = wf->execMask();
41601 gpuDynInst->latency.init(gpuDynInst->computeUnit());
41602 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
41603
41604 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
41605 ConstVecOperandU64 data(gpuDynInst, extData.DATA);
41606
41607 addr.read();
41608 data.read();
41609
41610 calcAddr(gpuDynInst, addr);
41611
41612 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41613 if (gpuDynInst->exec_mask[lane]) {
41614 (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
41615 = data[lane];
41616 }
41617 }
41618
41619 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
41620 gpuDynInst->computeUnit()->globalMemoryPipe.
41621 issueRequest(gpuDynInst);
41622 wf->wrGmReqsInPipe--;
41623 wf->outstandingReqsWrGm++;
41624 wf->rdGmReqsInPipe--;
41625 wf->outstandingReqsRdGm++;
41626 } else {
41627 fatal("Non global flat instructions not implemented yet.\n");
41628 }
41629
41630 gpuDynInst->wavefront()->outstandingReqs++;
41631 gpuDynInst->wavefront()->validateRequestCounters();
41632 }
41633
41634 void
41635 Inst_FLAT__FLAT_ATOMIC_INC_X2::initiateAcc(GPUDynInstPtr gpuDynInst)
41636 {
41637 initAtomicAccess<VecElemU64>(gpuDynInst);
41638 } // initiateAcc
41639
41640 void
41641 Inst_FLAT__FLAT_ATOMIC_INC_X2::completeAcc(GPUDynInstPtr gpuDynInst)
41642 {
41643 if (isAtomicRet()) {
41644 VecOperandU64 vdst(gpuDynInst, extData.VDST);
41645
41646
41647 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41648 if (gpuDynInst->exec_mask[lane]) {
41649 vdst[lane] = (reinterpret_cast<VecElemU64*>(
41650 gpuDynInst->d_data))[lane];
41651 }
41652 }
41653
41654 vdst.write();
41655 }
41656 } // completeAcc
41657
41658 Inst_FLAT__FLAT_ATOMIC_DEC_X2::Inst_FLAT__FLAT_ATOMIC_DEC_X2(
41659 InFmt_FLAT *iFmt)
41660 : Inst_FLAT(iFmt, "flat_atomic_dec_x2")
41661 {
41662 setFlag(AtomicDec);
41663 if (instData.GLC) {
41664 setFlag(AtomicReturn);
41665 } else {
41666 setFlag(AtomicNoReturn);
41667 }
41668 setFlag(MemoryRef);
41669 } // Inst_FLAT__FLAT_ATOMIC_DEC_X2
41670
41671 Inst_FLAT__FLAT_ATOMIC_DEC_X2::~Inst_FLAT__FLAT_ATOMIC_DEC_X2()
41672 {
41673 } // ~Inst_FLAT__FLAT_ATOMIC_DEC_X2
41674
41675 // tmp = MEM[ADDR];
41676 // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
41677 // (unsigned compare);
41678 // RETURN_DATA[0:1] = tmp.
41679 void
41680 Inst_FLAT__FLAT_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst)
41681 {
41682 Wavefront *wf = gpuDynInst->wavefront();
41683
41684 if (wf->execMask().none()) {
41685 wf->decVMemInstsIssued();
41686 wf->decLGKMInstsIssued();
41687 wf->wrGmReqsInPipe--;
41688 wf->rdGmReqsInPipe--;
41689 return;
41690 }
41691
41692 gpuDynInst->execUnitId = wf->execUnitId;
41693 gpuDynInst->exec_mask = wf->execMask();
41694 gpuDynInst->latency.init(gpuDynInst->computeUnit());
41695 gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
41696
41697 ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
41698 ConstVecOperandU64 data(gpuDynInst, extData.DATA);
41699
41700 addr.read();
41701 data.read();
41702
41703 calcAddr(gpuDynInst, addr);
41704
41705 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41706 if (gpuDynInst->exec_mask[lane]) {
41707 (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
41708 = data[lane];
41709 }
41710 }
41711
41712 if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
41713 gpuDynInst->computeUnit()->globalMemoryPipe.
41714 issueRequest(gpuDynInst);
41715 wf->wrGmReqsInPipe--;
41716 wf->outstandingReqsWrGm++;
41717 wf->rdGmReqsInPipe--;
41718 wf->outstandingReqsRdGm++;
41719 } else {
41720 fatal("Non global flat instructions not implemented yet.\n");
41721 }
41722
41723 gpuDynInst->wavefront()->outstandingReqs++;
41724 gpuDynInst->wavefront()->validateRequestCounters();
41725 }
41726
41727 void
41728 Inst_FLAT__FLAT_ATOMIC_DEC_X2::initiateAcc(GPUDynInstPtr gpuDynInst)
41729 {
41730 initAtomicAccess<VecElemU64>(gpuDynInst);
41731 } // initiateAcc
41732
41733 void
41734 Inst_FLAT__FLAT_ATOMIC_DEC_X2::completeAcc(GPUDynInstPtr gpuDynInst)
41735 {
41736 if (isAtomicRet()) {
41737 VecOperandU64 vdst(gpuDynInst, extData.VDST);
41738
41739
41740 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41741 if (gpuDynInst->exec_mask[lane]) {
41742 vdst[lane] = (reinterpret_cast<VecElemU64*>(
41743 gpuDynInst->d_data))[lane];
41744 }
41745 }
41746
41747 vdst.write();
41748 }
41749 } // completeAcc
41750 } // namespace Gcn3ISA