2 * Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
5 * For use for simulation and test purposes only
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
33 * Authors: Anthony Gutierrez
36 #include "arch/gcn3/insts/instructions.hh"
40 #include "arch/gcn3/insts/inst_util.hh"
41 #include "debug/GCN3.hh"
42 #include "debug/GPUSync.hh"
43 #include "gpu-compute/shader.hh"
48 Inst_SOP2__S_ADD_U32::Inst_SOP2__S_ADD_U32(InFmt_SOP2
*iFmt
)
49 : Inst_SOP2(iFmt
, "s_add_u32")
52 } // Inst_SOP2__S_ADD_U32
54 Inst_SOP2__S_ADD_U32::~Inst_SOP2__S_ADD_U32()
56 } // ~Inst_SOP2__S_ADD_U32
59 // SCC = (S0.u + S1.u >= 0x100000000ULL ? 1 : 0) is an unsigned
60 // overflow/carry-out.
62 Inst_SOP2__S_ADD_U32::execute(GPUDynInstPtr gpuDynInst
)
64 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
65 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
66 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
67 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
72 sdst
= src0
.rawData() + src1
.rawData();
73 scc
= ((ScalarRegU64
)src0
.rawData() + (ScalarRegU64
)src1
.rawData())
74 >= 0x100000000ULL
? 1 : 0;
80 Inst_SOP2__S_SUB_U32::Inst_SOP2__S_SUB_U32(InFmt_SOP2
*iFmt
)
81 : Inst_SOP2(iFmt
, "s_sub_u32")
84 } // Inst_SOP2__S_SUB_U32
86 Inst_SOP2__S_SUB_U32::~Inst_SOP2__S_SUB_U32()
88 } // ~Inst_SOP2__S_SUB_U32
91 // SCC = (S1.u > S0.u ? 1 : 0) is an unsigned overflow or carry-out.
93 Inst_SOP2__S_SUB_U32::execute(GPUDynInstPtr gpuDynInst
)
95 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
96 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
97 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
98 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
103 sdst
= src0
.rawData() - src1
.rawData();
104 scc
= (src1
.rawData() > src0
.rawData()) ? 1 : 0;
110 Inst_SOP2__S_ADD_I32::Inst_SOP2__S_ADD_I32(InFmt_SOP2
*iFmt
)
111 : Inst_SOP2(iFmt
, "s_add_i32")
114 } // Inst_SOP2__S_ADD_I32
116 Inst_SOP2__S_ADD_I32::~Inst_SOP2__S_ADD_I32()
118 } // ~Inst_SOP2__S_ADD_I32
120 // D.i = S0.i + S1.i;
121 // SCC = (S0.u[31] == S1.u[31] && S0.u[31] != D.u[31]) is a signed
124 Inst_SOP2__S_ADD_I32::execute(GPUDynInstPtr gpuDynInst
)
126 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
127 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
128 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
129 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
134 sdst
= src0
.rawData() + src1
.rawData();
135 scc
= (bits(src0
.rawData(), 31) == bits(src1
.rawData(), 31)
136 && bits(src0
.rawData(), 31) != bits(sdst
.rawData(), 31))
143 Inst_SOP2__S_SUB_I32::Inst_SOP2__S_SUB_I32(InFmt_SOP2
*iFmt
)
144 : Inst_SOP2(iFmt
, "s_sub_i32")
147 } // Inst_SOP2__S_SUB_I32
149 Inst_SOP2__S_SUB_I32::~Inst_SOP2__S_SUB_I32()
151 } // ~Inst_SOP2__S_SUB_I32
153 // D.i = S0.i - S1.i;
154 // SCC = (S0.u[31] != S1.u[31] && S0.u[31] != D.u[31]) is a signed
157 Inst_SOP2__S_SUB_I32::execute(GPUDynInstPtr gpuDynInst
)
159 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
160 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
161 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
162 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
167 sdst
= src0
.rawData() - src1
.rawData();
168 scc
= (bits(src0
.rawData(), 31) != bits(src1
.rawData(), 31)
169 && bits(src0
.rawData(), 31) != bits(sdst
.rawData(), 31)) ? 1 : 0;
175 Inst_SOP2__S_ADDC_U32::Inst_SOP2__S_ADDC_U32(InFmt_SOP2
*iFmt
)
176 : Inst_SOP2(iFmt
, "s_addc_u32")
179 } // Inst_SOP2__S_ADDC_U32
181 Inst_SOP2__S_ADDC_U32::~Inst_SOP2__S_ADDC_U32()
183 } // ~Inst_SOP2__S_ADDC_U32
185 // D.u = S0.u + S1.u + SCC;
186 // SCC = (S0.u + S1.u + SCC >= 0x100000000ULL ? 1 : 0) is an unsigned
189 Inst_SOP2__S_ADDC_U32::execute(GPUDynInstPtr gpuDynInst
)
191 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
192 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
193 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
194 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
200 sdst
= src0
.rawData() + src1
.rawData() + scc
.rawData();
201 scc
= ((ScalarRegU64
)src0
.rawData() + (ScalarRegU64
)src1
.rawData()
202 + (ScalarRegU64
)scc
.rawData()) >= 0x100000000ULL
? 1 : 0;
208 Inst_SOP2__S_SUBB_U32::Inst_SOP2__S_SUBB_U32(InFmt_SOP2
*iFmt
)
209 : Inst_SOP2(iFmt
, "s_subb_u32")
212 } // Inst_SOP2__S_SUBB_U32
214 Inst_SOP2__S_SUBB_U32::~Inst_SOP2__S_SUBB_U32()
216 } // ~Inst_SOP2__S_SUBB_U32
218 // D.u = S0.u - S1.u - SCC;
219 // SCC = (S1.u + SCC > S0.u ? 1 : 0) is an unsigned overflow.
221 Inst_SOP2__S_SUBB_U32::execute(GPUDynInstPtr gpuDynInst
)
223 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
224 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
225 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
226 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
232 sdst
= src0
.rawData() - src1
.rawData() - scc
.rawData();
233 scc
= (src1
.rawData() + scc
.rawData()) > src0
.rawData() ? 1 : 0;
239 Inst_SOP2__S_MIN_I32::Inst_SOP2__S_MIN_I32(InFmt_SOP2
*iFmt
)
240 : Inst_SOP2(iFmt
, "s_min_i32")
243 } // Inst_SOP2__S_MIN_I32
245 Inst_SOP2__S_MIN_I32::~Inst_SOP2__S_MIN_I32()
247 } // ~Inst_SOP2__S_MIN_I32
249 // D.i = (S0.i < S1.i) ? S0.i : S1.i;
250 // SCC = 1 if S0 is chosen as the minimum value.
252 Inst_SOP2__S_MIN_I32::execute(GPUDynInstPtr gpuDynInst
)
254 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
255 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
256 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
257 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
262 sdst
= std::min(src0
.rawData(), src1
.rawData());
263 scc
= (src0
.rawData() < src1
.rawData()) ? 1 : 0;
269 Inst_SOP2__S_MIN_U32::Inst_SOP2__S_MIN_U32(InFmt_SOP2
*iFmt
)
270 : Inst_SOP2(iFmt
, "s_min_u32")
273 } // Inst_SOP2__S_MIN_U32
275 Inst_SOP2__S_MIN_U32::~Inst_SOP2__S_MIN_U32()
277 } // ~Inst_SOP2__S_MIN_U32
279 // D.u = (S0.u < S1.u) ? S0.u : S1.u;
280 // SCC = 1 if S0 is chosen as the minimum value.
282 Inst_SOP2__S_MIN_U32::execute(GPUDynInstPtr gpuDynInst
)
284 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
285 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
286 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
287 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
292 sdst
= std::min(src0
.rawData(), src1
.rawData());
293 scc
= (src0
.rawData() < src1
.rawData()) ? 1 : 0;
299 Inst_SOP2__S_MAX_I32::Inst_SOP2__S_MAX_I32(InFmt_SOP2
*iFmt
)
300 : Inst_SOP2(iFmt
, "s_max_i32")
303 } // Inst_SOP2__S_MAX_I32
305 Inst_SOP2__S_MAX_I32::~Inst_SOP2__S_MAX_I32()
307 } // ~Inst_SOP2__S_MAX_I32
309 // D.i = (S0.i > S1.i) ? S0.i : S1.i;
310 // SCC = 1 if S0 is chosen as the maximum value.
312 Inst_SOP2__S_MAX_I32::execute(GPUDynInstPtr gpuDynInst
)
314 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
315 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
316 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
317 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
322 sdst
= std::max(src0
.rawData(), src1
.rawData());
323 scc
= (src0
.rawData() > src1
.rawData()) ? 1 : 0;
329 Inst_SOP2__S_MAX_U32::Inst_SOP2__S_MAX_U32(InFmt_SOP2
*iFmt
)
330 : Inst_SOP2(iFmt
, "s_max_u32")
333 } // Inst_SOP2__S_MAX_U32
335 Inst_SOP2__S_MAX_U32::~Inst_SOP2__S_MAX_U32()
337 } // ~Inst_SOP2__S_MAX_U32
339 // D.u = (S0.u > S1.u) ? S0.u : S1.u;
340 // SCC = 1 if S0 is chosen as the maximum value.
342 Inst_SOP2__S_MAX_U32::execute(GPUDynInstPtr gpuDynInst
)
344 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
345 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
346 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
347 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
352 sdst
= std::max(src0
.rawData(), src1
.rawData());
353 scc
= (src0
.rawData() > src1
.rawData()) ? 1 : 0;
359 Inst_SOP2__S_CSELECT_B32::Inst_SOP2__S_CSELECT_B32(InFmt_SOP2
*iFmt
)
360 : Inst_SOP2(iFmt
, "s_cselect_b32")
363 } // Inst_SOP2__S_CSELECT_B32
365 Inst_SOP2__S_CSELECT_B32::~Inst_SOP2__S_CSELECT_B32()
367 } // ~Inst_SOP2__S_CSELECT_B32
369 // D.u = SCC ? S0.u : S1.u (conditional select).
371 Inst_SOP2__S_CSELECT_B32::execute(GPUDynInstPtr gpuDynInst
)
373 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
374 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
375 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
376 ConstScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
382 sdst
= scc
.rawData() ? src0
.rawData() : src1
.rawData();
387 Inst_SOP2__S_CSELECT_B64::Inst_SOP2__S_CSELECT_B64(InFmt_SOP2
*iFmt
)
388 : Inst_SOP2(iFmt
, "s_cselect_b64")
391 } // Inst_SOP2__S_CSELECT_B64
393 Inst_SOP2__S_CSELECT_B64::~Inst_SOP2__S_CSELECT_B64()
395 } // ~Inst_SOP2__S_CSELECT_B64
397 // D.u64 = SCC ? S0.u64 : S1.u64 (conditional select).
399 Inst_SOP2__S_CSELECT_B64::execute(GPUDynInstPtr gpuDynInst
)
401 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
402 ConstScalarOperandU64
src1(gpuDynInst
, instData
.SSRC1
);
403 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
404 ConstScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
410 sdst
= scc
.rawData() ? src0
.rawData() : src1
.rawData();
415 Inst_SOP2__S_AND_B32::Inst_SOP2__S_AND_B32(InFmt_SOP2
*iFmt
)
416 : Inst_SOP2(iFmt
, "s_and_b32")
419 } // Inst_SOP2__S_AND_B32
421 Inst_SOP2__S_AND_B32::~Inst_SOP2__S_AND_B32()
423 } // ~Inst_SOP2__S_AND_B32
425 // D.u = S0.u & S1.u;
426 // SCC = 1 if result is non-zero.
428 Inst_SOP2__S_AND_B32::execute(GPUDynInstPtr gpuDynInst
)
430 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
431 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
432 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
433 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
438 sdst
= src0
.rawData() & src1
.rawData();
439 scc
= sdst
.rawData() ? 1 : 0;
445 Inst_SOP2__S_AND_B64::Inst_SOP2__S_AND_B64(InFmt_SOP2
*iFmt
)
446 : Inst_SOP2(iFmt
, "s_and_b64")
449 } // Inst_SOP2__S_AND_B64
451 Inst_SOP2__S_AND_B64::~Inst_SOP2__S_AND_B64()
453 } // ~Inst_SOP2__S_AND_B64
455 // D.u64 = S0.u64 & S1.u64;
456 // SCC = 1 if result is non-zero.
458 Inst_SOP2__S_AND_B64::execute(GPUDynInstPtr gpuDynInst
)
460 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
461 ConstScalarOperandU64
src1(gpuDynInst
, instData
.SSRC1
);
462 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
463 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
468 sdst
= src0
.rawData() & src1
.rawData();
469 scc
= sdst
.rawData() ? 1 : 0;
475 Inst_SOP2__S_OR_B32::Inst_SOP2__S_OR_B32(InFmt_SOP2
*iFmt
)
476 : Inst_SOP2(iFmt
, "s_or_b32")
479 } // Inst_SOP2__S_OR_B32
481 Inst_SOP2__S_OR_B32::~Inst_SOP2__S_OR_B32()
483 } // ~Inst_SOP2__S_OR_B32
485 // D.u = S0.u | S1.u;
486 // SCC = 1 if result is non-zero.
488 Inst_SOP2__S_OR_B32::execute(GPUDynInstPtr gpuDynInst
)
490 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
491 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
492 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
493 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
498 sdst
= src0
.rawData() | src1
.rawData();
499 scc
= sdst
.rawData() ? 1 : 0;
505 Inst_SOP2__S_OR_B64::Inst_SOP2__S_OR_B64(InFmt_SOP2
*iFmt
)
506 : Inst_SOP2(iFmt
, "s_or_b64")
509 } // Inst_SOP2__S_OR_B64
511 Inst_SOP2__S_OR_B64::~Inst_SOP2__S_OR_B64()
513 } // ~Inst_SOP2__S_OR_B64
515 // D.u64 = S0.u64 | S1.u64;
516 // SCC = 1 if result is non-zero.
518 Inst_SOP2__S_OR_B64::execute(GPUDynInstPtr gpuDynInst
)
520 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
521 ConstScalarOperandU64
src1(gpuDynInst
, instData
.SSRC1
);
522 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
523 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
528 sdst
= src0
.rawData() | src1
.rawData();
529 scc
= sdst
.rawData() ? 1 : 0;
535 Inst_SOP2__S_XOR_B32::Inst_SOP2__S_XOR_B32(InFmt_SOP2
*iFmt
)
536 : Inst_SOP2(iFmt
, "s_xor_b32")
539 } // Inst_SOP2__S_XOR_B32
541 Inst_SOP2__S_XOR_B32::~Inst_SOP2__S_XOR_B32()
543 } // ~Inst_SOP2__S_XOR_B32
545 // D.u = S0.u ^ S1.u;
546 // SCC = 1 if result is non-zero.
548 Inst_SOP2__S_XOR_B32::execute(GPUDynInstPtr gpuDynInst
)
550 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
551 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
552 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
553 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
558 sdst
= src0
.rawData() ^ src1
.rawData();
559 scc
= sdst
.rawData() ? 1 : 0;
565 Inst_SOP2__S_XOR_B64::Inst_SOP2__S_XOR_B64(InFmt_SOP2
*iFmt
)
566 : Inst_SOP2(iFmt
, "s_xor_b64")
569 } // Inst_SOP2__S_XOR_B64
571 Inst_SOP2__S_XOR_B64::~Inst_SOP2__S_XOR_B64()
573 } // ~Inst_SOP2__S_XOR_B64
575 // D.u64 = S0.u64 ^ S1.u64;
576 // SCC = 1 if result is non-zero.
578 Inst_SOP2__S_XOR_B64::execute(GPUDynInstPtr gpuDynInst
)
580 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
581 ConstScalarOperandU64
src1(gpuDynInst
, instData
.SSRC1
);
582 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
583 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
588 sdst
= src0
.rawData() ^ src1
.rawData();
589 scc
= sdst
.rawData() ? 1 : 0;
595 Inst_SOP2__S_ANDN2_B32::Inst_SOP2__S_ANDN2_B32(InFmt_SOP2
*iFmt
)
596 : Inst_SOP2(iFmt
, "s_andn2_b32")
599 } // Inst_SOP2__S_ANDN2_B32
601 Inst_SOP2__S_ANDN2_B32::~Inst_SOP2__S_ANDN2_B32()
603 } // ~Inst_SOP2__S_ANDN2_B32
605 // D.u = S0.u & ~S1.u;
606 // SCC = 1 if result is non-zero.
608 Inst_SOP2__S_ANDN2_B32::execute(GPUDynInstPtr gpuDynInst
)
610 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
611 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
612 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
613 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
618 sdst
= src0
.rawData() &~ src1
.rawData();
619 scc
= sdst
.rawData() ? 1 : 0;
625 Inst_SOP2__S_ANDN2_B64::Inst_SOP2__S_ANDN2_B64(InFmt_SOP2
*iFmt
)
626 : Inst_SOP2(iFmt
, "s_andn2_b64")
629 } // Inst_SOP2__S_ANDN2_B64
631 Inst_SOP2__S_ANDN2_B64::~Inst_SOP2__S_ANDN2_B64()
633 } // ~Inst_SOP2__S_ANDN2_B64
635 // D.u64 = S0.u64 & ~S1.u64;
636 // SCC = 1 if result is non-zero.
638 Inst_SOP2__S_ANDN2_B64::execute(GPUDynInstPtr gpuDynInst
)
640 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
641 ConstScalarOperandU64
src1(gpuDynInst
, instData
.SSRC1
);
642 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
643 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
648 sdst
= src0
.rawData() &~ src1
.rawData();
649 scc
= sdst
.rawData() ? 1 : 0;
655 Inst_SOP2__S_ORN2_B32::Inst_SOP2__S_ORN2_B32(InFmt_SOP2
*iFmt
)
656 : Inst_SOP2(iFmt
, "s_orn2_b32")
659 } // Inst_SOP2__S_ORN2_B32
661 Inst_SOP2__S_ORN2_B32::~Inst_SOP2__S_ORN2_B32()
663 } // ~Inst_SOP2__S_ORN2_B32
665 // D.u = S0.u | ~S1.u;
666 // SCC = 1 if result is non-zero.
668 Inst_SOP2__S_ORN2_B32::execute(GPUDynInstPtr gpuDynInst
)
670 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
671 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
672 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
673 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
678 sdst
= src0
.rawData() |~ src1
.rawData();
679 scc
= sdst
.rawData() ? 1 : 0;
685 Inst_SOP2__S_ORN2_B64::Inst_SOP2__S_ORN2_B64(InFmt_SOP2
*iFmt
)
686 : Inst_SOP2(iFmt
, "s_orn2_b64")
689 } // Inst_SOP2__S_ORN2_B64
691 Inst_SOP2__S_ORN2_B64::~Inst_SOP2__S_ORN2_B64()
693 } // ~Inst_SOP2__S_ORN2_B64
695 // D.u64 = S0.u64 | ~S1.u64;
696 // SCC = 1 if result is non-zero.
698 Inst_SOP2__S_ORN2_B64::execute(GPUDynInstPtr gpuDynInst
)
700 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
701 ConstScalarOperandU64
src1(gpuDynInst
, instData
.SSRC1
);
702 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
703 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
708 sdst
= src0
.rawData() |~ src1
.rawData();
709 scc
= sdst
.rawData() ? 1 : 0;
715 Inst_SOP2__S_NAND_B32::Inst_SOP2__S_NAND_B32(InFmt_SOP2
*iFmt
)
716 : Inst_SOP2(iFmt
, "s_nand_b32")
719 } // Inst_SOP2__S_NAND_B32
721 Inst_SOP2__S_NAND_B32::~Inst_SOP2__S_NAND_B32()
723 } // ~Inst_SOP2__S_NAND_B32
725 // D.u = ~(S0.u & S1.u);
726 // SCC = 1 if result is non-zero.
728 Inst_SOP2__S_NAND_B32::execute(GPUDynInstPtr gpuDynInst
)
730 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
731 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
732 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
733 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
738 sdst
= ~(src0
.rawData() & src1
.rawData());
739 scc
= sdst
.rawData() ? 1 : 0;
745 Inst_SOP2__S_NAND_B64::Inst_SOP2__S_NAND_B64(InFmt_SOP2
*iFmt
)
746 : Inst_SOP2(iFmt
, "s_nand_b64")
749 } // Inst_SOP2__S_NAND_B64
751 Inst_SOP2__S_NAND_B64::~Inst_SOP2__S_NAND_B64()
753 } // ~Inst_SOP2__S_NAND_B64
755 // D.u64 = ~(S0.u64 & S1.u64);
756 // SCC = 1 if result is non-zero.
758 Inst_SOP2__S_NAND_B64::execute(GPUDynInstPtr gpuDynInst
)
760 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
761 ConstScalarOperandU64
src1(gpuDynInst
, instData
.SSRC1
);
762 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
763 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
768 sdst
= ~(src0
.rawData() & src1
.rawData());
769 scc
= sdst
.rawData() ? 1 : 0;
775 Inst_SOP2__S_NOR_B32::Inst_SOP2__S_NOR_B32(InFmt_SOP2
*iFmt
)
776 : Inst_SOP2(iFmt
, "s_nor_b32")
779 } // Inst_SOP2__S_NOR_B32
781 Inst_SOP2__S_NOR_B32::~Inst_SOP2__S_NOR_B32()
783 } // ~Inst_SOP2__S_NOR_B32
785 // D.u = ~(S0.u | S1.u);
786 // SCC = 1 if result is non-zero.
788 Inst_SOP2__S_NOR_B32::execute(GPUDynInstPtr gpuDynInst
)
790 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
791 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
792 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
793 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
798 sdst
= ~(src0
.rawData() | src1
.rawData());
799 scc
= sdst
.rawData() ? 1 : 0;
805 Inst_SOP2__S_NOR_B64::Inst_SOP2__S_NOR_B64(InFmt_SOP2
*iFmt
)
806 : Inst_SOP2(iFmt
, "s_nor_b64")
809 } // Inst_SOP2__S_NOR_B64
811 Inst_SOP2__S_NOR_B64::~Inst_SOP2__S_NOR_B64()
813 } // ~Inst_SOP2__S_NOR_B64
815 // D.u64 = ~(S0.u64 | S1.u64);
816 // SCC = 1 if result is non-zero.
818 Inst_SOP2__S_NOR_B64::execute(GPUDynInstPtr gpuDynInst
)
820 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
821 ConstScalarOperandU64
src1(gpuDynInst
, instData
.SSRC1
);
822 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
823 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
828 sdst
= ~(src0
.rawData() | src1
.rawData());
829 scc
= sdst
.rawData() ? 1 : 0;
835 Inst_SOP2__S_XNOR_B32::Inst_SOP2__S_XNOR_B32(InFmt_SOP2
*iFmt
)
836 : Inst_SOP2(iFmt
, "s_xnor_b32")
839 } // Inst_SOP2__S_XNOR_B32
841 Inst_SOP2__S_XNOR_B32::~Inst_SOP2__S_XNOR_B32()
843 } // ~Inst_SOP2__S_XNOR_B32
845 // D.u = ~(S0.u ^ S1.u);
846 // SCC = 1 if result is non-zero.
848 Inst_SOP2__S_XNOR_B32::execute(GPUDynInstPtr gpuDynInst
)
850 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
851 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
852 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
853 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
858 sdst
= ~(src0
.rawData() ^ src1
.rawData());
859 scc
= sdst
.rawData() ? 1 : 0;
865 Inst_SOP2__S_XNOR_B64::Inst_SOP2__S_XNOR_B64(InFmt_SOP2
*iFmt
)
866 : Inst_SOP2(iFmt
, "s_xnor_b64")
869 } // Inst_SOP2__S_XNOR_B64
871 Inst_SOP2__S_XNOR_B64::~Inst_SOP2__S_XNOR_B64()
873 } // ~Inst_SOP2__S_XNOR_B64
875 // D.u64 = ~(S0.u64 ^ S1.u64);
876 // SCC = 1 if result is non-zero.
878 Inst_SOP2__S_XNOR_B64::execute(GPUDynInstPtr gpuDynInst
)
880 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
881 ConstScalarOperandU64
src1(gpuDynInst
, instData
.SSRC1
);
882 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
883 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
888 sdst
= ~(src0
.rawData() ^ src1
.rawData());
889 scc
= sdst
.rawData() ? 1 : 0;
895 Inst_SOP2__S_LSHL_B32::Inst_SOP2__S_LSHL_B32(InFmt_SOP2
*iFmt
)
896 : Inst_SOP2(iFmt
, "s_lshl_b32")
899 } // Inst_SOP2__S_LSHL_B32
901 Inst_SOP2__S_LSHL_B32::~Inst_SOP2__S_LSHL_B32()
903 } // ~Inst_SOP2__S_LSHL_B32
905 // D.u = S0.u << S1.u[4:0];
906 // SCC = 1 if result is non-zero.
908 Inst_SOP2__S_LSHL_B32::execute(GPUDynInstPtr gpuDynInst
)
910 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
911 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
912 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
913 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
918 sdst
= (src0
.rawData() << bits(src1
.rawData(), 4, 0));
919 scc
= sdst
.rawData() ? 1 : 0;
925 Inst_SOP2__S_LSHL_B64::Inst_SOP2__S_LSHL_B64(InFmt_SOP2
*iFmt
)
926 : Inst_SOP2(iFmt
, "s_lshl_b64")
929 } // Inst_SOP2__S_LSHL_B64
931 Inst_SOP2__S_LSHL_B64::~Inst_SOP2__S_LSHL_B64()
933 } // ~Inst_SOP2__S_LSHL_B64
935 // D.u64 = S0.u64 << S1.u[5:0];
936 // SCC = 1 if result is non-zero.
938 Inst_SOP2__S_LSHL_B64::execute(GPUDynInstPtr gpuDynInst
)
940 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
941 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
942 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
943 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
948 sdst
= (src0
.rawData() << bits(src1
.rawData(), 5, 0));
949 scc
= sdst
.rawData() ? 1 : 0;
955 Inst_SOP2__S_LSHR_B32::Inst_SOP2__S_LSHR_B32(InFmt_SOP2
*iFmt
)
956 : Inst_SOP2(iFmt
, "s_lshr_b32")
959 } // Inst_SOP2__S_LSHR_B32
961 Inst_SOP2__S_LSHR_B32::~Inst_SOP2__S_LSHR_B32()
963 } // ~Inst_SOP2__S_LSHR_B32
965 // D.u = S0.u >> S1.u[4:0];
966 // SCC = 1 if result is non-zero.
967 // The vacated bits are set to zero.
969 Inst_SOP2__S_LSHR_B32::execute(GPUDynInstPtr gpuDynInst
)
971 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
972 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
973 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
974 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
979 sdst
= (src0
.rawData() >> bits(src1
.rawData(), 4, 0));
980 scc
= sdst
.rawData() ? 1 : 0;
986 Inst_SOP2__S_LSHR_B64::Inst_SOP2__S_LSHR_B64(InFmt_SOP2
*iFmt
)
987 : Inst_SOP2(iFmt
, "s_lshr_b64")
990 } // Inst_SOP2__S_LSHR_B64
992 Inst_SOP2__S_LSHR_B64::~Inst_SOP2__S_LSHR_B64()
994 } // ~Inst_SOP2__S_LSHR_B64
996 // D.u64 = S0.u64 >> S1.u[5:0];
997 // SCC = 1 if result is non-zero.
998 // The vacated bits are set to zero.
1000 Inst_SOP2__S_LSHR_B64::execute(GPUDynInstPtr gpuDynInst
)
1002 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
1003 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
1004 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
1005 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1010 sdst
= (src0
.rawData() >> bits(src1
.rawData(), 5, 0));
1011 scc
= sdst
.rawData() ? 1 : 0;
1017 Inst_SOP2__S_ASHR_I32::Inst_SOP2__S_ASHR_I32(InFmt_SOP2
*iFmt
)
1018 : Inst_SOP2(iFmt
, "s_ashr_i32")
1021 } // Inst_SOP2__S_ASHR_I32
1023 Inst_SOP2__S_ASHR_I32::~Inst_SOP2__S_ASHR_I32()
1025 } // ~Inst_SOP2__S_ASHR_I32
1027 // D.i = signext(S0.i) >> S1.u[4:0];
1028 // SCC = 1 if result is non-zero.
1029 // The vacated bits are set to the sign bit of the input value.
1031 Inst_SOP2__S_ASHR_I32::execute(GPUDynInstPtr gpuDynInst
)
1033 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
1034 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
1035 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
1036 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1041 sdst
= (src0
.rawData() >> bits(src1
.rawData(), 4, 0));
1042 scc
= sdst
.rawData() ? 1 : 0;
1048 Inst_SOP2__S_ASHR_I64::Inst_SOP2__S_ASHR_I64(InFmt_SOP2
*iFmt
)
1049 : Inst_SOP2(iFmt
, "s_ashr_i64")
1052 } // Inst_SOP2__S_ASHR_I64
1054 Inst_SOP2__S_ASHR_I64::~Inst_SOP2__S_ASHR_I64()
1056 } // ~Inst_SOP2__S_ASHR_I64
1058 // D.i64 = signext(S0.i64) >> S1.u[5:0];
1059 // SCC = 1 if result is non-zero.
1060 // The vacated bits are set to the sign bit of the input value.
1062 Inst_SOP2__S_ASHR_I64::execute(GPUDynInstPtr gpuDynInst
)
1064 ConstScalarOperandI64
src0(gpuDynInst
, instData
.SSRC0
);
1065 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
1066 ScalarOperandI64
sdst(gpuDynInst
, instData
.SDST
);
1067 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1072 sdst
= (src0
.rawData() >> bits(src1
.rawData(), 5, 0));
1073 scc
= sdst
.rawData() ? 1 : 0;
1079 Inst_SOP2__S_BFM_B32::Inst_SOP2__S_BFM_B32(InFmt_SOP2
*iFmt
)
1080 : Inst_SOP2(iFmt
, "s_bfm_b32")
1083 } // Inst_SOP2__S_BFM_B32
1085 Inst_SOP2__S_BFM_B32::~Inst_SOP2__S_BFM_B32()
1087 } // ~Inst_SOP2__S_BFM_B32
1089 // D.u = ((1 << S0.u[4:0]) - 1) << S1.u[4:0] (bitfield mask).
1091 Inst_SOP2__S_BFM_B32::execute(GPUDynInstPtr gpuDynInst
)
1093 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
1094 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
1095 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
1100 sdst
= ((1 << bits(src0
.rawData(), 4, 0)) - 1)
1101 << bits(src1
.rawData(), 4, 0);
1106 Inst_SOP2__S_BFM_B64::Inst_SOP2__S_BFM_B64(InFmt_SOP2
*iFmt
)
1107 : Inst_SOP2(iFmt
, "s_bfm_b64")
1110 } // Inst_SOP2__S_BFM_B64
1112 Inst_SOP2__S_BFM_B64::~Inst_SOP2__S_BFM_B64()
1114 } // ~Inst_SOP2__S_BFM_B64
1116 // D.u64 = ((1ULL << S0.u[5:0]) - 1) << S1.u[5:0] (bitfield mask).
1118 Inst_SOP2__S_BFM_B64::execute(GPUDynInstPtr gpuDynInst
)
1120 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
1121 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
1122 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
1127 sdst
= ((1ULL << bits(src0
.rawData(), 5, 0)) - 1)
1128 << bits(src1
.rawData(), 5, 0);
1133 Inst_SOP2__S_MUL_I32::Inst_SOP2__S_MUL_I32(InFmt_SOP2
*iFmt
)
1134 : Inst_SOP2(iFmt
, "s_mul_i32")
1137 } // Inst_SOP2__S_MUL_I32
1139 Inst_SOP2__S_MUL_I32::~Inst_SOP2__S_MUL_I32()
1141 } // ~Inst_SOP2__S_MUL_I32
1143 // D.i = S0.i * S1.i.
1145 Inst_SOP2__S_MUL_I32::execute(GPUDynInstPtr gpuDynInst
)
1147 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
1148 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
1149 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
1154 sdst
= src0
.rawData() * src1
.rawData();
1159 Inst_SOP2__S_BFE_U32::Inst_SOP2__S_BFE_U32(InFmt_SOP2
*iFmt
)
1160 : Inst_SOP2(iFmt
, "s_bfe_u32")
1163 } // Inst_SOP2__S_BFE_U32
1165 Inst_SOP2__S_BFE_U32::~Inst_SOP2__S_BFE_U32()
1167 } // ~Inst_SOP2__S_BFE_U32
1169 // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is
1171 // D.u = (S0.u >> S1.u[4:0]) & ((1 << S1.u[22:16]) - 1);
1172 // SCC = 1 if result is non-zero.
1174 Inst_SOP2__S_BFE_U32::execute(GPUDynInstPtr gpuDynInst
)
1176 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
1177 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
1178 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
1179 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1184 sdst
= (src0
.rawData() >> bits(src1
.rawData(), 4, 0))
1185 & ((1 << bits(src1
.rawData(), 22, 16)) - 1);
1186 scc
= sdst
.rawData() ? 1 : 0;
1192 Inst_SOP2__S_BFE_I32::Inst_SOP2__S_BFE_I32(InFmt_SOP2
*iFmt
)
1193 : Inst_SOP2(iFmt
, "s_bfe_i32")
1196 } // Inst_SOP2__S_BFE_I32
1198 Inst_SOP2__S_BFE_I32::~Inst_SOP2__S_BFE_I32()
1200 } // ~Inst_SOP2__S_BFE_I32
1202 // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is
1204 // D.i = (S0.i >> S1.u[4:0]) & ((1 << S1.u[22:16]) - 1);
1205 // Sign-extend the result;
1206 // SCC = 1 if result is non-zero.
1208 Inst_SOP2__S_BFE_I32::execute(GPUDynInstPtr gpuDynInst
)
1210 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
1211 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
1212 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
1213 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1218 sdst
= (src0
.rawData() >> bits(src1
.rawData(), 4, 0))
1219 & ((1 << bits(src1
.rawData(), 22, 16)) - 1);
1220 scc
= sdst
.rawData() ? 1 : 0;
1226 Inst_SOP2__S_BFE_U64::Inst_SOP2__S_BFE_U64(InFmt_SOP2
*iFmt
)
1227 : Inst_SOP2(iFmt
, "s_bfe_u64")
1230 } // Inst_SOP2__S_BFE_U64
1232 Inst_SOP2__S_BFE_U64::~Inst_SOP2__S_BFE_U64()
1234 } // ~Inst_SOP2__S_BFE_U64
1236 // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is
1238 // D.u64 = (S0.u64 >> S1.u[5:0]) & ((1 << S1.u[22:16]) - 1);
1239 // SCC = 1 if result is non-zero.
1241 Inst_SOP2__S_BFE_U64::execute(GPUDynInstPtr gpuDynInst
)
1243 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
1244 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
1245 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
1246 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1251 sdst
= (src0
.rawData() >> bits(src1
.rawData(), 5, 0))
1252 & ((1 << bits(src1
.rawData(), 22, 16)) - 1);
1253 scc
= sdst
.rawData() ? 1 : 0;
1259 Inst_SOP2__S_BFE_I64::Inst_SOP2__S_BFE_I64(InFmt_SOP2
*iFmt
)
1260 : Inst_SOP2(iFmt
, "s_bfe_i64")
1263 } // Inst_SOP2__S_BFE_I64
1265 Inst_SOP2__S_BFE_I64::~Inst_SOP2__S_BFE_I64()
1267 } // ~Inst_SOP2__S_BFE_I64
1269 // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is
1271 // D.i64 = (S0.i64 >> S1.u[5:0]) & ((1 << S1.u[22:16]) - 1);
1272 // Sign-extend result;
1273 // SCC = 1 if result is non-zero.
1275 Inst_SOP2__S_BFE_I64::execute(GPUDynInstPtr gpuDynInst
)
1277 ConstScalarOperandI64
src0(gpuDynInst
, instData
.SSRC0
);
1278 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
1279 ScalarOperandI64
sdst(gpuDynInst
, instData
.SDST
);
1280 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1285 sdst
= (src0
.rawData() >> bits(src1
.rawData(), 5, 0))
1286 & ((1 << bits(src1
.rawData(), 22, 16)) - 1);
1287 scc
= sdst
.rawData() ? 1 : 0;
1293 Inst_SOP2__S_CBRANCH_G_FORK::Inst_SOP2__S_CBRANCH_G_FORK(InFmt_SOP2
*iFmt
)
1294 : Inst_SOP2(iFmt
, "s_cbranch_g_fork")
1297 } // Inst_SOP2__S_CBRANCH_G_FORK
1299 Inst_SOP2__S_CBRANCH_G_FORK::~Inst_SOP2__S_CBRANCH_G_FORK()
1301 } // ~Inst_SOP2__S_CBRANCH_G_FORK
1303 // Conditional branch using branch-stack.
1304 // S0 = compare mask(vcc or any sgpr) and
1305 // S1 = 64-bit byte address of target instruction.
1307 Inst_SOP2__S_CBRANCH_G_FORK::execute(GPUDynInstPtr gpuDynInst
)
1309 panicUnimplemented();
1312 Inst_SOP2__S_ABSDIFF_I32::Inst_SOP2__S_ABSDIFF_I32(InFmt_SOP2
*iFmt
)
1313 : Inst_SOP2(iFmt
, "s_absdiff_i32")
1316 } // Inst_SOP2__S_ABSDIFF_I32
1318 Inst_SOP2__S_ABSDIFF_I32::~Inst_SOP2__S_ABSDIFF_I32()
1320 } // ~Inst_SOP2__S_ABSDIFF_I32
1322 // D.i = S0.i - S1.i;
1323 // if (D.i < 0) then D.i = -D.i;
1324 // SCC = 1 if result is non-zero.
1325 // Compute the absolute value of difference between two values.
1327 Inst_SOP2__S_ABSDIFF_I32::execute(GPUDynInstPtr gpuDynInst
)
1329 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
1330 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
1331 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
1332 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1334 sdst
= std::abs(src0
.rawData() - src1
.rawData());
1335 scc
= sdst
.rawData() ? 1 : 0;
1341 Inst_SOP2__S_RFE_RESTORE_B64::Inst_SOP2__S_RFE_RESTORE_B64(
1343 : Inst_SOP2(iFmt
, "s_rfe_restore_b64")
1345 } // Inst_SOP2__S_RFE_RESTORE_B64
1347 Inst_SOP2__S_RFE_RESTORE_B64::~Inst_SOP2__S_RFE_RESTORE_B64()
1349 } // ~Inst_SOP2__S_RFE_RESTORE_B64
1351 // Return from exception handler and continue.
1353 Inst_SOP2__S_RFE_RESTORE_B64::execute(GPUDynInstPtr gpuDynInst
)
1355 panicUnimplemented();
1358 Inst_SOPK__S_MOVK_I32::Inst_SOPK__S_MOVK_I32(InFmt_SOPK
*iFmt
)
1359 : Inst_SOPK(iFmt
, "s_movk_i32")
1362 } // Inst_SOPK__S_MOVK_I32
1364 Inst_SOPK__S_MOVK_I32::~Inst_SOPK__S_MOVK_I32()
1366 } // ~Inst_SOPK__S_MOVK_I32
1368 // D.i = signext(SIMM16) (sign extension).
1370 Inst_SOPK__S_MOVK_I32::execute(GPUDynInstPtr gpuDynInst
)
1372 ScalarRegI32 simm16
= (ScalarRegI32
)instData
.SIMM16
;
1373 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
1380 Inst_SOPK__S_CMOVK_I32::Inst_SOPK__S_CMOVK_I32(InFmt_SOPK
*iFmt
)
1381 : Inst_SOPK(iFmt
, "s_cmovk_i32")
1384 } // Inst_SOPK__S_CMOVK_I32
1386 Inst_SOPK__S_CMOVK_I32::~Inst_SOPK__S_CMOVK_I32()
1388 } // ~Inst_SOPK__S_CMOVK_I32
1390 // if (SCC) then D.i = signext(SIMM16);
1392 // Conditional move with sign extension.
1394 Inst_SOPK__S_CMOVK_I32::execute(GPUDynInstPtr gpuDynInst
)
1396 ScalarRegI32 simm16
= (ScalarRegI32
)instData
.SIMM16
;
1397 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
1398 ConstScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1402 if (scc
.rawData()) {
1408 Inst_SOPK__S_CMPK_EQ_I32::Inst_SOPK__S_CMPK_EQ_I32(InFmt_SOPK
*iFmt
)
1409 : Inst_SOPK(iFmt
, "s_cmpk_eq_i32")
1412 } // Inst_SOPK__S_CMPK_EQ_I32
1414 Inst_SOPK__S_CMPK_EQ_I32::~Inst_SOPK__S_CMPK_EQ_I32()
1416 } // ~Inst_SOPK__S_CMPK_EQ_I32
1418 // SCC = (S0.i == signext(SIMM16)).
1420 Inst_SOPK__S_CMPK_EQ_I32::execute(GPUDynInstPtr gpuDynInst
)
1422 ScalarRegI32 simm16
= (ScalarRegI32
)instData
.SIMM16
;
1423 ConstScalarOperandI32
src(gpuDynInst
, instData
.SDST
);
1424 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1428 scc
= (src
.rawData() == simm16
) ? 1 : 0;
1433 Inst_SOPK__S_CMPK_LG_I32::Inst_SOPK__S_CMPK_LG_I32(InFmt_SOPK
*iFmt
)
1434 : Inst_SOPK(iFmt
, "s_cmpk_lg_i32")
1437 } // Inst_SOPK__S_CMPK_LG_I32
1439 Inst_SOPK__S_CMPK_LG_I32::~Inst_SOPK__S_CMPK_LG_I32()
1441 } // ~Inst_SOPK__S_CMPK_LG_I32
1443 // SCC = (S0.i != signext(SIMM16)).
1445 Inst_SOPK__S_CMPK_LG_I32::execute(GPUDynInstPtr gpuDynInst
)
1447 ScalarRegI32 simm16
= (ScalarRegI32
)instData
.SIMM16
;
1448 ConstScalarOperandI32
src(gpuDynInst
, instData
.SDST
);
1449 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1453 scc
= (src
.rawData() != simm16
) ? 1 : 0;
1458 Inst_SOPK__S_CMPK_GT_I32::Inst_SOPK__S_CMPK_GT_I32(InFmt_SOPK
*iFmt
)
1459 : Inst_SOPK(iFmt
, "s_cmpk_gt_i32")
1462 } // Inst_SOPK__S_CMPK_GT_I32
1464 Inst_SOPK__S_CMPK_GT_I32::~Inst_SOPK__S_CMPK_GT_I32()
1466 } // ~Inst_SOPK__S_CMPK_GT_I32
1468 // SCC = (S0.i > signext(SIMM16)).
1470 Inst_SOPK__S_CMPK_GT_I32::execute(GPUDynInstPtr gpuDynInst
)
1472 ScalarRegI32 simm16
= (ScalarRegI32
)instData
.SIMM16
;
1473 ConstScalarOperandI32
src(gpuDynInst
, instData
.SDST
);
1474 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1478 scc
= (src
.rawData() > simm16
) ? 1 : 0;
1483 Inst_SOPK__S_CMPK_GE_I32::Inst_SOPK__S_CMPK_GE_I32(InFmt_SOPK
*iFmt
)
1484 : Inst_SOPK(iFmt
, "s_cmpk_ge_i32")
1487 } // Inst_SOPK__S_CMPK_GE_I32
1489 Inst_SOPK__S_CMPK_GE_I32::~Inst_SOPK__S_CMPK_GE_I32()
1491 } // ~Inst_SOPK__S_CMPK_GE_I32
1493 // SCC = (S0.i >= signext(SIMM16)).
1495 Inst_SOPK__S_CMPK_GE_I32::execute(GPUDynInstPtr gpuDynInst
)
1497 ScalarRegI32 simm16
= (ScalarRegI32
)instData
.SIMM16
;
1498 ConstScalarOperandI32
src(gpuDynInst
, instData
.SDST
);
1499 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1503 scc
= (src
.rawData() >= simm16
) ? 1 : 0;
1508 Inst_SOPK__S_CMPK_LT_I32::Inst_SOPK__S_CMPK_LT_I32(InFmt_SOPK
*iFmt
)
1509 : Inst_SOPK(iFmt
, "s_cmpk_lt_i32")
1512 } // Inst_SOPK__S_CMPK_LT_I32
1514 Inst_SOPK__S_CMPK_LT_I32::~Inst_SOPK__S_CMPK_LT_I32()
1516 } // ~Inst_SOPK__S_CMPK_LT_I32
1518 // SCC = (S0.i < signext(SIMM16)).
1520 Inst_SOPK__S_CMPK_LT_I32::execute(GPUDynInstPtr gpuDynInst
)
1522 ScalarRegI32 simm16
= (ScalarRegI32
)instData
.SIMM16
;
1523 ConstScalarOperandI32
src(gpuDynInst
, instData
.SDST
);
1524 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1528 scc
= (src
.rawData() < simm16
) ? 1 : 0;
1533 Inst_SOPK__S_CMPK_LE_I32::Inst_SOPK__S_CMPK_LE_I32(InFmt_SOPK
*iFmt
)
1534 : Inst_SOPK(iFmt
, "s_cmpk_le_i32")
1537 } // Inst_SOPK__S_CMPK_LE_I32
1539 Inst_SOPK__S_CMPK_LE_I32::~Inst_SOPK__S_CMPK_LE_I32()
1541 } // ~Inst_SOPK__S_CMPK_LE_I32
1543 // SCC = (S0.i <= signext(SIMM16)).
1545 Inst_SOPK__S_CMPK_LE_I32::execute(GPUDynInstPtr gpuDynInst
)
1547 ScalarRegI32 simm16
= (ScalarRegI32
)instData
.SIMM16
;
1548 ConstScalarOperandI32
src(gpuDynInst
, instData
.SDST
);
1549 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1553 scc
= (src
.rawData() <= simm16
) ? 1 : 0;
1558 Inst_SOPK__S_CMPK_EQ_U32::Inst_SOPK__S_CMPK_EQ_U32(InFmt_SOPK
*iFmt
)
1559 : Inst_SOPK(iFmt
, "s_cmpk_eq_u32")
1562 } // Inst_SOPK__S_CMPK_EQ_U32
1564 Inst_SOPK__S_CMPK_EQ_U32::~Inst_SOPK__S_CMPK_EQ_U32()
1566 } // ~Inst_SOPK__S_CMPK_EQ_U32
1568 // SCC = (S0.u == SIMM16).
1570 Inst_SOPK__S_CMPK_EQ_U32::execute(GPUDynInstPtr gpuDynInst
)
1572 ScalarRegU32 simm16
= (ScalarRegU32
)instData
.SIMM16
;
1573 ConstScalarOperandU32
src(gpuDynInst
, instData
.SDST
);
1574 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1578 scc
= (src
.rawData() == simm16
) ? 1 : 0;
1583 Inst_SOPK__S_CMPK_LG_U32::Inst_SOPK__S_CMPK_LG_U32(InFmt_SOPK
*iFmt
)
1584 : Inst_SOPK(iFmt
, "s_cmpk_lg_u32")
1587 } // Inst_SOPK__S_CMPK_LG_U32
1589 Inst_SOPK__S_CMPK_LG_U32::~Inst_SOPK__S_CMPK_LG_U32()
1591 } // ~Inst_SOPK__S_CMPK_LG_U32
1593 // SCC = (S0.u != SIMM16).
1595 Inst_SOPK__S_CMPK_LG_U32::execute(GPUDynInstPtr gpuDynInst
)
1597 ScalarRegU32 simm16
= (ScalarRegU32
)instData
.SIMM16
;
1598 ConstScalarOperandU32
src(gpuDynInst
, instData
.SDST
);
1599 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1603 scc
= (src
.rawData() != simm16
) ? 1 : 0;
1608 Inst_SOPK__S_CMPK_GT_U32::Inst_SOPK__S_CMPK_GT_U32(InFmt_SOPK
*iFmt
)
1609 : Inst_SOPK(iFmt
, "s_cmpk_gt_u32")
1612 } // Inst_SOPK__S_CMPK_GT_U32
1614 Inst_SOPK__S_CMPK_GT_U32::~Inst_SOPK__S_CMPK_GT_U32()
1616 } // ~Inst_SOPK__S_CMPK_GT_U32
1618 // SCC = (S0.u > SIMM16).
1620 Inst_SOPK__S_CMPK_GT_U32::execute(GPUDynInstPtr gpuDynInst
)
1622 ScalarRegU32 simm16
= (ScalarRegU32
)instData
.SIMM16
;
1623 ConstScalarOperandU32
src(gpuDynInst
, instData
.SDST
);
1624 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1628 scc
= (src
.rawData() > simm16
) ? 1 : 0;
1633 Inst_SOPK__S_CMPK_GE_U32::Inst_SOPK__S_CMPK_GE_U32(InFmt_SOPK
*iFmt
)
1634 : Inst_SOPK(iFmt
, "s_cmpk_ge_u32")
1637 } // Inst_SOPK__S_CMPK_GE_U32
1639 Inst_SOPK__S_CMPK_GE_U32::~Inst_SOPK__S_CMPK_GE_U32()
1641 } // ~Inst_SOPK__S_CMPK_GE_U32
1643 // SCC = (S0.u >= SIMM16).
1645 Inst_SOPK__S_CMPK_GE_U32::execute(GPUDynInstPtr gpuDynInst
)
1647 ScalarRegU32 simm16
= (ScalarRegU32
)instData
.SIMM16
;
1648 ConstScalarOperandU32
src(gpuDynInst
, instData
.SDST
);
1649 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1653 scc
= (src
.rawData() >= simm16
) ? 1 : 0;
1658 Inst_SOPK__S_CMPK_LT_U32::Inst_SOPK__S_CMPK_LT_U32(InFmt_SOPK
*iFmt
)
1659 : Inst_SOPK(iFmt
, "s_cmpk_lt_u32")
1662 } // Inst_SOPK__S_CMPK_LT_U32
1664 Inst_SOPK__S_CMPK_LT_U32::~Inst_SOPK__S_CMPK_LT_U32()
1666 } // ~Inst_SOPK__S_CMPK_LT_U32
1668 // SCC = (S0.u < SIMM16).
1670 Inst_SOPK__S_CMPK_LT_U32::execute(GPUDynInstPtr gpuDynInst
)
1672 ScalarRegU32 simm16
= (ScalarRegU32
)instData
.SIMM16
;
1673 ConstScalarOperandU32
src(gpuDynInst
, instData
.SDST
);
1674 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1678 scc
= (src
.rawData() < simm16
) ? 1 : 0;
1683 Inst_SOPK__S_CMPK_LE_U32::Inst_SOPK__S_CMPK_LE_U32(InFmt_SOPK
*iFmt
)
1684 : Inst_SOPK(iFmt
, "s_cmpk_le_u32")
1687 } // Inst_SOPK__S_CMPK_LE_U32
1689 Inst_SOPK__S_CMPK_LE_U32::~Inst_SOPK__S_CMPK_LE_U32()
1691 } // ~Inst_SOPK__S_CMPK_LE_U32
1693 // SCC = (S0.u <= SIMM16).
1695 Inst_SOPK__S_CMPK_LE_U32::execute(GPUDynInstPtr gpuDynInst
)
1697 ScalarRegU32 simm16
= (ScalarRegU32
)instData
.SIMM16
;
1698 ConstScalarOperandU32
src(gpuDynInst
, instData
.SDST
);
1699 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1703 scc
= (src
.rawData() <= simm16
) ? 1 : 0;
1708 Inst_SOPK__S_ADDK_I32::Inst_SOPK__S_ADDK_I32(InFmt_SOPK
*iFmt
)
1709 : Inst_SOPK(iFmt
, "s_addk_i32")
1712 } // Inst_SOPK__S_ADDK_I32
1714 Inst_SOPK__S_ADDK_I32::~Inst_SOPK__S_ADDK_I32()
1716 } // ~Inst_SOPK__S_ADDK_I32
1718 // D.i = D.i + signext(SIMM16);
1721 Inst_SOPK__S_ADDK_I32::execute(GPUDynInstPtr gpuDynInst
)
1723 ScalarRegI16 simm16
= instData
.SIMM16
;
1724 ConstScalarOperandI32
src(gpuDynInst
, instData
.SDST
);
1725 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
1726 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1730 sdst
= src
.rawData() + (ScalarRegI32
)simm16
;
1731 scc
= (bits(src
.rawData(), 31) == bits(simm16
, 15)
1732 && bits(src
.rawData(), 31) != bits(sdst
.rawData(), 31)) ? 1 : 0;
1738 Inst_SOPK__S_MULK_I32::Inst_SOPK__S_MULK_I32(InFmt_SOPK
*iFmt
)
1739 : Inst_SOPK(iFmt
, "s_mulk_i32")
1742 } // Inst_SOPK__S_MULK_I32
1744 Inst_SOPK__S_MULK_I32::~Inst_SOPK__S_MULK_I32()
1746 } // ~Inst_SOPK__S_MULK_I32
1748 // D.i = D.i * signext(SIMM16).
1750 Inst_SOPK__S_MULK_I32::execute(GPUDynInstPtr gpuDynInst
)
1752 ScalarRegI16 simm16
= instData
.SIMM16
;
1753 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
1757 sdst
= sdst
.rawData() * (ScalarRegI32
)simm16
;
1762 Inst_SOPK__S_CBRANCH_I_FORK::Inst_SOPK__S_CBRANCH_I_FORK(InFmt_SOPK
*iFmt
)
1763 : Inst_SOPK(iFmt
, "s_cbranch_i_fork")
1766 } // Inst_SOPK__S_CBRANCH_I_FORK
1768 Inst_SOPK__S_CBRANCH_I_FORK::~Inst_SOPK__S_CBRANCH_I_FORK()
1770 } // ~Inst_SOPK__S_CBRANCH_I_FORK
1772 // Conditional branch using branch-stack.
1773 // S0 = compare mask(vcc or any sgpr), and
1774 // SIMM16 = signed DWORD branch offset relative to next instruction.
1776 Inst_SOPK__S_CBRANCH_I_FORK::execute(GPUDynInstPtr gpuDynInst
)
1778 panicUnimplemented();
1781 Inst_SOPK__S_GETREG_B32::Inst_SOPK__S_GETREG_B32(InFmt_SOPK
*iFmt
)
1782 : Inst_SOPK(iFmt
, "s_getreg_b32")
1784 } // Inst_SOPK__S_GETREG_B32
1786 Inst_SOPK__S_GETREG_B32::~Inst_SOPK__S_GETREG_B32()
1788 } // ~Inst_SOPK__S_GETREG_B32
1790 // D.u = hardware-reg. Read some or all of a hardware register into the
1792 // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
1795 Inst_SOPK__S_GETREG_B32::execute(GPUDynInstPtr gpuDynInst
)
1797 panicUnimplemented();
1800 Inst_SOPK__S_SETREG_B32::Inst_SOPK__S_SETREG_B32(InFmt_SOPK
*iFmt
)
1801 : Inst_SOPK(iFmt
, "s_setreg_b32")
1804 } // Inst_SOPK__S_SETREG_B32
1806 Inst_SOPK__S_SETREG_B32::~Inst_SOPK__S_SETREG_B32()
1808 } // ~Inst_SOPK__S_SETREG_B32
1810 // hardware-reg = S0.u. Write some or all of the LSBs of D into a hardware
1812 // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
1815 Inst_SOPK__S_SETREG_B32::execute(GPUDynInstPtr gpuDynInst
)
1817 ScalarRegI16 simm16
= instData
.SIMM16
;
1818 ScalarRegU32 hwregId
= simm16
& 0x3f;
1819 ScalarRegU32 offset
= (simm16
>> 6) & 31;
1820 ScalarRegU32 size
= ((simm16
>> 11) & 31) + 1;
1822 ScalarOperandU32
hwreg(gpuDynInst
, hwregId
);
1823 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
1827 // Store value from SDST to part of the hardware register.
1828 ScalarRegU32 mask
= (((1U << size
) - 1U) << offset
);
1829 hwreg
= ((hwreg
.rawData() & ~mask
)
1830 | ((sdst
.rawData() << offset
) & mask
));
1833 // set MODE register to control the behavior of single precision
1834 // floating-point numbers: denormal mode or round mode
1835 if (hwregId
==1 && size
==2
1836 && (offset
==4 || offset
==0)) {
1837 warn_once("Be cautious that s_setreg_b32 has no real effect "
1838 "on FP modes: %s\n", gpuDynInst
->disassemble());
1842 // panic if not changing MODE of floating-point numbers
1843 panicUnimplemented();
1846 Inst_SOPK__S_SETREG_IMM32_B32::Inst_SOPK__S_SETREG_IMM32_B32(
1848 : Inst_SOPK(iFmt
, "s_setreg_imm32_b32")
1850 } // Inst_SOPK__S_SETREG_IMM32_B32
1852 Inst_SOPK__S_SETREG_IMM32_B32::~Inst_SOPK__S_SETREG_IMM32_B32()
1854 } // ~Inst_SOPK__S_SETREG_IMM32_B32
1856 // Write some or all of the LSBs of IMM32 into a hardware register; this
1857 // instruction requires a 32-bit literal constant.
1858 // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
1861 Inst_SOPK__S_SETREG_IMM32_B32::execute(GPUDynInstPtr gpuDynInst
)
1863 panicUnimplemented();
1866 Inst_SOP1__S_MOV_B32::Inst_SOP1__S_MOV_B32(InFmt_SOP1
*iFmt
)
1867 : Inst_SOP1(iFmt
, "s_mov_b32")
1870 } // Inst_SOP1__S_MOV_B32
1872 Inst_SOP1__S_MOV_B32::~Inst_SOP1__S_MOV_B32()
1874 } // ~Inst_SOP1__S_MOV_B32
1878 Inst_SOP1__S_MOV_B32::execute(GPUDynInstPtr gpuDynInst
)
1880 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
1881 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
1885 sdst
= src
.rawData();
1890 Inst_SOP1__S_MOV_B64::Inst_SOP1__S_MOV_B64(InFmt_SOP1
*iFmt
)
1891 : Inst_SOP1(iFmt
, "s_mov_b64")
1894 } // Inst_SOP1__S_MOV_B64
1896 Inst_SOP1__S_MOV_B64::~Inst_SOP1__S_MOV_B64()
1898 } // ~Inst_SOP1__S_MOV_B64
1902 Inst_SOP1__S_MOV_B64::execute(GPUDynInstPtr gpuDynInst
)
1904 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
1905 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
1909 sdst
= src
.rawData();
1914 Inst_SOP1__S_CMOV_B32::Inst_SOP1__S_CMOV_B32(InFmt_SOP1
*iFmt
)
1915 : Inst_SOP1(iFmt
, "s_cmov_b32")
1918 } // Inst_SOP1__S_CMOV_B32
1920 Inst_SOP1__S_CMOV_B32::~Inst_SOP1__S_CMOV_B32()
1922 } // ~Inst_SOP1__S_CMOV_B32
1924 // if (SCC) then D.u = S0.u;
1926 // Conditional move.
1928 Inst_SOP1__S_CMOV_B32::execute(GPUDynInstPtr gpuDynInst
)
1930 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
1931 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
1932 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1937 if (scc
.rawData()) {
1938 sdst
= src
.rawData();
1943 Inst_SOP1__S_CMOV_B64::Inst_SOP1__S_CMOV_B64(InFmt_SOP1
*iFmt
)
1944 : Inst_SOP1(iFmt
, "s_cmov_b64")
1947 } // Inst_SOP1__S_CMOV_B64
1949 Inst_SOP1__S_CMOV_B64::~Inst_SOP1__S_CMOV_B64()
1951 } // ~Inst_SOP1__S_CMOV_B64
1953 // if (SCC) then D.u64 = S0.u64;
1955 // Conditional move.
1957 Inst_SOP1__S_CMOV_B64::execute(GPUDynInstPtr gpuDynInst
)
1959 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
1960 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
1961 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1966 if (scc
.rawData()) {
1967 sdst
= src
.rawData();
1972 Inst_SOP1__S_NOT_B32::Inst_SOP1__S_NOT_B32(InFmt_SOP1
*iFmt
)
1973 : Inst_SOP1(iFmt
, "s_not_b32")
1976 } // Inst_SOP1__S_NOT_B32
1978 Inst_SOP1__S_NOT_B32::~Inst_SOP1__S_NOT_B32()
1980 } // ~Inst_SOP1__S_NOT_B32
1983 // SCC = 1 if result is non-zero.
1984 // Bitwise negation.
1986 Inst_SOP1__S_NOT_B32::execute(GPUDynInstPtr gpuDynInst
)
1988 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
1989 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
1990 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1994 sdst
= ~src
.rawData();
1996 scc
= sdst
.rawData() ? 1 : 0;
2002 Inst_SOP1__S_NOT_B64::Inst_SOP1__S_NOT_B64(InFmt_SOP1
*iFmt
)
2003 : Inst_SOP1(iFmt
, "s_not_b64")
2006 } // Inst_SOP1__S_NOT_B64
2008 Inst_SOP1__S_NOT_B64::~Inst_SOP1__S_NOT_B64()
2010 } // ~Inst_SOP1__S_NOT_B64
2013 // SCC = 1 if result is non-zero.
2014 // Bitwise negation.
2016 Inst_SOP1__S_NOT_B64::execute(GPUDynInstPtr gpuDynInst
)
2018 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2019 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2020 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2024 sdst
= ~src
.rawData();
2025 scc
= sdst
.rawData() ? 1 : 0;
2031 Inst_SOP1__S_WQM_B32::Inst_SOP1__S_WQM_B32(InFmt_SOP1
*iFmt
)
2032 : Inst_SOP1(iFmt
, "s_wqm_b32")
2035 } // Inst_SOP1__S_WQM_B32
2037 Inst_SOP1__S_WQM_B32::~Inst_SOP1__S_WQM_B32()
2039 } // ~Inst_SOP1__S_WQM_B32
2041 // Computes whole quad mode for an active/valid mask.
2042 // SCC = 1 if result is non-zero.
2044 Inst_SOP1__S_WQM_B32::execute(GPUDynInstPtr gpuDynInst
)
2046 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2047 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
2048 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2052 sdst
= wholeQuadMode(src
.rawData());
2053 scc
= sdst
.rawData() ? 1 : 0;
2059 Inst_SOP1__S_WQM_B64::Inst_SOP1__S_WQM_B64(InFmt_SOP1
*iFmt
)
2060 : Inst_SOP1(iFmt
, "s_wqm_b64")
2063 } // Inst_SOP1__S_WQM_B64
2065 Inst_SOP1__S_WQM_B64::~Inst_SOP1__S_WQM_B64()
2067 } // ~Inst_SOP1__S_WQM_B64
2069 // Computes whole quad mode for an active/valid mask.
2070 // SCC = 1 if result is non-zero.
2072 Inst_SOP1__S_WQM_B64::execute(GPUDynInstPtr gpuDynInst
)
2074 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2075 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2076 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2080 sdst
= wholeQuadMode(src
.rawData());
2081 scc
= sdst
.rawData() ? 1 : 0;
2087 Inst_SOP1__S_BREV_B32::Inst_SOP1__S_BREV_B32(InFmt_SOP1
*iFmt
)
2088 : Inst_SOP1(iFmt
, "s_brev_b32")
2091 } // Inst_SOP1__S_BREV_B32
2093 Inst_SOP1__S_BREV_B32::~Inst_SOP1__S_BREV_B32()
2095 } // ~Inst_SOP1__S_BREV_B32
2097 // D.u[31:0] = S0.u[0:31] (reverse bits).
2099 Inst_SOP1__S_BREV_B32::execute(GPUDynInstPtr gpuDynInst
)
2101 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2102 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
2106 sdst
= reverseBits(src
.rawData());
2111 Inst_SOP1__S_BREV_B64::Inst_SOP1__S_BREV_B64(InFmt_SOP1
*iFmt
)
2112 : Inst_SOP1(iFmt
, "s_brev_b64")
2115 } // Inst_SOP1__S_BREV_B64
2117 Inst_SOP1__S_BREV_B64::~Inst_SOP1__S_BREV_B64()
2119 } // ~Inst_SOP1__S_BREV_B64
2121 // D.u64[63:0] = S0.u64[0:63] (reverse bits).
2123 Inst_SOP1__S_BREV_B64::execute(GPUDynInstPtr gpuDynInst
)
2125 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2126 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2130 sdst
= reverseBits(src
.rawData());
2135 Inst_SOP1__S_BCNT0_I32_B32::Inst_SOP1__S_BCNT0_I32_B32(InFmt_SOP1
*iFmt
)
2136 : Inst_SOP1(iFmt
, "s_bcnt0_i32_b32")
2139 } // Inst_SOP1__S_BCNT0_I32_B32
2141 Inst_SOP1__S_BCNT0_I32_B32::~Inst_SOP1__S_BCNT0_I32_B32()
2143 } // ~Inst_SOP1__S_BCNT0_I32_B32
2145 // D.i = CountZeroBits(S0.u);
2146 // SCC = 1 if result is non-zero.
2148 Inst_SOP1__S_BCNT0_I32_B32::execute(GPUDynInstPtr gpuDynInst
)
2150 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2151 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2152 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2156 sdst
= countZeroBits(src
.rawData());
2157 scc
= sdst
.rawData() ? 1 : 0;
2163 Inst_SOP1__S_BCNT0_I32_B64::Inst_SOP1__S_BCNT0_I32_B64(InFmt_SOP1
*iFmt
)
2164 : Inst_SOP1(iFmt
, "s_bcnt0_i32_b64")
2167 } // Inst_SOP1__S_BCNT0_I32_B64
2169 Inst_SOP1__S_BCNT0_I32_B64::~Inst_SOP1__S_BCNT0_I32_B64()
2171 } // ~Inst_SOP1__S_BCNT0_I32_B64
2173 // D.i = CountZeroBits(S0.u64);
2174 // SCC = 1 if result is non-zero.
2176 Inst_SOP1__S_BCNT0_I32_B64::execute(GPUDynInstPtr gpuDynInst
)
2178 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2179 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2180 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2184 sdst
= countZeroBits(src
.rawData());
2185 scc
= sdst
.rawData() ? 1 : 0;
2191 Inst_SOP1__S_BCNT1_I32_B32::Inst_SOP1__S_BCNT1_I32_B32(InFmt_SOP1
*iFmt
)
2192 : Inst_SOP1(iFmt
, "s_bcnt1_i32_b32")
2195 } // Inst_SOP1__S_BCNT1_I32_B32
2197 Inst_SOP1__S_BCNT1_I32_B32::~Inst_SOP1__S_BCNT1_I32_B32()
2199 } // ~Inst_SOP1__S_BCNT1_I32_B32
2201 // D.i = CountOneBits(S0.u);
2202 // SCC = 1 if result is non-zero.
2204 Inst_SOP1__S_BCNT1_I32_B32::execute(GPUDynInstPtr gpuDynInst
)
2206 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2207 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2208 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2212 sdst
= popCount(src
.rawData());
2213 scc
= sdst
.rawData() ? 1 : 0;
2219 Inst_SOP1__S_BCNT1_I32_B64::Inst_SOP1__S_BCNT1_I32_B64(InFmt_SOP1
*iFmt
)
2220 : Inst_SOP1(iFmt
, "s_bcnt1_i32_b64")
2223 } // Inst_SOP1__S_BCNT1_I32_B64
2225 Inst_SOP1__S_BCNT1_I32_B64::~Inst_SOP1__S_BCNT1_I32_B64()
2227 } // ~Inst_SOP1__S_BCNT1_I32_B64
2229 // D.i = CountOneBits(S0.u64);
2230 // SCC = 1 if result is non-zero.
2232 Inst_SOP1__S_BCNT1_I32_B64::execute(GPUDynInstPtr gpuDynInst
)
2234 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2235 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2236 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2240 sdst
= popCount(src
.rawData());
2241 scc
= sdst
.rawData() ? 1 : 0;
2247 Inst_SOP1__S_FF0_I32_B32::Inst_SOP1__S_FF0_I32_B32(InFmt_SOP1
*iFmt
)
2248 : Inst_SOP1(iFmt
, "s_ff0_i32_b32")
2251 } // Inst_SOP1__S_FF0_I32_B32
2253 Inst_SOP1__S_FF0_I32_B32::~Inst_SOP1__S_FF0_I32_B32()
2255 } // ~Inst_SOP1__S_FF0_I32_B32
2257 // D.i = FindFirstZero(S0.u);
2258 // If no zeros are found, return -1.
2259 // Returns the bit position of the first zero from the LSB.
2261 Inst_SOP1__S_FF0_I32_B32::execute(GPUDynInstPtr gpuDynInst
)
2263 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2264 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2268 sdst
= findFirstZero(src
.rawData());
2273 Inst_SOP1__S_FF0_I32_B64::Inst_SOP1__S_FF0_I32_B64(InFmt_SOP1
*iFmt
)
2274 : Inst_SOP1(iFmt
, "s_ff0_i32_b64")
2277 } // Inst_SOP1__S_FF0_I32_B64
2279 Inst_SOP1__S_FF0_I32_B64::~Inst_SOP1__S_FF0_I32_B64()
2281 } // ~Inst_SOP1__S_FF0_I32_B64
2283 // D.i = FindFirstZero(S0.u64);
2284 // If no zeros are found, return -1.
2285 // Returns the bit position of the first zero from the LSB.
2287 Inst_SOP1__S_FF0_I32_B64::execute(GPUDynInstPtr gpuDynInst
)
2289 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2290 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2294 sdst
= findFirstZero(src
.rawData());
2299 Inst_SOP1__S_FF1_I32_B32::Inst_SOP1__S_FF1_I32_B32(InFmt_SOP1
*iFmt
)
2300 : Inst_SOP1(iFmt
, "s_ff1_i32_b32")
2303 } // Inst_SOP1__S_FF1_I32_B32
2305 Inst_SOP1__S_FF1_I32_B32::~Inst_SOP1__S_FF1_I32_B32()
2307 } // ~Inst_SOP1__S_FF1_I32_B32
2309 // D.i = FindFirstOne(S0.u);
2310 // If no ones are found, return -1.
2311 // Returns the bit position of the first one from the LSB.
2313 Inst_SOP1__S_FF1_I32_B32::execute(GPUDynInstPtr gpuDynInst
)
2315 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2316 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2320 sdst
= findFirstOne(src
.rawData());
2325 Inst_SOP1__S_FF1_I32_B64::Inst_SOP1__S_FF1_I32_B64(InFmt_SOP1
*iFmt
)
2326 : Inst_SOP1(iFmt
, "s_ff1_i32_b64")
2329 } // Inst_SOP1__S_FF1_I32_B64
2331 Inst_SOP1__S_FF1_I32_B64::~Inst_SOP1__S_FF1_I32_B64()
2333 } // ~Inst_SOP1__S_FF1_I32_B64
2335 // D.i = FindFirstOne(S0.u64);
2336 // If no ones are found, return -1.
2337 // Returns the bit position of the first one from the LSB.
2339 Inst_SOP1__S_FF1_I32_B64::execute(GPUDynInstPtr gpuDynInst
)
2341 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2342 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2346 sdst
= findFirstOne(src
.rawData());
2351 Inst_SOP1__S_FLBIT_I32_B32::Inst_SOP1__S_FLBIT_I32_B32(InFmt_SOP1
*iFmt
)
2352 : Inst_SOP1(iFmt
, "s_flbit_i32_b32")
2355 } // Inst_SOP1__S_FLBIT_I32_B32
2357 Inst_SOP1__S_FLBIT_I32_B32::~Inst_SOP1__S_FLBIT_I32_B32()
2359 } // ~Inst_SOP1__S_FLBIT_I32_B32
2361 // D.i = FindFirstOne(S0.u);
2362 // If no ones are found, return -1.
2363 // Counts how many zeros before the first one starting from the MSB.
2365 Inst_SOP1__S_FLBIT_I32_B32::execute(GPUDynInstPtr gpuDynInst
)
2367 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2368 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2372 sdst
= countZeroBitsMsb(src
.rawData());
2377 Inst_SOP1__S_FLBIT_I32_B64::Inst_SOP1__S_FLBIT_I32_B64(InFmt_SOP1
*iFmt
)
2378 : Inst_SOP1(iFmt
, "s_flbit_i32_b64")
2381 } // Inst_SOP1__S_FLBIT_I32_B64
2383 Inst_SOP1__S_FLBIT_I32_B64::~Inst_SOP1__S_FLBIT_I32_B64()
2385 } // ~Inst_SOP1__S_FLBIT_I32_B64
2387 // D.i = FindFirstOne(S0.u64);
2388 // If no ones are found, return -1.
2389 // Counts how many zeros before the first one starting from the MSB.
2391 Inst_SOP1__S_FLBIT_I32_B64::execute(GPUDynInstPtr gpuDynInst
)
2393 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2394 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2398 sdst
= countZeroBitsMsb(src
.rawData());
2403 Inst_SOP1__S_FLBIT_I32::Inst_SOP1__S_FLBIT_I32(InFmt_SOP1
*iFmt
)
2404 : Inst_SOP1(iFmt
, "s_flbit_i32")
2407 } // Inst_SOP1__S_FLBIT_I32
2409 Inst_SOP1__S_FLBIT_I32::~Inst_SOP1__S_FLBIT_I32()
2411 } // ~Inst_SOP1__S_FLBIT_I32
2413 // D.i = FirstOppositeSignBit(S0.i);
2414 // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1.
2415 // Counts how many bits in a row (from MSB to LSB) are the same as the
2418 Inst_SOP1__S_FLBIT_I32::execute(GPUDynInstPtr gpuDynInst
)
2420 ConstScalarOperandI32
src(gpuDynInst
, instData
.SSRC0
);
2421 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2425 sdst
= firstOppositeSignBit(src
.rawData());
2430 Inst_SOP1__S_FLBIT_I32_I64::Inst_SOP1__S_FLBIT_I32_I64(InFmt_SOP1
*iFmt
)
2431 : Inst_SOP1(iFmt
, "s_flbit_i32_i64")
2434 } // Inst_SOP1__S_FLBIT_I32_I64
2436 Inst_SOP1__S_FLBIT_I32_I64::~Inst_SOP1__S_FLBIT_I32_I64()
2438 } // ~Inst_SOP1__S_FLBIT_I32_I64
2440 // D.i = FirstOppositeSignBit(S0.i64);
2441 // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1.
2442 // Counts how many bits in a row (from MSB to LSB) are the same as the
2445 Inst_SOP1__S_FLBIT_I32_I64::execute(GPUDynInstPtr gpuDynInst
)
2447 ConstScalarOperandI64
src(gpuDynInst
, instData
.SSRC0
);
2448 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2452 sdst
= firstOppositeSignBit(src
.rawData());
2457 Inst_SOP1__S_SEXT_I32_I8::Inst_SOP1__S_SEXT_I32_I8(InFmt_SOP1
*iFmt
)
2458 : Inst_SOP1(iFmt
, "s_sext_i32_i8")
2461 } // Inst_SOP1__S_SEXT_I32_I8
2463 Inst_SOP1__S_SEXT_I32_I8::~Inst_SOP1__S_SEXT_I32_I8()
2465 } // ~Inst_SOP1__S_SEXT_I32_I8
2467 // D.i = signext(S0.i[7:0]) (sign extension).
2469 Inst_SOP1__S_SEXT_I32_I8::execute(GPUDynInstPtr gpuDynInst
)
2471 ConstScalarOperandI32
src(gpuDynInst
, instData
.SSRC0
);
2472 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2476 sdst
= sext
<std::numeric_limits
<ScalarRegI8
>::digits
>(
2477 bits(src
.rawData(), 7, 0));
2482 Inst_SOP1__S_SEXT_I32_I16::Inst_SOP1__S_SEXT_I32_I16(InFmt_SOP1
*iFmt
)
2483 : Inst_SOP1(iFmt
, "s_sext_i32_i16")
2486 } // Inst_SOP1__S_SEXT_I32_I16
2488 Inst_SOP1__S_SEXT_I32_I16::~Inst_SOP1__S_SEXT_I32_I16()
2490 } // ~Inst_SOP1__S_SEXT_I32_I16
2492 // D.i = signext(S0.i[15:0]) (sign extension).
2494 Inst_SOP1__S_SEXT_I32_I16::execute(GPUDynInstPtr gpuDynInst
)
2496 ConstScalarOperandI32
src(gpuDynInst
, instData
.SSRC0
);
2497 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2501 sdst
= sext
<std::numeric_limits
<ScalarRegI16
>::digits
>(
2502 bits(src
.rawData(), 15, 0));
2507 Inst_SOP1__S_BITSET0_B32::Inst_SOP1__S_BITSET0_B32(InFmt_SOP1
*iFmt
)
2508 : Inst_SOP1(iFmt
, "s_bitset0_b32")
2511 } // Inst_SOP1__S_BITSET0_B32
2513 Inst_SOP1__S_BITSET0_B32::~Inst_SOP1__S_BITSET0_B32()
2515 } // ~Inst_SOP1__S_BITSET0_B32
2517 // D.u[S0.u[4:0]] = 0.
2519 Inst_SOP1__S_BITSET0_B32::execute(GPUDynInstPtr gpuDynInst
)
2521 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2522 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
2526 sdst
.setBit(bits(src
.rawData(), 4, 0), 0);
2531 Inst_SOP1__S_BITSET0_B64::Inst_SOP1__S_BITSET0_B64(InFmt_SOP1
*iFmt
)
2532 : Inst_SOP1(iFmt
, "s_bitset0_b64")
2535 } // Inst_SOP1__S_BITSET0_B64
2537 Inst_SOP1__S_BITSET0_B64::~Inst_SOP1__S_BITSET0_B64()
2539 } // ~Inst_SOP1__S_BITSET0_B64
2541 // D.u64[S0.u[5:0]] = 0.
2543 Inst_SOP1__S_BITSET0_B64::execute(GPUDynInstPtr gpuDynInst
)
2545 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2546 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2550 sdst
.setBit(bits(src
.rawData(), 5, 0), 0);
2555 Inst_SOP1__S_BITSET1_B32::Inst_SOP1__S_BITSET1_B32(InFmt_SOP1
*iFmt
)
2556 : Inst_SOP1(iFmt
, "s_bitset1_b32")
2559 } // Inst_SOP1__S_BITSET1_B32
2561 Inst_SOP1__S_BITSET1_B32::~Inst_SOP1__S_BITSET1_B32()
2563 } // ~Inst_SOP1__S_BITSET1_B32
2565 // D.u[S0.u[4:0]] = 1.
2567 Inst_SOP1__S_BITSET1_B32::execute(GPUDynInstPtr gpuDynInst
)
2569 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2570 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
2574 sdst
.setBit(bits(src
.rawData(), 4, 0), 1);
2579 Inst_SOP1__S_BITSET1_B64::Inst_SOP1__S_BITSET1_B64(InFmt_SOP1
*iFmt
)
2580 : Inst_SOP1(iFmt
, "s_bitset1_b64")
2583 } // Inst_SOP1__S_BITSET1_B64
2585 Inst_SOP1__S_BITSET1_B64::~Inst_SOP1__S_BITSET1_B64()
2587 } // ~Inst_SOP1__S_BITSET1_B64
2589 // D.u64[S0.u[5:0]] = 1.
2591 Inst_SOP1__S_BITSET1_B64::execute(GPUDynInstPtr gpuDynInst
)
2593 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2594 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2598 sdst
.setBit(bits(src
.rawData(), 5, 0), 1);
2603 Inst_SOP1__S_GETPC_B64::Inst_SOP1__S_GETPC_B64(InFmt_SOP1
*iFmt
)
2604 : Inst_SOP1(iFmt
, "s_getpc_b64")
2607 } // Inst_SOP1__S_GETPC_B64
2609 Inst_SOP1__S_GETPC_B64::~Inst_SOP1__S_GETPC_B64()
2611 } // ~Inst_SOP1__S_GETPC_B64
2614 // Destination receives the byte address of the next instruction.
2616 Inst_SOP1__S_GETPC_B64::execute(GPUDynInstPtr gpuDynInst
)
2618 Wavefront
*wf
= gpuDynInst
->wavefront();
2620 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2627 Inst_SOP1__S_SETPC_B64::Inst_SOP1__S_SETPC_B64(InFmt_SOP1
*iFmt
)
2628 : Inst_SOP1(iFmt
, "s_setpc_b64")
2631 } // Inst_SOP1__S_SETPC_B64
2633 Inst_SOP1__S_SETPC_B64::~Inst_SOP1__S_SETPC_B64()
2635 } // ~Inst_SOP1__S_SETPC_B64
2638 // S0.u64 is a byte address of the instruction to jump to.
2640 Inst_SOP1__S_SETPC_B64::execute(GPUDynInstPtr gpuDynInst
)
2642 Wavefront
*wf
= gpuDynInst
->wavefront();
2643 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2647 wf
->pc(src
.rawData());
2650 Inst_SOP1__S_SWAPPC_B64::Inst_SOP1__S_SWAPPC_B64(InFmt_SOP1
*iFmt
)
2651 : Inst_SOP1(iFmt
, "s_swappc_b64")
2654 } // Inst_SOP1__S_SWAPPC_B64
2656 Inst_SOP1__S_SWAPPC_B64::~Inst_SOP1__S_SWAPPC_B64()
2658 } // ~Inst_SOP1__S_SWAPPC_B64
2660 // D.u64 = PC + 4; PC = S0.u64.
2661 // S0.u64 is a byte address of the instruction to jump to.
2663 Inst_SOP1__S_SWAPPC_B64::execute(GPUDynInstPtr gpuDynInst
)
2665 Wavefront
*wf
= gpuDynInst
->wavefront();
2667 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2668 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2674 wf
->pc(src
.rawData());
2678 Inst_SOP1__S_RFE_B64::Inst_SOP1__S_RFE_B64(InFmt_SOP1
*iFmt
)
2679 : Inst_SOP1(iFmt
, "s_rfe_b64")
2681 } // Inst_SOP1__S_RFE_B64
2683 Inst_SOP1__S_RFE_B64::~Inst_SOP1__S_RFE_B64()
2685 } // ~Inst_SOP1__S_RFE_B64
2687 // Return from exception handler and continue.
2689 Inst_SOP1__S_RFE_B64::execute(GPUDynInstPtr gpuDynInst
)
2691 panicUnimplemented();
2694 Inst_SOP1__S_AND_SAVEEXEC_B64::Inst_SOP1__S_AND_SAVEEXEC_B64(
2696 : Inst_SOP1(iFmt
, "s_and_saveexec_b64")
2699 } // Inst_SOP1__S_AND_SAVEEXEC_B64
2701 Inst_SOP1__S_AND_SAVEEXEC_B64::~Inst_SOP1__S_AND_SAVEEXEC_B64()
2703 } // ~Inst_SOP1__S_AND_SAVEEXEC_B64
2706 // EXEC = S0.u64 & EXEC;
2707 // SCC = 1 if the new value of EXEC is non-zero.
2709 Inst_SOP1__S_AND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst
)
2711 Wavefront
*wf
= gpuDynInst
->wavefront();
2712 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2713 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2714 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2718 sdst
= wf
->execMask().to_ullong();
2719 wf
->execMask() = src
.rawData() & wf
->execMask().to_ullong();
2720 scc
= wf
->execMask().any() ? 1 : 0;
2726 Inst_SOP1__S_OR_SAVEEXEC_B64::Inst_SOP1__S_OR_SAVEEXEC_B64(
2728 : Inst_SOP1(iFmt
, "s_or_saveexec_b64")
2731 } // Inst_SOP1__S_OR_SAVEEXEC_B64
2733 Inst_SOP1__S_OR_SAVEEXEC_B64::~Inst_SOP1__S_OR_SAVEEXEC_B64()
2735 } // ~Inst_SOP1__S_OR_SAVEEXEC_B64
2738 // EXEC = S0.u64 | EXEC;
2739 // SCC = 1 if the new value of EXEC is non-zero.
2741 Inst_SOP1__S_OR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst
)
2743 Wavefront
*wf
= gpuDynInst
->wavefront();
2744 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2745 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2746 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2750 sdst
= wf
->execMask().to_ullong();
2751 wf
->execMask() = src
.rawData() | wf
->execMask().to_ullong();
2752 scc
= wf
->execMask().any() ? 1 : 0;
2758 Inst_SOP1__S_XOR_SAVEEXEC_B64::Inst_SOP1__S_XOR_SAVEEXEC_B64(
2760 : Inst_SOP1(iFmt
, "s_xor_saveexec_b64")
2763 } // Inst_SOP1__S_XOR_SAVEEXEC_B64
2765 Inst_SOP1__S_XOR_SAVEEXEC_B64::~Inst_SOP1__S_XOR_SAVEEXEC_B64()
2767 } // ~Inst_SOP1__S_XOR_SAVEEXEC_B64
2770 // EXEC = S0.u64 ^ EXEC;
2771 // SCC = 1 if the new value of EXEC is non-zero.
2773 Inst_SOP1__S_XOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst
)
2775 Wavefront
*wf
= gpuDynInst
->wavefront();
2776 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2777 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2778 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2782 sdst
= wf
->execMask().to_ullong();
2783 wf
->execMask() = src
.rawData() ^ wf
->execMask().to_ullong();
2784 scc
= wf
->execMask().any() ? 1 : 0;
2790 Inst_SOP1__S_ANDN2_SAVEEXEC_B64::Inst_SOP1__S_ANDN2_SAVEEXEC_B64(
2792 : Inst_SOP1(iFmt
, "s_andn2_saveexec_b64")
2795 } // Inst_SOP1__S_ANDN2_SAVEEXEC_B64
2797 Inst_SOP1__S_ANDN2_SAVEEXEC_B64::~Inst_SOP1__S_ANDN2_SAVEEXEC_B64()
2799 } // ~Inst_SOP1__S_ANDN2_SAVEEXEC_B64
2802 // EXEC = S0.u64 & ~EXEC;
2803 // SCC = 1 if the new value of EXEC is non-zero.
2805 Inst_SOP1__S_ANDN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst
)
2807 Wavefront
*wf
= gpuDynInst
->wavefront();
2808 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2809 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2810 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2814 sdst
= wf
->execMask().to_ullong();
2815 wf
->execMask() = src
.rawData() &~ wf
->execMask().to_ullong();
2816 scc
= wf
->execMask().any() ? 1 : 0;
2822 Inst_SOP1__S_ORN2_SAVEEXEC_B64::Inst_SOP1__S_ORN2_SAVEEXEC_B64(
2824 : Inst_SOP1(iFmt
, "s_orn2_saveexec_b64")
2827 } // Inst_SOP1__S_ORN2_SAVEEXEC_B64
2829 Inst_SOP1__S_ORN2_SAVEEXEC_B64::~Inst_SOP1__S_ORN2_SAVEEXEC_B64()
2831 } // ~Inst_SOP1__S_ORN2_SAVEEXEC_B64
2834 // EXEC = S0.u64 | ~EXEC;
2835 // SCC = 1 if the new value of EXEC is non-zero.
2837 Inst_SOP1__S_ORN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst
)
2839 Wavefront
*wf
= gpuDynInst
->wavefront();
2840 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2841 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2842 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2846 sdst
= wf
->execMask().to_ullong();
2847 wf
->execMask() = src
.rawData() |~ wf
->execMask().to_ullong();
2848 scc
= wf
->execMask().any() ? 1 : 0;
2854 Inst_SOP1__S_NAND_SAVEEXEC_B64::Inst_SOP1__S_NAND_SAVEEXEC_B64(
2856 : Inst_SOP1(iFmt
, "s_nand_saveexec_b64")
2859 } // Inst_SOP1__S_NAND_SAVEEXEC_B64
2861 Inst_SOP1__S_NAND_SAVEEXEC_B64::~Inst_SOP1__S_NAND_SAVEEXEC_B64()
2863 } // ~Inst_SOP1__S_NAND_SAVEEXEC_B64
2866 // EXEC = ~(S0.u64 & EXEC);
2867 // SCC = 1 if the new value of EXEC is non-zero.
2869 Inst_SOP1__S_NAND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst
)
2871 Wavefront
*wf
= gpuDynInst
->wavefront();
2872 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2873 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2874 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2878 sdst
= wf
->execMask().to_ullong();
2879 wf
->execMask() = ~(src
.rawData() & wf
->execMask().to_ullong());
2880 scc
= wf
->execMask().any() ? 1 : 0;
2886 Inst_SOP1__S_NOR_SAVEEXEC_B64::Inst_SOP1__S_NOR_SAVEEXEC_B64(
2888 : Inst_SOP1(iFmt
, "s_nor_saveexec_b64")
2891 } // Inst_SOP1__S_NOR_SAVEEXEC_B64
2893 Inst_SOP1__S_NOR_SAVEEXEC_B64::~Inst_SOP1__S_NOR_SAVEEXEC_B64()
2895 } // ~Inst_SOP1__S_NOR_SAVEEXEC_B64
2898 // EXEC = ~(S0.u64 | EXEC);
2899 // SCC = 1 if the new value of EXEC is non-zero.
2901 Inst_SOP1__S_NOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst
)
2903 Wavefront
*wf
= gpuDynInst
->wavefront();
2904 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2905 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2906 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2910 sdst
= wf
->execMask().to_ullong();
2911 wf
->execMask() = ~(src
.rawData() | wf
->execMask().to_ullong());
2912 scc
= wf
->execMask().any() ? 1 : 0;
2918 Inst_SOP1__S_XNOR_SAVEEXEC_B64::Inst_SOP1__S_XNOR_SAVEEXEC_B64(
2920 : Inst_SOP1(iFmt
, "s_xnor_saveexec_b64")
2923 } // Inst_SOP1__S_XNOR_SAVEEXEC_B64
2925 Inst_SOP1__S_XNOR_SAVEEXEC_B64::~Inst_SOP1__S_XNOR_SAVEEXEC_B64()
2927 } // ~Inst_SOP1__S_XNOR_SAVEEXEC_B64
2930 // EXEC = ~(S0.u64 ^ EXEC);
2931 // SCC = 1 if the new value of EXEC is non-zero.
2933 Inst_SOP1__S_XNOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst
)
2935 Wavefront
*wf
= gpuDynInst
->wavefront();
2936 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2937 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2938 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2942 sdst
= wf
->execMask().to_ullong();
2943 wf
->execMask() = ~(src
.rawData() ^ wf
->execMask().to_ullong());
2944 scc
= wf
->execMask().any() ? 1 : 0;
2950 Inst_SOP1__S_QUADMASK_B32::Inst_SOP1__S_QUADMASK_B32(InFmt_SOP1
*iFmt
)
2951 : Inst_SOP1(iFmt
, "s_quadmask_b32")
2954 } // Inst_SOP1__S_QUADMASK_B32
2956 Inst_SOP1__S_QUADMASK_B32::~Inst_SOP1__S_QUADMASK_B32()
2958 } // ~Inst_SOP1__S_QUADMASK_B32
2960 // D.u = QuadMask(S0.u):
2961 // D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[31:8] = 0;
2962 // SCC = 1 if result is non-zero.
2964 Inst_SOP1__S_QUADMASK_B32::execute(GPUDynInstPtr gpuDynInst
)
2966 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2967 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
2968 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2972 sdst
= quadMask(src
.rawData());
2973 scc
= sdst
.rawData() ? 1 : 0;
2979 Inst_SOP1__S_QUADMASK_B64::Inst_SOP1__S_QUADMASK_B64(InFmt_SOP1
*iFmt
)
2980 : Inst_SOP1(iFmt
, "s_quadmask_b64")
2983 } // Inst_SOP1__S_QUADMASK_B64
2985 Inst_SOP1__S_QUADMASK_B64::~Inst_SOP1__S_QUADMASK_B64()
2987 } // ~Inst_SOP1__S_QUADMASK_B64
2989 // D.u64 = QuadMask(S0.u64):
2990 // D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[63:16] = 0;
2991 // SCC = 1 if result is non-zero.
2993 Inst_SOP1__S_QUADMASK_B64::execute(GPUDynInstPtr gpuDynInst
)
2995 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2996 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2997 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3001 sdst
= quadMask(src
.rawData());
3002 scc
= sdst
.rawData() ? 1 : 0;
3008 Inst_SOP1__S_MOVRELS_B32::Inst_SOP1__S_MOVRELS_B32(InFmt_SOP1
*iFmt
)
3009 : Inst_SOP1(iFmt
, "s_movrels_b32")
3012 } // Inst_SOP1__S_MOVRELS_B32
3014 Inst_SOP1__S_MOVRELS_B32::~Inst_SOP1__S_MOVRELS_B32()
3016 } // ~Inst_SOP1__S_MOVRELS_B32
3018 // D.u = SGPR[S0.u + M0.u].u (move from relative source).
3020 Inst_SOP1__S_MOVRELS_B32::execute(GPUDynInstPtr gpuDynInst
)
3022 ConstScalarOperandU32
m0(gpuDynInst
, REG_M0
);
3024 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
+ m0
.rawData());
3025 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
3029 sdst
= src
.rawData();
3034 Inst_SOP1__S_MOVRELS_B64::Inst_SOP1__S_MOVRELS_B64(InFmt_SOP1
*iFmt
)
3035 : Inst_SOP1(iFmt
, "s_movrels_b64")
3038 } // Inst_SOP1__S_MOVRELS_B64
3040 Inst_SOP1__S_MOVRELS_B64::~Inst_SOP1__S_MOVRELS_B64()
3042 } // ~Inst_SOP1__S_MOVRELS_B64
3044 // D.u64 = SGPR[S0.u + M0.u].u64 (move from relative source).
3045 // The index in M0.u must be even for this operation.
3047 Inst_SOP1__S_MOVRELS_B64::execute(GPUDynInstPtr gpuDynInst
)
3049 ConstScalarOperandU32
m0(gpuDynInst
, REG_M0
);
3051 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
+ m0
.rawData());
3052 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
3056 sdst
= src
.rawData();
3061 Inst_SOP1__S_MOVRELD_B32::Inst_SOP1__S_MOVRELD_B32(InFmt_SOP1
*iFmt
)
3062 : Inst_SOP1(iFmt
, "s_movreld_b32")
3065 } // Inst_SOP1__S_MOVRELD_B32
3067 Inst_SOP1__S_MOVRELD_B32::~Inst_SOP1__S_MOVRELD_B32()
3069 } // ~Inst_SOP1__S_MOVRELD_B32
3071 // SGPR[D.u + M0.u].u = S0.u (move to relative destination).
3073 Inst_SOP1__S_MOVRELD_B32::execute(GPUDynInstPtr gpuDynInst
)
3075 ConstScalarOperandU32
m0(gpuDynInst
, REG_M0
);
3077 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
3078 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
+ m0
.rawData());
3082 sdst
= src
.rawData();
3087 Inst_SOP1__S_MOVRELD_B64::Inst_SOP1__S_MOVRELD_B64(InFmt_SOP1
*iFmt
)
3088 : Inst_SOP1(iFmt
, "s_movreld_b64")
3091 } // Inst_SOP1__S_MOVRELD_B64
3093 Inst_SOP1__S_MOVRELD_B64::~Inst_SOP1__S_MOVRELD_B64()
3095 } // ~Inst_SOP1__S_MOVRELD_B64
3097 // SGPR[D.u + M0.u].u64 = S0.u64 (move to relative destination).
3098 // The index in M0.u must be even for this operation.
3100 Inst_SOP1__S_MOVRELD_B64::execute(GPUDynInstPtr gpuDynInst
)
3102 ConstScalarOperandU32
m0(gpuDynInst
, REG_M0
);
3104 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
3105 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
+ m0
.rawData());
3109 sdst
= src
.rawData();
3114 Inst_SOP1__S_CBRANCH_JOIN::Inst_SOP1__S_CBRANCH_JOIN(InFmt_SOP1
*iFmt
)
3115 : Inst_SOP1(iFmt
, "s_cbranch_join")
3118 } // Inst_SOP1__S_CBRANCH_JOIN
3120 Inst_SOP1__S_CBRANCH_JOIN::~Inst_SOP1__S_CBRANCH_JOIN()
3122 } // ~Inst_SOP1__S_CBRANCH_JOIN
3124 // Conditional branch join point (end of conditional branch block).
3126 Inst_SOP1__S_CBRANCH_JOIN::execute(GPUDynInstPtr gpuDynInst
)
3128 panicUnimplemented();
3131 Inst_SOP1__S_ABS_I32::Inst_SOP1__S_ABS_I32(InFmt_SOP1
*iFmt
)
3132 : Inst_SOP1(iFmt
, "s_abs_i32")
3135 } // Inst_SOP1__S_ABS_I32
3137 Inst_SOP1__S_ABS_I32::~Inst_SOP1__S_ABS_I32()
3139 } // ~Inst_SOP1__S_ABS_I32
3141 // if (S.i < 0) then D.i = -S.i;
3143 // SCC = 1 if result is non-zero.
3144 // Integer absolute value.
3146 Inst_SOP1__S_ABS_I32::execute(GPUDynInstPtr gpuDynInst
)
3148 ConstScalarOperandI32
src(gpuDynInst
, instData
.SSRC0
);
3149 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
3150 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3154 sdst
= std::abs(src
.rawData());
3156 scc
= sdst
.rawData() ? 1 : 0;
3162 Inst_SOP1__S_MOV_FED_B32::Inst_SOP1__S_MOV_FED_B32(InFmt_SOP1
*iFmt
)
3163 : Inst_SOP1(iFmt
, "s_mov_fed_b32")
3166 } // Inst_SOP1__S_MOV_FED_B32
3168 Inst_SOP1__S_MOV_FED_B32::~Inst_SOP1__S_MOV_FED_B32()
3170 } // ~Inst_SOP1__S_MOV_FED_B32
3174 Inst_SOP1__S_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst
)
3176 panicUnimplemented();
3179 Inst_SOP1__S_SET_GPR_IDX_IDX::Inst_SOP1__S_SET_GPR_IDX_IDX(
3181 : Inst_SOP1(iFmt
, "s_set_gpr_idx_idx")
3183 } // Inst_SOP1__S_SET_GPR_IDX_IDX
3185 Inst_SOP1__S_SET_GPR_IDX_IDX::~Inst_SOP1__S_SET_GPR_IDX_IDX()
3187 } // ~Inst_SOP1__S_SET_GPR_IDX_IDX
3189 // M0[7:0] = S0.u[7:0].
3190 // Modify the index used in vector GPR indexing.
3192 Inst_SOP1__S_SET_GPR_IDX_IDX::execute(GPUDynInstPtr gpuDynInst
)
3194 panicUnimplemented();
3197 Inst_SOPC__S_CMP_EQ_I32::Inst_SOPC__S_CMP_EQ_I32(InFmt_SOPC
*iFmt
)
3198 : Inst_SOPC(iFmt
, "s_cmp_eq_i32")
3201 } // Inst_SOPC__S_CMP_EQ_I32
3203 Inst_SOPC__S_CMP_EQ_I32::~Inst_SOPC__S_CMP_EQ_I32()
3205 } // ~Inst_SOPC__S_CMP_EQ_I32
3207 // SCC = (S0.i == S1.i).
3209 Inst_SOPC__S_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst
)
3211 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
3212 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
3213 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3218 scc
= (src0
.rawData() == src1
.rawData()) ? 1 : 0;
3223 Inst_SOPC__S_CMP_LG_I32::Inst_SOPC__S_CMP_LG_I32(InFmt_SOPC
*iFmt
)
3224 : Inst_SOPC(iFmt
, "s_cmp_lg_i32")
3227 } // Inst_SOPC__S_CMP_LG_I32
3229 Inst_SOPC__S_CMP_LG_I32::~Inst_SOPC__S_CMP_LG_I32()
3231 } // ~Inst_SOPC__S_CMP_LG_I32
3233 // SCC = (S0.i != S1.i).
3235 Inst_SOPC__S_CMP_LG_I32::execute(GPUDynInstPtr gpuDynInst
)
3237 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
3238 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
3239 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3244 scc
= (src0
.rawData() != src1
.rawData()) ? 1 : 0;
3249 Inst_SOPC__S_CMP_GT_I32::Inst_SOPC__S_CMP_GT_I32(InFmt_SOPC
*iFmt
)
3250 : Inst_SOPC(iFmt
, "s_cmp_gt_i32")
3253 } // Inst_SOPC__S_CMP_GT_I32
3255 Inst_SOPC__S_CMP_GT_I32::~Inst_SOPC__S_CMP_GT_I32()
3257 } // ~Inst_SOPC__S_CMP_GT_I32
3259 // SCC = (S0.i > S1.i).
3261 Inst_SOPC__S_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst
)
3263 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
3264 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
3265 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3270 scc
= (src0
.rawData() > src1
.rawData()) ? 1 : 0;
3275 Inst_SOPC__S_CMP_GE_I32::Inst_SOPC__S_CMP_GE_I32(InFmt_SOPC
*iFmt
)
3276 : Inst_SOPC(iFmt
, "s_cmp_ge_i32")
3279 } // Inst_SOPC__S_CMP_GE_I32
3281 Inst_SOPC__S_CMP_GE_I32::~Inst_SOPC__S_CMP_GE_I32()
3283 } // ~Inst_SOPC__S_CMP_GE_I32
3285 // SCC = (S0.i >= S1.i).
3287 Inst_SOPC__S_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst
)
3289 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
3290 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
3291 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3296 scc
= (src0
.rawData() >= src1
.rawData()) ? 1 : 0;
3301 Inst_SOPC__S_CMP_LT_I32::Inst_SOPC__S_CMP_LT_I32(InFmt_SOPC
*iFmt
)
3302 : Inst_SOPC(iFmt
, "s_cmp_lt_i32")
3305 } // Inst_SOPC__S_CMP_LT_I32
3307 Inst_SOPC__S_CMP_LT_I32::~Inst_SOPC__S_CMP_LT_I32()
3309 } // ~Inst_SOPC__S_CMP_LT_I32
3311 // SCC = (S0.i < S1.i).
3313 Inst_SOPC__S_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst
)
3315 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
3316 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
3317 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3322 scc
= (src0
.rawData() < src1
.rawData()) ? 1 : 0;
3327 Inst_SOPC__S_CMP_LE_I32::Inst_SOPC__S_CMP_LE_I32(InFmt_SOPC
*iFmt
)
3328 : Inst_SOPC(iFmt
, "s_cmp_le_i32")
3331 } // Inst_SOPC__S_CMP_LE_I32
3333 Inst_SOPC__S_CMP_LE_I32::~Inst_SOPC__S_CMP_LE_I32()
3335 } // ~Inst_SOPC__S_CMP_LE_I32
3337 // SCC = (S0.i <= S1.i).
3339 Inst_SOPC__S_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst
)
3341 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
3342 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
3343 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3348 scc
= (src0
.rawData() <= src1
.rawData()) ? 1 : 0;
3353 Inst_SOPC__S_CMP_EQ_U32::Inst_SOPC__S_CMP_EQ_U32(InFmt_SOPC
*iFmt
)
3354 : Inst_SOPC(iFmt
, "s_cmp_eq_u32")
3357 } // Inst_SOPC__S_CMP_EQ_U32
3359 Inst_SOPC__S_CMP_EQ_U32::~Inst_SOPC__S_CMP_EQ_U32()
3361 } // ~Inst_SOPC__S_CMP_EQ_U32
3363 // SCC = (S0.u == S1.u).
3365 Inst_SOPC__S_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst
)
3367 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
3368 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
3369 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3374 scc
= (src0
.rawData() == src1
.rawData()) ? 1 : 0;
3379 Inst_SOPC__S_CMP_LG_U32::Inst_SOPC__S_CMP_LG_U32(InFmt_SOPC
*iFmt
)
3380 : Inst_SOPC(iFmt
, "s_cmp_lg_u32")
3383 } // Inst_SOPC__S_CMP_LG_U32
3385 Inst_SOPC__S_CMP_LG_U32::~Inst_SOPC__S_CMP_LG_U32()
3387 } // ~Inst_SOPC__S_CMP_LG_U32
3389 // SCC = (S0.u != S1.u).
3391 Inst_SOPC__S_CMP_LG_U32::execute(GPUDynInstPtr gpuDynInst
)
3393 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
3394 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
3395 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3400 scc
= (src0
.rawData() != src1
.rawData()) ? 1 : 0;
3405 Inst_SOPC__S_CMP_GT_U32::Inst_SOPC__S_CMP_GT_U32(InFmt_SOPC
*iFmt
)
3406 : Inst_SOPC(iFmt
, "s_cmp_gt_u32")
3409 } // Inst_SOPC__S_CMP_GT_U32
3411 Inst_SOPC__S_CMP_GT_U32::~Inst_SOPC__S_CMP_GT_U32()
3413 } // ~Inst_SOPC__S_CMP_GT_U32
3415 // SCC = (S0.u > S1.u).
3417 Inst_SOPC__S_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst
)
3419 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
3420 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
3421 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3426 scc
= (src0
.rawData() > src1
.rawData()) ? 1 : 0;
3431 Inst_SOPC__S_CMP_GE_U32::Inst_SOPC__S_CMP_GE_U32(InFmt_SOPC
*iFmt
)
3432 : Inst_SOPC(iFmt
, "s_cmp_ge_u32")
3435 } // Inst_SOPC__S_CMP_GE_U32
3437 Inst_SOPC__S_CMP_GE_U32::~Inst_SOPC__S_CMP_GE_U32()
3439 } // ~Inst_SOPC__S_CMP_GE_U32
3441 // SCC = (S0.u >= S1.u).
3443 Inst_SOPC__S_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst
)
3445 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
3446 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
3447 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3452 scc
= (src0
.rawData() >= src1
.rawData()) ? 1 : 0;
3457 Inst_SOPC__S_CMP_LT_U32::Inst_SOPC__S_CMP_LT_U32(InFmt_SOPC
*iFmt
)
3458 : Inst_SOPC(iFmt
, "s_cmp_lt_u32")
3461 } // Inst_SOPC__S_CMP_LT_U32
3463 Inst_SOPC__S_CMP_LT_U32::~Inst_SOPC__S_CMP_LT_U32()
3465 } // ~Inst_SOPC__S_CMP_LT_U32
3467 // SCC = (S0.u < S1.u).
3469 Inst_SOPC__S_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst
)
3471 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
3472 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
3473 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3478 scc
= (src0
.rawData() <= src1
.rawData()) ? 1 : 0;
3483 Inst_SOPC__S_CMP_LE_U32::Inst_SOPC__S_CMP_LE_U32(InFmt_SOPC
*iFmt
)
3484 : Inst_SOPC(iFmt
, "s_cmp_le_u32")
3487 } // Inst_SOPC__S_CMP_LE_U32
3489 Inst_SOPC__S_CMP_LE_U32::~Inst_SOPC__S_CMP_LE_U32()
3491 } // ~Inst_SOPC__S_CMP_LE_U32
3493 // SCC = (S0.u <= S1.u).
3495 Inst_SOPC__S_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst
)
3497 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
3498 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
3499 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3504 scc
= (src0
.rawData() <= src1
.rawData()) ? 1 : 0;
3509 Inst_SOPC__S_BITCMP0_B32::Inst_SOPC__S_BITCMP0_B32(InFmt_SOPC
*iFmt
)
3510 : Inst_SOPC(iFmt
, "s_bitcmp0_b32")
3513 } // Inst_SOPC__S_BITCMP0_B32
3515 Inst_SOPC__S_BITCMP0_B32::~Inst_SOPC__S_BITCMP0_B32()
3517 } // ~Inst_SOPC__S_BITCMP0_B32
3519 // SCC = (S0.u[S1.u[4:0]] == 0).
3521 Inst_SOPC__S_BITCMP0_B32::execute(GPUDynInstPtr gpuDynInst
)
3523 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
3524 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
3525 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3530 scc
= !bits(src0
.rawData(), bits(src1
.rawData(), 4, 0)) ? 1 : 0;
3535 Inst_SOPC__S_BITCMP1_B32::Inst_SOPC__S_BITCMP1_B32(InFmt_SOPC
*iFmt
)
3536 : Inst_SOPC(iFmt
, "s_bitcmp1_b32")
3539 } // Inst_SOPC__S_BITCMP1_B32
3541 Inst_SOPC__S_BITCMP1_B32::~Inst_SOPC__S_BITCMP1_B32()
3543 } // ~Inst_SOPC__S_BITCMP1_B32
3545 // SCC = (S0.u[S1.u[4:0]] == 1).
3547 Inst_SOPC__S_BITCMP1_B32::execute(GPUDynInstPtr gpuDynInst
)
3549 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
3550 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
3551 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3556 scc
= bits(src0
.rawData(), bits(src1
.rawData(), 4, 0)) ? 1 : 0;
3561 Inst_SOPC__S_BITCMP0_B64::Inst_SOPC__S_BITCMP0_B64(InFmt_SOPC
*iFmt
)
3562 : Inst_SOPC(iFmt
, "s_bitcmp0_b64")
3565 } // Inst_SOPC__S_BITCMP0_B64
3567 Inst_SOPC__S_BITCMP0_B64::~Inst_SOPC__S_BITCMP0_B64()
3569 } // ~Inst_SOPC__S_BITCMP0_B64
3571 // SCC = (S0.u64[S1.u[5:0]] == 0).
3573 Inst_SOPC__S_BITCMP0_B64::execute(GPUDynInstPtr gpuDynInst
)
3575 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
3576 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
3577 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3582 scc
= !bits(src0
.rawData(), bits(src1
.rawData(), 5, 0)) ? 1 : 0;
3587 Inst_SOPC__S_BITCMP1_B64::Inst_SOPC__S_BITCMP1_B64(InFmt_SOPC
*iFmt
)
3588 : Inst_SOPC(iFmt
, "s_bitcmp1_b64")
3591 } // Inst_SOPC__S_BITCMP1_B64
3593 Inst_SOPC__S_BITCMP1_B64::~Inst_SOPC__S_BITCMP1_B64()
3595 } // ~Inst_SOPC__S_BITCMP1_B64
3597 // SCC = (S0.u64[S1.u[5:0]] == 1).
3599 Inst_SOPC__S_BITCMP1_B64::execute(GPUDynInstPtr gpuDynInst
)
3601 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
3602 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
3603 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3608 scc
= bits(src0
.rawData(), bits(src1
.rawData(), 5, 0)) ? 1 : 0;
3613 Inst_SOPC__S_SETVSKIP::Inst_SOPC__S_SETVSKIP(InFmt_SOPC
*iFmt
)
3614 : Inst_SOPC(iFmt
, "s_setvskip")
3616 setFlag(UnconditionalJump
);
3617 } // Inst_SOPC__S_SETVSKIP
3619 Inst_SOPC__S_SETVSKIP::~Inst_SOPC__S_SETVSKIP()
3621 } // ~Inst_SOPC__S_SETVSKIP
3623 // VSKIP = S0.u[S1.u[4:0]].
3624 // Enables and disables VSKIP mode.
3625 // When VSKIP is enabled, no VOP*/M*BUF/MIMG/DS/FLAT/EXP instuctions are
3628 Inst_SOPC__S_SETVSKIP::execute(GPUDynInstPtr gpuDynInst
)
3630 panicUnimplemented();
3633 Inst_SOPC__S_SET_GPR_IDX_ON::Inst_SOPC__S_SET_GPR_IDX_ON(InFmt_SOPC
*iFmt
)
3634 : Inst_SOPC(iFmt
, "s_set_gpr_idx_on")
3636 } // Inst_SOPC__S_SET_GPR_IDX_ON
3638 Inst_SOPC__S_SET_GPR_IDX_ON::~Inst_SOPC__S_SET_GPR_IDX_ON()
3640 } // ~Inst_SOPC__S_SET_GPR_IDX_ON
3642 // MODE.gpr_idx_en = 1;
3643 // M0[7:0] = S0.u[7:0];
3644 // M0[15:12] = SIMM4 (direct contents of S1 field);
3645 // Remaining bits of M0 are unmodified.
3646 // Enable GPR indexing mode. Vector operations after this will perform
3647 // relative GPR addressing based on the contents of M0.
3648 // The raw contents of the S1 field are read and used to set the enable
3649 // bits. S1[0] = VSRC0_REL, S1[1] = VSRC1_REL, S1[2] = VSRC2_REL and
3650 // S1[3] = VDST_REL.
3652 Inst_SOPC__S_SET_GPR_IDX_ON::execute(GPUDynInstPtr gpuDynInst
)
3654 panicUnimplemented();
3657 Inst_SOPC__S_CMP_EQ_U64::Inst_SOPC__S_CMP_EQ_U64(InFmt_SOPC
*iFmt
)
3658 : Inst_SOPC(iFmt
, "s_cmp_eq_u64")
3661 } // Inst_SOPC__S_CMP_EQ_U64
3663 Inst_SOPC__S_CMP_EQ_U64::~Inst_SOPC__S_CMP_EQ_U64()
3665 } // ~Inst_SOPC__S_CMP_EQ_U64
3667 // SCC = (S0.i64 == S1.i64).
3669 Inst_SOPC__S_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst
)
3671 ConstScalarOperandI64
src0(gpuDynInst
, instData
.SSRC0
);
3672 ConstScalarOperandI64
src1(gpuDynInst
, instData
.SSRC1
);
3673 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3678 scc
= (src0
.rawData() == src1
.rawData()) ? 1 : 0;
3683 Inst_SOPC__S_CMP_LG_U64::Inst_SOPC__S_CMP_LG_U64(InFmt_SOPC
*iFmt
)
3684 : Inst_SOPC(iFmt
, "s_cmp_lg_u64")
3687 } // Inst_SOPC__S_CMP_LG_U64
3689 Inst_SOPC__S_CMP_LG_U64::~Inst_SOPC__S_CMP_LG_U64()
3691 } // ~Inst_SOPC__S_CMP_LG_U64
3693 // SCC = (S0.i64 != S1.i64).
3695 Inst_SOPC__S_CMP_LG_U64::execute(GPUDynInstPtr gpuDynInst
)
3697 ConstScalarOperandI64
src0(gpuDynInst
, instData
.SSRC0
);
3698 ConstScalarOperandI64
src1(gpuDynInst
, instData
.SSRC1
);
3699 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3704 scc
= (src0
.rawData() != src1
.rawData()) ? 1 : 0;
3709 Inst_SOPP__S_NOP::Inst_SOPP__S_NOP(InFmt_SOPP
*iFmt
)
3710 : Inst_SOPP(iFmt
, "s_nop")
3713 } // Inst_SOPP__S_NOP
3715 Inst_SOPP__S_NOP::~Inst_SOPP__S_NOP()
3717 } // ~Inst_SOPP__S_NOP
3721 Inst_SOPP__S_NOP::execute(GPUDynInstPtr gpuDynInst
)
3725 Inst_SOPP__S_ENDPGM::Inst_SOPP__S_ENDPGM(InFmt_SOPP
*iFmt
)
3726 : Inst_SOPP(iFmt
, "s_endpgm")
3728 setFlag(EndOfKernel
);
3729 } // Inst_SOPP__S_ENDPGM
3731 Inst_SOPP__S_ENDPGM::~Inst_SOPP__S_ENDPGM()
3733 } // ~Inst_SOPP__S_ENDPGM
3735 // End of program; terminate wavefront.
3737 Inst_SOPP__S_ENDPGM::execute(GPUDynInstPtr gpuDynInst
)
3739 Wavefront
*wf
= gpuDynInst
->wavefront();
3740 ComputeUnit
*cu
= gpuDynInst
->computeUnit();
3742 // delete extra instructions fetched for completed work-items
3743 wf
->instructionBuffer
.erase(wf
->instructionBuffer
.begin() + 1,
3744 wf
->instructionBuffer
.end());
3746 if (wf
->pendingFetch
) {
3747 wf
->dropFetch
= true;
3750 wf
->computeUnit
->fetchStage
.fetchUnit(wf
->simdId
)
3751 .flushBuf(wf
->wfSlotId
);
3752 wf
->setStatus(Wavefront::S_STOPPED
);
3754 int refCount
= wf
->computeUnit
->getLds()
3755 .decreaseRefCounter(wf
->dispatchId
, wf
->wgId
);
3758 * The parent WF of this instruction is exiting, therefore
3759 * it should not participate in this barrier any longer. This
3760 * prevents possible deadlock issues if WFs exit early.
3762 int bar_id
= WFBarrier::InvalidID
;
3763 if (wf
->hasBarrier()) {
3764 assert(wf
->getStatus() != Wavefront::S_BARRIER
);
3765 bar_id
= wf
->barrierId();
3766 assert(bar_id
!= WFBarrier::InvalidID
);
3767 wf
->releaseBarrier();
3768 cu
->decMaxBarrierCnt(bar_id
);
3769 DPRINTF(GPUSync
, "CU[%d] WF[%d][%d] Wave[%d] - Exiting the "
3770 "program and decrementing max barrier count for "
3771 "barrier Id%d. New max count: %d.\n", cu
->cu_id
,
3772 wf
->simdId
, wf
->wfSlotId
, wf
->wfDynId
, bar_id
,
3773 cu
->maxBarrierCnt(bar_id
));
3776 DPRINTF(GPUExec
, "CU%d: decrease ref ctr WG[%d] to [%d]\n",
3777 wf
->computeUnit
->cu_id
, wf
->wgId
, refCount
);
3779 wf
->computeUnit
->registerManager
->freeRegisters(wf
);
3780 wf
->computeUnit
->completedWfs
++;
3781 wf
->computeUnit
->activeWaves
--;
3783 panic_if(wf
->computeUnit
->activeWaves
< 0, "CU[%d] Active waves less "
3784 "than zero\n", wf
->computeUnit
->cu_id
);
3786 DPRINTF(GPUExec
, "Doing return for CU%d: WF[%d][%d][%d]\n",
3787 wf
->computeUnit
->cu_id
, wf
->simdId
, wf
->wfSlotId
, wf
->wfDynId
);
3789 for (int i
= 0; i
< wf
->vecReads
.size(); i
++) {
3790 if (wf
->rawDist
.find(i
) != wf
->rawDist
.end()) {
3791 wf
->readsPerWrite
.sample(wf
->vecReads
.at(i
));
3794 wf
->vecReads
.clear();
3795 wf
->rawDist
.clear();
3796 wf
->lastInstExec
= 0;
3800 * If all WFs have finished, and hence the WG has finished,
3801 * then we can free up the barrier belonging to the parent
3802 * WG, but only if we actually used a barrier (i.e., more
3803 * than one WF in the WG).
3805 if (bar_id
!= WFBarrier::InvalidID
) {
3806 DPRINTF(GPUSync
, "CU[%d] WF[%d][%d] Wave[%d] - All waves are "
3807 "now complete. Releasing barrier Id%d.\n", cu
->cu_id
,
3808 wf
->simdId
, wf
->wfSlotId
, wf
->wfDynId
,
3810 cu
->releaseBarrier(bar_id
);
3814 * Last wavefront of the workgroup has executed return. If the
3815 * workgroup is not the final one in the kernel, then simply
3816 * retire it; however, if it is the final one (i.e., indicating
3817 * the kernel end) then release operation is needed.
3820 // check whether the workgroup is indicating the kernel end (i.e.,
3821 // the last workgroup in the kernel).
3823 wf
->computeUnit
->shader
->dispatcher().isReachingKernelEnd(wf
);
3824 // further check whether 'release @ kernel end' is needed
3826 wf
->computeUnit
->shader
->impl_kern_end_rel
;
3828 // if not a kernel end or no release needed, retire the workgroup
3830 if (!kernelEnd
|| !relNeeded
) {
3831 wf
->computeUnit
->shader
->dispatcher().notifyWgCompl(wf
);
3832 wf
->setStatus(Wavefront::S_STOPPED
);
3833 wf
->computeUnit
->completedWGs
++;
3839 * If a kernel end and release needed, inject a memory sync and
3840 * retire the workgroup after receving all acks.
3843 setFlag(GlobalSegment
);
3844 // Notify Memory System of Kernel Completion
3845 wf
->setStatus(Wavefront::S_RETURNING
);
3846 gpuDynInst
->simdId
= wf
->simdId
;
3847 gpuDynInst
->wfSlotId
= wf
->wfSlotId
;
3848 gpuDynInst
->wfDynId
= wf
->wfDynId
;
3850 DPRINTF(GPUExec
, "inject global memory fence for CU%d: "
3851 "WF[%d][%d][%d]\n", wf
->computeUnit
->cu_id
,
3852 wf
->simdId
, wf
->wfSlotId
, wf
->wfDynId
);
3854 // call shader to prepare the flush operations
3855 wf
->computeUnit
->shader
->prepareFlush(gpuDynInst
);
3857 wf
->computeUnit
->completedWGs
++;
3859 wf
->computeUnit
->shader
->dispatcher().scheduleDispatch();
3864 Inst_SOPP__S_BRANCH::Inst_SOPP__S_BRANCH(InFmt_SOPP
*iFmt
)
3865 : Inst_SOPP(iFmt
, "s_branch")
3868 } // Inst_SOPP__S_BRANCH
3870 Inst_SOPP__S_BRANCH::~Inst_SOPP__S_BRANCH()
3872 } // ~Inst_SOPP__S_BRANCH
3874 // PC = PC + signext(SIMM16 * 4) + 4 (short jump).
3876 Inst_SOPP__S_BRANCH::execute(GPUDynInstPtr gpuDynInst
)
3878 Wavefront
*wf
= gpuDynInst
->wavefront();
3880 ScalarRegI16 simm16
= instData
.SIMM16
;
3882 pc
= pc
+ ((ScalarRegI64
)simm16
* 4LL) + 4LL;
3887 Inst_SOPP__S_WAKEUP::Inst_SOPP__S_WAKEUP(InFmt_SOPP
*iFmt
)
3888 : Inst_SOPP(iFmt
, "s_wakeup")
3890 } // Inst_SOPP__S_WAKEUP
3892 Inst_SOPP__S_WAKEUP::~Inst_SOPP__S_WAKEUP()
3894 } // ~Inst_SOPP__S_WAKEUP
3896 // Allow a wave to wakeup all the other waves in its workgroup to force
3897 // them to wake up immediately from an S_SLEEP instruction. The wakeup is
3898 // ignored if the waves are not sleeping.
3900 Inst_SOPP__S_WAKEUP::execute(GPUDynInstPtr gpuDynInst
)
3902 panicUnimplemented();
3905 Inst_SOPP__S_CBRANCH_SCC0::Inst_SOPP__S_CBRANCH_SCC0(InFmt_SOPP
*iFmt
)
3906 : Inst_SOPP(iFmt
, "s_cbranch_scc0")
3909 } // Inst_SOPP__S_CBRANCH_SCC0
3911 Inst_SOPP__S_CBRANCH_SCC0::~Inst_SOPP__S_CBRANCH_SCC0()
3913 } // ~Inst_SOPP__S_CBRANCH_SCC0
3915 // if (SCC == 0) then PC = PC + signext(SIMM16 * 4) + 4;
3918 Inst_SOPP__S_CBRANCH_SCC0::execute(GPUDynInstPtr gpuDynInst
)
3920 Wavefront
*wf
= gpuDynInst
->wavefront();
3922 ScalarRegI16 simm16
= instData
.SIMM16
;
3923 ConstScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3927 if (!scc
.rawData()) {
3928 pc
= pc
+ ((ScalarRegI64
)simm16
* 4LL) + 4LL;
3934 Inst_SOPP__S_CBRANCH_SCC1::Inst_SOPP__S_CBRANCH_SCC1(InFmt_SOPP
*iFmt
)
3935 : Inst_SOPP(iFmt
, "s_cbranch_scc1")
3938 } // Inst_SOPP__S_CBRANCH_SCC1
3940 Inst_SOPP__S_CBRANCH_SCC1::~Inst_SOPP__S_CBRANCH_SCC1()
3942 } // ~Inst_SOPP__S_CBRANCH_SCC1
3944 // if (SCC == 1) then PC = PC + signext(SIMM16 * 4) + 4;
3947 Inst_SOPP__S_CBRANCH_SCC1::execute(GPUDynInstPtr gpuDynInst
)
3949 Wavefront
*wf
= gpuDynInst
->wavefront();
3951 ScalarRegI16 simm16
= instData
.SIMM16
;
3952 ConstScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3956 if (scc
.rawData()) {
3957 pc
= pc
+ ((ScalarRegI64
)simm16
* 4LL) + 4LL;
3963 Inst_SOPP__S_CBRANCH_VCCZ::Inst_SOPP__S_CBRANCH_VCCZ(InFmt_SOPP
*iFmt
)
3964 : Inst_SOPP(iFmt
, "s_cbranch_vccz")
3968 } // Inst_SOPP__S_CBRANCH_VCCZ
3970 Inst_SOPP__S_CBRANCH_VCCZ::~Inst_SOPP__S_CBRANCH_VCCZ()
3972 } // ~Inst_SOPP__S_CBRANCH_VCCZ
3974 // if (VCC == 0) then PC = PC + signext(SIMM16 * 4) + 4;
3977 Inst_SOPP__S_CBRANCH_VCCZ::execute(GPUDynInstPtr gpuDynInst
)
3979 Wavefront
*wf
= gpuDynInst
->wavefront();
3980 ConstScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
3982 ScalarRegI16 simm16
= instData
.SIMM16
;
3986 if (!vcc
.rawData()) {
3987 pc
= pc
+ ((ScalarRegI64
)simm16
* 4LL) + 4LL;
3993 Inst_SOPP__S_CBRANCH_VCCNZ::Inst_SOPP__S_CBRANCH_VCCNZ(InFmt_SOPP
*iFmt
)
3994 : Inst_SOPP(iFmt
, "s_cbranch_vccnz")
3998 } // Inst_SOPP__S_CBRANCH_VCCNZ
4000 Inst_SOPP__S_CBRANCH_VCCNZ::~Inst_SOPP__S_CBRANCH_VCCNZ()
4002 } // ~Inst_SOPP__S_CBRANCH_VCCNZ
4004 // if (VCC != 0) then PC = PC + signext(SIMM16 * 4) + 4;
4007 Inst_SOPP__S_CBRANCH_VCCNZ::execute(GPUDynInstPtr gpuDynInst
)
4009 Wavefront
*wf
= gpuDynInst
->wavefront();
4010 ConstScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
4014 if (vcc
.rawData()) {
4016 ScalarRegI16 simm16
= instData
.SIMM16
;
4017 pc
= pc
+ ((ScalarRegI64
)simm16
* 4LL) + 4LL;
4022 Inst_SOPP__S_CBRANCH_EXECZ::Inst_SOPP__S_CBRANCH_EXECZ(InFmt_SOPP
*iFmt
)
4023 : Inst_SOPP(iFmt
, "s_cbranch_execz")
4026 } // Inst_SOPP__S_CBRANCH_EXECZ
4028 Inst_SOPP__S_CBRANCH_EXECZ::~Inst_SOPP__S_CBRANCH_EXECZ()
4030 } // ~Inst_SOPP__S_CBRANCH_EXECZ
4032 // if (EXEC == 0) then PC = PC + signext(SIMM16 * 4) + 4;
4035 Inst_SOPP__S_CBRANCH_EXECZ::execute(GPUDynInstPtr gpuDynInst
)
4037 Wavefront
*wf
= gpuDynInst
->wavefront();
4039 if (wf
->execMask().none()) {
4041 ScalarRegI16 simm16
= instData
.SIMM16
;
4042 pc
= pc
+ ((ScalarRegI64
)simm16
* 4LL) + 4LL;
4047 Inst_SOPP__S_CBRANCH_EXECNZ::Inst_SOPP__S_CBRANCH_EXECNZ(InFmt_SOPP
*iFmt
)
4048 : Inst_SOPP(iFmt
, "s_cbranch_execnz")
4051 } // Inst_SOPP__S_CBRANCH_EXECNZ
4053 Inst_SOPP__S_CBRANCH_EXECNZ::~Inst_SOPP__S_CBRANCH_EXECNZ()
4055 } // ~Inst_SOPP__S_CBRANCH_EXECNZ
4057 // if (EXEC != 0) then PC = PC + signext(SIMM16 * 4) + 4;
4060 Inst_SOPP__S_CBRANCH_EXECNZ::execute(GPUDynInstPtr gpuDynInst
)
4062 Wavefront
*wf
= gpuDynInst
->wavefront();
4064 if (wf
->execMask().any()) {
4066 ScalarRegI16 simm16
= instData
.SIMM16
;
4067 pc
= pc
+ ((ScalarRegI64
)simm16
* 4LL) + 4LL;
4072 Inst_SOPP__S_BARRIER::Inst_SOPP__S_BARRIER(InFmt_SOPP
*iFmt
)
4073 : Inst_SOPP(iFmt
, "s_barrier")
4075 setFlag(MemBarrier
);
4076 } // Inst_SOPP__S_BARRIER
4078 Inst_SOPP__S_BARRIER::~Inst_SOPP__S_BARRIER()
4080 } // ~Inst_SOPP__S_BARRIER
4083 * Synchronize waves within a workgroup. If not all waves of the workgroup
4084 * have been created yet, wait for entire group before proceeding. If some
4085 * waves in the wokgroup have already terminated, this waits on only the
4089 Inst_SOPP__S_BARRIER::execute(GPUDynInstPtr gpuDynInst
)
4091 Wavefront
*wf
= gpuDynInst
->wavefront();
4092 ComputeUnit
*cu
= gpuDynInst
->computeUnit();
4094 if (wf
->hasBarrier()) {
4095 int bar_id
= wf
->barrierId();
4096 assert(wf
->getStatus() != Wavefront::S_BARRIER
);
4097 wf
->setStatus(Wavefront::S_BARRIER
);
4098 cu
->incNumAtBarrier(bar_id
);
4099 DPRINTF(GPUSync
, "CU[%d] WF[%d][%d] Wave[%d] - Stalling at "
4100 "barrier Id%d. %d waves now at barrier, %d waves "
4101 "remain.\n", cu
->cu_id
, wf
->simdId
, wf
->wfSlotId
,
4102 wf
->wfDynId
, bar_id
, cu
->numAtBarrier(bar_id
),
4103 cu
->numYetToReachBarrier(bar_id
));
4106 // --- Inst_SOPP__S_SETKILL class methods ---
4108 Inst_SOPP__S_SETKILL::Inst_SOPP__S_SETKILL(InFmt_SOPP
*iFmt
)
4109 : Inst_SOPP(iFmt
, "s_setkill")
4111 } // Inst_SOPP__S_SETKILL
4113 Inst_SOPP__S_SETKILL::~Inst_SOPP__S_SETKILL()
4115 } // ~Inst_SOPP__S_SETKILL
4118 Inst_SOPP__S_SETKILL::execute(GPUDynInstPtr gpuDynInst
)
4120 panicUnimplemented();
4123 Inst_SOPP__S_WAITCNT::Inst_SOPP__S_WAITCNT(InFmt_SOPP
*iFmt
)
4124 : Inst_SOPP(iFmt
, "s_waitcnt")
4128 } // Inst_SOPP__S_WAITCNT
4130 Inst_SOPP__S_WAITCNT::~Inst_SOPP__S_WAITCNT()
4132 } // ~Inst_SOPP__S_WAITCNT
4134 // Wait for the counts of outstanding lds, vector-memory and
4135 // export/vmem-write-data to be at or below the specified levels.
4136 // SIMM16[3:0] = vmcount (vector memory operations),
4137 // SIMM16[6:4] = export/mem-write-data count,
4138 // SIMM16[12:8] = LGKM_cnt (scalar-mem/GDS/LDS count).
4140 Inst_SOPP__S_WAITCNT::execute(GPUDynInstPtr gpuDynInst
)
4142 ScalarRegI32 vm_cnt
= 0;
4143 ScalarRegI32 exp_cnt
= 0;
4144 ScalarRegI32 lgkm_cnt
= 0;
4145 vm_cnt
= bits
<ScalarRegI16
>(instData
.SIMM16
, 3, 0);
4146 exp_cnt
= bits
<ScalarRegI16
>(instData
.SIMM16
, 6, 4);
4147 lgkm_cnt
= bits
<ScalarRegI16
>(instData
.SIMM16
, 12, 8);
4148 gpuDynInst
->wavefront()->setWaitCnts(vm_cnt
, exp_cnt
, lgkm_cnt
);
4151 Inst_SOPP__S_SETHALT::Inst_SOPP__S_SETHALT(InFmt_SOPP
*iFmt
)
4152 : Inst_SOPP(iFmt
, "s_sethalt")
4154 } // Inst_SOPP__S_SETHALT
4156 Inst_SOPP__S_SETHALT::~Inst_SOPP__S_SETHALT()
4158 } // ~Inst_SOPP__S_SETHALT
4161 Inst_SOPP__S_SETHALT::execute(GPUDynInstPtr gpuDynInst
)
4163 panicUnimplemented();
4166 Inst_SOPP__S_SLEEP::Inst_SOPP__S_SLEEP(InFmt_SOPP
*iFmt
)
4167 : Inst_SOPP(iFmt
, "s_sleep")
4169 } // Inst_SOPP__S_SLEEP
4171 Inst_SOPP__S_SLEEP::~Inst_SOPP__S_SLEEP()
4173 } // ~Inst_SOPP__S_SLEEP
4175 // Cause a wave to sleep for (64 * SIMM16[2:0] + 1..64) clocks.
4177 Inst_SOPP__S_SLEEP::execute(GPUDynInstPtr gpuDynInst
)
4179 panicUnimplemented();
4182 Inst_SOPP__S_SETPRIO::Inst_SOPP__S_SETPRIO(InFmt_SOPP
*iFmt
)
4183 : Inst_SOPP(iFmt
, "s_setprio")
4185 } // Inst_SOPP__S_SETPRIO
4187 Inst_SOPP__S_SETPRIO::~Inst_SOPP__S_SETPRIO()
4189 } // ~Inst_SOPP__S_SETPRIO
4191 // User settable wave priority is set to SIMM16[1:0]. 0 = lowest,
4194 Inst_SOPP__S_SETPRIO::execute(GPUDynInstPtr gpuDynInst
)
4196 panicUnimplemented();
4199 Inst_SOPP__S_SENDMSG::Inst_SOPP__S_SENDMSG(InFmt_SOPP
*iFmt
)
4200 : Inst_SOPP(iFmt
, "s_sendmsg")
4202 } // Inst_SOPP__S_SENDMSG
4204 Inst_SOPP__S_SENDMSG::~Inst_SOPP__S_SENDMSG()
4206 } // ~Inst_SOPP__S_SENDMSG
4209 Inst_SOPP__S_SENDMSG::execute(GPUDynInstPtr gpuDynInst
)
4211 panicUnimplemented();
4214 Inst_SOPP__S_SENDMSGHALT::Inst_SOPP__S_SENDMSGHALT(InFmt_SOPP
*iFmt
)
4215 : Inst_SOPP(iFmt
, "s_sendmsghalt")
4217 } // Inst_SOPP__S_SENDMSGHALT
4219 Inst_SOPP__S_SENDMSGHALT::~Inst_SOPP__S_SENDMSGHALT()
4221 } // ~Inst_SOPP__S_SENDMSGHALT
4224 Inst_SOPP__S_SENDMSGHALT::execute(GPUDynInstPtr gpuDynInst
)
4226 panicUnimplemented();
4229 Inst_SOPP__S_TRAP::Inst_SOPP__S_TRAP(InFmt_SOPP
*iFmt
)
4230 : Inst_SOPP(iFmt
, "s_trap")
4232 } // Inst_SOPP__S_TRAP
4234 Inst_SOPP__S_TRAP::~Inst_SOPP__S_TRAP()
4236 } // ~Inst_SOPP__S_TRAP
4238 // Enter the trap handler.
4240 Inst_SOPP__S_TRAP::execute(GPUDynInstPtr gpuDynInst
)
4242 panicUnimplemented();
4245 Inst_SOPP__S_ICACHE_INV::Inst_SOPP__S_ICACHE_INV(InFmt_SOPP
*iFmt
)
4246 : Inst_SOPP(iFmt
, "s_icache_inv")
4248 } // Inst_SOPP__S_ICACHE_INV
4250 Inst_SOPP__S_ICACHE_INV::~Inst_SOPP__S_ICACHE_INV()
4252 } // ~Inst_SOPP__S_ICACHE_INV
4254 // Invalidate entire L1 instruction cache.
4256 Inst_SOPP__S_ICACHE_INV::execute(GPUDynInstPtr gpuDynInst
)
4258 panicUnimplemented();
4261 Inst_SOPP__S_INCPERFLEVEL::Inst_SOPP__S_INCPERFLEVEL(InFmt_SOPP
*iFmt
)
4262 : Inst_SOPP(iFmt
, "s_incperflevel")
4264 } // Inst_SOPP__S_INCPERFLEVEL
4266 Inst_SOPP__S_INCPERFLEVEL::~Inst_SOPP__S_INCPERFLEVEL()
4268 } // ~Inst_SOPP__S_INCPERFLEVEL
4271 Inst_SOPP__S_INCPERFLEVEL::execute(GPUDynInstPtr gpuDynInst
)
4273 panicUnimplemented();
4276 Inst_SOPP__S_DECPERFLEVEL::Inst_SOPP__S_DECPERFLEVEL(InFmt_SOPP
*iFmt
)
4277 : Inst_SOPP(iFmt
, "s_decperflevel")
4279 } // Inst_SOPP__S_DECPERFLEVEL
4281 Inst_SOPP__S_DECPERFLEVEL::~Inst_SOPP__S_DECPERFLEVEL()
4283 } // ~Inst_SOPP__S_DECPERFLEVEL
4286 Inst_SOPP__S_DECPERFLEVEL::execute(GPUDynInstPtr gpuDynInst
)
4288 panicUnimplemented();
4291 Inst_SOPP__S_TTRACEDATA::Inst_SOPP__S_TTRACEDATA(InFmt_SOPP
*iFmt
)
4292 : Inst_SOPP(iFmt
, "s_ttracedata")
4294 } // Inst_SOPP__S_TTRACEDATA
4296 Inst_SOPP__S_TTRACEDATA::~Inst_SOPP__S_TTRACEDATA()
4298 } // ~Inst_SOPP__S_TTRACEDATA
4301 Inst_SOPP__S_TTRACEDATA::execute(GPUDynInstPtr gpuDynInst
)
4303 panicUnimplemented();
4306 Inst_SOPP__S_CBRANCH_CDBGSYS::Inst_SOPP__S_CBRANCH_CDBGSYS(
4308 : Inst_SOPP(iFmt
, "s_cbranch_cdbgsys")
4311 } // Inst_SOPP__S_CBRANCH_CDBGSYS
4313 Inst_SOPP__S_CBRANCH_CDBGSYS::~Inst_SOPP__S_CBRANCH_CDBGSYS()
4315 } // ~Inst_SOPP__S_CBRANCH_CDBGSYS
4318 Inst_SOPP__S_CBRANCH_CDBGSYS::execute(GPUDynInstPtr gpuDynInst
)
4320 panicUnimplemented();
4323 Inst_SOPP__S_CBRANCH_CDBGUSER::Inst_SOPP__S_CBRANCH_CDBGUSER(
4325 : Inst_SOPP(iFmt
, "s_cbranch_cdbguser")
4328 } // Inst_SOPP__S_CBRANCH_CDBGUSER
4330 Inst_SOPP__S_CBRANCH_CDBGUSER::~Inst_SOPP__S_CBRANCH_CDBGUSER()
4332 } // ~Inst_SOPP__S_CBRANCH_CDBGUSER
4335 Inst_SOPP__S_CBRANCH_CDBGUSER::execute(GPUDynInstPtr gpuDynInst
)
4337 panicUnimplemented();
4340 Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER(
4342 : Inst_SOPP(iFmt
, "s_cbranch_cdbgsys_or_user")
4345 } // Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER
4347 Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::
4348 ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER()
4350 } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER
4353 Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::execute(GPUDynInstPtr gpuDynInst
)
4355 panicUnimplemented();
4358 Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::
4359 Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER(InFmt_SOPP
*iFmt
)
4360 : Inst_SOPP(iFmt
, "s_cbranch_cdbgsys_and_user")
4363 } // Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER
4365 Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::
4366 ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER()
4368 } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER
4371 Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::execute(GPUDynInstPtr gpuDynInst
)
4373 panicUnimplemented();
4376 Inst_SOPP__S_ENDPGM_SAVED::Inst_SOPP__S_ENDPGM_SAVED(InFmt_SOPP
*iFmt
)
4377 : Inst_SOPP(iFmt
, "s_endpgm_saved")
4379 } // Inst_SOPP__S_ENDPGM_SAVED
4381 Inst_SOPP__S_ENDPGM_SAVED::~Inst_SOPP__S_ENDPGM_SAVED()
4383 } // ~Inst_SOPP__S_ENDPGM_SAVED
4387 Inst_SOPP__S_ENDPGM_SAVED::execute(GPUDynInstPtr gpuDynInst
)
4389 panicUnimplemented();
4392 Inst_SOPP__S_SET_GPR_IDX_OFF::Inst_SOPP__S_SET_GPR_IDX_OFF(
4394 : Inst_SOPP(iFmt
, "s_set_gpr_idx_off")
4396 } // Inst_SOPP__S_SET_GPR_IDX_OFF
4398 Inst_SOPP__S_SET_GPR_IDX_OFF::~Inst_SOPP__S_SET_GPR_IDX_OFF()
4400 } // ~Inst_SOPP__S_SET_GPR_IDX_OFF
4402 // MODE.gpr_idx_en = 0.
4403 // Clear GPR indexing mode. Vector operations after this will not perform
4404 // relative GPR addressing regardless of the contents of M0.
4406 Inst_SOPP__S_SET_GPR_IDX_OFF::execute(GPUDynInstPtr gpuDynInst
)
4408 panicUnimplemented();
4411 Inst_SOPP__S_SET_GPR_IDX_MODE::Inst_SOPP__S_SET_GPR_IDX_MODE(
4413 : Inst_SOPP(iFmt
, "s_set_gpr_idx_mode")
4415 } // Inst_SOPP__S_SET_GPR_IDX_MODE
4417 Inst_SOPP__S_SET_GPR_IDX_MODE::~Inst_SOPP__S_SET_GPR_IDX_MODE()
4419 } // ~Inst_SOPP__S_SET_GPR_IDX_MODE
4421 // M0[15:12] = SIMM4.
4422 // Modify the mode used for vector GPR indexing.
4423 // The raw contents of the source field are read and used to set the enable
4424 // bits. SIMM4[0] = VSRC0_REL, SIMM4[1] = VSRC1_REL, SIMM4[2] = VSRC2_REL
4425 // and SIMM4[3] = VDST_REL.
4427 Inst_SOPP__S_SET_GPR_IDX_MODE::execute(GPUDynInstPtr gpuDynInst
)
4429 panicUnimplemented();
4432 Inst_SMEM__S_LOAD_DWORD::Inst_SMEM__S_LOAD_DWORD(InFmt_SMEM
*iFmt
)
4433 : Inst_SMEM(iFmt
, "s_load_dword")
4437 } // Inst_SMEM__S_LOAD_DWORD
4439 Inst_SMEM__S_LOAD_DWORD::~Inst_SMEM__S_LOAD_DWORD()
4441 } // ~Inst_SMEM__S_LOAD_DWORD
4444 * Read 1 dword from scalar data cache. If the offset is specified as an
4445 * sgpr, the sgpr contains an unsigned byte offset (the 2 LSBs are
4446 * ignored). If the offset is specified as an immediate 20-bit constant,
4447 * the constant is an unsigned byte offset.
4450 Inst_SMEM__S_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst
)
4452 Wavefront
*wf
= gpuDynInst
->wavefront();
4453 gpuDynInst
->execUnitId
= wf
->execUnitId
;
4454 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
4455 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
4456 ScalarRegU32
offset(0);
4457 ConstScalarOperandU64
addr(gpuDynInst
, instData
.SBASE
<< 1);
4462 offset
= extData
.OFFSET
;
4464 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
4466 offset
= off_sgpr
.rawData();
4469 calcAddr(gpuDynInst
, addr
, offset
);
4471 gpuDynInst
->computeUnit()->scalarMemoryPipe
4472 .getGMReqFIFO().push(gpuDynInst
);
4474 wf
->scalarRdGmReqsInPipe
--;
4475 wf
->scalarOutstandingReqsRdGm
++;
4476 gpuDynInst
->wavefront()->outstandingReqs
++;
4477 gpuDynInst
->wavefront()->validateRequestCounters();
4481 Inst_SMEM__S_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst
)
4483 initMemRead
<1>(gpuDynInst
);
4487 Inst_SMEM__S_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst
)
4489 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDATA
);
4493 Inst_SMEM__S_LOAD_DWORDX2::Inst_SMEM__S_LOAD_DWORDX2(InFmt_SMEM
*iFmt
)
4494 : Inst_SMEM(iFmt
, "s_load_dwordx2")
4498 } // Inst_SMEM__S_LOAD_DWORDX2
4500 Inst_SMEM__S_LOAD_DWORDX2::~Inst_SMEM__S_LOAD_DWORDX2()
4502 } // ~Inst_SMEM__S_LOAD_DWORDX2
4505 * Read 2 dwords from scalar data cache. See s_load_dword for details on
4509 Inst_SMEM__S_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst
)
4511 Wavefront
*wf
= gpuDynInst
->wavefront();
4512 gpuDynInst
->execUnitId
= wf
->execUnitId
;
4513 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
4514 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
4515 ScalarRegU32
offset(0);
4516 ConstScalarOperandU64
addr(gpuDynInst
, instData
.SBASE
<< 1);
4521 offset
= extData
.OFFSET
;
4523 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
4525 offset
= off_sgpr
.rawData();
4528 calcAddr(gpuDynInst
, addr
, offset
);
4530 gpuDynInst
->computeUnit()->scalarMemoryPipe
.
4531 getGMReqFIFO().push(gpuDynInst
);
4533 wf
->scalarRdGmReqsInPipe
--;
4534 wf
->scalarOutstandingReqsRdGm
++;
4535 gpuDynInst
->wavefront()->outstandingReqs
++;
4536 gpuDynInst
->wavefront()->validateRequestCounters();
4540 Inst_SMEM__S_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst
)
4542 initMemRead
<2>(gpuDynInst
);
4546 Inst_SMEM__S_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst
)
4548 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDATA
);
4552 Inst_SMEM__S_LOAD_DWORDX4::Inst_SMEM__S_LOAD_DWORDX4(InFmt_SMEM
*iFmt
)
4553 : Inst_SMEM(iFmt
, "s_load_dwordx4")
4557 } // Inst_SMEM__S_LOAD_DWORDX4
4559 Inst_SMEM__S_LOAD_DWORDX4::~Inst_SMEM__S_LOAD_DWORDX4()
4561 } // ~Inst_SMEM__S_LOAD_DWORDX4
4563 // Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on
4564 // the offset input.
4566 Inst_SMEM__S_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst
)
4568 Wavefront
*wf
= gpuDynInst
->wavefront();
4569 gpuDynInst
->execUnitId
= wf
->execUnitId
;
4570 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
4571 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
4572 ScalarRegU32
offset(0);
4573 ConstScalarOperandU64
addr(gpuDynInst
, instData
.SBASE
<< 1);
4578 offset
= extData
.OFFSET
;
4580 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
4582 offset
= off_sgpr
.rawData();
4585 calcAddr(gpuDynInst
, addr
, offset
);
4587 gpuDynInst
->computeUnit()->scalarMemoryPipe
.
4588 getGMReqFIFO().push(gpuDynInst
);
4590 wf
->scalarRdGmReqsInPipe
--;
4591 wf
->scalarOutstandingReqsRdGm
++;
4592 gpuDynInst
->wavefront()->outstandingReqs
++;
4593 gpuDynInst
->wavefront()->validateRequestCounters();
4597 Inst_SMEM__S_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst
)
4599 initMemRead
<4>(gpuDynInst
);
4603 Inst_SMEM__S_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst
)
4605 ScalarOperandU128
sdst(gpuDynInst
, instData
.SDATA
);
4609 Inst_SMEM__S_LOAD_DWORDX8::Inst_SMEM__S_LOAD_DWORDX8(InFmt_SMEM
*iFmt
)
4610 : Inst_SMEM(iFmt
, "s_load_dwordx8")
4614 } // Inst_SMEM__S_LOAD_DWORDX8
4616 Inst_SMEM__S_LOAD_DWORDX8::~Inst_SMEM__S_LOAD_DWORDX8()
4618 } // ~Inst_SMEM__S_LOAD_DWORDX8
4620 // Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on
4621 // the offset input.
4623 Inst_SMEM__S_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst
)
4625 Wavefront
*wf
= gpuDynInst
->wavefront();
4626 gpuDynInst
->execUnitId
= wf
->execUnitId
;
4627 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
4628 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
4629 ScalarRegU32
offset(0);
4630 ConstScalarOperandU64
addr(gpuDynInst
, instData
.SBASE
<< 1);
4635 offset
= extData
.OFFSET
;
4637 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
4639 offset
= off_sgpr
.rawData();
4642 calcAddr(gpuDynInst
, addr
, offset
);
4644 gpuDynInst
->computeUnit()->scalarMemoryPipe
.
4645 getGMReqFIFO().push(gpuDynInst
);
4647 wf
->scalarRdGmReqsInPipe
--;
4648 wf
->scalarOutstandingReqsRdGm
++;
4649 gpuDynInst
->wavefront()->outstandingReqs
++;
4650 gpuDynInst
->wavefront()->validateRequestCounters();
4654 Inst_SMEM__S_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst
)
4656 initMemRead
<8>(gpuDynInst
);
4660 Inst_SMEM__S_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst
)
4662 ScalarOperandU256
sdst(gpuDynInst
, instData
.SDATA
);
4666 Inst_SMEM__S_LOAD_DWORDX16::Inst_SMEM__S_LOAD_DWORDX16(InFmt_SMEM
*iFmt
)
4667 : Inst_SMEM(iFmt
, "s_load_dwordx16")
4671 } // Inst_SMEM__S_LOAD_DWORDX16
4673 Inst_SMEM__S_LOAD_DWORDX16::~Inst_SMEM__S_LOAD_DWORDX16()
4675 } // ~Inst_SMEM__S_LOAD_DWORDX16
4677 // Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on
4678 // the offset input.
4680 Inst_SMEM__S_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst
)
4682 Wavefront
*wf
= gpuDynInst
->wavefront();
4683 gpuDynInst
->execUnitId
= wf
->execUnitId
;
4684 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
4685 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
4686 ScalarRegU32
offset(0);
4687 ConstScalarOperandU64
addr(gpuDynInst
, instData
.SBASE
<< 1);
4692 offset
= extData
.OFFSET
;
4694 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
4696 offset
= off_sgpr
.rawData();
4699 calcAddr(gpuDynInst
, addr
, offset
);
4701 gpuDynInst
->computeUnit()->scalarMemoryPipe
.
4702 getGMReqFIFO().push(gpuDynInst
);
4704 wf
->scalarRdGmReqsInPipe
--;
4705 wf
->scalarOutstandingReqsRdGm
++;
4706 gpuDynInst
->wavefront()->outstandingReqs
++;
4707 gpuDynInst
->wavefront()->validateRequestCounters();
4711 Inst_SMEM__S_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst
)
4713 initMemRead
<16>(gpuDynInst
);
4717 Inst_SMEM__S_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst
)
4719 ScalarOperandU512
sdst(gpuDynInst
, instData
.SDATA
);
4723 Inst_SMEM__S_BUFFER_LOAD_DWORD::Inst_SMEM__S_BUFFER_LOAD_DWORD(
4725 : Inst_SMEM(iFmt
, "s_buffer_load_dword")
4729 } // Inst_SMEM__S_BUFFER_LOAD_DWORD
4731 Inst_SMEM__S_BUFFER_LOAD_DWORD::~Inst_SMEM__S_BUFFER_LOAD_DWORD()
4733 } // ~Inst_SMEM__S_BUFFER_LOAD_DWORD
4735 // Read 1 dword from scalar data cache. See S_LOAD_DWORD for details on the
4738 Inst_SMEM__S_BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst
)
4740 Wavefront
*wf
= gpuDynInst
->wavefront();
4741 gpuDynInst
->execUnitId
= wf
->execUnitId
;
4742 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
4743 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
4744 ScalarRegU32
offset(0);
4745 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, instData
.SBASE
);
4750 offset
= extData
.OFFSET
;
4752 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
4754 offset
= off_sgpr
.rawData();
4757 calcAddr(gpuDynInst
, rsrcDesc
, offset
);
4759 gpuDynInst
->computeUnit()->scalarMemoryPipe
4760 .getGMReqFIFO().push(gpuDynInst
);
4762 wf
->scalarRdGmReqsInPipe
--;
4763 wf
->scalarOutstandingReqsRdGm
++;
4764 gpuDynInst
->wavefront()->outstandingReqs
++;
4765 gpuDynInst
->wavefront()->validateRequestCounters();
4769 Inst_SMEM__S_BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst
)
4771 initMemRead
<1>(gpuDynInst
);
4775 Inst_SMEM__S_BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst
)
4777 // 1 request, size 32
4778 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDATA
);
4782 Inst_SMEM__S_BUFFER_LOAD_DWORDX2::Inst_SMEM__S_BUFFER_LOAD_DWORDX2(
4784 : Inst_SMEM(iFmt
, "s_buffer_load_dwordx2")
4788 } // Inst_SMEM__S_BUFFER_LOAD_DWORDX2
4790 Inst_SMEM__S_BUFFER_LOAD_DWORDX2::~Inst_SMEM__S_BUFFER_LOAD_DWORDX2()
4792 } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX2
4794 // Read 2 dwords from scalar data cache. See S_LOAD_DWORD for details on
4795 // the offset input.
4797 Inst_SMEM__S_BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst
)
4799 Wavefront
*wf
= gpuDynInst
->wavefront();
4800 gpuDynInst
->execUnitId
= wf
->execUnitId
;
4801 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
4802 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
4803 ScalarRegU32
offset(0);
4804 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, instData
.SBASE
);
4809 offset
= extData
.OFFSET
;
4811 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
4813 offset
= off_sgpr
.rawData();
4816 calcAddr(gpuDynInst
, rsrcDesc
, offset
);
4818 gpuDynInst
->computeUnit()->scalarMemoryPipe
4819 .getGMReqFIFO().push(gpuDynInst
);
4821 wf
->scalarRdGmReqsInPipe
--;
4822 wf
->scalarOutstandingReqsRdGm
++;
4823 gpuDynInst
->wavefront()->outstandingReqs
++;
4824 gpuDynInst
->wavefront()->validateRequestCounters();
4828 Inst_SMEM__S_BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst
)
4830 initMemRead
<2>(gpuDynInst
);
4834 Inst_SMEM__S_BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst
)
4836 // use U64 because 2 requests, each size 32
4837 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDATA
);
4841 Inst_SMEM__S_BUFFER_LOAD_DWORDX4::Inst_SMEM__S_BUFFER_LOAD_DWORDX4(
4843 : Inst_SMEM(iFmt
, "s_buffer_load_dwordx4")
4847 } // Inst_SMEM__S_BUFFER_LOAD_DWORDX4
4849 Inst_SMEM__S_BUFFER_LOAD_DWORDX4::~Inst_SMEM__S_BUFFER_LOAD_DWORDX4()
4851 } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX4
4853 // Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on
4854 // the offset input.
4856 Inst_SMEM__S_BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst
)
4858 Wavefront
*wf
= gpuDynInst
->wavefront();
4859 gpuDynInst
->execUnitId
= wf
->execUnitId
;
4860 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
4861 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
4862 ScalarRegU32
offset(0);
4863 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, instData
.SBASE
);
4868 offset
= extData
.OFFSET
;
4870 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
4872 offset
= off_sgpr
.rawData();
4875 calcAddr(gpuDynInst
, rsrcDesc
, offset
);
4877 gpuDynInst
->computeUnit()->scalarMemoryPipe
4878 .getGMReqFIFO().push(gpuDynInst
);
4880 wf
->scalarRdGmReqsInPipe
--;
4881 wf
->scalarOutstandingReqsRdGm
++;
4882 gpuDynInst
->wavefront()->outstandingReqs
++;
4883 gpuDynInst
->wavefront()->validateRequestCounters();
4887 Inst_SMEM__S_BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst
)
4889 initMemRead
<4>(gpuDynInst
);
4893 Inst_SMEM__S_BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst
)
4895 // 4 requests, each size 32
4896 ScalarOperandU128
sdst(gpuDynInst
, instData
.SDATA
);
4900 Inst_SMEM__S_BUFFER_LOAD_DWORDX8::Inst_SMEM__S_BUFFER_LOAD_DWORDX8(
4902 : Inst_SMEM(iFmt
, "s_buffer_load_dwordx8")
4906 } // Inst_SMEM__S_BUFFER_LOAD_DWORDX8
4908 Inst_SMEM__S_BUFFER_LOAD_DWORDX8::~Inst_SMEM__S_BUFFER_LOAD_DWORDX8()
4910 } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX8
4912 // Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on
4913 // the offset input.
4915 Inst_SMEM__S_BUFFER_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst
)
4917 Wavefront
*wf
= gpuDynInst
->wavefront();
4918 gpuDynInst
->execUnitId
= wf
->execUnitId
;
4919 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
4920 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
4921 ScalarRegU32
offset(0);
4922 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, instData
.SBASE
);
4927 offset
= extData
.OFFSET
;
4929 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
4931 offset
= off_sgpr
.rawData();
4934 calcAddr(gpuDynInst
, rsrcDesc
, offset
);
4936 gpuDynInst
->computeUnit()->scalarMemoryPipe
4937 .getGMReqFIFO().push(gpuDynInst
);
4939 wf
->scalarRdGmReqsInPipe
--;
4940 wf
->scalarOutstandingReqsRdGm
++;
4941 gpuDynInst
->wavefront()->outstandingReqs
++;
4942 gpuDynInst
->wavefront()->validateRequestCounters();
4946 Inst_SMEM__S_BUFFER_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst
)
4948 initMemRead
<8>(gpuDynInst
);
4952 Inst_SMEM__S_BUFFER_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst
)
4954 // 8 requests, each size 32
4955 ScalarOperandU256
sdst(gpuDynInst
, instData
.SDATA
);
4959 Inst_SMEM__S_BUFFER_LOAD_DWORDX16::Inst_SMEM__S_BUFFER_LOAD_DWORDX16(
4961 : Inst_SMEM(iFmt
, "s_buffer_load_dwordx16")
4965 } // Inst_SMEM__S_BUFFER_LOAD_DWORDX16
4967 Inst_SMEM__S_BUFFER_LOAD_DWORDX16::~Inst_SMEM__S_BUFFER_LOAD_DWORDX16()
4969 } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX16
4971 // Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on
4972 // the offset input.
4974 Inst_SMEM__S_BUFFER_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst
)
4976 Wavefront
*wf
= gpuDynInst
->wavefront();
4977 gpuDynInst
->execUnitId
= wf
->execUnitId
;
4978 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
4979 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
4980 ScalarRegU32
offset(0);
4981 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, instData
.SBASE
);
4986 offset
= extData
.OFFSET
;
4988 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
4990 offset
= off_sgpr
.rawData();
4993 calcAddr(gpuDynInst
, rsrcDesc
, offset
);
4995 gpuDynInst
->computeUnit()->scalarMemoryPipe
4996 .getGMReqFIFO().push(gpuDynInst
);
4998 wf
->scalarRdGmReqsInPipe
--;
4999 wf
->scalarOutstandingReqsRdGm
++;
5000 gpuDynInst
->wavefront()->outstandingReqs
++;
5001 gpuDynInst
->wavefront()->validateRequestCounters();
5005 Inst_SMEM__S_BUFFER_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst
)
5007 initMemRead
<16>(gpuDynInst
);
5011 Inst_SMEM__S_BUFFER_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst
)
5013 // 16 requests, each size 32
5014 ScalarOperandU512
sdst(gpuDynInst
, instData
.SDATA
);
5018 Inst_SMEM__S_STORE_DWORD::Inst_SMEM__S_STORE_DWORD(InFmt_SMEM
*iFmt
)
5019 : Inst_SMEM(iFmt
, "s_store_dword")
5023 } // Inst_SMEM__S_STORE_DWORD
5025 Inst_SMEM__S_STORE_DWORD::~Inst_SMEM__S_STORE_DWORD()
5027 } // ~Inst_SMEM__S_STORE_DWORD
5029 // Write 1 dword to scalar data cache.
5030 // If the offset is specified as an SGPR, the SGPR contains an unsigned
5031 // BYTE offset (the 2 LSBs are ignored).
5032 // If the offset is specified as an immediate 20-bit constant, the
5033 // constant is an unsigned BYTE offset.
5035 Inst_SMEM__S_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst
)
5037 Wavefront
*wf
= gpuDynInst
->wavefront();
5038 gpuDynInst
->execUnitId
= wf
->execUnitId
;
5039 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
5040 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
5041 ScalarRegU32
offset(0);
5042 ConstScalarOperandU64
addr(gpuDynInst
, instData
.SBASE
<< 1);
5047 offset
= extData
.OFFSET
;
5049 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
5051 offset
= off_sgpr
.rawData();
5054 calcAddr(gpuDynInst
, addr
, offset
);
5056 gpuDynInst
->computeUnit()->scalarMemoryPipe
.
5057 getGMReqFIFO().push(gpuDynInst
);
5059 wf
->scalarWrGmReqsInPipe
--;
5060 wf
->scalarOutstandingReqsWrGm
++;
5061 gpuDynInst
->wavefront()->outstandingReqs
++;
5062 gpuDynInst
->wavefront()->validateRequestCounters();
5066 Inst_SMEM__S_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst
)
5068 ConstScalarOperandU32
sdata(gpuDynInst
, instData
.SDATA
);
5070 std::memcpy((void*)gpuDynInst
->scalar_data
, sdata
.rawDataPtr(),
5071 sizeof(ScalarRegU32
));
5072 initMemWrite
<1>(gpuDynInst
);
5076 Inst_SMEM__S_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst
)
5080 Inst_SMEM__S_STORE_DWORDX2::Inst_SMEM__S_STORE_DWORDX2(InFmt_SMEM
*iFmt
)
5081 : Inst_SMEM(iFmt
, "s_store_dwordx2")
5085 } // Inst_SMEM__S_STORE_DWORDX2
5087 Inst_SMEM__S_STORE_DWORDX2::~Inst_SMEM__S_STORE_DWORDX2()
5089 } // ~Inst_SMEM__S_STORE_DWORDX2
5091 // Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on
5092 // the offset input.
5094 Inst_SMEM__S_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst
)
5096 Wavefront
*wf
= gpuDynInst
->wavefront();
5097 gpuDynInst
->execUnitId
= wf
->execUnitId
;
5098 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
5099 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
5100 ScalarRegU32
offset(0);
5101 ConstScalarOperandU64
addr(gpuDynInst
, instData
.SBASE
<< 1);
5106 offset
= extData
.OFFSET
;
5108 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
5110 offset
= off_sgpr
.rawData();
5113 calcAddr(gpuDynInst
, addr
, offset
);
5115 gpuDynInst
->computeUnit()->scalarMemoryPipe
.
5116 getGMReqFIFO().push(gpuDynInst
);
5118 wf
->scalarWrGmReqsInPipe
--;
5119 wf
->scalarOutstandingReqsWrGm
++;
5120 gpuDynInst
->wavefront()->outstandingReqs
++;
5121 gpuDynInst
->wavefront()->validateRequestCounters();
5125 Inst_SMEM__S_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst
)
5127 ConstScalarOperandU64
sdata(gpuDynInst
, instData
.SDATA
);
5129 std::memcpy((void*)gpuDynInst
->scalar_data
, sdata
.rawDataPtr(),
5130 sizeof(ScalarRegU64
));
5131 initMemWrite
<2>(gpuDynInst
);
5135 Inst_SMEM__S_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst
)
5139 Inst_SMEM__S_STORE_DWORDX4::Inst_SMEM__S_STORE_DWORDX4(InFmt_SMEM
*iFmt
)
5140 : Inst_SMEM(iFmt
, "s_store_dwordx4")
5144 } // Inst_SMEM__S_STORE_DWORDX4
5146 Inst_SMEM__S_STORE_DWORDX4::~Inst_SMEM__S_STORE_DWORDX4()
5148 } // ~Inst_SMEM__S_STORE_DWORDX4
5150 // Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on
5151 // the offset input.
5153 Inst_SMEM__S_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst
)
5155 Wavefront
*wf
= gpuDynInst
->wavefront();
5156 gpuDynInst
->execUnitId
= wf
->execUnitId
;
5157 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
5158 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
5159 ScalarRegU32
offset(0);
5160 ConstScalarOperandU64
addr(gpuDynInst
, instData
.SBASE
<< 1);
5165 offset
= extData
.OFFSET
;
5167 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
5169 offset
= off_sgpr
.rawData();
5172 calcAddr(gpuDynInst
, addr
, offset
);
5174 gpuDynInst
->computeUnit()->scalarMemoryPipe
.
5175 getGMReqFIFO().push(gpuDynInst
);
5177 wf
->scalarWrGmReqsInPipe
--;
5178 wf
->scalarOutstandingReqsWrGm
++;
5179 gpuDynInst
->wavefront()->outstandingReqs
++;
5180 gpuDynInst
->wavefront()->validateRequestCounters();
5184 Inst_SMEM__S_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst
)
5186 ConstScalarOperandU128
sdata(gpuDynInst
, instData
.SDATA
);
5188 std::memcpy((void*)gpuDynInst
->scalar_data
, sdata
.rawDataPtr(),
5189 4 * sizeof(ScalarRegU32
));
5190 initMemWrite
<4>(gpuDynInst
);
5194 Inst_SMEM__S_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst
)
5198 Inst_SMEM__S_BUFFER_STORE_DWORD::Inst_SMEM__S_BUFFER_STORE_DWORD(
5200 : Inst_SMEM(iFmt
, "s_buffer_store_dword")
5204 } // Inst_SMEM__S_BUFFER_STORE_DWORD
5206 Inst_SMEM__S_BUFFER_STORE_DWORD::~Inst_SMEM__S_BUFFER_STORE_DWORD()
5208 } // ~Inst_SMEM__S_BUFFER_STORE_DWORD
5210 // Write 1 dword to scalar data cache. See S_STORE_DWORD for details on the
5213 Inst_SMEM__S_BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst
)
5215 panicUnimplemented();
5219 Inst_SMEM__S_BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst
)
5224 Inst_SMEM__S_BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst
)
5228 Inst_SMEM__S_BUFFER_STORE_DWORDX2::Inst_SMEM__S_BUFFER_STORE_DWORDX2(
5230 : Inst_SMEM(iFmt
, "s_buffer_store_dwordx2")
5234 } // Inst_SMEM__S_BUFFER_STORE_DWORDX2
5236 Inst_SMEM__S_BUFFER_STORE_DWORDX2::~Inst_SMEM__S_BUFFER_STORE_DWORDX2()
5238 } // ~Inst_SMEM__S_BUFFER_STORE_DWORDX2
5240 // Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on
5241 // the offset input.
5243 Inst_SMEM__S_BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst
)
5245 panicUnimplemented();
5249 Inst_SMEM__S_BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst
)
5254 Inst_SMEM__S_BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst
)
5258 Inst_SMEM__S_BUFFER_STORE_DWORDX4::Inst_SMEM__S_BUFFER_STORE_DWORDX4(
5260 : Inst_SMEM(iFmt
, "s_buffer_store_dwordx4")
5264 } // Inst_SMEM__S_BUFFER_STORE_DWORDX4
5266 Inst_SMEM__S_BUFFER_STORE_DWORDX4::~Inst_SMEM__S_BUFFER_STORE_DWORDX4()
5268 } // ~Inst_SMEM__S_BUFFER_STORE_DWORDX4
5270 // Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on
5271 // the offset input.
5273 Inst_SMEM__S_BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst
)
5275 panicUnimplemented();
5279 Inst_SMEM__S_BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst
)
5284 Inst_SMEM__S_BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst
)
5288 Inst_SMEM__S_DCACHE_INV::Inst_SMEM__S_DCACHE_INV(InFmt_SMEM
*iFmt
)
5289 : Inst_SMEM(iFmt
, "s_dcache_inv")
5291 } // Inst_SMEM__S_DCACHE_INV
5293 Inst_SMEM__S_DCACHE_INV::~Inst_SMEM__S_DCACHE_INV()
5295 } // ~Inst_SMEM__S_DCACHE_INV
5297 // Invalidate the scalar data cache.
5299 Inst_SMEM__S_DCACHE_INV::execute(GPUDynInstPtr gpuDynInst
)
5301 panicUnimplemented();
5304 Inst_SMEM__S_DCACHE_WB::Inst_SMEM__S_DCACHE_WB(InFmt_SMEM
*iFmt
)
5305 : Inst_SMEM(iFmt
, "s_dcache_wb")
5307 } // Inst_SMEM__S_DCACHE_WB
5309 Inst_SMEM__S_DCACHE_WB::~Inst_SMEM__S_DCACHE_WB()
5311 } // ~Inst_SMEM__S_DCACHE_WB
5313 // Write back dirty data in the scalar data cache.
5315 Inst_SMEM__S_DCACHE_WB::execute(GPUDynInstPtr gpuDynInst
)
5317 panicUnimplemented();
5320 Inst_SMEM__S_DCACHE_INV_VOL::Inst_SMEM__S_DCACHE_INV_VOL(InFmt_SMEM
*iFmt
)
5321 : Inst_SMEM(iFmt
, "s_dcache_inv_vol")
5323 } // Inst_SMEM__S_DCACHE_INV_VOL
5325 Inst_SMEM__S_DCACHE_INV_VOL::~Inst_SMEM__S_DCACHE_INV_VOL()
5327 } // ~Inst_SMEM__S_DCACHE_INV_VOL
5329 // Invalidate the scalar data cache volatile lines.
5331 Inst_SMEM__S_DCACHE_INV_VOL::execute(GPUDynInstPtr gpuDynInst
)
5333 panicUnimplemented();
5336 Inst_SMEM__S_DCACHE_WB_VOL::Inst_SMEM__S_DCACHE_WB_VOL(InFmt_SMEM
*iFmt
)
5337 : Inst_SMEM(iFmt
, "s_dcache_wb_vol")
5339 } // Inst_SMEM__S_DCACHE_WB_VOL
5341 Inst_SMEM__S_DCACHE_WB_VOL::~Inst_SMEM__S_DCACHE_WB_VOL()
5343 } // ~Inst_SMEM__S_DCACHE_WB_VOL
5345 // Write back dirty data in the scalar data cache volatile lines.
5347 Inst_SMEM__S_DCACHE_WB_VOL::execute(GPUDynInstPtr gpuDynInst
)
5349 panicUnimplemented();
5352 Inst_SMEM__S_MEMTIME::Inst_SMEM__S_MEMTIME(InFmt_SMEM
*iFmt
)
5353 : Inst_SMEM(iFmt
, "s_memtime")
5355 } // Inst_SMEM__S_MEMTIME
5357 Inst_SMEM__S_MEMTIME::~Inst_SMEM__S_MEMTIME()
5359 } // ~Inst_SMEM__S_MEMTIME
5361 // Return current 64-bit timestamp.
5363 Inst_SMEM__S_MEMTIME::execute(GPUDynInstPtr gpuDynInst
)
5365 panicUnimplemented();
5368 Inst_SMEM__S_MEMREALTIME::Inst_SMEM__S_MEMREALTIME(InFmt_SMEM
*iFmt
)
5369 : Inst_SMEM(iFmt
, "s_memrealtime")
5371 } // Inst_SMEM__S_MEMREALTIME
5373 Inst_SMEM__S_MEMREALTIME::~Inst_SMEM__S_MEMREALTIME()
5375 } // ~Inst_SMEM__S_MEMREALTIME
5377 // Return current 64-bit RTC.
5379 Inst_SMEM__S_MEMREALTIME::execute(GPUDynInstPtr gpuDynInst
)
5381 panicUnimplemented();
5384 Inst_SMEM__S_ATC_PROBE::Inst_SMEM__S_ATC_PROBE(InFmt_SMEM
*iFmt
)
5385 : Inst_SMEM(iFmt
, "s_atc_probe")
5387 } // Inst_SMEM__S_ATC_PROBE
5389 Inst_SMEM__S_ATC_PROBE::~Inst_SMEM__S_ATC_PROBE()
5391 } // ~Inst_SMEM__S_ATC_PROBE
5394 Inst_SMEM__S_ATC_PROBE::execute(GPUDynInstPtr gpuDynInst
)
5396 panicUnimplemented();
5399 Inst_SMEM__S_ATC_PROBE_BUFFER::Inst_SMEM__S_ATC_PROBE_BUFFER(
5401 : Inst_SMEM(iFmt
, "s_atc_probe_buffer")
5403 } // Inst_SMEM__S_ATC_PROBE_BUFFER
5405 Inst_SMEM__S_ATC_PROBE_BUFFER::~Inst_SMEM__S_ATC_PROBE_BUFFER()
5407 } // ~Inst_SMEM__S_ATC_PROBE_BUFFER
5410 Inst_SMEM__S_ATC_PROBE_BUFFER::execute(GPUDynInstPtr gpuDynInst
)
5412 panicUnimplemented();
5415 Inst_VOP2__V_CNDMASK_B32::Inst_VOP2__V_CNDMASK_B32(InFmt_VOP2
*iFmt
)
5416 : Inst_VOP2(iFmt
, "v_cndmask_b32")
5420 } // Inst_VOP2__V_CNDMASK_B32
5422 Inst_VOP2__V_CNDMASK_B32::~Inst_VOP2__V_CNDMASK_B32()
5424 } // ~Inst_VOP2__V_CNDMASK_B32
5426 // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC
5427 // as a scalar GPR in S2.
5429 Inst_VOP2__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst
)
5431 Wavefront
*wf
= gpuDynInst
->wavefront();
5432 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
5433 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
5434 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
5435 ConstScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
5441 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5442 if (wf
->execMask(lane
)) {
5444 = bits(vcc
.rawData(), lane
) ? src1
[lane
] : src0
[lane
];
5451 Inst_VOP2__V_ADD_F32::Inst_VOP2__V_ADD_F32(InFmt_VOP2
*iFmt
)
5452 : Inst_VOP2(iFmt
, "v_add_f32")
5456 } // Inst_VOP2__V_ADD_F32
5458 Inst_VOP2__V_ADD_F32::~Inst_VOP2__V_ADD_F32()
5460 } // ~Inst_VOP2__V_ADD_F32
5462 // D.f = S0.f + S1.f.
5464 Inst_VOP2__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst
)
5466 Wavefront
*wf
= gpuDynInst
->wavefront();
5467 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
5468 VecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
5469 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
5475 VecOperandF32
src0_dpp(gpuDynInst
, extData
.iFmt_VOP_DPP
.SRC0
);
5478 DPRINTF(GCN3
, "Handling V_ADD_F32 SRC DPP. SRC0: register v[%d], "
5479 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
5480 "SRC1_ABS: %d, SRC1_NEG: %d, BOUND_CTRL: %d, "
5481 "BANK_MASK: %d, ROW_MASK: %d\n", extData
.iFmt_VOP_DPP
.SRC0
,
5482 extData
.iFmt_VOP_DPP
.DPP_CTRL
,
5483 extData
.iFmt_VOP_DPP
.SRC0_ABS
,
5484 extData
.iFmt_VOP_DPP
.SRC0_NEG
,
5485 extData
.iFmt_VOP_DPP
.SRC1_ABS
,
5486 extData
.iFmt_VOP_DPP
.SRC1_NEG
,
5487 extData
.iFmt_VOP_DPP
.BOUND_CTRL
,
5488 extData
.iFmt_VOP_DPP
.BANK_MASK
,
5489 extData
.iFmt_VOP_DPP
.ROW_MASK
);
5491 processDPP(gpuDynInst
, extData
.iFmt_VOP_DPP
, src0_dpp
, src1
);
5493 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5494 if (wf
->execMask(lane
)) {
5495 vdst
[lane
] = src0_dpp
[lane
] + src1
[lane
];
5499 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5500 if (wf
->execMask(lane
)) {
5501 vdst
[lane
] = src0
[lane
] + src1
[lane
];
5509 Inst_VOP2__V_SUB_F32::Inst_VOP2__V_SUB_F32(InFmt_VOP2
*iFmt
)
5510 : Inst_VOP2(iFmt
, "v_sub_f32")
5514 } // Inst_VOP2__V_SUB_F32
5516 Inst_VOP2__V_SUB_F32::~Inst_VOP2__V_SUB_F32()
5518 } // ~Inst_VOP2__V_SUB_F32
5520 // D.f = S0.f - S1.f.
5522 Inst_VOP2__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst
)
5524 Wavefront
*wf
= gpuDynInst
->wavefront();
5525 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
5526 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
5527 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
5532 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5533 if (wf
->execMask(lane
)) {
5534 vdst
[lane
] = src0
[lane
] - src1
[lane
];
5541 Inst_VOP2__V_SUBREV_F32::Inst_VOP2__V_SUBREV_F32(InFmt_VOP2
*iFmt
)
5542 : Inst_VOP2(iFmt
, "v_subrev_f32")
5546 } // Inst_VOP2__V_SUBREV_F32
5548 Inst_VOP2__V_SUBREV_F32::~Inst_VOP2__V_SUBREV_F32()
5550 } // ~Inst_VOP2__V_SUBREV_F32
5552 // D.f = S1.f - S0.f.
5554 Inst_VOP2__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst
)
5556 Wavefront
*wf
= gpuDynInst
->wavefront();
5557 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
5558 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
5559 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
5564 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5565 if (wf
->execMask(lane
)) {
5566 vdst
[lane
] = src1
[lane
] - src0
[lane
];
5573 Inst_VOP2__V_MUL_LEGACY_F32::Inst_VOP2__V_MUL_LEGACY_F32(InFmt_VOP2
*iFmt
)
5574 : Inst_VOP2(iFmt
, "v_mul_legacy_f32")
5578 } // Inst_VOP2__V_MUL_LEGACY_F32
5580 Inst_VOP2__V_MUL_LEGACY_F32::~Inst_VOP2__V_MUL_LEGACY_F32()
5582 } // ~Inst_VOP2__V_MUL_LEGACY_F32
5584 // D.f = S0.f * S1.f
5586 Inst_VOP2__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst
)
5588 Wavefront
*wf
= gpuDynInst
->wavefront();
5589 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
5590 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
5591 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
5596 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5597 if (wf
->execMask(lane
)) {
5598 vdst
[lane
] = src0
[lane
] * src1
[lane
];
5605 Inst_VOP2__V_MUL_F32::Inst_VOP2__V_MUL_F32(InFmt_VOP2
*iFmt
)
5606 : Inst_VOP2(iFmt
, "v_mul_f32")
5610 } // Inst_VOP2__V_MUL_F32
5612 Inst_VOP2__V_MUL_F32::~Inst_VOP2__V_MUL_F32()
5614 } // ~Inst_VOP2__V_MUL_F32
5616 // D.f = S0.f * S1.f.
5618 Inst_VOP2__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst
)
5620 Wavefront
*wf
= gpuDynInst
->wavefront();
5621 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
5622 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
5623 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
5628 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5629 if (wf
->execMask(lane
)) {
5630 if (std::isnan(src0
[lane
]) ||
5631 std::isnan(src1
[lane
])) {
5633 } else if ((std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
||
5634 std::fpclassify(src0
[lane
]) == FP_ZERO
) &&
5635 !std::signbit(src0
[lane
])) {
5636 if (std::isinf(src1
[lane
])) {
5638 } else if (!std::signbit(src1
[lane
])) {
5643 } else if ((std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
||
5644 std::fpclassify(src0
[lane
]) == FP_ZERO
) &&
5645 std::signbit(src0
[lane
])) {
5646 if (std::isinf(src1
[lane
])) {
5648 } else if (std::signbit(src1
[lane
])) {
5653 } else if (std::isinf(src0
[lane
]) &&
5654 !std::signbit(src0
[lane
])) {
5655 if (std::fpclassify(src1
[lane
]) == FP_SUBNORMAL
||
5656 std::fpclassify(src1
[lane
]) == FP_ZERO
) {
5658 } else if (!std::signbit(src1
[lane
])) {
5659 vdst
[lane
] = +INFINITY
;
5661 vdst
[lane
] = -INFINITY
;
5663 } else if (std::isinf(src0
[lane
]) &&
5664 std::signbit(src0
[lane
])) {
5665 if (std::fpclassify(src1
[lane
]) == FP_SUBNORMAL
||
5666 std::fpclassify(src1
[lane
]) == FP_ZERO
) {
5668 } else if (std::signbit(src1
[lane
])) {
5669 vdst
[lane
] = +INFINITY
;
5671 vdst
[lane
] = -INFINITY
;
5674 vdst
[lane
] = src0
[lane
] * src1
[lane
];
5682 Inst_VOP2__V_MUL_I32_I24::Inst_VOP2__V_MUL_I32_I24(InFmt_VOP2
*iFmt
)
5683 : Inst_VOP2(iFmt
, "v_mul_i32_i24")
5686 } // Inst_VOP2__V_MUL_I32_I24
5688 Inst_VOP2__V_MUL_I32_I24::~Inst_VOP2__V_MUL_I32_I24()
5690 } // ~Inst_VOP2__V_MUL_I32_I24
5692 // D.i = S0.i[23:0] * S1.i[23:0].
5694 Inst_VOP2__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst
)
5696 Wavefront
*wf
= gpuDynInst
->wavefront();
5697 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
5698 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
5699 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
5704 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5705 if (wf
->execMask(lane
)) {
5706 vdst
[lane
] = sext
<24>(bits(src0
[lane
], 23, 0))
5707 * sext
<24>(bits(src1
[lane
], 23, 0));
5714 Inst_VOP2__V_MUL_HI_I32_I24::Inst_VOP2__V_MUL_HI_I32_I24(InFmt_VOP2
*iFmt
)
5715 : Inst_VOP2(iFmt
, "v_mul_hi_i32_i24")
5718 } // Inst_VOP2__V_MUL_HI_I32_I24
5720 Inst_VOP2__V_MUL_HI_I32_I24::~Inst_VOP2__V_MUL_HI_I32_I24()
5722 } // ~Inst_VOP2__V_MUL_HI_I32_I24
5724 // D.i = (S0.i[23:0] * S1.i[23:0]) >> 32.
5726 Inst_VOP2__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst
)
5728 Wavefront
*wf
= gpuDynInst
->wavefront();
5729 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
5730 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
5731 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
5736 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5737 if (wf
->execMask(lane
)) {
5739 = (VecElemI64
)sext
<24>(bits(src0
[lane
], 23, 0));
5741 = (VecElemI64
)sext
<24>(bits(src1
[lane
], 23, 0));
5743 vdst
[lane
] = (VecElemI32
)((tmp_src0
* tmp_src1
) >> 32);
5750 Inst_VOP2__V_MUL_U32_U24::Inst_VOP2__V_MUL_U32_U24(InFmt_VOP2
*iFmt
)
5751 : Inst_VOP2(iFmt
, "v_mul_u32_u24")
5754 } // Inst_VOP2__V_MUL_U32_U24
5756 Inst_VOP2__V_MUL_U32_U24::~Inst_VOP2__V_MUL_U32_U24()
5758 } // ~Inst_VOP2__V_MUL_U32_U24
5760 // D.u = S0.u[23:0] * S1.u[23:0].
5762 Inst_VOP2__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst
)
5764 Wavefront
*wf
= gpuDynInst
->wavefront();
5765 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
5766 VecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
5767 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
5773 VecOperandU32
src0_sdwa(gpuDynInst
, extData
.iFmt_VOP_SDWA
.SRC0
);
5774 // use copies of original src0, src1, and dest during selecting
5775 VecOperandU32
origSrc0_sdwa(gpuDynInst
,
5776 extData
.iFmt_VOP_SDWA
.SRC0
);
5777 VecOperandU32
origSrc1(gpuDynInst
, instData
.VSRC1
);
5778 VecOperandU32
origVdst(gpuDynInst
, instData
.VDST
);
5781 origSrc0_sdwa
.read();
5784 DPRINTF(GCN3
, "Handling V_MUL_U32_U24 SRC SDWA. SRC0: register "
5785 "v[%d], DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: "
5786 "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: "
5787 "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
5788 extData
.iFmt_VOP_SDWA
.SRC0
, extData
.iFmt_VOP_SDWA
.DST_SEL
,
5789 extData
.iFmt_VOP_SDWA
.DST_UNUSED
,
5790 extData
.iFmt_VOP_SDWA
.CLAMP
,
5791 extData
.iFmt_VOP_SDWA
.SRC0_SEL
,
5792 extData
.iFmt_VOP_SDWA
.SRC0_SEXT
,
5793 extData
.iFmt_VOP_SDWA
.SRC0_NEG
,
5794 extData
.iFmt_VOP_SDWA
.SRC0_ABS
,
5795 extData
.iFmt_VOP_SDWA
.SRC1_SEL
,
5796 extData
.iFmt_VOP_SDWA
.SRC1_SEXT
,
5797 extData
.iFmt_VOP_SDWA
.SRC1_NEG
,
5798 extData
.iFmt_VOP_SDWA
.SRC1_ABS
);
5800 processSDWA_src(extData
.iFmt_VOP_SDWA
, src0_sdwa
, origSrc0_sdwa
,
5803 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5804 if (wf
->execMask(lane
)) {
5805 vdst
[lane
] = bits(src0_sdwa
[lane
], 23, 0) *
5806 bits(src1
[lane
], 23, 0);
5807 origVdst
[lane
] = vdst
[lane
]; // keep copy consistent
5811 processSDWA_dst(extData
.iFmt_VOP_SDWA
, vdst
, origVdst
);
5813 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5814 if (wf
->execMask(lane
)) {
5815 vdst
[lane
] = bits(src0
[lane
], 23, 0) *
5816 bits(src1
[lane
], 23, 0);
5825 Inst_VOP2__V_MUL_HI_U32_U24::Inst_VOP2__V_MUL_HI_U32_U24(InFmt_VOP2
*iFmt
)
5826 : Inst_VOP2(iFmt
, "v_mul_hi_u32_u24")
5829 } // Inst_VOP2__V_MUL_HI_U32_U24
5831 Inst_VOP2__V_MUL_HI_U32_U24::~Inst_VOP2__V_MUL_HI_U32_U24()
5833 } // ~Inst_VOP2__V_MUL_HI_U32_U24
5835 // D.i = (S0.u[23:0] * S1.u[23:0]) >> 32.
5837 Inst_VOP2__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst
)
5839 Wavefront
*wf
= gpuDynInst
->wavefront();
5840 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
5841 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
5842 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
5847 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5848 if (wf
->execMask(lane
)) {
5849 VecElemU64 tmp_src0
= (VecElemU64
)bits(src0
[lane
], 23, 0);
5850 VecElemU64 tmp_src1
= (VecElemU64
)bits(src1
[lane
], 23, 0);
5851 vdst
[lane
] = (VecElemU32
)((tmp_src0
* tmp_src1
) >> 32);
5858 Inst_VOP2__V_MIN_F32::Inst_VOP2__V_MIN_F32(InFmt_VOP2
*iFmt
)
5859 : Inst_VOP2(iFmt
, "v_min_f32")
5863 } // Inst_VOP2__V_MIN_F32
5865 Inst_VOP2__V_MIN_F32::~Inst_VOP2__V_MIN_F32()
5867 } // ~Inst_VOP2__V_MIN_F32
5869 // D.f = (S0.f < S1.f ? S0.f : S1.f).
5871 Inst_VOP2__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst
)
5873 Wavefront
*wf
= gpuDynInst
->wavefront();
5874 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
5875 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
5876 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
5881 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5882 if (wf
->execMask(lane
)) {
5883 vdst
[lane
] = std::fmin(src0
[lane
], src1
[lane
]);
5890 Inst_VOP2__V_MAX_F32::Inst_VOP2__V_MAX_F32(InFmt_VOP2
*iFmt
)
5891 : Inst_VOP2(iFmt
, "v_max_f32")
5895 } // Inst_VOP2__V_MAX_F32
5897 Inst_VOP2__V_MAX_F32::~Inst_VOP2__V_MAX_F32()
5899 } // ~Inst_VOP2__V_MAX_F32
5901 // D.f = (S0.f >= S1.f ? S0.f : S1.f).
5903 Inst_VOP2__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst
)
5905 Wavefront
*wf
= gpuDynInst
->wavefront();
5906 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
5907 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
5908 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
5913 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5914 if (wf
->execMask(lane
)) {
5915 vdst
[lane
] = std::fmax(src0
[lane
], src1
[lane
]);
5922 Inst_VOP2__V_MIN_I32::Inst_VOP2__V_MIN_I32(InFmt_VOP2
*iFmt
)
5923 : Inst_VOP2(iFmt
, "v_min_i32")
5926 } // Inst_VOP2__V_MIN_I32
5928 Inst_VOP2__V_MIN_I32::~Inst_VOP2__V_MIN_I32()
5930 } // ~Inst_VOP2__V_MIN_I32
5932 // D.i = min(S0.i, S1.i).
5934 Inst_VOP2__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst
)
5936 Wavefront
*wf
= gpuDynInst
->wavefront();
5937 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
5938 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
5939 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
5944 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5945 if (wf
->execMask(lane
)) {
5946 vdst
[lane
] = std::min(src0
[lane
], src1
[lane
]);
5953 Inst_VOP2__V_MAX_I32::Inst_VOP2__V_MAX_I32(InFmt_VOP2
*iFmt
)
5954 : Inst_VOP2(iFmt
, "v_max_i32")
5957 } // Inst_VOP2__V_MAX_I32
5959 Inst_VOP2__V_MAX_I32::~Inst_VOP2__V_MAX_I32()
5961 } // ~Inst_VOP2__V_MAX_I32
5963 // D.i = max(S0.i, S1.i).
5965 Inst_VOP2__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst
)
5967 Wavefront
*wf
= gpuDynInst
->wavefront();
5968 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
5969 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
5970 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
5975 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5976 if (wf
->execMask(lane
)) {
5977 vdst
[lane
] = std::max(src0
[lane
], src1
[lane
]);
5984 Inst_VOP2__V_MIN_U32::Inst_VOP2__V_MIN_U32(InFmt_VOP2
*iFmt
)
5985 : Inst_VOP2(iFmt
, "v_min_u32")
5988 } // Inst_VOP2__V_MIN_U32
5990 Inst_VOP2__V_MIN_U32::~Inst_VOP2__V_MIN_U32()
5992 } // ~Inst_VOP2__V_MIN_U32
5994 // D.u = min(S0.u, S1.u).
5996 Inst_VOP2__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst
)
5998 Wavefront
*wf
= gpuDynInst
->wavefront();
5999 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6000 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6001 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6006 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6007 if (wf
->execMask(lane
)) {
6008 vdst
[lane
] = std::min(src0
[lane
], src1
[lane
]);
6015 Inst_VOP2__V_MAX_U32::Inst_VOP2__V_MAX_U32(InFmt_VOP2
*iFmt
)
6016 : Inst_VOP2(iFmt
, "v_max_u32")
6019 } // Inst_VOP2__V_MAX_U32
6021 Inst_VOP2__V_MAX_U32::~Inst_VOP2__V_MAX_U32()
6023 } // ~Inst_VOP2__V_MAX_U32
6025 // D.u = max(S0.u, S1.u).
6027 Inst_VOP2__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst
)
6029 Wavefront
*wf
= gpuDynInst
->wavefront();
6030 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6031 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6032 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6037 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6038 if (wf
->execMask(lane
)) {
6039 vdst
[lane
] = std::max(src0
[lane
], src1
[lane
]);
6046 Inst_VOP2__V_LSHRREV_B32::Inst_VOP2__V_LSHRREV_B32(InFmt_VOP2
*iFmt
)
6047 : Inst_VOP2(iFmt
, "v_lshrrev_b32")
6050 } // Inst_VOP2__V_LSHRREV_B32
6052 Inst_VOP2__V_LSHRREV_B32::~Inst_VOP2__V_LSHRREV_B32()
6054 } // ~Inst_VOP2__V_LSHRREV_B32
6056 // D.u = S1.u >> S0.u[4:0].
6057 // The vacated bits are set to zero.
6059 Inst_VOP2__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst
)
6061 Wavefront
*wf
= gpuDynInst
->wavefront();
6062 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6063 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6064 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6069 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6070 if (wf
->execMask(lane
)) {
6071 vdst
[lane
] = src1
[lane
] >> bits(src0
[lane
], 4, 0);
6078 Inst_VOP2__V_ASHRREV_I32::Inst_VOP2__V_ASHRREV_I32(InFmt_VOP2
*iFmt
)
6079 : Inst_VOP2(iFmt
, "v_ashrrev_i32")
6082 } // Inst_VOP2__V_ASHRREV_I32
6084 Inst_VOP2__V_ASHRREV_I32::~Inst_VOP2__V_ASHRREV_I32()
6086 } // ~Inst_VOP2__V_ASHRREV_I32
6088 // D.i = signext(S1.i) >> S0.i[4:0].
6089 // The vacated bits are set to the sign bit of the input value.
6091 Inst_VOP2__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst
)
6093 Wavefront
*wf
= gpuDynInst
->wavefront();
6094 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6095 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
6096 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
6101 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6102 if (wf
->execMask(lane
)) {
6103 vdst
[lane
] = src1
[lane
] >> bits(src0
[lane
], 4, 0);
6110 Inst_VOP2__V_LSHLREV_B32::Inst_VOP2__V_LSHLREV_B32(InFmt_VOP2
*iFmt
)
6111 : Inst_VOP2(iFmt
, "v_lshlrev_b32")
6114 } // Inst_VOP2__V_LSHLREV_B32
6116 Inst_VOP2__V_LSHLREV_B32::~Inst_VOP2__V_LSHLREV_B32()
6118 } // ~Inst_VOP2__V_LSHLREV_B32
6120 // D.u = S1.u << S0.u[4:0].
6122 Inst_VOP2__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst
)
6124 Wavefront
*wf
= gpuDynInst
->wavefront();
6125 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6126 VecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6127 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6133 VecOperandU32
src0_sdwa(gpuDynInst
, extData
.iFmt_VOP_SDWA
.SRC0
);
6134 // use copies of original src0, src1, and vdst during selecting
6135 VecOperandU32
origSrc0_sdwa(gpuDynInst
,
6136 extData
.iFmt_VOP_SDWA
.SRC0
);
6137 VecOperandU32
origSrc1(gpuDynInst
, instData
.VSRC1
);
6138 VecOperandU32
origVdst(gpuDynInst
, instData
.VDST
);
6141 origSrc0_sdwa
.read();
6144 DPRINTF(GCN3
, "Handling V_LSHLREV_B32 SRC SDWA. SRC0: register "
6145 "v[%d], DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: "
6146 "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: "
6147 "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
6148 extData
.iFmt_VOP_SDWA
.SRC0
, extData
.iFmt_VOP_SDWA
.DST_SEL
,
6149 extData
.iFmt_VOP_SDWA
.DST_UNUSED
,
6150 extData
.iFmt_VOP_SDWA
.CLAMP
,
6151 extData
.iFmt_VOP_SDWA
.SRC0_SEL
,
6152 extData
.iFmt_VOP_SDWA
.SRC0_SEXT
,
6153 extData
.iFmt_VOP_SDWA
.SRC0_NEG
,
6154 extData
.iFmt_VOP_SDWA
.SRC0_ABS
,
6155 extData
.iFmt_VOP_SDWA
.SRC1_SEL
,
6156 extData
.iFmt_VOP_SDWA
.SRC1_SEXT
,
6157 extData
.iFmt_VOP_SDWA
.SRC1_NEG
,
6158 extData
.iFmt_VOP_SDWA
.SRC1_ABS
);
6160 processSDWA_src(extData
.iFmt_VOP_SDWA
, src0_sdwa
, origSrc0_sdwa
,
6163 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6164 if (wf
->execMask(lane
)) {
6165 vdst
[lane
] = src1
[lane
] << bits(src0_sdwa
[lane
], 4, 0);
6166 origVdst
[lane
] = vdst
[lane
]; // keep copy consistent
6170 processSDWA_dst(extData
.iFmt_VOP_SDWA
, vdst
, origVdst
);
6172 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6173 if (wf
->execMask(lane
)) {
6174 vdst
[lane
] = src1
[lane
] << bits(src0
[lane
], 4, 0);
6182 Inst_VOP2__V_AND_B32::Inst_VOP2__V_AND_B32(InFmt_VOP2
*iFmt
)
6183 : Inst_VOP2(iFmt
, "v_and_b32")
6186 } // Inst_VOP2__V_AND_B32
6188 Inst_VOP2__V_AND_B32::~Inst_VOP2__V_AND_B32()
6190 } // ~Inst_VOP2__V_AND_B32
6192 // D.u = S0.u & S1.u.
6193 // Input and output modifiers not supported.
6195 Inst_VOP2__V_AND_B32::execute(GPUDynInstPtr gpuDynInst
)
6197 Wavefront
*wf
= gpuDynInst
->wavefront();
6198 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6199 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6200 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6205 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6206 if (wf
->execMask(lane
)) {
6207 vdst
[lane
] = src0
[lane
] & src1
[lane
];
6214 Inst_VOP2__V_OR_B32::Inst_VOP2__V_OR_B32(InFmt_VOP2
*iFmt
)
6215 : Inst_VOP2(iFmt
, "v_or_b32")
6218 } // Inst_VOP2__V_OR_B32
6220 Inst_VOP2__V_OR_B32::~Inst_VOP2__V_OR_B32()
6222 } // ~Inst_VOP2__V_OR_B32
6224 // D.u = S0.u | S1.u.
6225 // Input and output modifiers not supported.
6227 Inst_VOP2__V_OR_B32::execute(GPUDynInstPtr gpuDynInst
)
6229 Wavefront
*wf
= gpuDynInst
->wavefront();
6230 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6231 VecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6232 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6238 VecOperandU32
src0_sdwa(gpuDynInst
, extData
.iFmt_VOP_SDWA
.SRC0
);
6239 // use copies of original src0, src1, and dest during selecting
6240 VecOperandU32
origSrc0_sdwa(gpuDynInst
,
6241 extData
.iFmt_VOP_SDWA
.SRC0
);
6242 VecOperandU32
origSrc1(gpuDynInst
, instData
.VSRC1
);
6243 VecOperandU32
origVdst(gpuDynInst
, instData
.VDST
);
6246 origSrc0_sdwa
.read();
6249 DPRINTF(GCN3
, "Handling V_OR_B32 SRC SDWA. SRC0: register v[%d], "
6250 "DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: %d, "
6251 "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
6252 "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
6253 extData
.iFmt_VOP_SDWA
.SRC0
, extData
.iFmt_VOP_SDWA
.DST_SEL
,
6254 extData
.iFmt_VOP_SDWA
.DST_UNUSED
,
6255 extData
.iFmt_VOP_SDWA
.CLAMP
,
6256 extData
.iFmt_VOP_SDWA
.SRC0_SEL
,
6257 extData
.iFmt_VOP_SDWA
.SRC0_SEXT
,
6258 extData
.iFmt_VOP_SDWA
.SRC0_NEG
,
6259 extData
.iFmt_VOP_SDWA
.SRC0_ABS
,
6260 extData
.iFmt_VOP_SDWA
.SRC1_SEL
,
6261 extData
.iFmt_VOP_SDWA
.SRC1_SEXT
,
6262 extData
.iFmt_VOP_SDWA
.SRC1_NEG
,
6263 extData
.iFmt_VOP_SDWA
.SRC1_ABS
);
6265 processSDWA_src(extData
.iFmt_VOP_SDWA
, src0_sdwa
, origSrc0_sdwa
,
6268 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6269 if (wf
->execMask(lane
)) {
6270 vdst
[lane
] = src0_sdwa
[lane
] | src1
[lane
];
6271 origVdst
[lane
] = vdst
[lane
]; // keep copy consistent
6275 processSDWA_dst(extData
.iFmt_VOP_SDWA
, vdst
, origVdst
);
6277 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6278 if (wf
->execMask(lane
)) {
6279 vdst
[lane
] = src0
[lane
] | src1
[lane
];
6287 Inst_VOP2__V_XOR_B32::Inst_VOP2__V_XOR_B32(InFmt_VOP2
*iFmt
)
6288 : Inst_VOP2(iFmt
, "v_xor_b32")
6291 } // Inst_VOP2__V_XOR_B32
6293 Inst_VOP2__V_XOR_B32::~Inst_VOP2__V_XOR_B32()
6295 } // ~Inst_VOP2__V_XOR_B32
6297 // D.u = S0.u ^ S1.u.
6298 // Input and output modifiers not supported.
6300 Inst_VOP2__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst
)
6302 Wavefront
*wf
= gpuDynInst
->wavefront();
6303 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6304 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6305 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6310 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6311 if (wf
->execMask(lane
)) {
6312 vdst
[lane
] = src0
[lane
] ^ src1
[lane
];
6319 Inst_VOP2__V_MAC_F32::Inst_VOP2__V_MAC_F32(InFmt_VOP2
*iFmt
)
6320 : Inst_VOP2(iFmt
, "v_mac_f32")
6325 } // Inst_VOP2__V_MAC_F32
6327 Inst_VOP2__V_MAC_F32::~Inst_VOP2__V_MAC_F32()
6329 } // ~Inst_VOP2__V_MAC_F32
6331 // D.f = S0.f * S1.f + D.f.
6333 Inst_VOP2__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst
)
6335 Wavefront
*wf
= gpuDynInst
->wavefront();
6336 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
6337 VecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
6338 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
6345 VecOperandF32
src0_dpp(gpuDynInst
, extData
.iFmt_VOP_DPP
.SRC0
);
6348 DPRINTF(GCN3
, "Handling V_MAC_F32 SRC DPP. SRC0: register v[%d], "
6349 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
6350 "SRC1_ABS: %d, SRC1_NEG: %d, BOUND_CTRL: %d, "
6351 "BANK_MASK: %d, ROW_MASK: %d\n", extData
.iFmt_VOP_DPP
.SRC0
,
6352 extData
.iFmt_VOP_DPP
.DPP_CTRL
,
6353 extData
.iFmt_VOP_DPP
.SRC0_ABS
,
6354 extData
.iFmt_VOP_DPP
.SRC0_NEG
,
6355 extData
.iFmt_VOP_DPP
.SRC1_ABS
,
6356 extData
.iFmt_VOP_DPP
.SRC1_NEG
,
6357 extData
.iFmt_VOP_DPP
.BOUND_CTRL
,
6358 extData
.iFmt_VOP_DPP
.BANK_MASK
,
6359 extData
.iFmt_VOP_DPP
.ROW_MASK
);
6361 processDPP(gpuDynInst
, extData
.iFmt_VOP_DPP
, src0_dpp
, src1
);
6363 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6364 if (wf
->execMask(lane
)) {
6365 vdst
[lane
] = std::fma(src0_dpp
[lane
], src1
[lane
],
6370 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6371 if (wf
->execMask(lane
)) {
6372 vdst
[lane
] = std::fma(src0
[lane
], src1
[lane
], vdst
[lane
]);
6380 Inst_VOP2__V_MADMK_F32::Inst_VOP2__V_MADMK_F32(InFmt_VOP2
*iFmt
)
6381 : Inst_VOP2(iFmt
, "v_madmk_f32")
6386 } // Inst_VOP2__V_MADMK_F32
6388 Inst_VOP2__V_MADMK_F32::~Inst_VOP2__V_MADMK_F32()
6390 } // ~Inst_VOP2__V_MADMK_F32
6392 // D.f = S0.f * K + S1.f; K is a 32-bit inline constant.
6393 // This opcode cannot use the input/output modifiers.
6395 Inst_VOP2__V_MADMK_F32::execute(GPUDynInstPtr gpuDynInst
)
6397 Wavefront
*wf
= gpuDynInst
->wavefront();
6398 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
6399 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
6400 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
6401 VecElemF32 k
= extData
.imm_f32
;
6406 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6407 if (wf
->execMask(lane
)) {
6408 vdst
[lane
] = std::fma(src0
[lane
], k
, src1
[lane
]);
6415 Inst_VOP2__V_MADAK_F32::Inst_VOP2__V_MADAK_F32(InFmt_VOP2
*iFmt
)
6416 : Inst_VOP2(iFmt
, "v_madak_f32")
6421 } // Inst_VOP2__V_MADAK_F32
6423 Inst_VOP2__V_MADAK_F32::~Inst_VOP2__V_MADAK_F32()
6425 } // ~Inst_VOP2__V_MADAK_F32
6427 // D.f = S0.f * S1.f + K; K is a 32-bit inline constant.
6428 // This opcode cannot use input/output modifiers.
6430 Inst_VOP2__V_MADAK_F32::execute(GPUDynInstPtr gpuDynInst
)
6432 Wavefront
*wf
= gpuDynInst
->wavefront();
6433 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
6434 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
6435 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
6436 VecElemF32 k
= extData
.imm_f32
;
6441 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6442 if (wf
->execMask(lane
)) {
6443 vdst
[lane
] = std::fma(src0
[lane
], src1
[lane
], k
);
6450 Inst_VOP2__V_ADD_U32::Inst_VOP2__V_ADD_U32(InFmt_VOP2
*iFmt
)
6451 : Inst_VOP2(iFmt
, "v_add_u32")
6455 } // Inst_VOP2__V_ADD_U32
6457 Inst_VOP2__V_ADD_U32::~Inst_VOP2__V_ADD_U32()
6459 } // ~Inst_VOP2__V_ADD_U32
6461 // D.u = S0.u + S1.u;
6462 // VCC[threadId] = (S0.u + S1.u >= 0x100000000ULL ? 1 : 0) is an UNSIGNED
6463 // overflow or carry-out.
6464 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
6466 Inst_VOP2__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst
)
6468 Wavefront
*wf
= gpuDynInst
->wavefront();
6469 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6470 VecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6471 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6472 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
6478 VecOperandU32
src0_sdwa(gpuDynInst
, extData
.iFmt_VOP_SDWA
.SRC0
);
6479 // use copies of original src0, src1, and dest during selecting
6480 VecOperandU32
origSrc0_sdwa(gpuDynInst
,
6481 extData
.iFmt_VOP_SDWA
.SRC0
);
6482 VecOperandU32
origSrc1(gpuDynInst
, instData
.VSRC1
);
6483 VecOperandU32
origVdst(gpuDynInst
, instData
.VDST
);
6486 origSrc0_sdwa
.read();
6489 DPRINTF(GCN3
, "Handling V_ADD_U32 SRC SDWA. SRC0: register v[%d], "
6490 "DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: %d, "
6491 "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
6492 "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
6493 extData
.iFmt_VOP_SDWA
.SRC0
, extData
.iFmt_VOP_SDWA
.DST_SEL
,
6494 extData
.iFmt_VOP_SDWA
.DST_UNUSED
,
6495 extData
.iFmt_VOP_SDWA
.CLAMP
,
6496 extData
.iFmt_VOP_SDWA
.SRC0_SEL
,
6497 extData
.iFmt_VOP_SDWA
.SRC0_SEXT
,
6498 extData
.iFmt_VOP_SDWA
.SRC0_NEG
,
6499 extData
.iFmt_VOP_SDWA
.SRC0_ABS
,
6500 extData
.iFmt_VOP_SDWA
.SRC1_SEL
,
6501 extData
.iFmt_VOP_SDWA
.SRC1_SEXT
,
6502 extData
.iFmt_VOP_SDWA
.SRC1_NEG
,
6503 extData
.iFmt_VOP_SDWA
.SRC1_ABS
);
6505 processSDWA_src(extData
.iFmt_VOP_SDWA
, src0_sdwa
, origSrc0_sdwa
,
6508 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6509 if (wf
->execMask(lane
)) {
6510 vdst
[lane
] = src0_sdwa
[lane
] + src1
[lane
];
6511 origVdst
[lane
] = vdst
[lane
]; // keep copy consistent
6512 vcc
.setBit(lane
, ((VecElemU64
)src0_sdwa
[lane
]
6513 + (VecElemU64
)src1
[lane
] >= 0x100000000ULL
) ? 1 : 0);
6517 processSDWA_dst(extData
.iFmt_VOP_SDWA
, vdst
, origVdst
);
6519 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6520 if (wf
->execMask(lane
)) {
6521 vdst
[lane
] = src0
[lane
] + src1
[lane
];
6522 vcc
.setBit(lane
, ((VecElemU64
)src0
[lane
]
6523 + (VecElemU64
)src1
[lane
] >= 0x100000000ULL
) ? 1 : 0);
6532 Inst_VOP2__V_SUB_U32::Inst_VOP2__V_SUB_U32(InFmt_VOP2
*iFmt
)
6533 : Inst_VOP2(iFmt
, "v_sub_u32")
6537 } // Inst_VOP2__V_SUB_U32
6539 Inst_VOP2__V_SUB_U32::~Inst_VOP2__V_SUB_U32()
6541 } // ~Inst_VOP2__V_SUB_U32
6543 // D.u = S0.u - S1.u;
6544 // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or
6546 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
6548 Inst_VOP2__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst
)
6550 Wavefront
*wf
= gpuDynInst
->wavefront();
6551 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6552 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6553 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6554 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
6559 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6560 if (wf
->execMask(lane
)) {
6561 vdst
[lane
] = src0
[lane
] - src1
[lane
];
6562 vcc
.setBit(lane
, src1
[lane
] > src0
[lane
] ? 1 : 0);
6570 Inst_VOP2__V_SUBREV_U32::Inst_VOP2__V_SUBREV_U32(InFmt_VOP2
*iFmt
)
6571 : Inst_VOP2(iFmt
, "v_subrev_u32")
6575 } // Inst_VOP2__V_SUBREV_U32
6577 Inst_VOP2__V_SUBREV_U32::~Inst_VOP2__V_SUBREV_U32()
6579 } // ~Inst_VOP2__V_SUBREV_U32
6581 // D.u = S1.u - S0.u;
6582 // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or
6584 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
6586 Inst_VOP2__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst
)
6588 Wavefront
*wf
= gpuDynInst
->wavefront();
6589 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6590 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6591 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6592 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
6597 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6598 if (wf
->execMask(lane
)) {
6599 vdst
[lane
] = src1
[lane
] - src0
[lane
];
6600 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
6608 Inst_VOP2__V_ADDC_U32::Inst_VOP2__V_ADDC_U32(InFmt_VOP2
*iFmt
)
6609 : Inst_VOP2(iFmt
, "v_addc_u32")
6614 } // Inst_VOP2__V_ADDC_U32
6616 Inst_VOP2__V_ADDC_U32::~Inst_VOP2__V_ADDC_U32()
6618 } // ~Inst_VOP2__V_ADDC_U32
6620 // D.u = S0.u + S1.u + VCC[threadId];
6621 // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x100000000ULL ? 1 : 0)
6622 // is an UNSIGNED overflow.
6623 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
6624 // source comes from the SGPR-pair at S2.u.
6626 Inst_VOP2__V_ADDC_U32::execute(GPUDynInstPtr gpuDynInst
)
6628 Wavefront
*wf
= gpuDynInst
->wavefront();
6629 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6630 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6631 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6632 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
6638 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6639 if (wf
->execMask(lane
)) {
6640 vdst
[lane
] = src0
[lane
] + src1
[lane
]
6641 + bits(vcc
.rawData(), lane
);
6642 vcc
.setBit(lane
, ((VecElemU64
)src0
[lane
]
6643 + (VecElemU64
)src1
[lane
]
6644 + (VecElemU64
)bits(vcc
.rawData(), lane
, lane
))
6645 >= 0x100000000 ? 1 : 0);
6653 Inst_VOP2__V_SUBB_U32::Inst_VOP2__V_SUBB_U32(InFmt_VOP2
*iFmt
)
6654 : Inst_VOP2(iFmt
, "v_subb_u32")
6659 } // Inst_VOP2__V_SUBB_U32
6661 Inst_VOP2__V_SUBB_U32::~Inst_VOP2__V_SUBB_U32()
6663 } // ~Inst_VOP2__V_SUBB_U32
6665 // D.u = S0.u - S1.u - VCC[threadId];
6666 // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
6668 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
6669 // source comes from the SGPR-pair at S2.u.
6671 Inst_VOP2__V_SUBB_U32::execute(GPUDynInstPtr gpuDynInst
)
6673 Wavefront
*wf
= gpuDynInst
->wavefront();
6674 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6675 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6676 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6677 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
6683 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6684 if (wf
->execMask(lane
)) {
6686 = src0
[lane
] - src1
[lane
] - bits(vcc
.rawData(), lane
);
6687 vcc
.setBit(lane
, (src1
[lane
] + bits(vcc
.rawData(), lane
))
6688 > src0
[lane
] ? 1 : 0);
6696 Inst_VOP2__V_SUBBREV_U32::Inst_VOP2__V_SUBBREV_U32(InFmt_VOP2
*iFmt
)
6697 : Inst_VOP2(iFmt
, "v_subbrev_u32")
6702 } // Inst_VOP2__V_SUBBREV_U32
6704 Inst_VOP2__V_SUBBREV_U32::~Inst_VOP2__V_SUBBREV_U32()
6706 } // ~Inst_VOP2__V_SUBBREV_U32
6708 // D.u = S1.u - S0.u - VCC[threadId];
6709 // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
6711 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
6712 // source comes from the SGPR-pair at S2.u.
6714 Inst_VOP2__V_SUBBREV_U32::execute(GPUDynInstPtr gpuDynInst
)
6716 Wavefront
*wf
= gpuDynInst
->wavefront();
6717 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6718 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6719 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6720 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
6726 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6727 if (wf
->execMask(lane
)) {
6729 = src1
[lane
] - src0
[lane
] - bits(vcc
.rawData(), lane
);
6730 vcc
.setBit(lane
, (src0
[lane
] + bits(vcc
.rawData(), lane
))
6731 > src1
[lane
] ? 1 : 0);
6739 Inst_VOP2__V_ADD_F16::Inst_VOP2__V_ADD_F16(InFmt_VOP2
*iFmt
)
6740 : Inst_VOP2(iFmt
, "v_add_f16")
6744 } // Inst_VOP2__V_ADD_F16
6746 Inst_VOP2__V_ADD_F16::~Inst_VOP2__V_ADD_F16()
6748 } // ~Inst_VOP2__V_ADD_F16
6750 // D.f16 = S0.f16 + S1.f16.
6752 Inst_VOP2__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst
)
6754 panicUnimplemented();
6757 Inst_VOP2__V_SUB_F16::Inst_VOP2__V_SUB_F16(InFmt_VOP2
*iFmt
)
6758 : Inst_VOP2(iFmt
, "v_sub_f16")
6762 } // Inst_VOP2__V_SUB_F16
6764 Inst_VOP2__V_SUB_F16::~Inst_VOP2__V_SUB_F16()
6766 } // ~Inst_VOP2__V_SUB_F16
6768 // D.f16 = S0.f16 - S1.f16.
6770 Inst_VOP2__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst
)
6772 panicUnimplemented();
6775 Inst_VOP2__V_SUBREV_F16::Inst_VOP2__V_SUBREV_F16(InFmt_VOP2
*iFmt
)
6776 : Inst_VOP2(iFmt
, "v_subrev_f16")
6780 } // Inst_VOP2__V_SUBREV_F16
6782 Inst_VOP2__V_SUBREV_F16::~Inst_VOP2__V_SUBREV_F16()
6784 } // ~Inst_VOP2__V_SUBREV_F16
6786 // D.f16 = S1.f16 - S0.f16.
6788 Inst_VOP2__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst
)
6790 panicUnimplemented();
6793 Inst_VOP2__V_MUL_F16::Inst_VOP2__V_MUL_F16(InFmt_VOP2
*iFmt
)
6794 : Inst_VOP2(iFmt
, "v_mul_f16")
6798 } // Inst_VOP2__V_MUL_F16
6800 Inst_VOP2__V_MUL_F16::~Inst_VOP2__V_MUL_F16()
6802 } // ~Inst_VOP2__V_MUL_F16
6804 // D.f16 = S0.f16 * S1.f16.
6806 Inst_VOP2__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst
)
6808 panicUnimplemented();
6811 Inst_VOP2__V_MAC_F16::Inst_VOP2__V_MAC_F16(InFmt_VOP2
*iFmt
)
6812 : Inst_VOP2(iFmt
, "v_mac_f16")
6817 } // Inst_VOP2__V_MAC_F16
6819 Inst_VOP2__V_MAC_F16::~Inst_VOP2__V_MAC_F16()
6821 } // ~Inst_VOP2__V_MAC_F16
6823 // D.f16 = S0.f16 * S1.f16 + D.f16.
6825 Inst_VOP2__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst
)
6827 panicUnimplemented();
6830 Inst_VOP2__V_MADMK_F16::Inst_VOP2__V_MADMK_F16(InFmt_VOP2
*iFmt
)
6831 : Inst_VOP2(iFmt
, "v_madmk_f16")
6836 } // Inst_VOP2__V_MADMK_F16
6838 Inst_VOP2__V_MADMK_F16::~Inst_VOP2__V_MADMK_F16()
6840 } // ~Inst_VOP2__V_MADMK_F16
6842 // D.f16 = S0.f16 * K.f16 + S1.f16; K is a 16-bit inline constant stored
6843 // in the following literal DWORD.
6844 // This opcode cannot use the VOP3 encoding and cannot use input/output
6847 Inst_VOP2__V_MADMK_F16::execute(GPUDynInstPtr gpuDynInst
)
6849 panicUnimplemented();
6852 Inst_VOP2__V_MADAK_F16::Inst_VOP2__V_MADAK_F16(InFmt_VOP2
*iFmt
)
6853 : Inst_VOP2(iFmt
, "v_madak_f16")
6858 } // Inst_VOP2__V_MADAK_F16
6860 Inst_VOP2__V_MADAK_F16::~Inst_VOP2__V_MADAK_F16()
6862 } // ~Inst_VOP2__V_MADAK_F16
6864 // D.f16 = S0.f16 * S1.f16 + K.f16; K is a 16-bit inline constant stored
6865 // in the following literal DWORD.
6866 // This opcode cannot use the VOP3 encoding and cannot use input/output
6869 Inst_VOP2__V_MADAK_F16::execute(GPUDynInstPtr gpuDynInst
)
6871 panicUnimplemented();
6874 Inst_VOP2__V_ADD_U16::Inst_VOP2__V_ADD_U16(InFmt_VOP2
*iFmt
)
6875 : Inst_VOP2(iFmt
, "v_add_u16")
6878 } // Inst_VOP2__V_ADD_U16
6880 Inst_VOP2__V_ADD_U16::~Inst_VOP2__V_ADD_U16()
6882 } // ~Inst_VOP2__V_ADD_U16
6884 // D.u16 = S0.u16 + S1.u16.
6886 Inst_VOP2__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst
)
6888 Wavefront
*wf
= gpuDynInst
->wavefront();
6889 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
6890 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
6891 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
6896 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6897 if (wf
->execMask(lane
)) {
6898 vdst
[lane
] = src0
[lane
] + src1
[lane
];
6905 Inst_VOP2__V_SUB_U16::Inst_VOP2__V_SUB_U16(InFmt_VOP2
*iFmt
)
6906 : Inst_VOP2(iFmt
, "v_sub_u16")
6909 } // Inst_VOP2__V_SUB_U16
6911 Inst_VOP2__V_SUB_U16::~Inst_VOP2__V_SUB_U16()
6913 } // ~Inst_VOP2__V_SUB_U16
6915 // D.u16 = S0.u16 - S1.u16.
6917 Inst_VOP2__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst
)
6919 Wavefront
*wf
= gpuDynInst
->wavefront();
6920 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
6921 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
6922 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
6927 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6928 if (wf
->execMask(lane
)) {
6929 vdst
[lane
] = src0
[lane
] - src1
[lane
];
6936 Inst_VOP2__V_SUBREV_U16::Inst_VOP2__V_SUBREV_U16(InFmt_VOP2
*iFmt
)
6937 : Inst_VOP2(iFmt
, "v_subrev_u16")
6940 } // Inst_VOP2__V_SUBREV_U16
6942 Inst_VOP2__V_SUBREV_U16::~Inst_VOP2__V_SUBREV_U16()
6944 } // ~Inst_VOP2__V_SUBREV_U16
6946 // D.u16 = S1.u16 - S0.u16.
6948 Inst_VOP2__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst
)
6950 Wavefront
*wf
= gpuDynInst
->wavefront();
6951 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
6952 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
6953 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
6958 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6959 if (wf
->execMask(lane
)) {
6960 vdst
[lane
] = src1
[lane
] - src0
[lane
];
6967 Inst_VOP2__V_MUL_LO_U16::Inst_VOP2__V_MUL_LO_U16(InFmt_VOP2
*iFmt
)
6968 : Inst_VOP2(iFmt
, "v_mul_lo_u16")
6971 } // Inst_VOP2__V_MUL_LO_U16
6973 Inst_VOP2__V_MUL_LO_U16::~Inst_VOP2__V_MUL_LO_U16()
6975 } // ~Inst_VOP2__V_MUL_LO_U16
6977 // D.u16 = S0.u16 * S1.u16.
6979 Inst_VOP2__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst
)
6981 Wavefront
*wf
= gpuDynInst
->wavefront();
6982 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
6983 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
6984 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
6989 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6990 if (wf
->execMask(lane
)) {
6991 vdst
[lane
] = src0
[lane
] * src1
[lane
];
6998 Inst_VOP2__V_LSHLREV_B16::Inst_VOP2__V_LSHLREV_B16(InFmt_VOP2
*iFmt
)
6999 : Inst_VOP2(iFmt
, "v_lshlrev_b16")
7002 } // Inst_VOP2__V_LSHLREV_B16
7004 Inst_VOP2__V_LSHLREV_B16::~Inst_VOP2__V_LSHLREV_B16()
7006 } // ~Inst_VOP2__V_LSHLREV_B16
7008 // D.u[15:0] = S1.u[15:0] << S0.u[3:0].
7010 Inst_VOP2__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst
)
7012 Wavefront
*wf
= gpuDynInst
->wavefront();
7013 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
7014 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
7015 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
7020 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7021 if (wf
->execMask(lane
)) {
7022 vdst
[lane
] = src1
[lane
] << bits(src0
[lane
], 3, 0);
7029 Inst_VOP2__V_LSHRREV_B16::Inst_VOP2__V_LSHRREV_B16(InFmt_VOP2
*iFmt
)
7030 : Inst_VOP2(iFmt
, "v_lshrrev_b16")
7033 } // Inst_VOP2__V_LSHRREV_B16
7035 Inst_VOP2__V_LSHRREV_B16::~Inst_VOP2__V_LSHRREV_B16()
7037 } // ~Inst_VOP2__V_LSHRREV_B16
7039 // D.u[15:0] = S1.u[15:0] >> S0.u[3:0].
7040 // The vacated bits are set to zero.
7042 Inst_VOP2__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst
)
7044 Wavefront
*wf
= gpuDynInst
->wavefront();
7045 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
7046 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
7047 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
7052 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7053 if (wf
->execMask(lane
)) {
7054 vdst
[lane
] = src1
[lane
] >> src0
[lane
];
7061 Inst_VOP2__V_ASHRREV_I16::Inst_VOP2__V_ASHRREV_I16(InFmt_VOP2
*iFmt
)
7062 : Inst_VOP2(iFmt
, "v_ashrrev_i16")
7065 } // Inst_VOP2__V_ASHRREV_I16
7067 Inst_VOP2__V_ASHRREV_I16::~Inst_VOP2__V_ASHRREV_I16()
7069 } // ~Inst_VOP2__V_ASHRREV_I16
7071 // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0].
7072 // The vacated bits are set to the sign bit of the input value.
7074 Inst_VOP2__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst
)
7076 Wavefront
*wf
= gpuDynInst
->wavefront();
7077 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
7078 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
7079 VecOperandI16
vdst(gpuDynInst
, instData
.VDST
);
7084 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7085 if (wf
->execMask(lane
)) {
7086 vdst
[lane
] = src1
[lane
] >> src0
[lane
];
7093 Inst_VOP2__V_MAX_F16::Inst_VOP2__V_MAX_F16(InFmt_VOP2
*iFmt
)
7094 : Inst_VOP2(iFmt
, "v_max_f16")
7098 } // Inst_VOP2__V_MAX_F16
7100 Inst_VOP2__V_MAX_F16::~Inst_VOP2__V_MAX_F16()
7102 } // ~Inst_VOP2__V_MAX_F16
7104 // D.f16 = max(S0.f16, S1.f16).
7106 Inst_VOP2__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst
)
7108 panicUnimplemented();
7111 Inst_VOP2__V_MIN_F16::Inst_VOP2__V_MIN_F16(InFmt_VOP2
*iFmt
)
7112 : Inst_VOP2(iFmt
, "v_min_f16")
7116 } // Inst_VOP2__V_MIN_F16
7118 Inst_VOP2__V_MIN_F16::~Inst_VOP2__V_MIN_F16()
7120 } // ~Inst_VOP2__V_MIN_F16
7122 // D.f16 = min(S0.f16, S1.f16).
7124 Inst_VOP2__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst
)
7126 panicUnimplemented();
7129 Inst_VOP2__V_MAX_U16::Inst_VOP2__V_MAX_U16(InFmt_VOP2
*iFmt
)
7130 : Inst_VOP2(iFmt
, "v_max_u16")
7133 } // Inst_VOP2__V_MAX_U16
7135 Inst_VOP2__V_MAX_U16::~Inst_VOP2__V_MAX_U16()
7137 } // ~Inst_VOP2__V_MAX_U16
7139 // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]).
7141 Inst_VOP2__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst
)
7143 Wavefront
*wf
= gpuDynInst
->wavefront();
7144 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
7145 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
7146 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
7151 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7152 if (wf
->execMask(lane
)) {
7153 vdst
[lane
] = std::max(src0
[lane
], src1
[lane
]);
7160 Inst_VOP2__V_MAX_I16::Inst_VOP2__V_MAX_I16(InFmt_VOP2
*iFmt
)
7161 : Inst_VOP2(iFmt
, "v_max_i16")
7164 } // Inst_VOP2__V_MAX_I16
7166 Inst_VOP2__V_MAX_I16::~Inst_VOP2__V_MAX_I16()
7168 } // ~Inst_VOP2__V_MAX_I16
7170 // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]).
7172 Inst_VOP2__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst
)
7174 Wavefront
*wf
= gpuDynInst
->wavefront();
7175 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
7176 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
7177 VecOperandI16
vdst(gpuDynInst
, instData
.VDST
);
7182 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7183 if (wf
->execMask(lane
)) {
7184 vdst
[lane
] = std::max(src0
[lane
], src1
[lane
]);
7191 Inst_VOP2__V_MIN_U16::Inst_VOP2__V_MIN_U16(InFmt_VOP2
*iFmt
)
7192 : Inst_VOP2(iFmt
, "v_min_u16")
7195 } // Inst_VOP2__V_MIN_U16
7197 Inst_VOP2__V_MIN_U16::~Inst_VOP2__V_MIN_U16()
7199 } // ~Inst_VOP2__V_MIN_U16
7201 // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]).
7203 Inst_VOP2__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst
)
7205 Wavefront
*wf
= gpuDynInst
->wavefront();
7206 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
7207 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
7208 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
7213 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7214 if (wf
->execMask(lane
)) {
7215 vdst
[lane
] = std::min(src0
[lane
], src1
[lane
]);
7222 Inst_VOP2__V_MIN_I16::Inst_VOP2__V_MIN_I16(InFmt_VOP2
*iFmt
)
7223 : Inst_VOP2(iFmt
, "v_min_i16")
7226 } // Inst_VOP2__V_MIN_I16
7228 Inst_VOP2__V_MIN_I16::~Inst_VOP2__V_MIN_I16()
7230 } // ~Inst_VOP2__V_MIN_I16
7232 // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]).
7234 Inst_VOP2__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst
)
7236 Wavefront
*wf
= gpuDynInst
->wavefront();
7237 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
7238 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
7239 VecOperandI16
vdst(gpuDynInst
, instData
.VDST
);
7244 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7245 if (wf
->execMask(lane
)) {
7246 vdst
[lane
] = std::min(src0
[lane
], src1
[lane
]);
7253 Inst_VOP2__V_LDEXP_F16::Inst_VOP2__V_LDEXP_F16(InFmt_VOP2
*iFmt
)
7254 : Inst_VOP2(iFmt
, "v_ldexp_f16")
7258 } // Inst_VOP2__V_LDEXP_F16
7260 Inst_VOP2__V_LDEXP_F16::~Inst_VOP2__V_LDEXP_F16()
7262 } // ~Inst_VOP2__V_LDEXP_F16
7264 // D.f16 = S0.f16 * (2 ** S1.i16).
7266 Inst_VOP2__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst
)
7268 panicUnimplemented();
7271 Inst_VOP1__V_NOP::Inst_VOP1__V_NOP(InFmt_VOP1
*iFmt
)
7272 : Inst_VOP1(iFmt
, "v_nop")
7276 } // Inst_VOP1__V_NOP
7278 Inst_VOP1__V_NOP::~Inst_VOP1__V_NOP()
7280 } // ~Inst_VOP1__V_NOP
7284 Inst_VOP1__V_NOP::execute(GPUDynInstPtr gpuDynInst
)
7288 Inst_VOP1__V_MOV_B32::Inst_VOP1__V_MOV_B32(InFmt_VOP1
*iFmt
)
7289 : Inst_VOP1(iFmt
, "v_mov_b32")
7292 } // Inst_VOP1__V_MOV_B32
7294 Inst_VOP1__V_MOV_B32::~Inst_VOP1__V_MOV_B32()
7296 } // ~Inst_VOP1__V_MOV_B32
7299 // Input and output modifiers not supported; this is an untyped operation.
7301 Inst_VOP1__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst
)
7303 Wavefront
*wf
= gpuDynInst
->wavefront();
7304 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
7305 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
7310 VecOperandU32
src_dpp(gpuDynInst
, extData
.iFmt_VOP_DPP
.SRC0
);
7313 DPRINTF(GCN3
, "Handling V_MOV_B32 SRC DPP. SRC0: register v[%d], "
7314 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
7315 "SRC1_ABS: %d, SRC1_NEG: %d, BOUND_CTRL: %d, "
7316 "BANK_MASK: %d, ROW_MASK: %d\n", extData
.iFmt_VOP_DPP
.SRC0
,
7317 extData
.iFmt_VOP_DPP
.DPP_CTRL
,
7318 extData
.iFmt_VOP_DPP
.SRC0_ABS
,
7319 extData
.iFmt_VOP_DPP
.SRC0_NEG
,
7320 extData
.iFmt_VOP_DPP
.SRC1_ABS
,
7321 extData
.iFmt_VOP_DPP
.SRC1_NEG
,
7322 extData
.iFmt_VOP_DPP
.BOUND_CTRL
,
7323 extData
.iFmt_VOP_DPP
.BANK_MASK
,
7324 extData
.iFmt_VOP_DPP
.ROW_MASK
);
7326 // NOTE: For VOP1, there is no SRC1, so make sure we're not trying
7327 // to negate it or take the absolute value of it
7328 assert(!extData
.iFmt_VOP_DPP
.SRC1_ABS
);
7329 assert(!extData
.iFmt_VOP_DPP
.SRC1_NEG
);
7330 processDPP(gpuDynInst
, extData
.iFmt_VOP_DPP
, src_dpp
);
7332 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7333 if (wf
->execMask(lane
)) {
7334 vdst
[lane
] = src_dpp
[lane
];
7338 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7339 if (wf
->execMask(lane
)) {
7340 vdst
[lane
] = src
[lane
];
7348 Inst_VOP1__V_READFIRSTLANE_B32::Inst_VOP1__V_READFIRSTLANE_B32(
7350 : Inst_VOP1(iFmt
, "v_readfirstlane_b32")
7353 } // Inst_VOP1__V_READFIRSTLANE_B32
7355 Inst_VOP1__V_READFIRSTLANE_B32::~Inst_VOP1__V_READFIRSTLANE_B32()
7357 } // ~Inst_VOP1__V_READFIRSTLANE_B32
7359 // Copy one VGPR value to one SGPR. D = SGPR destination, S0 = source data
7360 // (VGPR# or M0 for lds direct access), Lane# = FindFirst1fromLSB(exec)
7361 // (Lane# = 0 if exec is zero). Ignores exec mask for the access.
7362 // Input and output modifiers not supported; this is an untyped operation.
7364 Inst_VOP1__V_READFIRSTLANE_B32::execute(GPUDynInstPtr gpuDynInst
)
7366 Wavefront
*wf
= gpuDynInst
->wavefront();
7367 ScalarRegI32
src_lane(0);
7368 ScalarRegU64 exec_mask
= wf
->execMask().to_ullong();
7369 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
7370 ScalarOperandU32
sdst(gpuDynInst
, instData
.VDST
);
7375 src_lane
= findLsbSet(exec_mask
);
7378 sdst
= src
[src_lane
];
7383 Inst_VOP1__V_CVT_I32_F64::Inst_VOP1__V_CVT_I32_F64(InFmt_VOP1
*iFmt
)
7384 : Inst_VOP1(iFmt
, "v_cvt_i32_f64")
7388 } // Inst_VOP1__V_CVT_I32_F64
7390 Inst_VOP1__V_CVT_I32_F64::~Inst_VOP1__V_CVT_I32_F64()
7392 } // ~Inst_VOP1__V_CVT_I32_F64
7395 // Out-of-range floating point values (including infinity) saturate. NaN
7396 // is converted to 0.
7398 Inst_VOP1__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst
)
7400 Wavefront
*wf
= gpuDynInst
->wavefront();
7401 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
7402 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
7406 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7407 if (wf
->execMask(lane
)) {
7409 std::frexp(src
[lane
],&exp
);
7410 if (std::isnan(src
[lane
])) {
7412 } else if (std::isinf(src
[lane
]) || exp
> 30) {
7413 if (std::signbit(src
[lane
])) {
7414 vdst
[lane
] = INT_MIN
;
7416 vdst
[lane
] = INT_MAX
;
7419 vdst
[lane
] = (VecElemI32
)src
[lane
];
7427 Inst_VOP1__V_CVT_F64_I32::Inst_VOP1__V_CVT_F64_I32(InFmt_VOP1
*iFmt
)
7428 : Inst_VOP1(iFmt
, "v_cvt_f64_i32")
7432 } // Inst_VOP1__V_CVT_F64_I32
7434 Inst_VOP1__V_CVT_F64_I32::~Inst_VOP1__V_CVT_F64_I32()
7436 } // ~Inst_VOP1__V_CVT_F64_I32
7438 // D.d = (double)S0.i.
7440 Inst_VOP1__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst
)
7442 Wavefront
*wf
= gpuDynInst
->wavefront();
7443 ConstVecOperandI32
src(gpuDynInst
, instData
.SRC0
);
7444 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
7448 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7449 if (wf
->execMask(lane
)) {
7450 vdst
[lane
] = (VecElemF64
)src
[lane
];
7457 Inst_VOP1__V_CVT_F32_I32::Inst_VOP1__V_CVT_F32_I32(InFmt_VOP1
*iFmt
)
7458 : Inst_VOP1(iFmt
, "v_cvt_f32_i32")
7462 } // Inst_VOP1__V_CVT_F32_I32
7464 Inst_VOP1__V_CVT_F32_I32::~Inst_VOP1__V_CVT_F32_I32()
7466 } // ~Inst_VOP1__V_CVT_F32_I32
7468 // D.f = (float)S0.i.
7470 Inst_VOP1__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst
)
7472 Wavefront
*wf
= gpuDynInst
->wavefront();
7473 ConstVecOperandI32
src(gpuDynInst
, instData
.SRC0
);
7474 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
7478 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7479 if (wf
->execMask(lane
)) {
7480 vdst
[lane
] = (VecElemF32
)src
[lane
];
7487 Inst_VOP1__V_CVT_F32_U32::Inst_VOP1__V_CVT_F32_U32(InFmt_VOP1
*iFmt
)
7488 : Inst_VOP1(iFmt
, "v_cvt_f32_u32")
7492 } // Inst_VOP1__V_CVT_F32_U32
7494 Inst_VOP1__V_CVT_F32_U32::~Inst_VOP1__V_CVT_F32_U32()
7496 } // ~Inst_VOP1__V_CVT_F32_U32
7498 // D.f = (float)S0.u.
7500 Inst_VOP1__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst
)
7502 Wavefront
*wf
= gpuDynInst
->wavefront();
7503 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
7504 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
7508 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7509 if (wf
->execMask(lane
)) {
7510 vdst
[lane
] = (VecElemF32
)src
[lane
];
7517 Inst_VOP1__V_CVT_U32_F32::Inst_VOP1__V_CVT_U32_F32(InFmt_VOP1
*iFmt
)
7518 : Inst_VOP1(iFmt
, "v_cvt_u32_f32")
7522 } // Inst_VOP1__V_CVT_U32_F32
7524 Inst_VOP1__V_CVT_U32_F32::~Inst_VOP1__V_CVT_U32_F32()
7526 } // ~Inst_VOP1__V_CVT_U32_F32
7528 // D.u = (unsigned)S0.f.
7529 // Out-of-range floating point values (including infinity) saturate. NaN
7530 // is converted to 0.
7532 Inst_VOP1__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst
)
7534 Wavefront
*wf
= gpuDynInst
->wavefront();
7535 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
7536 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
7540 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7541 if (wf
->execMask(lane
)) {
7543 std::frexp(src
[lane
],&exp
);
7544 if (std::isnan(src
[lane
])) {
7546 } else if (std::isinf(src
[lane
])) {
7547 if (std::signbit(src
[lane
])) {
7550 vdst
[lane
] = UINT_MAX
;
7552 } else if (exp
> 31) {
7553 vdst
[lane
] = UINT_MAX
;
7555 vdst
[lane
] = (VecElemU32
)src
[lane
];
7563 Inst_VOP1__V_CVT_I32_F32::Inst_VOP1__V_CVT_I32_F32(InFmt_VOP1
*iFmt
)
7564 : Inst_VOP1(iFmt
, "v_cvt_i32_f32")
7568 } // Inst_VOP1__V_CVT_I32_F32
7570 Inst_VOP1__V_CVT_I32_F32::~Inst_VOP1__V_CVT_I32_F32()
7572 } // ~Inst_VOP1__V_CVT_I32_F32
7575 // Out-of-range floating point values (including infinity) saturate. NaN
7576 // is converted to 0.
7578 Inst_VOP1__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst
)
7580 Wavefront
*wf
= gpuDynInst
->wavefront();
7581 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
7582 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
7586 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7587 if (wf
->execMask(lane
)) {
7589 std::frexp(src
[lane
],&exp
);
7590 if (std::isnan(src
[lane
])) {
7592 } else if (std::isinf(src
[lane
]) || exp
> 30) {
7593 if (std::signbit(src
[lane
])) {
7594 vdst
[lane
] = INT_MIN
;
7596 vdst
[lane
] = INT_MAX
;
7599 vdst
[lane
] = (VecElemI32
)src
[lane
];
7607 Inst_VOP1__V_MOV_FED_B32::Inst_VOP1__V_MOV_FED_B32(InFmt_VOP1
*iFmt
)
7608 : Inst_VOP1(iFmt
, "v_mov_fed_b32")
7611 } // Inst_VOP1__V_MOV_FED_B32
7613 Inst_VOP1__V_MOV_FED_B32::~Inst_VOP1__V_MOV_FED_B32()
7615 } // ~Inst_VOP1__V_MOV_FED_B32
7618 // Input and output modifiers not supported; this is an untyped operation.
7620 Inst_VOP1__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst
)
7622 panicUnimplemented();
7625 Inst_VOP1__V_CVT_F16_F32::Inst_VOP1__V_CVT_F16_F32(InFmt_VOP1
*iFmt
)
7626 : Inst_VOP1(iFmt
, "v_cvt_f16_f32")
7630 } // Inst_VOP1__V_CVT_F16_F32
7632 Inst_VOP1__V_CVT_F16_F32::~Inst_VOP1__V_CVT_F16_F32()
7634 } // ~Inst_VOP1__V_CVT_F16_F32
7636 // D.f16 = flt32_to_flt16(S0.f).
7638 Inst_VOP1__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst
)
7640 panicUnimplemented();
7643 Inst_VOP1__V_CVT_F32_F16::Inst_VOP1__V_CVT_F32_F16(InFmt_VOP1
*iFmt
)
7644 : Inst_VOP1(iFmt
, "v_cvt_f32_f16")
7648 } // Inst_VOP1__V_CVT_F32_F16
7650 Inst_VOP1__V_CVT_F32_F16::~Inst_VOP1__V_CVT_F32_F16()
7652 } // ~Inst_VOP1__V_CVT_F32_F16
7654 // D.f = flt16_to_flt32(S0.f16).
7656 Inst_VOP1__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst
)
7658 panicUnimplemented();
7661 Inst_VOP1__V_CVT_RPI_I32_F32::Inst_VOP1__V_CVT_RPI_I32_F32(
7663 : Inst_VOP1(iFmt
, "v_cvt_rpi_i32_f32")
7667 } // Inst_VOP1__V_CVT_RPI_I32_F32
7669 Inst_VOP1__V_CVT_RPI_I32_F32::~Inst_VOP1__V_CVT_RPI_I32_F32()
7671 } // ~Inst_VOP1__V_CVT_RPI_I32_F32
7673 // D.i = (int)floor(S0.f + 0.5).
7675 Inst_VOP1__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst
)
7677 Wavefront
*wf
= gpuDynInst
->wavefront();
7678 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
7679 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
7683 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7684 if (wf
->execMask(lane
)) {
7685 vdst
[lane
] = (VecElemI32
)std::floor(src
[lane
] + 0.5);
7692 Inst_VOP1__V_CVT_FLR_I32_F32::Inst_VOP1__V_CVT_FLR_I32_F32(
7694 : Inst_VOP1(iFmt
, "v_cvt_flr_i32_f32")
7698 } // Inst_VOP1__V_CVT_FLR_I32_F32
7700 Inst_VOP1__V_CVT_FLR_I32_F32::~Inst_VOP1__V_CVT_FLR_I32_F32()
7702 } // ~Inst_VOP1__V_CVT_FLR_I32_F32
7704 // D.i = (int)floor(S0.f).
7706 Inst_VOP1__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst
)
7708 Wavefront
*wf
= gpuDynInst
->wavefront();
7709 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
7710 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
7714 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7715 if (wf
->execMask(lane
)) {
7716 vdst
[lane
] = (VecElemI32
)std::floor(src
[lane
]);
7723 Inst_VOP1__V_CVT_OFF_F32_I4::Inst_VOP1__V_CVT_OFF_F32_I4(InFmt_VOP1
*iFmt
)
7724 : Inst_VOP1(iFmt
, "v_cvt_off_f32_i4")
7728 } // Inst_VOP1__V_CVT_OFF_F32_I4
7730 Inst_VOP1__V_CVT_OFF_F32_I4::~Inst_VOP1__V_CVT_OFF_F32_I4()
7732 } // ~Inst_VOP1__V_CVT_OFF_F32_I4
7734 // 4-bit signed int to 32-bit float.
7736 Inst_VOP1__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst
)
7738 panicUnimplemented();
7741 Inst_VOP1__V_CVT_F32_F64::Inst_VOP1__V_CVT_F32_F64(InFmt_VOP1
*iFmt
)
7742 : Inst_VOP1(iFmt
, "v_cvt_f32_f64")
7746 } // Inst_VOP1__V_CVT_F32_F64
7748 Inst_VOP1__V_CVT_F32_F64::~Inst_VOP1__V_CVT_F32_F64()
7750 } // ~Inst_VOP1__V_CVT_F32_F64
7752 // D.f = (float)S0.d.
7754 Inst_VOP1__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst
)
7756 Wavefront
*wf
= gpuDynInst
->wavefront();
7757 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
7758 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
7762 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7763 if (wf
->execMask(lane
)) {
7764 vdst
[lane
] = (VecElemF32
)src
[lane
];
7771 Inst_VOP1__V_CVT_F64_F32::Inst_VOP1__V_CVT_F64_F32(InFmt_VOP1
*iFmt
)
7772 : Inst_VOP1(iFmt
, "v_cvt_f64_f32")
7776 } // Inst_VOP1__V_CVT_F64_F32
7778 Inst_VOP1__V_CVT_F64_F32::~Inst_VOP1__V_CVT_F64_F32()
7780 } // ~Inst_VOP1__V_CVT_F64_F32
7782 // D.d = (double)S0.f.
7784 Inst_VOP1__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst
)
7786 Wavefront
*wf
= gpuDynInst
->wavefront();
7787 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
7788 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
7792 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7793 if (wf
->execMask(lane
)) {
7794 vdst
[lane
] = (VecElemF64
)src
[lane
];
7801 Inst_VOP1__V_CVT_F32_UBYTE0::Inst_VOP1__V_CVT_F32_UBYTE0(InFmt_VOP1
*iFmt
)
7802 : Inst_VOP1(iFmt
, "v_cvt_f32_ubyte0")
7806 } // Inst_VOP1__V_CVT_F32_UBYTE0
7808 Inst_VOP1__V_CVT_F32_UBYTE0::~Inst_VOP1__V_CVT_F32_UBYTE0()
7810 } // ~Inst_VOP1__V_CVT_F32_UBYTE0
7812 // D.f = (float)(S0.u[7:0]).
7814 Inst_VOP1__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst
)
7816 Wavefront
*wf
= gpuDynInst
->wavefront();
7817 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
7818 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
7822 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7823 if (wf
->execMask(lane
)) {
7824 vdst
[lane
] = (VecElemF32
)(bits(src
[lane
], 7, 0));
7831 Inst_VOP1__V_CVT_F32_UBYTE1::Inst_VOP1__V_CVT_F32_UBYTE1(InFmt_VOP1
*iFmt
)
7832 : Inst_VOP1(iFmt
, "v_cvt_f32_ubyte1")
7836 } // Inst_VOP1__V_CVT_F32_UBYTE1
7838 Inst_VOP1__V_CVT_F32_UBYTE1::~Inst_VOP1__V_CVT_F32_UBYTE1()
7840 } // ~Inst_VOP1__V_CVT_F32_UBYTE1
7842 // D.f = (float)(S0.u[15:8]).
7844 Inst_VOP1__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst
)
7846 Wavefront
*wf
= gpuDynInst
->wavefront();
7847 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
7848 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
7852 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7853 if (wf
->execMask(lane
)) {
7854 vdst
[lane
] = (VecElemF32
)(bits(src
[lane
], 15, 8));
7861 Inst_VOP1__V_CVT_F32_UBYTE2::Inst_VOP1__V_CVT_F32_UBYTE2(InFmt_VOP1
*iFmt
)
7862 : Inst_VOP1(iFmt
, "v_cvt_f32_ubyte2")
7866 } // Inst_VOP1__V_CVT_F32_UBYTE2
7868 Inst_VOP1__V_CVT_F32_UBYTE2::~Inst_VOP1__V_CVT_F32_UBYTE2()
7870 } // ~Inst_VOP1__V_CVT_F32_UBYTE2
7872 // D.f = (float)(S0.u[23:16]).
7874 Inst_VOP1__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst
)
7876 Wavefront
*wf
= gpuDynInst
->wavefront();
7877 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
7878 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
7882 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7883 if (wf
->execMask(lane
)) {
7884 vdst
[lane
] = (VecElemF32
)(bits(src
[lane
], 23, 16));
7891 Inst_VOP1__V_CVT_F32_UBYTE3::Inst_VOP1__V_CVT_F32_UBYTE3(InFmt_VOP1
*iFmt
)
7892 : Inst_VOP1(iFmt
, "v_cvt_f32_ubyte3")
7896 } // Inst_VOP1__V_CVT_F32_UBYTE3
7898 Inst_VOP1__V_CVT_F32_UBYTE3::~Inst_VOP1__V_CVT_F32_UBYTE3()
7900 } // ~Inst_VOP1__V_CVT_F32_UBYTE3
7902 // D.f = (float)(S0.u[31:24]).
7904 Inst_VOP1__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst
)
7906 Wavefront
*wf
= gpuDynInst
->wavefront();
7907 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
7908 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
7912 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7913 if (wf
->execMask(lane
)) {
7914 vdst
[lane
] = (VecElemF32
)(bits(src
[lane
], 31, 24));
7921 Inst_VOP1__V_CVT_U32_F64::Inst_VOP1__V_CVT_U32_F64(InFmt_VOP1
*iFmt
)
7922 : Inst_VOP1(iFmt
, "v_cvt_u32_f64")
7926 } // Inst_VOP1__V_CVT_U32_F64
7928 Inst_VOP1__V_CVT_U32_F64::~Inst_VOP1__V_CVT_U32_F64()
7930 } // ~Inst_VOP1__V_CVT_U32_F64
7932 // D.u = (unsigned)S0.d.
7933 // Out-of-range floating point values (including infinity) saturate. NaN
7934 // is converted to 0.
7936 Inst_VOP1__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst
)
7938 Wavefront
*wf
= gpuDynInst
->wavefront();
7939 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
7940 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
7944 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7945 if (wf
->execMask(lane
)) {
7947 std::frexp(src
[lane
],&exp
);
7948 if (std::isnan(src
[lane
])) {
7950 } else if (std::isinf(src
[lane
])) {
7951 if (std::signbit(src
[lane
])) {
7954 vdst
[lane
] = UINT_MAX
;
7956 } else if (exp
> 31) {
7957 vdst
[lane
] = UINT_MAX
;
7959 vdst
[lane
] = (VecElemU32
)src
[lane
];
7967 Inst_VOP1__V_CVT_F64_U32::Inst_VOP1__V_CVT_F64_U32(InFmt_VOP1
*iFmt
)
7968 : Inst_VOP1(iFmt
, "v_cvt_f64_u32")
7972 } // Inst_VOP1__V_CVT_F64_U32
7974 Inst_VOP1__V_CVT_F64_U32::~Inst_VOP1__V_CVT_F64_U32()
7976 } // ~Inst_VOP1__V_CVT_F64_U32
7978 // D.d = (double)S0.u.
7980 Inst_VOP1__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst
)
7982 Wavefront
*wf
= gpuDynInst
->wavefront();
7983 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
7984 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
7988 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7989 if (wf
->execMask(lane
)) {
7990 vdst
[lane
] = (VecElemF64
)src
[lane
];
7997 Inst_VOP1__V_TRUNC_F64::Inst_VOP1__V_TRUNC_F64(InFmt_VOP1
*iFmt
)
7998 : Inst_VOP1(iFmt
, "v_trunc_f64")
8002 } // Inst_VOP1__V_TRUNC_F64
8004 Inst_VOP1__V_TRUNC_F64::~Inst_VOP1__V_TRUNC_F64()
8006 } // ~Inst_VOP1__V_TRUNC_F64
8008 // D.d = trunc(S0.d), return integer part of S0.d.
8010 Inst_VOP1__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst
)
8012 Wavefront
*wf
= gpuDynInst
->wavefront();
8013 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
8014 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
8018 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8019 if (wf
->execMask(lane
)) {
8020 vdst
[lane
] = std::trunc(src
[lane
]);
8027 Inst_VOP1__V_CEIL_F64::Inst_VOP1__V_CEIL_F64(InFmt_VOP1
*iFmt
)
8028 : Inst_VOP1(iFmt
, "v_ceil_f64")
8032 } // Inst_VOP1__V_CEIL_F64
8034 Inst_VOP1__V_CEIL_F64::~Inst_VOP1__V_CEIL_F64()
8036 } // ~Inst_VOP1__V_CEIL_F64
8038 // D.d = ceil(S0.d);
8040 Inst_VOP1__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst
)
8042 Wavefront
*wf
= gpuDynInst
->wavefront();
8043 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
8044 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
8048 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8049 if (wf
->execMask(lane
)) {
8050 vdst
[lane
] = std::ceil(src
[lane
]);
8057 Inst_VOP1__V_RNDNE_F64::Inst_VOP1__V_RNDNE_F64(InFmt_VOP1
*iFmt
)
8058 : Inst_VOP1(iFmt
, "v_rndne_f64")
8062 } // Inst_VOP1__V_RNDNE_F64
8064 Inst_VOP1__V_RNDNE_F64::~Inst_VOP1__V_RNDNE_F64()
8066 } // ~Inst_VOP1__V_RNDNE_F64
8068 // D.d = round_nearest_even(S0.d).
8070 Inst_VOP1__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst
)
8072 Wavefront
*wf
= gpuDynInst
->wavefront();
8073 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
8074 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
8078 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8079 if (wf
->execMask(lane
)) {
8080 vdst
[lane
] = roundNearestEven(src
[lane
]);
8087 Inst_VOP1__V_FLOOR_F64::Inst_VOP1__V_FLOOR_F64(InFmt_VOP1
*iFmt
)
8088 : Inst_VOP1(iFmt
, "v_floor_f64")
8092 } // Inst_VOP1__V_FLOOR_F64
8094 Inst_VOP1__V_FLOOR_F64::~Inst_VOP1__V_FLOOR_F64()
8096 } // ~Inst_VOP1__V_FLOOR_F64
8098 // D.d = floor(S0.d);
8100 Inst_VOP1__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst
)
8102 Wavefront
*wf
= gpuDynInst
->wavefront();
8103 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
8104 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
8108 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8109 if (wf
->execMask(lane
)) {
8110 vdst
[lane
] = std::floor(src
[lane
]);
8117 Inst_VOP1__V_FRACT_F32::Inst_VOP1__V_FRACT_F32(InFmt_VOP1
*iFmt
)
8118 : Inst_VOP1(iFmt
, "v_fract_f32")
8122 } // Inst_VOP1__V_FRACT_F32
8124 Inst_VOP1__V_FRACT_F32::~Inst_VOP1__V_FRACT_F32()
8126 } // ~Inst_VOP1__V_FRACT_F32
8128 // D.f = modf(S0.f).
8130 Inst_VOP1__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst
)
8132 Wavefront
*wf
= gpuDynInst
->wavefront();
8133 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8134 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8138 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8139 if (wf
->execMask(lane
)) {
8140 VecElemF32
int_part(0.0);
8141 vdst
[lane
] = std::modf(src
[lane
], &int_part
);
8148 Inst_VOP1__V_TRUNC_F32::Inst_VOP1__V_TRUNC_F32(InFmt_VOP1
*iFmt
)
8149 : Inst_VOP1(iFmt
, "v_trunc_f32")
8153 } // Inst_VOP1__V_TRUNC_F32
8155 Inst_VOP1__V_TRUNC_F32::~Inst_VOP1__V_TRUNC_F32()
8157 } // ~Inst_VOP1__V_TRUNC_F32
8159 // D.f = trunc(S0.f), return integer part of S0.f.
8161 Inst_VOP1__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst
)
8163 Wavefront
*wf
= gpuDynInst
->wavefront();
8164 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8165 VecOperandF32
vdst (gpuDynInst
, instData
.VDST
);
8169 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8170 if (wf
->execMask(lane
)) {
8171 vdst
[lane
] = std::trunc(src
[lane
]);
8178 Inst_VOP1__V_CEIL_F32::Inst_VOP1__V_CEIL_F32(InFmt_VOP1
*iFmt
)
8179 : Inst_VOP1(iFmt
, "v_ceil_f32")
8183 } // Inst_VOP1__V_CEIL_F32
8185 Inst_VOP1__V_CEIL_F32::~Inst_VOP1__V_CEIL_F32()
8187 } // ~Inst_VOP1__V_CEIL_F32
8189 // D.f = ceil(S0.f);
8191 Inst_VOP1__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst
)
8193 Wavefront
*wf
= gpuDynInst
->wavefront();
8194 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8195 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8199 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8200 if (wf
->execMask(lane
)) {
8201 vdst
[lane
] = std::ceil(src
[lane
]);
8208 Inst_VOP1__V_RNDNE_F32::Inst_VOP1__V_RNDNE_F32(InFmt_VOP1
*iFmt
)
8209 : Inst_VOP1(iFmt
, "v_rndne_f32")
8213 } // Inst_VOP1__V_RNDNE_F32
8215 Inst_VOP1__V_RNDNE_F32::~Inst_VOP1__V_RNDNE_F32()
8217 } // ~Inst_VOP1__V_RNDNE_F32
8219 // D.f = round_nearest_even(S0.f).
8221 Inst_VOP1__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst
)
8223 Wavefront
*wf
= gpuDynInst
->wavefront();
8224 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8225 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8229 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8230 if (wf
->execMask(lane
)) {
8231 vdst
[lane
] = roundNearestEven(src
[lane
]);
8238 Inst_VOP1__V_FLOOR_F32::Inst_VOP1__V_FLOOR_F32(InFmt_VOP1
*iFmt
)
8239 : Inst_VOP1(iFmt
, "v_floor_f32")
8243 } // Inst_VOP1__V_FLOOR_F32
8245 Inst_VOP1__V_FLOOR_F32::~Inst_VOP1__V_FLOOR_F32()
8247 } // ~Inst_VOP1__V_FLOOR_F32
8249 // D.f = floor(S0.f);
8251 Inst_VOP1__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst
)
8253 Wavefront
*wf
= gpuDynInst
->wavefront();
8254 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8255 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8259 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8260 if (wf
->execMask(lane
)) {
8261 vdst
[lane
] = std::floor(src
[lane
]);
8268 Inst_VOP1__V_EXP_F32::Inst_VOP1__V_EXP_F32(InFmt_VOP1
*iFmt
)
8269 : Inst_VOP1(iFmt
, "v_exp_f32")
8273 } // Inst_VOP1__V_EXP_F32
8275 Inst_VOP1__V_EXP_F32::~Inst_VOP1__V_EXP_F32()
8277 } // ~Inst_VOP1__V_EXP_F32
8279 // D.f = pow(2.0, S0.f).
8281 Inst_VOP1__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst
)
8283 Wavefront
*wf
= gpuDynInst
->wavefront();
8284 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8285 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8289 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8290 if (wf
->execMask(lane
)) {
8291 vdst
[lane
] = std::pow(2.0, src
[lane
]);
8298 Inst_VOP1__V_LOG_F32::Inst_VOP1__V_LOG_F32(InFmt_VOP1
*iFmt
)
8299 : Inst_VOP1(iFmt
, "v_log_f32")
8303 } // Inst_VOP1__V_LOG_F32
8305 Inst_VOP1__V_LOG_F32::~Inst_VOP1__V_LOG_F32()
8307 } // ~Inst_VOP1__V_LOG_F32
8309 // D.f = log2(S0.f).
8311 Inst_VOP1__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst
)
8313 Wavefront
*wf
= gpuDynInst
->wavefront();
8314 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8315 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8319 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8320 if (wf
->execMask(lane
)) {
8321 vdst
[lane
] = std::log2(src
[lane
]);
8328 Inst_VOP1__V_RCP_F32::Inst_VOP1__V_RCP_F32(InFmt_VOP1
*iFmt
)
8329 : Inst_VOP1(iFmt
, "v_rcp_f32")
8333 } // Inst_VOP1__V_RCP_F32
8335 Inst_VOP1__V_RCP_F32::~Inst_VOP1__V_RCP_F32()
8337 } // ~Inst_VOP1__V_RCP_F32
8339 // D.f = 1.0 / S0.f.
8341 Inst_VOP1__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst
)
8343 Wavefront
*wf
= gpuDynInst
->wavefront();
8344 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8345 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8349 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8350 if (wf
->execMask(lane
)) {
8351 vdst
[lane
] = 1.0 / src
[lane
];
8358 Inst_VOP1__V_RCP_IFLAG_F32::Inst_VOP1__V_RCP_IFLAG_F32(InFmt_VOP1
*iFmt
)
8359 : Inst_VOP1(iFmt
, "v_rcp_iflag_f32")
8363 } // Inst_VOP1__V_RCP_IFLAG_F32
8365 Inst_VOP1__V_RCP_IFLAG_F32::~Inst_VOP1__V_RCP_IFLAG_F32()
8367 } // ~Inst_VOP1__V_RCP_IFLAG_F32
8369 // D.f = 1.0 / S0.f.
8371 Inst_VOP1__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst
)
8373 Wavefront
*wf
= gpuDynInst
->wavefront();
8374 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8375 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8379 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8380 if (wf
->execMask(lane
)) {
8381 vdst
[lane
] = 1.0 / src
[lane
];
8388 Inst_VOP1__V_RSQ_F32::Inst_VOP1__V_RSQ_F32(InFmt_VOP1
*iFmt
)
8389 : Inst_VOP1(iFmt
, "v_rsq_f32")
8393 } // Inst_VOP1__V_RSQ_F32
8395 Inst_VOP1__V_RSQ_F32::~Inst_VOP1__V_RSQ_F32()
8397 } // ~Inst_VOP1__V_RSQ_F32
8399 // D.f = 1.0 / sqrt(S0.f).
8401 Inst_VOP1__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst
)
8403 Wavefront
*wf
= gpuDynInst
->wavefront();
8404 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8405 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8409 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8410 if (wf
->execMask(lane
)) {
8411 vdst
[lane
] = 1.0 / std::sqrt(src
[lane
]);
8418 Inst_VOP1__V_RCP_F64::Inst_VOP1__V_RCP_F64(InFmt_VOP1
*iFmt
)
8419 : Inst_VOP1(iFmt
, "v_rcp_f64")
8423 } // Inst_VOP1__V_RCP_F64
8425 Inst_VOP1__V_RCP_F64::~Inst_VOP1__V_RCP_F64()
8427 } // ~Inst_VOP1__V_RCP_F64
8429 // D.d = 1.0 / S0.d.
8431 Inst_VOP1__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst
)
8433 Wavefront
*wf
= gpuDynInst
->wavefront();
8434 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
8435 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
8439 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8440 if (wf
->execMask(lane
)) {
8441 if (std::fpclassify(src
[lane
]) == FP_ZERO
) {
8442 vdst
[lane
] = +INFINITY
;
8443 } else if (std::isnan(src
[lane
])) {
8445 } else if (std::isinf(src
[lane
])) {
8446 if (std::signbit(src
[lane
])) {
8452 vdst
[lane
] = 1.0 / src
[lane
];
8460 Inst_VOP1__V_RSQ_F64::Inst_VOP1__V_RSQ_F64(InFmt_VOP1
*iFmt
)
8461 : Inst_VOP1(iFmt
, "v_rsq_f64")
8465 } // Inst_VOP1__V_RSQ_F64
8467 Inst_VOP1__V_RSQ_F64::~Inst_VOP1__V_RSQ_F64()
8469 } // ~Inst_VOP1__V_RSQ_F64
8471 // D.d = 1.0 / sqrt(S0.d).
8473 Inst_VOP1__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst
)
8475 Wavefront
*wf
= gpuDynInst
->wavefront();
8476 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
8477 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
8481 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8482 if (wf
->execMask(lane
)) {
8483 if (std::fpclassify(src
[lane
]) == FP_ZERO
) {
8484 vdst
[lane
] = +INFINITY
;
8485 } else if (std::isnan(src
[lane
])) {
8487 } else if (std::isinf(src
[lane
])
8488 && !std::signbit(src
[lane
])) {
8490 } else if (std::signbit(src
[lane
])) {
8493 vdst
[lane
] = 1.0 / std::sqrt(src
[lane
]);
8501 Inst_VOP1__V_SQRT_F32::Inst_VOP1__V_SQRT_F32(InFmt_VOP1
*iFmt
)
8502 : Inst_VOP1(iFmt
, "v_sqrt_f32")
8506 } // Inst_VOP1__V_SQRT_F32
8508 Inst_VOP1__V_SQRT_F32::~Inst_VOP1__V_SQRT_F32()
8510 } // ~Inst_VOP1__V_SQRT_F32
8512 // D.f = sqrt(S0.f).
8514 Inst_VOP1__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst
)
8516 Wavefront
*wf
= gpuDynInst
->wavefront();
8517 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8518 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8522 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8523 if (wf
->execMask(lane
)) {
8524 vdst
[lane
] = std::sqrt(src
[lane
]);
8531 Inst_VOP1__V_SQRT_F64::Inst_VOP1__V_SQRT_F64(InFmt_VOP1
*iFmt
)
8532 : Inst_VOP1(iFmt
, "v_sqrt_f64")
8536 } // Inst_VOP1__V_SQRT_F64
8538 Inst_VOP1__V_SQRT_F64::~Inst_VOP1__V_SQRT_F64()
8540 } // ~Inst_VOP1__V_SQRT_F64
8542 // D.d = sqrt(S0.d).
8544 Inst_VOP1__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst
)
8546 Wavefront
*wf
= gpuDynInst
->wavefront();
8547 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
8548 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
8552 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8553 if (wf
->execMask(lane
)) {
8554 vdst
[lane
] = std::sqrt(src
[lane
]);
8561 Inst_VOP1__V_SIN_F32::Inst_VOP1__V_SIN_F32(InFmt_VOP1
*iFmt
)
8562 : Inst_VOP1(iFmt
, "v_sin_f32")
8566 } // Inst_VOP1__V_SIN_F32
8568 Inst_VOP1__V_SIN_F32::~Inst_VOP1__V_SIN_F32()
8570 } // ~Inst_VOP1__V_SIN_F32
8572 // D.f = sin(S0.f * 2 * PI).
8574 Inst_VOP1__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst
)
8576 Wavefront
*wf
= gpuDynInst
->wavefront();
8577 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8578 ConstScalarOperandF32
pi(gpuDynInst
, REG_PI
);
8579 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8584 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8585 if (wf
->execMask(lane
)) {
8586 if (src
[lane
] < -256.0 || src
[lane
] > 256.0) {
8589 vdst
[lane
] = std::sin(src
[lane
] * 2.0 * pi
.rawData());
8597 Inst_VOP1__V_COS_F32::Inst_VOP1__V_COS_F32(InFmt_VOP1
*iFmt
)
8598 : Inst_VOP1(iFmt
, "v_cos_f32")
8602 } // Inst_VOP1__V_COS_F32
8604 Inst_VOP1__V_COS_F32::~Inst_VOP1__V_COS_F32()
8606 } // ~Inst_VOP1__V_COS_F32
8608 // D.f = cos(S0.f * 2 * PI).
8610 Inst_VOP1__V_COS_F32::execute(GPUDynInstPtr gpuDynInst
)
8612 Wavefront
*wf
= gpuDynInst
->wavefront();
8613 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8614 ConstScalarOperandF32
pi(gpuDynInst
, REG_PI
);
8615 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8620 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8621 if (wf
->execMask(lane
)) {
8622 if (src
[lane
] < -256.0 || src
[lane
] > 256.0) {
8625 vdst
[lane
] = std::cos(src
[lane
] * 2.0 * pi
.rawData());
8633 Inst_VOP1__V_NOT_B32::Inst_VOP1__V_NOT_B32(InFmt_VOP1
*iFmt
)
8634 : Inst_VOP1(iFmt
, "v_not_b32")
8637 } // Inst_VOP1__V_NOT_B32
8639 Inst_VOP1__V_NOT_B32::~Inst_VOP1__V_NOT_B32()
8641 } // ~Inst_VOP1__V_NOT_B32
8644 // Input and output modifiers not supported.
8646 Inst_VOP1__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst
)
8648 Wavefront
*wf
= gpuDynInst
->wavefront();
8649 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
8650 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
8654 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8655 if (wf
->execMask(lane
)) {
8656 vdst
[lane
] = ~src
[lane
];
8663 Inst_VOP1__V_BFREV_B32::Inst_VOP1__V_BFREV_B32(InFmt_VOP1
*iFmt
)
8664 : Inst_VOP1(iFmt
, "v_bfrev_b32")
8667 } // Inst_VOP1__V_BFREV_B32
8669 Inst_VOP1__V_BFREV_B32::~Inst_VOP1__V_BFREV_B32()
8671 } // ~Inst_VOP1__V_BFREV_B32
8673 // D.u[31:0] = S0.u[0:31], bitfield reverse.
8674 // Input and output modifiers not supported.
8676 Inst_VOP1__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst
)
8678 Wavefront
*wf
= gpuDynInst
->wavefront();
8679 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
8680 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
8684 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8685 if (wf
->execMask(lane
)) {
8686 vdst
[lane
] = reverseBits(src
[lane
]);
8693 Inst_VOP1__V_FFBH_U32::Inst_VOP1__V_FFBH_U32(InFmt_VOP1
*iFmt
)
8694 : Inst_VOP1(iFmt
, "v_ffbh_u32")
8697 } // Inst_VOP1__V_FFBH_U32
8699 Inst_VOP1__V_FFBH_U32::~Inst_VOP1__V_FFBH_U32()
8701 } // ~Inst_VOP1__V_FFBH_U32
8703 // D.u = position of first 1 in S0.u from MSB;
8704 // D.u = 0xffffffff if S0.u == 0.
8706 Inst_VOP1__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst
)
8708 Wavefront
*wf
= gpuDynInst
->wavefront();
8709 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
8710 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
8714 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8715 if (wf
->execMask(lane
)) {
8716 vdst
[lane
] = findFirstOneMsb(src
[lane
]);
8723 Inst_VOP1__V_FFBL_B32::Inst_VOP1__V_FFBL_B32(InFmt_VOP1
*iFmt
)
8724 : Inst_VOP1(iFmt
, "v_ffbl_b32")
8727 } // Inst_VOP1__V_FFBL_B32
8729 Inst_VOP1__V_FFBL_B32::~Inst_VOP1__V_FFBL_B32()
8731 } // ~Inst_VOP1__V_FFBL_B32
8733 // D.u = position of first 1 in S0.u from LSB;
8734 // D.u = 0xffffffff if S0.u == 0.
8736 Inst_VOP1__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst
)
8738 Wavefront
*wf
= gpuDynInst
->wavefront();
8739 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
8740 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
8744 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8745 if (wf
->execMask(lane
)) {
8746 vdst
[lane
] = findFirstOne(src
[lane
]);
8753 Inst_VOP1__V_FFBH_I32::Inst_VOP1__V_FFBH_I32(InFmt_VOP1
*iFmt
)
8754 : Inst_VOP1(iFmt
, "v_ffbh_i32")
8757 } // Inst_VOP1__V_FFBH_I32
8759 Inst_VOP1__V_FFBH_I32::~Inst_VOP1__V_FFBH_I32()
8761 } // ~Inst_VOP1__V_FFBH_I32
8763 // D.u = position of first bit different from sign bit in S0.i from MSB;
8764 // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff.
8766 Inst_VOP1__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst
)
8768 Wavefront
*wf
= gpuDynInst
->wavefront();
8769 ConstVecOperandI32
src(gpuDynInst
, instData
.SRC0
);
8770 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
8774 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8775 if (wf
->execMask(lane
)) {
8776 vdst
[lane
] = firstOppositeSignBit(src
[lane
]);
8783 Inst_VOP1__V_FREXP_EXP_I32_F64::Inst_VOP1__V_FREXP_EXP_I32_F64(
8785 : Inst_VOP1(iFmt
, "v_frexp_exp_i32_f64")
8789 } // Inst_VOP1__V_FREXP_EXP_I32_F64
8791 Inst_VOP1__V_FREXP_EXP_I32_F64::~Inst_VOP1__V_FREXP_EXP_I32_F64()
8793 } // ~Inst_VOP1__V_FREXP_EXP_I32_F64
8796 Inst_VOP1__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst
)
8798 Wavefront
*wf
= gpuDynInst
->wavefront();
8799 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
8800 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
8804 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8805 if (wf
->execMask(lane
)) {
8806 if (std::isinf(src
[lane
]) || std::isnan(src
[lane
])) {
8810 std::frexp(src
[lane
], &exp
);
8819 Inst_VOP1__V_FREXP_MANT_F64::Inst_VOP1__V_FREXP_MANT_F64(InFmt_VOP1
*iFmt
)
8820 : Inst_VOP1(iFmt
, "v_frexp_mant_f64")
8824 } // Inst_VOP1__V_FREXP_MANT_F64
8826 Inst_VOP1__V_FREXP_MANT_F64::~Inst_VOP1__V_FREXP_MANT_F64()
8828 } // ~Inst_VOP1__V_FREXP_MANT_F64
8831 Inst_VOP1__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst
)
8833 Wavefront
*wf
= gpuDynInst
->wavefront();
8834 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
8835 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
8839 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8840 if (wf
->execMask(lane
)) {
8841 if (std::isinf(src
[lane
]) || std::isnan(src
[lane
])) {
8842 vdst
[lane
] = src
[lane
];
8845 vdst
[lane
] = std::frexp(src
[lane
], &exp
);
8853 Inst_VOP1__V_FRACT_F64::Inst_VOP1__V_FRACT_F64(InFmt_VOP1
*iFmt
)
8854 : Inst_VOP1(iFmt
, "v_fract_f64")
8858 } // Inst_VOP1__V_FRACT_F64
8860 Inst_VOP1__V_FRACT_F64::~Inst_VOP1__V_FRACT_F64()
8862 } // ~Inst_VOP1__V_FRACT_F64
8865 Inst_VOP1__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst
)
8867 Wavefront
*wf
= gpuDynInst
->wavefront();
8868 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
8869 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
8873 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8874 if (wf
->execMask(lane
)) {
8875 VecElemF64
int_part(0.0);
8876 vdst
[lane
] = std::modf(src
[lane
], &int_part
);
8883 Inst_VOP1__V_FREXP_EXP_I32_F32::Inst_VOP1__V_FREXP_EXP_I32_F32(
8885 : Inst_VOP1(iFmt
, "v_frexp_exp_i32_f32")
8889 } // Inst_VOP1__V_FREXP_EXP_I32_F32
8891 Inst_VOP1__V_FREXP_EXP_I32_F32::~Inst_VOP1__V_FREXP_EXP_I32_F32()
8893 } // ~Inst_VOP1__V_FREXP_EXP_I32_F32
8895 // frexp(S0.f, Exponent(S0.f))
8896 // if (S0.f == INF || S0.f == NAN) then D.i = 0;
8897 // else D.i = Exponent(S0.f);
8899 Inst_VOP1__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst
)
8901 Wavefront
*wf
= gpuDynInst
->wavefront();
8902 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8903 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
8907 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8908 if (wf
->execMask(lane
)) {
8909 if (std::isinf(src
[lane
]) || std::isnan(src
[lane
])) {
8913 std::frexp(src
[lane
], &exp
);
8922 Inst_VOP1__V_FREXP_MANT_F32::Inst_VOP1__V_FREXP_MANT_F32(InFmt_VOP1
*iFmt
)
8923 : Inst_VOP1(iFmt
, "v_frexp_mant_f32")
8927 } // Inst_VOP1__V_FREXP_MANT_F32
8929 Inst_VOP1__V_FREXP_MANT_F32::~Inst_VOP1__V_FREXP_MANT_F32()
8931 } // ~Inst_VOP1__V_FREXP_MANT_F32
8933 // if (S0.f == INF || S0.f == NAN) then D.f = S0.f;
8934 // else D.f = frexp(S0.f, Exponent(S0.f)).
8936 Inst_VOP1__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst
)
8938 Wavefront
*wf
= gpuDynInst
->wavefront();
8939 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8940 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8944 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8945 if (wf
->execMask(lane
)) {
8946 if (std::isinf(src
[lane
]) || std::isnan(src
[lane
])) {
8947 vdst
[lane
] = src
[lane
];
8950 vdst
[lane
] = std::frexp(src
[lane
], &exp
);
8958 Inst_VOP1__V_CLREXCP::Inst_VOP1__V_CLREXCP(InFmt_VOP1
*iFmt
)
8959 : Inst_VOP1(iFmt
, "v_clrexcp")
8962 } // Inst_VOP1__V_CLREXCP
8964 Inst_VOP1__V_CLREXCP::~Inst_VOP1__V_CLREXCP()
8966 } // ~Inst_VOP1__V_CLREXCP
8969 Inst_VOP1__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst
)
8971 panicUnimplemented();
8974 Inst_VOP1__V_CVT_F16_U16::Inst_VOP1__V_CVT_F16_U16(InFmt_VOP1
*iFmt
)
8975 : Inst_VOP1(iFmt
, "v_cvt_f16_u16")
8979 } // Inst_VOP1__V_CVT_F16_U16
8981 Inst_VOP1__V_CVT_F16_U16::~Inst_VOP1__V_CVT_F16_U16()
8983 } // ~Inst_VOP1__V_CVT_F16_U16
8985 // D.f16 = uint16_to_flt16(S.u16).
8987 Inst_VOP1__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst
)
8989 panicUnimplemented();
8992 Inst_VOP1__V_CVT_F16_I16::Inst_VOP1__V_CVT_F16_I16(InFmt_VOP1
*iFmt
)
8993 : Inst_VOP1(iFmt
, "v_cvt_f16_i16")
8997 } // Inst_VOP1__V_CVT_F16_I16
8999 Inst_VOP1__V_CVT_F16_I16::~Inst_VOP1__V_CVT_F16_I16()
9001 } // ~Inst_VOP1__V_CVT_F16_I16
9003 // D.f16 = int16_to_flt16(S.i16).
9005 Inst_VOP1__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst
)
9007 panicUnimplemented();
9010 Inst_VOP1__V_CVT_U16_F16::Inst_VOP1__V_CVT_U16_F16(InFmt_VOP1
*iFmt
)
9011 : Inst_VOP1(iFmt
, "v_cvt_u16_f16")
9015 } // Inst_VOP1__V_CVT_U16_F16
9017 Inst_VOP1__V_CVT_U16_F16::~Inst_VOP1__V_CVT_U16_F16()
9019 } // ~Inst_VOP1__V_CVT_U16_F16
9021 // D.u16 = flt16_to_uint16(S.f16).
9023 Inst_VOP1__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst
)
9025 panicUnimplemented();
9028 Inst_VOP1__V_CVT_I16_F16::Inst_VOP1__V_CVT_I16_F16(InFmt_VOP1
*iFmt
)
9029 : Inst_VOP1(iFmt
, "v_cvt_i16_f16")
9033 } // Inst_VOP1__V_CVT_I16_F16
9035 Inst_VOP1__V_CVT_I16_F16::~Inst_VOP1__V_CVT_I16_F16()
9037 } // ~Inst_VOP1__V_CVT_I16_F16
9039 // D.i16 = flt16_to_int16(S.f16).
9041 Inst_VOP1__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst
)
9043 panicUnimplemented();
9046 Inst_VOP1__V_RCP_F16::Inst_VOP1__V_RCP_F16(InFmt_VOP1
*iFmt
)
9047 : Inst_VOP1(iFmt
, "v_rcp_f16")
9051 } // Inst_VOP1__V_RCP_F16
9053 Inst_VOP1__V_RCP_F16::~Inst_VOP1__V_RCP_F16()
9055 } // ~Inst_VOP1__V_RCP_F16
9057 // if (S0.f16 == 1.0f)
9060 // D.f16 = 1 / S0.f16;
9062 Inst_VOP1__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst
)
9064 panicUnimplemented();
9067 Inst_VOP1__V_SQRT_F16::Inst_VOP1__V_SQRT_F16(InFmt_VOP1
*iFmt
)
9068 : Inst_VOP1(iFmt
, "v_sqrt_f16")
9072 } // Inst_VOP1__V_SQRT_F16
9074 Inst_VOP1__V_SQRT_F16::~Inst_VOP1__V_SQRT_F16()
9076 } // ~Inst_VOP1__V_SQRT_F16
9078 // if (S0.f16 == 1.0f)
9081 // D.f16 = sqrt(S0.f16);
9083 Inst_VOP1__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst
)
9085 panicUnimplemented();
9088 Inst_VOP1__V_RSQ_F16::Inst_VOP1__V_RSQ_F16(InFmt_VOP1
*iFmt
)
9089 : Inst_VOP1(iFmt
, "v_rsq_f16")
9093 } // Inst_VOP1__V_RSQ_F16
9095 Inst_VOP1__V_RSQ_F16::~Inst_VOP1__V_RSQ_F16()
9097 } // ~Inst_VOP1__V_RSQ_F16
9099 // if (S0.f16 == 1.0f)
9102 // D.f16 = 1 / sqrt(S0.f16);
9104 Inst_VOP1__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst
)
9106 panicUnimplemented();
9109 Inst_VOP1__V_LOG_F16::Inst_VOP1__V_LOG_F16(InFmt_VOP1
*iFmt
)
9110 : Inst_VOP1(iFmt
, "v_log_f16")
9114 } // Inst_VOP1__V_LOG_F16
9116 Inst_VOP1__V_LOG_F16::~Inst_VOP1__V_LOG_F16()
9118 } // ~Inst_VOP1__V_LOG_F16
9120 // if (S0.f16 == 1.0f)
9123 // D.f16 = log2(S0.f16);
9125 Inst_VOP1__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst
)
9127 panicUnimplemented();
9130 Inst_VOP1__V_EXP_F16::Inst_VOP1__V_EXP_F16(InFmt_VOP1
*iFmt
)
9131 : Inst_VOP1(iFmt
, "v_exp_f16")
9135 } // Inst_VOP1__V_EXP_F16
9137 Inst_VOP1__V_EXP_F16::~Inst_VOP1__V_EXP_F16()
9139 } // ~Inst_VOP1__V_EXP_F16
9141 // if (S0.f16 == 0.0f)
9144 // D.f16 = pow(2.0, S0.f16).
9146 Inst_VOP1__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst
)
9148 panicUnimplemented();
9151 Inst_VOP1__V_FREXP_MANT_F16::Inst_VOP1__V_FREXP_MANT_F16(InFmt_VOP1
*iFmt
)
9152 : Inst_VOP1(iFmt
, "v_frexp_mant_f16")
9156 } // Inst_VOP1__V_FREXP_MANT_F16
9158 Inst_VOP1__V_FREXP_MANT_F16::~Inst_VOP1__V_FREXP_MANT_F16()
9160 } // ~Inst_VOP1__V_FREXP_MANT_F16
9162 // if (S0.f16 == +-INF || S0.f16 == NAN)
9165 // D.f16 = mantissa(S0.f16).
9167 Inst_VOP1__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst
)
9169 panicUnimplemented();
9172 Inst_VOP1__V_FREXP_EXP_I16_F16::Inst_VOP1__V_FREXP_EXP_I16_F16(
9174 : Inst_VOP1(iFmt
, "v_frexp_exp_i16_f16")
9178 } // Inst_VOP1__V_FREXP_EXP_I16_F16
9180 Inst_VOP1__V_FREXP_EXP_I16_F16::~Inst_VOP1__V_FREXP_EXP_I16_F16()
9182 } // ~Inst_VOP1__V_FREXP_EXP_I16_F16
9184 // frexp(S0.f16, Exponent(S0.f16))
9185 // if (S0.f16 == +-INF || S0.f16 == NAN)
9188 // D.i16 = Exponent(S0.f16);
9190 Inst_VOP1__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst
)
9192 panicUnimplemented();
9195 Inst_VOP1__V_FLOOR_F16::Inst_VOP1__V_FLOOR_F16(InFmt_VOP1
*iFmt
)
9196 : Inst_VOP1(iFmt
, "v_floor_f16")
9200 } // Inst_VOP1__V_FLOOR_F16
9202 Inst_VOP1__V_FLOOR_F16::~Inst_VOP1__V_FLOOR_F16()
9204 } // ~Inst_VOP1__V_FLOOR_F16
9206 // D.f16 = floor(S0.f16);
9208 Inst_VOP1__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst
)
9210 panicUnimplemented();
9213 Inst_VOP1__V_CEIL_F16::Inst_VOP1__V_CEIL_F16(InFmt_VOP1
*iFmt
)
9214 : Inst_VOP1(iFmt
, "v_ceil_f16")
9218 } // Inst_VOP1__V_CEIL_F16
9220 Inst_VOP1__V_CEIL_F16::~Inst_VOP1__V_CEIL_F16()
9222 } // ~Inst_VOP1__V_CEIL_F16
9224 // D.f16 = ceil(S0.f16);
9226 Inst_VOP1__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst
)
9228 panicUnimplemented();
9231 Inst_VOP1__V_TRUNC_F16::Inst_VOP1__V_TRUNC_F16(InFmt_VOP1
*iFmt
)
9232 : Inst_VOP1(iFmt
, "v_trunc_f16")
9236 } // Inst_VOP1__V_TRUNC_F16
9238 Inst_VOP1__V_TRUNC_F16::~Inst_VOP1__V_TRUNC_F16()
9240 } // ~Inst_VOP1__V_TRUNC_F16
9242 // D.f16 = trunc(S0.f16).
9244 Inst_VOP1__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst
)
9246 panicUnimplemented();
9249 Inst_VOP1__V_RNDNE_F16::Inst_VOP1__V_RNDNE_F16(InFmt_VOP1
*iFmt
)
9250 : Inst_VOP1(iFmt
, "v_rndne_f16")
9254 } // Inst_VOP1__V_RNDNE_F16
9256 Inst_VOP1__V_RNDNE_F16::~Inst_VOP1__V_RNDNE_F16()
9258 } // ~Inst_VOP1__V_RNDNE_F16
9260 // D.f16 = roundNearestEven(S0.f16);
9262 Inst_VOP1__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst
)
9264 panicUnimplemented();
9267 Inst_VOP1__V_FRACT_F16::Inst_VOP1__V_FRACT_F16(InFmt_VOP1
*iFmt
)
9268 : Inst_VOP1(iFmt
, "v_fract_f16")
9272 } // Inst_VOP1__V_FRACT_F16
9274 Inst_VOP1__V_FRACT_F16::~Inst_VOP1__V_FRACT_F16()
9276 } // ~Inst_VOP1__V_FRACT_F16
9278 // D.f16 = S0.f16 + -floor(S0.f16).
9280 Inst_VOP1__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst
)
9282 panicUnimplemented();
9285 Inst_VOP1__V_SIN_F16::Inst_VOP1__V_SIN_F16(InFmt_VOP1
*iFmt
)
9286 : Inst_VOP1(iFmt
, "v_sin_f16")
9290 } // Inst_VOP1__V_SIN_F16
9292 Inst_VOP1__V_SIN_F16::~Inst_VOP1__V_SIN_F16()
9294 } // ~Inst_VOP1__V_SIN_F16
9296 // D.f16 = sin(S0.f16 * 2 * PI).
9298 Inst_VOP1__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst
)
9300 panicUnimplemented();
9303 Inst_VOP1__V_COS_F16::Inst_VOP1__V_COS_F16(InFmt_VOP1
*iFmt
)
9304 : Inst_VOP1(iFmt
, "v_cos_f16")
9308 } // Inst_VOP1__V_COS_F16
9310 Inst_VOP1__V_COS_F16::~Inst_VOP1__V_COS_F16()
9312 } // ~Inst_VOP1__V_COS_F16
9314 // D.f16 = cos(S0.f16 * 2 * PI).
9316 Inst_VOP1__V_COS_F16::execute(GPUDynInstPtr gpuDynInst
)
9318 panicUnimplemented();
9321 Inst_VOP1__V_EXP_LEGACY_F32::Inst_VOP1__V_EXP_LEGACY_F32(InFmt_VOP1
*iFmt
)
9322 : Inst_VOP1(iFmt
, "v_exp_legacy_f32")
9326 } // Inst_VOP1__V_EXP_LEGACY_F32
9328 Inst_VOP1__V_EXP_LEGACY_F32::~Inst_VOP1__V_EXP_LEGACY_F32()
9330 } // ~Inst_VOP1__V_EXP_LEGACY_F32
9332 // D.f = pow(2.0, S0.f)
9334 Inst_VOP1__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst
)
9336 Wavefront
*wf
= gpuDynInst
->wavefront();
9337 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
9338 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
9342 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
9343 if (wf
->execMask(lane
)) {
9344 vdst
[lane
] = std::pow(2.0, src
[lane
]);
9351 Inst_VOP1__V_LOG_LEGACY_F32::Inst_VOP1__V_LOG_LEGACY_F32(InFmt_VOP1
*iFmt
)
9352 : Inst_VOP1(iFmt
, "v_log_legacy_f32")
9356 } // Inst_VOP1__V_LOG_LEGACY_F32
9358 Inst_VOP1__V_LOG_LEGACY_F32::~Inst_VOP1__V_LOG_LEGACY_F32()
9360 } // ~Inst_VOP1__V_LOG_LEGACY_F32
9362 // D.f = log2(S0.f).
9364 Inst_VOP1__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst
)
9366 Wavefront
*wf
= gpuDynInst
->wavefront();
9367 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
9368 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
9372 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
9373 if (wf
->execMask(lane
)) {
9374 vdst
[lane
] = std::log2(src
[lane
]);
9381 Inst_VOPC__V_CMP_CLASS_F32::Inst_VOPC__V_CMP_CLASS_F32(InFmt_VOPC
*iFmt
)
9382 : Inst_VOPC(iFmt
, "v_cmp_class_f32")
9386 } // Inst_VOPC__V_CMP_CLASS_F32
9388 Inst_VOPC__V_CMP_CLASS_F32::~Inst_VOPC__V_CMP_CLASS_F32()
9390 } // ~Inst_VOPC__V_CMP_CLASS_F32
9392 // VCC = IEEE numeric class function specified in S1.u, performed on S0.f
9393 // The function reports true if the floating point value is any of the
9394 // numeric types selected in S1.u according to the following list:
9395 // S1.u[0] -- value is a signaling NaN.
9396 // S1.u[1] -- value is a quiet NaN.
9397 // S1.u[2] -- value is negative infinity.
9398 // S1.u[3] -- value is a negative normal value.
9399 // S1.u[4] -- value is a negative denormal value.
9400 // S1.u[5] -- value is negative zero.
9401 // S1.u[6] -- value is positive zero.
9402 // S1.u[7] -- value is a positive denormal value.
9403 // S1.u[8] -- value is a positive normal value.
9404 // S1.u[9] -- value is positive infinity.
9406 Inst_VOPC__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst
)
9408 Wavefront
*wf
= gpuDynInst
->wavefront();
9409 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
9410 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
9411 ConstScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
9416 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
9417 if (wf
->execMask(lane
)) {
9418 if (bits(src1
[lane
], 0) || bits(src1
[lane
], 1)) {
9420 if (std::isnan(src0
[lane
])) {
9421 vcc
.setBit(lane
, 1);
9425 if (bits(src1
[lane
], 2)) {
9427 if (std::isinf(src0
[lane
]) && std::signbit(src0
[lane
])) {
9428 vcc
.setBit(lane
, 1);
9432 if (bits(src1
[lane
], 3)) {
9434 if (std::isnormal(src0
[lane
])
9435 && std::signbit(src0
[lane
])) {
9436 vcc
.setBit(lane
, 1);
9440 if (bits(src1
[lane
], 4)) {
9442 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
9443 && std::signbit(src0
[lane
])) {
9444 vcc
.setBit(lane
, 1);
9448 if (bits(src1
[lane
], 5)) {
9450 if (std::fpclassify(src0
[lane
]) == FP_ZERO
9451 && std::signbit(src0
[lane
])) {
9452 vcc
.setBit(lane
, 1);
9456 if (bits(src1
[lane
], 6)) {
9458 if (std::fpclassify(src0
[lane
]) == FP_ZERO
9459 && !std::signbit(src0
[lane
])) {
9460 vcc
.setBit(lane
, 1);
9464 if (bits(src1
[lane
], 7)) {
9466 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
9467 && !std::signbit(src0
[lane
])) {
9468 vcc
.setBit(lane
, 1);
9472 if (bits(src1
[lane
], 8)) {
9474 if (std::isnormal(src0
[lane
])
9475 && !std::signbit(src0
[lane
])) {
9476 vcc
.setBit(lane
, 1);
9480 if (bits(src1
[lane
], 9)) {
9482 if (std::isinf(src0
[lane
]) && !std::signbit(src0
[lane
])) {
9483 vcc
.setBit(lane
, 1);
9493 Inst_VOPC__V_CMPX_CLASS_F32::Inst_VOPC__V_CMPX_CLASS_F32(InFmt_VOPC
*iFmt
)
9494 : Inst_VOPC(iFmt
, "v_cmpx_class_f32")
9498 } // Inst_VOPC__V_CMPX_CLASS_F32
9500 Inst_VOPC__V_CMPX_CLASS_F32::~Inst_VOPC__V_CMPX_CLASS_F32()
9502 } // ~Inst_VOPC__V_CMPX_CLASS_F32
9504 // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
9505 // S0.f The function reports true if the floating point value is any of
9506 // the numeric types selected in S1.u according to the following list:
9507 // S1.u[0] -- value is a signaling NaN.
9508 // S1.u[1] -- value is a quiet NaN.
9509 // S1.u[2] -- value is negative infinity.
9510 // S1.u[3] -- value is a negative normal value.
9511 // S1.u[4] -- value is a negative denormal value.
9512 // S1.u[5] -- value is negative zero.
9513 // S1.u[6] -- value is positive zero.
9514 // S1.u[7] -- value is a positive denormal value.
9515 // S1.u[8] -- value is a positive normal value.
9516 // S1.u[9] -- value is positive infinity.
9518 Inst_VOPC__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst
)
9520 Wavefront
*wf
= gpuDynInst
->wavefront();
9521 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
9522 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
9523 ConstScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
9528 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
9529 if (wf
->execMask(lane
)) {
9530 if (bits(src1
[lane
], 0) || bits(src1
[lane
], 1)) {
9532 if (std::isnan(src0
[lane
])) {
9533 vcc
.setBit(lane
, 1);
9537 if (bits(src1
[lane
], 2)) {
9539 if (std::isinf(src0
[lane
]) && std::signbit(src0
[lane
])) {
9540 vcc
.setBit(lane
, 1);
9544 if (bits(src1
[lane
], 3)) {
9546 if (std::isnormal(src0
[lane
])
9547 && std::signbit(src0
[lane
])) {
9548 vcc
.setBit(lane
, 1);
9552 if (bits(src1
[lane
], 4)) {
9554 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
9555 && std::signbit(src0
[lane
])) {
9556 vcc
.setBit(lane
, 1);
9560 if (bits(src1
[lane
], 5)) {
9562 if (std::fpclassify(src0
[lane
]) == FP_ZERO
9563 && std::signbit(src0
[lane
])) {
9564 vcc
.setBit(lane
, 1);
9568 if (bits(src1
[lane
], 6)) {
9570 if (std::fpclassify(src0
[lane
]) == FP_ZERO
9571 && !std::signbit(src0
[lane
])) {
9572 vcc
.setBit(lane
, 1);
9576 if (bits(src1
[lane
], 7)) {
9578 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
9579 && !std::signbit(src0
[lane
])) {
9580 vcc
.setBit(lane
, 1);
9584 if (bits(src1
[lane
], 8)) {
9586 if (std::isnormal(src0
[lane
])
9587 && !std::signbit(src0
[lane
])) {
9588 vcc
.setBit(lane
, 1);
9592 if (bits(src1
[lane
], 9)) {
9594 if (std::isinf(src0
[lane
]) && !std::signbit(src0
[lane
])) {
9595 vcc
.setBit(lane
, 1);
9603 wf
->execMask() = vcc
.rawData();
9606 Inst_VOPC__V_CMP_CLASS_F64::Inst_VOPC__V_CMP_CLASS_F64(InFmt_VOPC
*iFmt
)
9607 : Inst_VOPC(iFmt
, "v_cmp_class_f64")
9611 } // Inst_VOPC__V_CMP_CLASS_F64
9613 Inst_VOPC__V_CMP_CLASS_F64::~Inst_VOPC__V_CMP_CLASS_F64()
9615 } // ~Inst_VOPC__V_CMP_CLASS_F64
9617 // VCC = IEEE numeric class function specified in S1.u, performed on S0.d
9618 // The function reports true if the floating point value is any of the
9619 // numeric types selected in S1.u according to the following list:
9620 // S1.u[0] -- value is a signaling NaN.
9621 // S1.u[1] -- value is a quiet NaN.
9622 // S1.u[2] -- value is negative infinity.
9623 // S1.u[3] -- value is a negative normal value.
9624 // S1.u[4] -- value is a negative denormal value.
9625 // S1.u[5] -- value is negative zero.
9626 // S1.u[6] -- value is positive zero.
9627 // S1.u[7] -- value is a positive denormal value.
9628 // S1.u[8] -- value is a positive normal value.
9629 // S1.u[9] -- value is positive infinity.
9631 Inst_VOPC__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst
)
9633 Wavefront
*wf
= gpuDynInst
->wavefront();
9634 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
9635 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
9636 ConstScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
9641 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
9642 if (wf
->execMask(lane
)) {
9643 if (bits(src1
[lane
], 0) || bits(src1
[lane
], 1)) {
9645 if (std::isnan(src0
[lane
])) {
9646 vcc
.setBit(lane
, 1);
9650 if (bits(src1
[lane
], 2)) {
9652 if (std::isinf(src0
[lane
]) && std::signbit(src0
[lane
])) {
9653 vcc
.setBit(lane
, 1);
9657 if (bits(src1
[lane
], 3)) {
9659 if (std::isnormal(src0
[lane
])
9660 && std::signbit(src0
[lane
])) {
9661 vcc
.setBit(lane
, 1);
9665 if (bits(src1
[lane
], 4)) {
9667 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
9668 && std::signbit(src0
[lane
])) {
9669 vcc
.setBit(lane
, 1);
9673 if (bits(src1
[lane
], 5)) {
9675 if (std::fpclassify(src0
[lane
]) == FP_ZERO
9676 && std::signbit(src0
[lane
])) {
9677 vcc
.setBit(lane
, 1);
9681 if (bits(src1
[lane
], 6)) {
9683 if (std::fpclassify(src0
[lane
]) == FP_ZERO
9684 && !std::signbit(src0
[lane
])) {
9685 vcc
.setBit(lane
, 1);
9689 if (bits(src1
[lane
], 7)) {
9691 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
9692 && !std::signbit(src0
[lane
])) {
9693 vcc
.setBit(lane
, 1);
9697 if (bits(src1
[lane
], 8)) {
9699 if (std::isnormal(src0
[lane
])
9700 && !std::signbit(src0
[lane
])) {
9701 vcc
.setBit(lane
, 1);
9705 if (bits(src1
[lane
], 9)) {
9707 if (std::isinf(src0
[lane
])
9708 && !std::signbit(src0
[lane
])) {
9709 vcc
.setBit(lane
, 1);
9719 Inst_VOPC__V_CMPX_CLASS_F64::Inst_VOPC__V_CMPX_CLASS_F64(InFmt_VOPC
*iFmt
)
9720 : Inst_VOPC(iFmt
, "v_cmpx_class_f64")
9724 } // Inst_VOPC__V_CMPX_CLASS_F64
9726 Inst_VOPC__V_CMPX_CLASS_F64::~Inst_VOPC__V_CMPX_CLASS_F64()
9728 } // ~Inst_VOPC__V_CMPX_CLASS_F64
9730 // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
9731 // S0.d The function reports true if the floating point value is any of
9732 // the numeric types selected in S1.u according to the following list:
9733 // S1.u[0] -- value is a signaling NaN.
9734 // S1.u[1] -- value is a quiet NaN.
9735 // S1.u[2] -- value is negative infinity.
9736 // S1.u[3] -- value is a negative normal value.
9737 // S1.u[4] -- value is a negative denormal value.
9738 // S1.u[5] -- value is negative zero.
9739 // S1.u[6] -- value is positive zero.
9740 // S1.u[7] -- value is a positive denormal value.
9741 // S1.u[8] -- value is a positive normal value.
9742 // S1.u[9] -- value is positive infinity.
9744 Inst_VOPC__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst
)
9746 Wavefront
*wf
= gpuDynInst
->wavefront();
9747 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
9748 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
9749 ConstScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
9754 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
9755 if (wf
->execMask(lane
)) {
9756 if (bits(src1
[lane
], 0) || bits(src1
[lane
], 1)) {
9758 if (std::isnan(src0
[lane
])) {
9759 vcc
.setBit(lane
, 1);
9763 if (bits(src1
[lane
], 2)) {
9765 if (std::isinf(src0
[lane
]) && std::signbit(src0
[lane
])) {
9766 vcc
.setBit(lane
, 1);
9770 if (bits(src1
[lane
], 3)) {
9772 if (std::isnormal(src0
[lane
])
9773 && std::signbit(src0
[lane
])) {
9774 vcc
.setBit(lane
, 1);
9778 if (bits(src1
[lane
], 4)) {
9780 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
9781 && std::signbit(src0
[lane
])) {
9782 vcc
.setBit(lane
, 1);
9786 if (bits(src1
[lane
], 5)) {
9788 if (std::fpclassify(src0
[lane
]) == FP_ZERO
9789 && std::signbit(src0
[lane
])) {
9790 vcc
.setBit(lane
, 1);
9794 if (bits(src1
[lane
], 6)) {
9796 if (std::fpclassify(src0
[lane
]) == FP_ZERO
9797 && !std::signbit(src0
[lane
])) {
9798 vcc
.setBit(lane
, 1);
9802 if (bits(src1
[lane
], 7)) {
9804 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
9805 && !std::signbit(src0
[lane
])) {
9806 vcc
.setBit(lane
, 1);
9810 if (bits(src1
[lane
], 8)) {
9812 if (std::isnormal(src0
[lane
])
9813 && !std::signbit(src0
[lane
])) {
9814 vcc
.setBit(lane
, 1);
9818 if (bits(src1
[lane
], 9)) {
9820 if (std::isinf(src0
[lane
])
9821 && !std::signbit(src0
[lane
])) {
9822 vcc
.setBit(lane
, 1);
9830 wf
->execMask() = vcc
.rawData();
9833 Inst_VOPC__V_CMP_CLASS_F16::Inst_VOPC__V_CMP_CLASS_F16(InFmt_VOPC
*iFmt
)
9834 : Inst_VOPC(iFmt
, "v_cmp_class_f16")
9838 } // Inst_VOPC__V_CMP_CLASS_F16
9840 Inst_VOPC__V_CMP_CLASS_F16::~Inst_VOPC__V_CMP_CLASS_F16()
9842 } // ~Inst_VOPC__V_CMP_CLASS_F16
9844 // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16
9845 // The function reports true if the floating point value is any of the
9846 // numeric types selected in S1.u according to the following list:
9847 // S1.u[0] -- value is a signaling NaN.
9848 // S1.u[1] -- value is a quiet NaN.
9849 // S1.u[2] -- value is negative infinity.
9850 // S1.u[3] -- value is a negative normal value.
9851 // S1.u[4] -- value is a negative denormal value.
9852 // S1.u[5] -- value is negative zero.
9853 // S1.u[6] -- value is positive zero.
9854 // S1.u[7] -- value is a positive denormal value.
9855 // S1.u[8] -- value is a positive normal value.
9856 // S1.u[9] -- value is positive infinity.
9858 Inst_VOPC__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst
)
9860 panicUnimplemented();
9863 Inst_VOPC__V_CMPX_CLASS_F16::Inst_VOPC__V_CMPX_CLASS_F16(InFmt_VOPC
*iFmt
)
9864 : Inst_VOPC(iFmt
, "v_cmpx_class_f16")
9868 } // Inst_VOPC__V_CMPX_CLASS_F16
9870 Inst_VOPC__V_CMPX_CLASS_F16::~Inst_VOPC__V_CMPX_CLASS_F16()
9872 } // ~Inst_VOPC__V_CMPX_CLASS_F16
9874 // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
9876 // The function reports true if the floating point value is any of the
9877 // numeric types selected in S1.u according to the following list:
9878 // S1.u[0] -- value is a signaling NaN.
9879 // S1.u[1] -- value is a quiet NaN.
9880 // S1.u[2] -- value is negative infinity.
9881 // S1.u[3] -- value is a negative normal value.
9882 // S1.u[4] -- value is a negative denormal value.
9883 // S1.u[5] -- value is negative zero.
9884 // S1.u[6] -- value is positive zero.
9885 // S1.u[7] -- value is a positive denormal value.
9886 // S1.u[8] -- value is a positive normal value.
9887 // S1.u[9] -- value is positive infinity.
9889 Inst_VOPC__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst
)
9891 panicUnimplemented();
9894 Inst_VOPC__V_CMP_F_F16::Inst_VOPC__V_CMP_F_F16(InFmt_VOPC
*iFmt
)
9895 : Inst_VOPC(iFmt
, "v_cmp_f_f16")
9899 } // Inst_VOPC__V_CMP_F_F16
9901 Inst_VOPC__V_CMP_F_F16::~Inst_VOPC__V_CMP_F_F16()
9903 } // ~Inst_VOPC__V_CMP_F_F16
9905 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
9907 Inst_VOPC__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst
)
9909 panicUnimplemented();
9912 Inst_VOPC__V_CMP_LT_F16::Inst_VOPC__V_CMP_LT_F16(InFmt_VOPC
*iFmt
)
9913 : Inst_VOPC(iFmt
, "v_cmp_lt_f16")
9917 } // Inst_VOPC__V_CMP_LT_F16
9919 Inst_VOPC__V_CMP_LT_F16::~Inst_VOPC__V_CMP_LT_F16()
9921 } // ~Inst_VOPC__V_CMP_LT_F16
9923 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
9925 Inst_VOPC__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst
)
9927 panicUnimplemented();
9930 Inst_VOPC__V_CMP_EQ_F16::Inst_VOPC__V_CMP_EQ_F16(InFmt_VOPC
*iFmt
)
9931 : Inst_VOPC(iFmt
, "v_cmp_eq_f16")
9935 } // Inst_VOPC__V_CMP_EQ_F16
9937 Inst_VOPC__V_CMP_EQ_F16::~Inst_VOPC__V_CMP_EQ_F16()
9939 } // ~Inst_VOPC__V_CMP_EQ_F16
9941 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
9943 Inst_VOPC__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst
)
9945 panicUnimplemented();
9948 Inst_VOPC__V_CMP_LE_F16::Inst_VOPC__V_CMP_LE_F16(InFmt_VOPC
*iFmt
)
9949 : Inst_VOPC(iFmt
, "v_cmp_le_f16")
9953 } // Inst_VOPC__V_CMP_LE_F16
9955 Inst_VOPC__V_CMP_LE_F16::~Inst_VOPC__V_CMP_LE_F16()
9957 } // ~Inst_VOPC__V_CMP_LE_F16
9959 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
9961 Inst_VOPC__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst
)
9963 panicUnimplemented();
9966 Inst_VOPC__V_CMP_GT_F16::Inst_VOPC__V_CMP_GT_F16(InFmt_VOPC
*iFmt
)
9967 : Inst_VOPC(iFmt
, "v_cmp_gt_f16")
9971 } // Inst_VOPC__V_CMP_GT_F16
9973 Inst_VOPC__V_CMP_GT_F16::~Inst_VOPC__V_CMP_GT_F16()
9975 } // ~Inst_VOPC__V_CMP_GT_F16
9977 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
9979 Inst_VOPC__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst
)
9981 panicUnimplemented();
9984 Inst_VOPC__V_CMP_LG_F16::Inst_VOPC__V_CMP_LG_F16(InFmt_VOPC
*iFmt
)
9985 : Inst_VOPC(iFmt
, "v_cmp_lg_f16")
9989 } // Inst_VOPC__V_CMP_LG_F16
9991 Inst_VOPC__V_CMP_LG_F16::~Inst_VOPC__V_CMP_LG_F16()
9993 } // ~Inst_VOPC__V_CMP_LG_F16
9995 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
9997 Inst_VOPC__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst
)
9999 panicUnimplemented();
10002 Inst_VOPC__V_CMP_GE_F16::Inst_VOPC__V_CMP_GE_F16(InFmt_VOPC
*iFmt
)
10003 : Inst_VOPC(iFmt
, "v_cmp_ge_f16")
10007 } // Inst_VOPC__V_CMP_GE_F16
10009 Inst_VOPC__V_CMP_GE_F16::~Inst_VOPC__V_CMP_GE_F16()
10011 } // ~Inst_VOPC__V_CMP_GE_F16
10013 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
10015 Inst_VOPC__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst
)
10017 panicUnimplemented();
10020 Inst_VOPC__V_CMP_O_F16::Inst_VOPC__V_CMP_O_F16(InFmt_VOPC
*iFmt
)
10021 : Inst_VOPC(iFmt
, "v_cmp_o_f16")
10025 } // Inst_VOPC__V_CMP_O_F16
10027 Inst_VOPC__V_CMP_O_F16::~Inst_VOPC__V_CMP_O_F16()
10029 } // ~Inst_VOPC__V_CMP_O_F16
10031 // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
10033 Inst_VOPC__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst
)
10035 panicUnimplemented();
10038 Inst_VOPC__V_CMP_U_F16::Inst_VOPC__V_CMP_U_F16(InFmt_VOPC
*iFmt
)
10039 : Inst_VOPC(iFmt
, "v_cmp_u_f16")
10043 } // Inst_VOPC__V_CMP_U_F16
10045 Inst_VOPC__V_CMP_U_F16::~Inst_VOPC__V_CMP_U_F16()
10047 } // ~Inst_VOPC__V_CMP_U_F16
10049 // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
10051 Inst_VOPC__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst
)
10053 panicUnimplemented();
10056 Inst_VOPC__V_CMP_NGE_F16::Inst_VOPC__V_CMP_NGE_F16(InFmt_VOPC
*iFmt
)
10057 : Inst_VOPC(iFmt
, "v_cmp_nge_f16")
10061 } // Inst_VOPC__V_CMP_NGE_F16
10063 Inst_VOPC__V_CMP_NGE_F16::~Inst_VOPC__V_CMP_NGE_F16()
10065 } // ~Inst_VOPC__V_CMP_NGE_F16
10067 // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
10069 Inst_VOPC__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst
)
10071 panicUnimplemented();
10074 Inst_VOPC__V_CMP_NLG_F16::Inst_VOPC__V_CMP_NLG_F16(InFmt_VOPC
*iFmt
)
10075 : Inst_VOPC(iFmt
, "v_cmp_nlg_f16")
10079 } // Inst_VOPC__V_CMP_NLG_F16
10081 Inst_VOPC__V_CMP_NLG_F16::~Inst_VOPC__V_CMP_NLG_F16()
10083 } // ~Inst_VOPC__V_CMP_NLG_F16
10085 // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
10087 Inst_VOPC__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst
)
10089 panicUnimplemented();
10092 Inst_VOPC__V_CMP_NGT_F16::Inst_VOPC__V_CMP_NGT_F16(InFmt_VOPC
*iFmt
)
10093 : Inst_VOPC(iFmt
, "v_cmp_ngt_f16")
10097 } // Inst_VOPC__V_CMP_NGT_F16
10099 Inst_VOPC__V_CMP_NGT_F16::~Inst_VOPC__V_CMP_NGT_F16()
10101 } // ~Inst_VOPC__V_CMP_NGT_F16
10103 // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
10105 Inst_VOPC__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst
)
10107 panicUnimplemented();
10110 Inst_VOPC__V_CMP_NLE_F16::Inst_VOPC__V_CMP_NLE_F16(InFmt_VOPC
*iFmt
)
10111 : Inst_VOPC(iFmt
, "v_cmp_nle_f16")
10115 } // Inst_VOPC__V_CMP_NLE_F16
10117 Inst_VOPC__V_CMP_NLE_F16::~Inst_VOPC__V_CMP_NLE_F16()
10119 } // ~Inst_VOPC__V_CMP_NLE_F16
10121 // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
10123 Inst_VOPC__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst
)
10125 panicUnimplemented();
10128 Inst_VOPC__V_CMP_NEQ_F16::Inst_VOPC__V_CMP_NEQ_F16(InFmt_VOPC
*iFmt
)
10129 : Inst_VOPC(iFmt
, "v_cmp_neq_f16")
10133 } // Inst_VOPC__V_CMP_NEQ_F16
10135 Inst_VOPC__V_CMP_NEQ_F16::~Inst_VOPC__V_CMP_NEQ_F16()
10137 } // ~Inst_VOPC__V_CMP_NEQ_F16
10139 // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
10141 Inst_VOPC__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst
)
10143 panicUnimplemented();
10146 Inst_VOPC__V_CMP_NLT_F16::Inst_VOPC__V_CMP_NLT_F16(InFmt_VOPC
*iFmt
)
10147 : Inst_VOPC(iFmt
, "v_cmp_nlt_f16")
10151 } // Inst_VOPC__V_CMP_NLT_F16
10153 Inst_VOPC__V_CMP_NLT_F16::~Inst_VOPC__V_CMP_NLT_F16()
10155 } // ~Inst_VOPC__V_CMP_NLT_F16
10157 // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
10159 Inst_VOPC__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst
)
10161 panicUnimplemented();
10164 Inst_VOPC__V_CMP_TRU_F16::Inst_VOPC__V_CMP_TRU_F16(InFmt_VOPC
*iFmt
)
10165 : Inst_VOPC(iFmt
, "v_cmp_tru_f16")
10169 } // Inst_VOPC__V_CMP_TRU_F16
10171 Inst_VOPC__V_CMP_TRU_F16::~Inst_VOPC__V_CMP_TRU_F16()
10173 } // ~Inst_VOPC__V_CMP_TRU_F16
10175 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
10177 Inst_VOPC__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst
)
10179 panicUnimplemented();
10182 Inst_VOPC__V_CMPX_F_F16::Inst_VOPC__V_CMPX_F_F16(InFmt_VOPC
*iFmt
)
10183 : Inst_VOPC(iFmt
, "v_cmpx_f_f16")
10187 } // Inst_VOPC__V_CMPX_F_F16
10189 Inst_VOPC__V_CMPX_F_F16::~Inst_VOPC__V_CMPX_F_F16()
10191 } // ~Inst_VOPC__V_CMPX_F_F16
10193 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
10195 Inst_VOPC__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst
)
10197 panicUnimplemented();
10200 Inst_VOPC__V_CMPX_LT_F16::Inst_VOPC__V_CMPX_LT_F16(InFmt_VOPC
*iFmt
)
10201 : Inst_VOPC(iFmt
, "v_cmpx_lt_f16")
10205 } // Inst_VOPC__V_CMPX_LT_F16
10207 Inst_VOPC__V_CMPX_LT_F16::~Inst_VOPC__V_CMPX_LT_F16()
10209 } // ~Inst_VOPC__V_CMPX_LT_F16
10211 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
10213 Inst_VOPC__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst
)
10215 panicUnimplemented();
10218 Inst_VOPC__V_CMPX_EQ_F16::Inst_VOPC__V_CMPX_EQ_F16(InFmt_VOPC
*iFmt
)
10219 : Inst_VOPC(iFmt
, "v_cmpx_eq_f16")
10223 } // Inst_VOPC__V_CMPX_EQ_F16
10225 Inst_VOPC__V_CMPX_EQ_F16::~Inst_VOPC__V_CMPX_EQ_F16()
10227 } // ~Inst_VOPC__V_CMPX_EQ_F16
10229 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
10231 Inst_VOPC__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst
)
10233 panicUnimplemented();
10236 Inst_VOPC__V_CMPX_LE_F16::Inst_VOPC__V_CMPX_LE_F16(InFmt_VOPC
*iFmt
)
10237 : Inst_VOPC(iFmt
, "v_cmpx_le_f16")
10241 } // Inst_VOPC__V_CMPX_LE_F16
10243 Inst_VOPC__V_CMPX_LE_F16::~Inst_VOPC__V_CMPX_LE_F16()
10245 } // ~Inst_VOPC__V_CMPX_LE_F16
10247 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
10249 Inst_VOPC__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst
)
10251 panicUnimplemented();
10254 Inst_VOPC__V_CMPX_GT_F16::Inst_VOPC__V_CMPX_GT_F16(InFmt_VOPC
*iFmt
)
10255 : Inst_VOPC(iFmt
, "v_cmpx_gt_f16")
10259 } // Inst_VOPC__V_CMPX_GT_F16
10261 Inst_VOPC__V_CMPX_GT_F16::~Inst_VOPC__V_CMPX_GT_F16()
10263 } // ~Inst_VOPC__V_CMPX_GT_F16
10265 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
10267 Inst_VOPC__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst
)
10269 panicUnimplemented();
10272 Inst_VOPC__V_CMPX_LG_F16::Inst_VOPC__V_CMPX_LG_F16(InFmt_VOPC
*iFmt
)
10273 : Inst_VOPC(iFmt
, "v_cmpx_lg_f16")
10277 } // Inst_VOPC__V_CMPX_LG_F16
10279 Inst_VOPC__V_CMPX_LG_F16::~Inst_VOPC__V_CMPX_LG_F16()
10281 } // ~Inst_VOPC__V_CMPX_LG_F16
10283 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
10285 Inst_VOPC__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst
)
10287 panicUnimplemented();
10290 Inst_VOPC__V_CMPX_GE_F16::Inst_VOPC__V_CMPX_GE_F16(InFmt_VOPC
*iFmt
)
10291 : Inst_VOPC(iFmt
, "v_cmpx_ge_f16")
10295 } // Inst_VOPC__V_CMPX_GE_F16
10297 Inst_VOPC__V_CMPX_GE_F16::~Inst_VOPC__V_CMPX_GE_F16()
10299 } // ~Inst_VOPC__V_CMPX_GE_F16
10301 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
10303 Inst_VOPC__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst
)
10305 panicUnimplemented();
10308 Inst_VOPC__V_CMPX_O_F16::Inst_VOPC__V_CMPX_O_F16(InFmt_VOPC
*iFmt
)
10309 : Inst_VOPC(iFmt
, "v_cmpx_o_f16")
10313 } // Inst_VOPC__V_CMPX_O_F16
10315 Inst_VOPC__V_CMPX_O_F16::~Inst_VOPC__V_CMPX_O_F16()
10317 } // ~Inst_VOPC__V_CMPX_O_F16
10319 // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
10322 Inst_VOPC__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst
)
10324 panicUnimplemented();
10327 Inst_VOPC__V_CMPX_U_F16::Inst_VOPC__V_CMPX_U_F16(InFmt_VOPC
*iFmt
)
10328 : Inst_VOPC(iFmt
, "v_cmpx_u_f16")
10332 } // Inst_VOPC__V_CMPX_U_F16
10334 Inst_VOPC__V_CMPX_U_F16::~Inst_VOPC__V_CMPX_U_F16()
10336 } // ~Inst_VOPC__V_CMPX_U_F16
10338 // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
10341 Inst_VOPC__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst
)
10343 panicUnimplemented();
10346 Inst_VOPC__V_CMPX_NGE_F16::Inst_VOPC__V_CMPX_NGE_F16(InFmt_VOPC
*iFmt
)
10347 : Inst_VOPC(iFmt
, "v_cmpx_nge_f16")
10351 } // Inst_VOPC__V_CMPX_NGE_F16
10353 Inst_VOPC__V_CMPX_NGE_F16::~Inst_VOPC__V_CMPX_NGE_F16()
10355 } // ~Inst_VOPC__V_CMPX_NGE_F16
10357 // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
10359 Inst_VOPC__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst
)
10361 panicUnimplemented();
10364 Inst_VOPC__V_CMPX_NLG_F16::Inst_VOPC__V_CMPX_NLG_F16(InFmt_VOPC
*iFmt
)
10365 : Inst_VOPC(iFmt
, "v_cmpx_nlg_f16")
10369 } // Inst_VOPC__V_CMPX_NLG_F16
10371 Inst_VOPC__V_CMPX_NLG_F16::~Inst_VOPC__V_CMPX_NLG_F16()
10373 } // ~Inst_VOPC__V_CMPX_NLG_F16
10375 // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
10377 Inst_VOPC__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst
)
10379 panicUnimplemented();
10382 Inst_VOPC__V_CMPX_NGT_F16::Inst_VOPC__V_CMPX_NGT_F16(InFmt_VOPC
*iFmt
)
10383 : Inst_VOPC(iFmt
, "v_cmpx_ngt_f16")
10387 } // Inst_VOPC__V_CMPX_NGT_F16
10389 Inst_VOPC__V_CMPX_NGT_F16::~Inst_VOPC__V_CMPX_NGT_F16()
10391 } // ~Inst_VOPC__V_CMPX_NGT_F16
10393 // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
10395 Inst_VOPC__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst
)
10397 panicUnimplemented();
10400 Inst_VOPC__V_CMPX_NLE_F16::Inst_VOPC__V_CMPX_NLE_F16(InFmt_VOPC
*iFmt
)
10401 : Inst_VOPC(iFmt
, "v_cmpx_nle_f16")
10405 } // Inst_VOPC__V_CMPX_NLE_F16
10407 Inst_VOPC__V_CMPX_NLE_F16::~Inst_VOPC__V_CMPX_NLE_F16()
10409 } // ~Inst_VOPC__V_CMPX_NLE_F16
10411 // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
10413 Inst_VOPC__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst
)
10415 panicUnimplemented();
10418 Inst_VOPC__V_CMPX_NEQ_F16::Inst_VOPC__V_CMPX_NEQ_F16(InFmt_VOPC
*iFmt
)
10419 : Inst_VOPC(iFmt
, "v_cmpx_neq_f16")
10423 } // Inst_VOPC__V_CMPX_NEQ_F16
10425 Inst_VOPC__V_CMPX_NEQ_F16::~Inst_VOPC__V_CMPX_NEQ_F16()
10427 } // ~Inst_VOPC__V_CMPX_NEQ_F16
10429 // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
10431 Inst_VOPC__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst
)
10433 panicUnimplemented();
10436 Inst_VOPC__V_CMPX_NLT_F16::Inst_VOPC__V_CMPX_NLT_F16(InFmt_VOPC
*iFmt
)
10437 : Inst_VOPC(iFmt
, "v_cmpx_nlt_f16")
10441 } // Inst_VOPC__V_CMPX_NLT_F16
10443 Inst_VOPC__V_CMPX_NLT_F16::~Inst_VOPC__V_CMPX_NLT_F16()
10445 } // ~Inst_VOPC__V_CMPX_NLT_F16
10447 // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
10449 Inst_VOPC__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst
)
10451 panicUnimplemented();
10454 Inst_VOPC__V_CMPX_TRU_F16::Inst_VOPC__V_CMPX_TRU_F16(InFmt_VOPC
*iFmt
)
10455 : Inst_VOPC(iFmt
, "v_cmpx_tru_f16")
10459 } // Inst_VOPC__V_CMPX_TRU_F16
10461 Inst_VOPC__V_CMPX_TRU_F16::~Inst_VOPC__V_CMPX_TRU_F16()
10463 } // ~Inst_VOPC__V_CMPX_TRU_F16
10465 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
10467 Inst_VOPC__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst
)
10469 panicUnimplemented();
10472 Inst_VOPC__V_CMP_F_F32::Inst_VOPC__V_CMP_F_F32(InFmt_VOPC
*iFmt
)
10473 : Inst_VOPC(iFmt
, "v_cmp_f_f32")
10477 } // Inst_VOPC__V_CMP_F_F32
10479 Inst_VOPC__V_CMP_F_F32::~Inst_VOPC__V_CMP_F_F32()
10481 } // ~Inst_VOPC__V_CMP_F_F32
10483 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
10485 Inst_VOPC__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst
)
10487 Wavefront
*wf
= gpuDynInst
->wavefront();
10488 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10490 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10491 if (wf
->execMask(lane
)) {
10492 vcc
.setBit(lane
, 0);
10499 Inst_VOPC__V_CMP_LT_F32::Inst_VOPC__V_CMP_LT_F32(InFmt_VOPC
*iFmt
)
10500 : Inst_VOPC(iFmt
, "v_cmp_lt_f32")
10504 } // Inst_VOPC__V_CMP_LT_F32
10506 Inst_VOPC__V_CMP_LT_F32::~Inst_VOPC__V_CMP_LT_F32()
10508 } // ~Inst_VOPC__V_CMP_LT_F32
10510 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
10512 Inst_VOPC__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst
)
10514 Wavefront
*wf
= gpuDynInst
->wavefront();
10515 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10516 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10517 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10522 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10523 if (wf
->execMask(lane
)) {
10524 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
10531 Inst_VOPC__V_CMP_EQ_F32::Inst_VOPC__V_CMP_EQ_F32(InFmt_VOPC
*iFmt
)
10532 : Inst_VOPC(iFmt
, "v_cmp_eq_f32")
10536 } // Inst_VOPC__V_CMP_EQ_F32
10538 Inst_VOPC__V_CMP_EQ_F32::~Inst_VOPC__V_CMP_EQ_F32()
10540 } // ~Inst_VOPC__V_CMP_EQ_F32
10542 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
10544 Inst_VOPC__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst
)
10546 Wavefront
*wf
= gpuDynInst
->wavefront();
10547 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10548 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10549 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10554 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10555 if (wf
->execMask(lane
)) {
10556 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
10563 Inst_VOPC__V_CMP_LE_F32::Inst_VOPC__V_CMP_LE_F32(InFmt_VOPC
*iFmt
)
10564 : Inst_VOPC(iFmt
, "v_cmp_le_f32")
10568 } // Inst_VOPC__V_CMP_LE_F32
10570 Inst_VOPC__V_CMP_LE_F32::~Inst_VOPC__V_CMP_LE_F32()
10572 } // ~Inst_VOPC__V_CMP_LE_F32
10574 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
10576 Inst_VOPC__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst
)
10578 Wavefront
*wf
= gpuDynInst
->wavefront();
10579 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10580 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10581 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10586 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10587 if (wf
->execMask(lane
)) {
10588 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
10595 Inst_VOPC__V_CMP_GT_F32::Inst_VOPC__V_CMP_GT_F32(InFmt_VOPC
*iFmt
)
10596 : Inst_VOPC(iFmt
, "v_cmp_gt_f32")
10600 } // Inst_VOPC__V_CMP_GT_F32
10602 Inst_VOPC__V_CMP_GT_F32::~Inst_VOPC__V_CMP_GT_F32()
10604 } // ~Inst_VOPC__V_CMP_GT_F32
10606 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
10608 Inst_VOPC__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst
)
10610 Wavefront
*wf
= gpuDynInst
->wavefront();
10611 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10612 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10613 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10618 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10619 if (wf
->execMask(lane
)) {
10620 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
10627 Inst_VOPC__V_CMP_LG_F32::Inst_VOPC__V_CMP_LG_F32(InFmt_VOPC
*iFmt
)
10628 : Inst_VOPC(iFmt
, "v_cmp_lg_f32")
10632 } // Inst_VOPC__V_CMP_LG_F32
10634 Inst_VOPC__V_CMP_LG_F32::~Inst_VOPC__V_CMP_LG_F32()
10636 } // ~Inst_VOPC__V_CMP_LG_F32
10638 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
10640 Inst_VOPC__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst
)
10642 Wavefront
*wf
= gpuDynInst
->wavefront();
10643 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10644 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10645 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10650 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10651 if (wf
->execMask(lane
)) {
10652 vcc
.setBit(lane
, (src0
[lane
] < src1
[lane
]
10653 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
10660 Inst_VOPC__V_CMP_GE_F32::Inst_VOPC__V_CMP_GE_F32(InFmt_VOPC
*iFmt
)
10661 : Inst_VOPC(iFmt
, "v_cmp_ge_f32")
10665 } // Inst_VOPC__V_CMP_GE_F32
10667 Inst_VOPC__V_CMP_GE_F32::~Inst_VOPC__V_CMP_GE_F32()
10669 } // ~Inst_VOPC__V_CMP_GE_F32
10671 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
10673 Inst_VOPC__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst
)
10675 Wavefront
*wf
= gpuDynInst
->wavefront();
10676 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10677 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10678 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10683 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10684 if (wf
->execMask(lane
)) {
10685 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
10692 Inst_VOPC__V_CMP_O_F32::Inst_VOPC__V_CMP_O_F32(InFmt_VOPC
*iFmt
)
10693 : Inst_VOPC(iFmt
, "v_cmp_o_f32")
10697 } // Inst_VOPC__V_CMP_O_F32
10699 Inst_VOPC__V_CMP_O_F32::~Inst_VOPC__V_CMP_O_F32()
10701 } // ~Inst_VOPC__V_CMP_O_F32
10703 // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
10705 Inst_VOPC__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst
)
10707 Wavefront
*wf
= gpuDynInst
->wavefront();
10708 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10709 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10710 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10715 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10716 if (wf
->execMask(lane
)) {
10717 vcc
.setBit(lane
, (!std::isnan(src0
[lane
])
10718 && !std::isnan(src1
[lane
])) ? 1 : 0);
10725 Inst_VOPC__V_CMP_U_F32::Inst_VOPC__V_CMP_U_F32(InFmt_VOPC
*iFmt
)
10726 : Inst_VOPC(iFmt
, "v_cmp_u_f32")
10730 } // Inst_VOPC__V_CMP_U_F32
10732 Inst_VOPC__V_CMP_U_F32::~Inst_VOPC__V_CMP_U_F32()
10734 } // ~Inst_VOPC__V_CMP_U_F32
10736 // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
10738 Inst_VOPC__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst
)
10740 Wavefront
*wf
= gpuDynInst
->wavefront();
10741 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10742 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10743 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10748 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10749 if (wf
->execMask(lane
)) {
10750 vcc
.setBit(lane
, (std::isnan(src0
[lane
])
10751 || std::isnan(src1
[lane
])) ? 1 : 0);
10758 Inst_VOPC__V_CMP_NGE_F32::Inst_VOPC__V_CMP_NGE_F32(InFmt_VOPC
*iFmt
)
10759 : Inst_VOPC(iFmt
, "v_cmp_nge_f32")
10763 } // Inst_VOPC__V_CMP_NGE_F32
10765 Inst_VOPC__V_CMP_NGE_F32::~Inst_VOPC__V_CMP_NGE_F32()
10767 } // ~Inst_VOPC__V_CMP_NGE_F32
10769 // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
10771 Inst_VOPC__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst
)
10773 Wavefront
*wf
= gpuDynInst
->wavefront();
10774 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10775 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10776 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10781 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10782 if (wf
->execMask(lane
)) {
10783 vcc
.setBit(lane
, !(src0
[lane
] >= src1
[lane
]) ? 1 : 0);
10790 Inst_VOPC__V_CMP_NLG_F32::Inst_VOPC__V_CMP_NLG_F32(InFmt_VOPC
*iFmt
)
10791 : Inst_VOPC(iFmt
, "v_cmp_nlg_f32")
10795 } // Inst_VOPC__V_CMP_NLG_F32
10797 Inst_VOPC__V_CMP_NLG_F32::~Inst_VOPC__V_CMP_NLG_F32()
10799 } // ~Inst_VOPC__V_CMP_NLG_F32
10801 // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
10803 Inst_VOPC__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst
)
10805 Wavefront
*wf
= gpuDynInst
->wavefront();
10806 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10807 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10808 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10813 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10814 if (wf
->execMask(lane
)) {
10815 vcc
.setBit(lane
, !(src0
[lane
] < src1
[lane
]
10816 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
10823 Inst_VOPC__V_CMP_NGT_F32::Inst_VOPC__V_CMP_NGT_F32(InFmt_VOPC
*iFmt
)
10824 : Inst_VOPC(iFmt
, "v_cmp_ngt_f32")
10828 } // Inst_VOPC__V_CMP_NGT_F32
10830 Inst_VOPC__V_CMP_NGT_F32::~Inst_VOPC__V_CMP_NGT_F32()
10832 } // ~Inst_VOPC__V_CMP_NGT_F32
10834 // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
10836 Inst_VOPC__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst
)
10838 Wavefront
*wf
= gpuDynInst
->wavefront();
10839 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10840 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10841 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10846 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10847 if (wf
->execMask(lane
)) {
10848 vcc
.setBit(lane
, !(src0
[lane
] > src1
[lane
]) ? 1 : 0);
10855 Inst_VOPC__V_CMP_NLE_F32::Inst_VOPC__V_CMP_NLE_F32(InFmt_VOPC
*iFmt
)
10856 : Inst_VOPC(iFmt
, "v_cmp_nle_f32")
10860 } // Inst_VOPC__V_CMP_NLE_F32
10862 Inst_VOPC__V_CMP_NLE_F32::~Inst_VOPC__V_CMP_NLE_F32()
10864 } // ~Inst_VOPC__V_CMP_NLE_F32
10866 // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
10868 Inst_VOPC__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst
)
10870 Wavefront
*wf
= gpuDynInst
->wavefront();
10871 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10872 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10873 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10878 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10879 if (wf
->execMask(lane
)) {
10880 vcc
.setBit(lane
, !(src0
[lane
] <= src1
[lane
]) ? 1 : 0);
10887 Inst_VOPC__V_CMP_NEQ_F32::Inst_VOPC__V_CMP_NEQ_F32(InFmt_VOPC
*iFmt
)
10888 : Inst_VOPC(iFmt
, "v_cmp_neq_f32")
10892 } // Inst_VOPC__V_CMP_NEQ_F32
10894 Inst_VOPC__V_CMP_NEQ_F32::~Inst_VOPC__V_CMP_NEQ_F32()
10896 } // ~Inst_VOPC__V_CMP_NEQ_F32
10898 // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
10900 Inst_VOPC__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst
)
10902 Wavefront
*wf
= gpuDynInst
->wavefront();
10903 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10904 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10905 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10910 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10911 if (wf
->execMask(lane
)) {
10912 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
10919 Inst_VOPC__V_CMP_NLT_F32::Inst_VOPC__V_CMP_NLT_F32(InFmt_VOPC
*iFmt
)
10920 : Inst_VOPC(iFmt
, "v_cmp_nlt_f32")
10924 } // Inst_VOPC__V_CMP_NLT_F32
10926 Inst_VOPC__V_CMP_NLT_F32::~Inst_VOPC__V_CMP_NLT_F32()
10928 } // ~Inst_VOPC__V_CMP_NLT_F32
10930 // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
10932 Inst_VOPC__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst
)
10934 Wavefront
*wf
= gpuDynInst
->wavefront();
10935 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10936 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10937 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10942 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10943 if (wf
->execMask(lane
)) {
10944 vcc
.setBit(lane
, !(src0
[lane
] < src1
[lane
]) ? 1 : 0);
10951 Inst_VOPC__V_CMP_TRU_F32::Inst_VOPC__V_CMP_TRU_F32(InFmt_VOPC
*iFmt
)
10952 : Inst_VOPC(iFmt
, "v_cmp_tru_f32")
10956 } // Inst_VOPC__V_CMP_TRU_F32
10958 Inst_VOPC__V_CMP_TRU_F32::~Inst_VOPC__V_CMP_TRU_F32()
10960 } // ~Inst_VOPC__V_CMP_TRU_F32
10962 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
10964 Inst_VOPC__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst
)
10966 Wavefront
*wf
= gpuDynInst
->wavefront();
10967 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10969 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10970 if (wf
->execMask(lane
)) {
10971 vcc
.setBit(lane
, 1);
10978 Inst_VOPC__V_CMPX_F_F32::Inst_VOPC__V_CMPX_F_F32(InFmt_VOPC
*iFmt
)
10979 : Inst_VOPC(iFmt
, "v_cmpx_f_f32")
10983 } // Inst_VOPC__V_CMPX_F_F32
10985 Inst_VOPC__V_CMPX_F_F32::~Inst_VOPC__V_CMPX_F_F32()
10987 } // ~Inst_VOPC__V_CMPX_F_F32
10989 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
10991 Inst_VOPC__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst
)
10993 Wavefront
*wf
= gpuDynInst
->wavefront();
10994 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10996 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10997 if (wf
->execMask(lane
)) {
10998 vcc
.setBit(lane
, 0);
11003 wf
->execMask() = vcc
.rawData();
11006 Inst_VOPC__V_CMPX_LT_F32::Inst_VOPC__V_CMPX_LT_F32(InFmt_VOPC
*iFmt
)
11007 : Inst_VOPC(iFmt
, "v_cmpx_lt_f32")
11011 } // Inst_VOPC__V_CMPX_LT_F32
11013 Inst_VOPC__V_CMPX_LT_F32::~Inst_VOPC__V_CMPX_LT_F32()
11015 } // ~Inst_VOPC__V_CMPX_LT_F32
11017 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
11019 Inst_VOPC__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst
)
11021 Wavefront
*wf
= gpuDynInst
->wavefront();
11022 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11023 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11024 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11029 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11030 if (wf
->execMask(lane
)) {
11031 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
11036 wf
->execMask() = vcc
.rawData();
11039 Inst_VOPC__V_CMPX_EQ_F32::Inst_VOPC__V_CMPX_EQ_F32(InFmt_VOPC
*iFmt
)
11040 : Inst_VOPC(iFmt
, "v_cmpx_eq_f32")
11044 } // Inst_VOPC__V_CMPX_EQ_F32
11046 Inst_VOPC__V_CMPX_EQ_F32::~Inst_VOPC__V_CMPX_EQ_F32()
11048 } // ~Inst_VOPC__V_CMPX_EQ_F32
11050 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
11052 Inst_VOPC__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst
)
11054 Wavefront
*wf
= gpuDynInst
->wavefront();
11055 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11056 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11057 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11062 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11063 if (wf
->execMask(lane
)) {
11064 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
11069 wf
->execMask() = vcc
.rawData();
11072 Inst_VOPC__V_CMPX_LE_F32::Inst_VOPC__V_CMPX_LE_F32(InFmt_VOPC
*iFmt
)
11073 : Inst_VOPC(iFmt
, "v_cmpx_le_f32")
11077 } // Inst_VOPC__V_CMPX_LE_F32
11079 Inst_VOPC__V_CMPX_LE_F32::~Inst_VOPC__V_CMPX_LE_F32()
11081 } // ~Inst_VOPC__V_CMPX_LE_F32
11083 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
11085 Inst_VOPC__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst
)
11087 Wavefront
*wf
= gpuDynInst
->wavefront();
11088 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11089 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11090 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11095 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11096 if (wf
->execMask(lane
)) {
11097 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
11102 wf
->execMask() = vcc
.rawData();
11105 Inst_VOPC__V_CMPX_GT_F32::Inst_VOPC__V_CMPX_GT_F32(InFmt_VOPC
*iFmt
)
11106 : Inst_VOPC(iFmt
, "v_cmpx_gt_f32")
11110 } // Inst_VOPC__V_CMPX_GT_F32
11112 Inst_VOPC__V_CMPX_GT_F32::~Inst_VOPC__V_CMPX_GT_F32()
11114 } // ~Inst_VOPC__V_CMPX_GT_F32
11116 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
11118 Inst_VOPC__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst
)
11120 Wavefront
*wf
= gpuDynInst
->wavefront();
11121 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11122 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11123 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11128 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11129 if (wf
->execMask(lane
)) {
11130 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
11135 wf
->execMask() = vcc
.rawData();
11138 Inst_VOPC__V_CMPX_LG_F32::Inst_VOPC__V_CMPX_LG_F32(InFmt_VOPC
*iFmt
)
11139 : Inst_VOPC(iFmt
, "v_cmpx_lg_f32")
11143 } // Inst_VOPC__V_CMPX_LG_F32
11145 Inst_VOPC__V_CMPX_LG_F32::~Inst_VOPC__V_CMPX_LG_F32()
11147 } // ~Inst_VOPC__V_CMPX_LG_F32
11149 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
11151 Inst_VOPC__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst
)
11153 Wavefront
*wf
= gpuDynInst
->wavefront();
11154 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11155 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11156 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11161 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11162 if (wf
->execMask(lane
)) {
11163 vcc
.setBit(lane
, (src0
[lane
] < src1
[lane
]
11164 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
11169 wf
->execMask() = vcc
.rawData();
11172 Inst_VOPC__V_CMPX_GE_F32::Inst_VOPC__V_CMPX_GE_F32(InFmt_VOPC
*iFmt
)
11173 : Inst_VOPC(iFmt
, "v_cmpx_ge_f32")
11177 } // Inst_VOPC__V_CMPX_GE_F32
11179 Inst_VOPC__V_CMPX_GE_F32::~Inst_VOPC__V_CMPX_GE_F32()
11181 } // ~Inst_VOPC__V_CMPX_GE_F32
11183 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
11185 Inst_VOPC__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst
)
11187 Wavefront
*wf
= gpuDynInst
->wavefront();
11188 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11189 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11190 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11195 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11196 if (wf
->execMask(lane
)) {
11197 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
11202 wf
->execMask() = vcc
.rawData();
11205 Inst_VOPC__V_CMPX_O_F32::Inst_VOPC__V_CMPX_O_F32(InFmt_VOPC
*iFmt
)
11206 : Inst_VOPC(iFmt
, "v_cmpx_o_f32")
11210 } // Inst_VOPC__V_CMPX_O_F32
11212 Inst_VOPC__V_CMPX_O_F32::~Inst_VOPC__V_CMPX_O_F32()
11214 } // ~Inst_VOPC__V_CMPX_O_F32
11216 // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
11219 Inst_VOPC__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst
)
11221 Wavefront
*wf
= gpuDynInst
->wavefront();
11222 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11223 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11224 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11229 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11230 if (wf
->execMask(lane
)) {
11231 vcc
.setBit(lane
, (!std::isnan(src0
[lane
])
11232 && !std::isnan(src1
[lane
])) ? 1 : 0);
11237 wf
->execMask() = vcc
.rawData();
11240 Inst_VOPC__V_CMPX_U_F32::Inst_VOPC__V_CMPX_U_F32(InFmt_VOPC
*iFmt
)
11241 : Inst_VOPC(iFmt
, "v_cmpx_u_f32")
11245 } // Inst_VOPC__V_CMPX_U_F32
11247 Inst_VOPC__V_CMPX_U_F32::~Inst_VOPC__V_CMPX_U_F32()
11249 } // ~Inst_VOPC__V_CMPX_U_F32
11251 // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
11254 Inst_VOPC__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst
)
11256 Wavefront
*wf
= gpuDynInst
->wavefront();
11257 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11258 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11259 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11264 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11265 if (wf
->execMask(lane
)) {
11266 vcc
.setBit(lane
, (std::isnan(src0
[lane
])
11267 || std::isnan(src1
[lane
])) ? 1 : 0);
11272 wf
->execMask() = vcc
.rawData();
11275 Inst_VOPC__V_CMPX_NGE_F32::Inst_VOPC__V_CMPX_NGE_F32(InFmt_VOPC
*iFmt
)
11276 : Inst_VOPC(iFmt
, "v_cmpx_nge_f32")
11280 } // Inst_VOPC__V_CMPX_NGE_F32
11282 Inst_VOPC__V_CMPX_NGE_F32::~Inst_VOPC__V_CMPX_NGE_F32()
11284 } // ~Inst_VOPC__V_CMPX_NGE_F32
11286 // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
11288 Inst_VOPC__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst
)
11290 Wavefront
*wf
= gpuDynInst
->wavefront();
11291 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11292 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11293 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11298 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11299 if (wf
->execMask(lane
)) {
11300 vcc
.setBit(lane
, !(src0
[lane
] >= src1
[lane
]) ? 1 : 0);
11305 wf
->execMask() = vcc
.rawData();
11308 Inst_VOPC__V_CMPX_NLG_F32::Inst_VOPC__V_CMPX_NLG_F32(InFmt_VOPC
*iFmt
)
11309 : Inst_VOPC(iFmt
, "v_cmpx_nlg_f32")
11313 } // Inst_VOPC__V_CMPX_NLG_F32
11315 Inst_VOPC__V_CMPX_NLG_F32::~Inst_VOPC__V_CMPX_NLG_F32()
11317 } // ~Inst_VOPC__V_CMPX_NLG_F32
11319 // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
11321 Inst_VOPC__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst
)
11323 Wavefront
*wf
= gpuDynInst
->wavefront();
11324 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11325 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11326 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11331 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11332 if (wf
->execMask(lane
)) {
11333 vcc
.setBit(lane
, !(src0
[lane
] < src1
[lane
]
11334 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
11339 wf
->execMask() = vcc
.rawData();
11342 Inst_VOPC__V_CMPX_NGT_F32::Inst_VOPC__V_CMPX_NGT_F32(InFmt_VOPC
*iFmt
)
11343 : Inst_VOPC(iFmt
, "v_cmpx_ngt_f32")
11347 } // Inst_VOPC__V_CMPX_NGT_F32
11349 Inst_VOPC__V_CMPX_NGT_F32::~Inst_VOPC__V_CMPX_NGT_F32()
11351 } // ~Inst_VOPC__V_CMPX_NGT_F32
11353 // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
11355 Inst_VOPC__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst
)
11357 Wavefront
*wf
= gpuDynInst
->wavefront();
11358 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11359 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11360 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11365 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11366 if (wf
->execMask(lane
)) {
11367 vcc
.setBit(lane
, !(src0
[lane
] > src1
[lane
]) ? 1 : 0);
11372 wf
->execMask() = vcc
.rawData();
11375 Inst_VOPC__V_CMPX_NLE_F32::Inst_VOPC__V_CMPX_NLE_F32(InFmt_VOPC
*iFmt
)
11376 : Inst_VOPC(iFmt
, "v_cmpx_nle_f32")
11380 } // Inst_VOPC__V_CMPX_NLE_F32
11382 Inst_VOPC__V_CMPX_NLE_F32::~Inst_VOPC__V_CMPX_NLE_F32()
11384 } // ~Inst_VOPC__V_CMPX_NLE_F32
11386 // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
11388 Inst_VOPC__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst
)
11390 Wavefront
*wf
= gpuDynInst
->wavefront();
11391 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11392 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11393 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11398 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11399 if (wf
->execMask(lane
)) {
11400 vcc
.setBit(lane
, !(src0
[lane
] <= src1
[lane
]) ? 1 : 0);
11405 wf
->execMask() = vcc
.rawData();
11408 Inst_VOPC__V_CMPX_NEQ_F32::Inst_VOPC__V_CMPX_NEQ_F32(InFmt_VOPC
*iFmt
)
11409 : Inst_VOPC(iFmt
, "v_cmpx_neq_f32")
11413 } // Inst_VOPC__V_CMPX_NEQ_F32
11415 Inst_VOPC__V_CMPX_NEQ_F32::~Inst_VOPC__V_CMPX_NEQ_F32()
11417 } // ~Inst_VOPC__V_CMPX_NEQ_F32
11419 // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
11421 Inst_VOPC__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst
)
11423 Wavefront
*wf
= gpuDynInst
->wavefront();
11424 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11425 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11426 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11431 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11432 if (wf
->execMask(lane
)) {
11433 vcc
.setBit(lane
, !(src0
[lane
] == src1
[lane
]) ? 1 : 0);
11440 Inst_VOPC__V_CMPX_NLT_F32::Inst_VOPC__V_CMPX_NLT_F32(InFmt_VOPC
*iFmt
)
11441 : Inst_VOPC(iFmt
, "v_cmpx_nlt_f32")
11445 } // Inst_VOPC__V_CMPX_NLT_F32
11447 Inst_VOPC__V_CMPX_NLT_F32::~Inst_VOPC__V_CMPX_NLT_F32()
11449 } // ~Inst_VOPC__V_CMPX_NLT_F32
11451 // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
11453 Inst_VOPC__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst
)
11455 Wavefront
*wf
= gpuDynInst
->wavefront();
11456 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11457 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11458 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11463 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11464 if (wf
->execMask(lane
)) {
11465 vcc
.setBit(lane
, !(src0
[lane
] < src1
[lane
]) ? 1 : 0);
11470 wf
->execMask() = vcc
.rawData();
11473 Inst_VOPC__V_CMPX_TRU_F32::Inst_VOPC__V_CMPX_TRU_F32(InFmt_VOPC
*iFmt
)
11474 : Inst_VOPC(iFmt
, "v_cmpx_tru_f32")
11478 } // Inst_VOPC__V_CMPX_TRU_F32
11480 Inst_VOPC__V_CMPX_TRU_F32::~Inst_VOPC__V_CMPX_TRU_F32()
11482 } // ~Inst_VOPC__V_CMPX_TRU_F32
11484 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
11486 Inst_VOPC__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst
)
11488 Wavefront
*wf
= gpuDynInst
->wavefront();
11489 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11491 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11492 if (wf
->execMask(lane
)) {
11493 vcc
.setBit(lane
, 1);
11498 wf
->execMask() = vcc
.rawData();
11501 Inst_VOPC__V_CMP_F_F64::Inst_VOPC__V_CMP_F_F64(InFmt_VOPC
*iFmt
)
11502 : Inst_VOPC(iFmt
, "v_cmp_f_f64")
11506 } // Inst_VOPC__V_CMP_F_F64
11508 Inst_VOPC__V_CMP_F_F64::~Inst_VOPC__V_CMP_F_F64()
11510 } // ~Inst_VOPC__V_CMP_F_F64
11512 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
11514 Inst_VOPC__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst
)
11516 Wavefront
*wf
= gpuDynInst
->wavefront();
11517 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11519 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11520 if (wf
->execMask(lane
)) {
11521 vcc
.setBit(lane
, 0);
11528 Inst_VOPC__V_CMP_LT_F64::Inst_VOPC__V_CMP_LT_F64(InFmt_VOPC
*iFmt
)
11529 : Inst_VOPC(iFmt
, "v_cmp_lt_f64")
11533 } // Inst_VOPC__V_CMP_LT_F64
11535 Inst_VOPC__V_CMP_LT_F64::~Inst_VOPC__V_CMP_LT_F64()
11537 } // ~Inst_VOPC__V_CMP_LT_F64
11539 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
11541 Inst_VOPC__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst
)
11543 Wavefront
*wf
= gpuDynInst
->wavefront();
11544 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11545 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11546 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11551 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11552 if (wf
->execMask(lane
)) {
11553 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
11560 Inst_VOPC__V_CMP_EQ_F64::Inst_VOPC__V_CMP_EQ_F64(InFmt_VOPC
*iFmt
)
11561 : Inst_VOPC(iFmt
, "v_cmp_eq_f64")
11565 } // Inst_VOPC__V_CMP_EQ_F64
11567 Inst_VOPC__V_CMP_EQ_F64::~Inst_VOPC__V_CMP_EQ_F64()
11569 } // ~Inst_VOPC__V_CMP_EQ_F64
11571 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
11573 Inst_VOPC__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst
)
11575 Wavefront
*wf
= gpuDynInst
->wavefront();
11576 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11577 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11578 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11583 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11584 if (wf
->execMask(lane
)) {
11585 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
11592 Inst_VOPC__V_CMP_LE_F64::Inst_VOPC__V_CMP_LE_F64(InFmt_VOPC
*iFmt
)
11593 : Inst_VOPC(iFmt
, "v_cmp_le_f64")
11597 } // Inst_VOPC__V_CMP_LE_F64
11599 Inst_VOPC__V_CMP_LE_F64::~Inst_VOPC__V_CMP_LE_F64()
11601 } // ~Inst_VOPC__V_CMP_LE_F64
11603 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
11605 Inst_VOPC__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst
)
11607 Wavefront
*wf
= gpuDynInst
->wavefront();
11608 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11609 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11610 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11615 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11616 if (wf
->execMask(lane
)) {
11617 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
11624 Inst_VOPC__V_CMP_GT_F64::Inst_VOPC__V_CMP_GT_F64(InFmt_VOPC
*iFmt
)
11625 : Inst_VOPC(iFmt
, "v_cmp_gt_f64")
11629 } // Inst_VOPC__V_CMP_GT_F64
11631 Inst_VOPC__V_CMP_GT_F64::~Inst_VOPC__V_CMP_GT_F64()
11633 } // ~Inst_VOPC__V_CMP_GT_F64
11635 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
11637 Inst_VOPC__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst
)
11639 Wavefront
*wf
= gpuDynInst
->wavefront();
11640 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11641 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11642 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11647 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11648 if (wf
->execMask(lane
)) {
11649 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
11656 Inst_VOPC__V_CMP_LG_F64::Inst_VOPC__V_CMP_LG_F64(InFmt_VOPC
*iFmt
)
11657 : Inst_VOPC(iFmt
, "v_cmp_lg_f64")
11661 } // Inst_VOPC__V_CMP_LG_F64
11663 Inst_VOPC__V_CMP_LG_F64::~Inst_VOPC__V_CMP_LG_F64()
11665 } // ~Inst_VOPC__V_CMP_LG_F64
11667 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
11669 Inst_VOPC__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst
)
11671 Wavefront
*wf
= gpuDynInst
->wavefront();
11672 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11673 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11674 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11679 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11680 if (wf
->execMask(lane
)) {
11681 vcc
.setBit(lane
, (src0
[lane
] < src1
[lane
]
11682 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
11689 Inst_VOPC__V_CMP_GE_F64::Inst_VOPC__V_CMP_GE_F64(InFmt_VOPC
*iFmt
)
11690 : Inst_VOPC(iFmt
, "v_cmp_ge_f64")
11694 } // Inst_VOPC__V_CMP_GE_F64
11696 Inst_VOPC__V_CMP_GE_F64::~Inst_VOPC__V_CMP_GE_F64()
11698 } // ~Inst_VOPC__V_CMP_GE_F64
11700 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
11702 Inst_VOPC__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst
)
11704 Wavefront
*wf
= gpuDynInst
->wavefront();
11705 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11706 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11707 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11712 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11713 if (wf
->execMask(lane
)) {
11714 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
11721 Inst_VOPC__V_CMP_O_F64::Inst_VOPC__V_CMP_O_F64(InFmt_VOPC
*iFmt
)
11722 : Inst_VOPC(iFmt
, "v_cmp_o_f64")
11726 } // Inst_VOPC__V_CMP_O_F64
11728 Inst_VOPC__V_CMP_O_F64::~Inst_VOPC__V_CMP_O_F64()
11730 } // ~Inst_VOPC__V_CMP_O_F64
11732 // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
11734 Inst_VOPC__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst
)
11736 Wavefront
*wf
= gpuDynInst
->wavefront();
11737 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11738 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11739 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11744 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11745 if (wf
->execMask(lane
)) {
11746 vcc
.setBit(lane
, (!std::isnan(src0
[lane
])
11747 && !std::isnan(src1
[lane
])) ? 1 : 0);
11754 Inst_VOPC__V_CMP_U_F64::Inst_VOPC__V_CMP_U_F64(InFmt_VOPC
*iFmt
)
11755 : Inst_VOPC(iFmt
, "v_cmp_u_f64")
11759 } // Inst_VOPC__V_CMP_U_F64
11761 Inst_VOPC__V_CMP_U_F64::~Inst_VOPC__V_CMP_U_F64()
11763 } // ~Inst_VOPC__V_CMP_U_F64
11765 // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
11767 Inst_VOPC__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst
)
11769 Wavefront
*wf
= gpuDynInst
->wavefront();
11770 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11771 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11772 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11777 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11778 if (wf
->execMask(lane
)) {
11779 vcc
.setBit(lane
, (std::isnan(src0
[lane
])
11780 || std::isnan(src1
[lane
])) ? 1 : 0);
11787 Inst_VOPC__V_CMP_NGE_F64::Inst_VOPC__V_CMP_NGE_F64(InFmt_VOPC
*iFmt
)
11788 : Inst_VOPC(iFmt
, "v_cmp_nge_f64")
11792 } // Inst_VOPC__V_CMP_NGE_F64
11794 Inst_VOPC__V_CMP_NGE_F64::~Inst_VOPC__V_CMP_NGE_F64()
11796 } // ~Inst_VOPC__V_CMP_NGE_F64
11798 // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
11800 Inst_VOPC__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst
)
11802 Wavefront
*wf
= gpuDynInst
->wavefront();
11803 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11804 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11805 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11810 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11811 if (wf
->execMask(lane
)) {
11812 vcc
.setBit(lane
, !(src0
[lane
] >= src1
[lane
]) ? 1 : 0);
11819 Inst_VOPC__V_CMP_NLG_F64::Inst_VOPC__V_CMP_NLG_F64(InFmt_VOPC
*iFmt
)
11820 : Inst_VOPC(iFmt
, "v_cmp_nlg_f64")
11824 } // Inst_VOPC__V_CMP_NLG_F64
11826 Inst_VOPC__V_CMP_NLG_F64::~Inst_VOPC__V_CMP_NLG_F64()
11828 } // ~Inst_VOPC__V_CMP_NLG_F64
11830 // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
11832 Inst_VOPC__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst
)
11834 Wavefront
*wf
= gpuDynInst
->wavefront();
11835 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11836 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11837 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11842 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11843 if (wf
->execMask(lane
)) {
11844 vcc
.setBit(lane
, !(src0
[lane
] < src1
[lane
]
11845 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
11852 Inst_VOPC__V_CMP_NGT_F64::Inst_VOPC__V_CMP_NGT_F64(InFmt_VOPC
*iFmt
)
11853 : Inst_VOPC(iFmt
, "v_cmp_ngt_f64")
11857 } // Inst_VOPC__V_CMP_NGT_F64
11859 Inst_VOPC__V_CMP_NGT_F64::~Inst_VOPC__V_CMP_NGT_F64()
11861 } // ~Inst_VOPC__V_CMP_NGT_F64
11863 // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
11865 Inst_VOPC__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst
)
11867 Wavefront
*wf
= gpuDynInst
->wavefront();
11868 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11869 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11870 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11875 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11876 if (wf
->execMask(lane
)) {
11877 vcc
.setBit(lane
, !(src0
[lane
] > src1
[lane
]) ? 1 : 0);
11884 Inst_VOPC__V_CMP_NLE_F64::Inst_VOPC__V_CMP_NLE_F64(InFmt_VOPC
*iFmt
)
11885 : Inst_VOPC(iFmt
, "v_cmp_nle_f64")
11889 } // Inst_VOPC__V_CMP_NLE_F64
11891 Inst_VOPC__V_CMP_NLE_F64::~Inst_VOPC__V_CMP_NLE_F64()
11893 } // ~Inst_VOPC__V_CMP_NLE_F64
11895 // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
11897 Inst_VOPC__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst
)
11899 Wavefront
*wf
= gpuDynInst
->wavefront();
11900 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11901 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11902 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11907 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11908 if (wf
->execMask(lane
)) {
11909 vcc
.setBit(lane
, !(src0
[lane
] <= src1
[lane
]) ? 1 : 0);
11916 Inst_VOPC__V_CMP_NEQ_F64::Inst_VOPC__V_CMP_NEQ_F64(InFmt_VOPC
*iFmt
)
11917 : Inst_VOPC(iFmt
, "v_cmp_neq_f64")
11921 } // Inst_VOPC__V_CMP_NEQ_F64
11923 Inst_VOPC__V_CMP_NEQ_F64::~Inst_VOPC__V_CMP_NEQ_F64()
11925 } // ~Inst_VOPC__V_CMP_NEQ_F64
11927 // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
11929 Inst_VOPC__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst
)
11931 Wavefront
*wf
= gpuDynInst
->wavefront();
11932 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11933 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11934 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11939 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11940 if (wf
->execMask(lane
)) {
11941 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
11948 Inst_VOPC__V_CMP_NLT_F64::Inst_VOPC__V_CMP_NLT_F64(InFmt_VOPC
*iFmt
)
11949 : Inst_VOPC(iFmt
, "v_cmp_nlt_f64")
11953 } // Inst_VOPC__V_CMP_NLT_F64
11955 Inst_VOPC__V_CMP_NLT_F64::~Inst_VOPC__V_CMP_NLT_F64()
11957 } // ~Inst_VOPC__V_CMP_NLT_F64
11959 // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
11961 Inst_VOPC__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst
)
11963 Wavefront
*wf
= gpuDynInst
->wavefront();
11964 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11965 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11966 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11971 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11972 if (wf
->execMask(lane
)) {
11973 vcc
.setBit(lane
, !(src0
[lane
] < src1
[lane
]) ? 1 : 0);
11980 Inst_VOPC__V_CMP_TRU_F64::Inst_VOPC__V_CMP_TRU_F64(InFmt_VOPC
*iFmt
)
11981 : Inst_VOPC(iFmt
, "v_cmp_tru_f64")
11985 } // Inst_VOPC__V_CMP_TRU_F64
11987 Inst_VOPC__V_CMP_TRU_F64::~Inst_VOPC__V_CMP_TRU_F64()
11989 } // ~Inst_VOPC__V_CMP_TRU_F64
11991 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
11993 Inst_VOPC__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst
)
11995 Wavefront
*wf
= gpuDynInst
->wavefront();
11996 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11998 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11999 if (wf
->execMask(lane
)) {
12000 vcc
.setBit(lane
, 1);
12007 Inst_VOPC__V_CMPX_F_F64::Inst_VOPC__V_CMPX_F_F64(InFmt_VOPC
*iFmt
)
12008 : Inst_VOPC(iFmt
, "v_cmpx_f_f64")
12012 } // Inst_VOPC__V_CMPX_F_F64
12014 Inst_VOPC__V_CMPX_F_F64::~Inst_VOPC__V_CMPX_F_F64()
12016 } // ~Inst_VOPC__V_CMPX_F_F64
12018 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
12020 Inst_VOPC__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst
)
12022 Wavefront
*wf
= gpuDynInst
->wavefront();
12023 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12025 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12026 if (wf
->execMask(lane
)) {
12027 vcc
.setBit(lane
, 0);
12032 wf
->execMask() = vcc
.rawData();
12035 Inst_VOPC__V_CMPX_LT_F64::Inst_VOPC__V_CMPX_LT_F64(InFmt_VOPC
*iFmt
)
12036 : Inst_VOPC(iFmt
, "v_cmpx_lt_f64")
12040 } // Inst_VOPC__V_CMPX_LT_F64
12042 Inst_VOPC__V_CMPX_LT_F64::~Inst_VOPC__V_CMPX_LT_F64()
12044 } // ~Inst_VOPC__V_CMPX_LT_F64
12046 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
12048 Inst_VOPC__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst
)
12050 Wavefront
*wf
= gpuDynInst
->wavefront();
12051 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12052 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12053 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12058 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12059 if (wf
->execMask(lane
)) {
12060 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
12065 wf
->execMask() = vcc
.rawData();
12068 Inst_VOPC__V_CMPX_EQ_F64::Inst_VOPC__V_CMPX_EQ_F64(InFmt_VOPC
*iFmt
)
12069 : Inst_VOPC(iFmt
, "v_cmpx_eq_f64")
12073 } // Inst_VOPC__V_CMPX_EQ_F64
12075 Inst_VOPC__V_CMPX_EQ_F64::~Inst_VOPC__V_CMPX_EQ_F64()
12077 } // ~Inst_VOPC__V_CMPX_EQ_F64
12079 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
12081 Inst_VOPC__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst
)
12083 Wavefront
*wf
= gpuDynInst
->wavefront();
12084 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12085 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12086 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12091 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12092 if (wf
->execMask(lane
)) {
12093 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
12098 wf
->execMask() = vcc
.rawData();
12101 Inst_VOPC__V_CMPX_LE_F64::Inst_VOPC__V_CMPX_LE_F64(InFmt_VOPC
*iFmt
)
12102 : Inst_VOPC(iFmt
, "v_cmpx_le_f64")
12106 } // Inst_VOPC__V_CMPX_LE_F64
12108 Inst_VOPC__V_CMPX_LE_F64::~Inst_VOPC__V_CMPX_LE_F64()
12110 } // ~Inst_VOPC__V_CMPX_LE_F64
12112 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
12114 Inst_VOPC__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst
)
12116 Wavefront
*wf
= gpuDynInst
->wavefront();
12117 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12118 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12119 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12124 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12125 if (wf
->execMask(lane
)) {
12126 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
12130 wf
->execMask() = vcc
.rawData();
12134 Inst_VOPC__V_CMPX_GT_F64::Inst_VOPC__V_CMPX_GT_F64(InFmt_VOPC
*iFmt
)
12135 : Inst_VOPC(iFmt
, "v_cmpx_gt_f64")
12139 } // Inst_VOPC__V_CMPX_GT_F64
12141 Inst_VOPC__V_CMPX_GT_F64::~Inst_VOPC__V_CMPX_GT_F64()
12143 } // ~Inst_VOPC__V_CMPX_GT_F64
12145 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
12147 Inst_VOPC__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst
)
12149 Wavefront
*wf
= gpuDynInst
->wavefront();
12150 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12151 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12152 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12157 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12158 if (wf
->execMask(lane
)) {
12159 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
12163 wf
->execMask() = vcc
.rawData();
12167 Inst_VOPC__V_CMPX_LG_F64::Inst_VOPC__V_CMPX_LG_F64(InFmt_VOPC
*iFmt
)
12168 : Inst_VOPC(iFmt
, "v_cmpx_lg_f64")
12172 } // Inst_VOPC__V_CMPX_LG_F64
12174 Inst_VOPC__V_CMPX_LG_F64::~Inst_VOPC__V_CMPX_LG_F64()
12176 } // ~Inst_VOPC__V_CMPX_LG_F64
12178 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
12180 Inst_VOPC__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst
)
12182 Wavefront
*wf
= gpuDynInst
->wavefront();
12183 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12184 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12185 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12190 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12191 if (wf
->execMask(lane
)) {
12192 vcc
.setBit(lane
, (src0
[lane
] < src1
[lane
]
12193 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
12197 wf
->execMask() = vcc
.rawData();
12201 Inst_VOPC__V_CMPX_GE_F64::Inst_VOPC__V_CMPX_GE_F64(InFmt_VOPC
*iFmt
)
12202 : Inst_VOPC(iFmt
, "v_cmpx_ge_f64")
12206 } // Inst_VOPC__V_CMPX_GE_F64
12208 Inst_VOPC__V_CMPX_GE_F64::~Inst_VOPC__V_CMPX_GE_F64()
12210 } // ~Inst_VOPC__V_CMPX_GE_F64
12212 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
12214 Inst_VOPC__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst
)
12216 Wavefront
*wf
= gpuDynInst
->wavefront();
12217 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12218 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12219 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12224 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12225 if (wf
->execMask(lane
)) {
12226 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
12230 wf
->execMask() = vcc
.rawData();
12234 Inst_VOPC__V_CMPX_O_F64::Inst_VOPC__V_CMPX_O_F64(InFmt_VOPC
*iFmt
)
12235 : Inst_VOPC(iFmt
, "v_cmpx_o_f64")
12239 } // Inst_VOPC__V_CMPX_O_F64
12241 Inst_VOPC__V_CMPX_O_F64::~Inst_VOPC__V_CMPX_O_F64()
12243 } // ~Inst_VOPC__V_CMPX_O_F64
12245 // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
12248 Inst_VOPC__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst
)
12250 Wavefront
*wf
= gpuDynInst
->wavefront();
12251 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12252 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12253 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12258 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12259 if (wf
->execMask(lane
)) {
12260 vcc
.setBit(lane
, (!std::isnan(src0
[lane
])
12261 && !std::isnan(src1
[lane
])) ? 1 : 0);
12265 wf
->execMask() = vcc
.rawData();
12269 Inst_VOPC__V_CMPX_U_F64::Inst_VOPC__V_CMPX_U_F64(InFmt_VOPC
*iFmt
)
12270 : Inst_VOPC(iFmt
, "v_cmpx_u_f64")
12274 } // Inst_VOPC__V_CMPX_U_F64
12276 Inst_VOPC__V_CMPX_U_F64::~Inst_VOPC__V_CMPX_U_F64()
12278 } // ~Inst_VOPC__V_CMPX_U_F64
12280 // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
12283 Inst_VOPC__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst
)
12285 Wavefront
*wf
= gpuDynInst
->wavefront();
12286 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12287 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12288 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12293 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12294 if (wf
->execMask(lane
)) {
12295 vcc
.setBit(lane
, (std::isnan(src0
[lane
])
12296 || std::isnan(src1
[lane
])) ? 1 : 0);
12300 wf
->execMask() = vcc
.rawData();
12304 Inst_VOPC__V_CMPX_NGE_F64::Inst_VOPC__V_CMPX_NGE_F64(InFmt_VOPC
*iFmt
)
12305 : Inst_VOPC(iFmt
, "v_cmpx_nge_f64")
12309 } // Inst_VOPC__V_CMPX_NGE_F64
12311 Inst_VOPC__V_CMPX_NGE_F64::~Inst_VOPC__V_CMPX_NGE_F64()
12313 } // ~Inst_VOPC__V_CMPX_NGE_F64
12315 // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
12317 Inst_VOPC__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst
)
12319 Wavefront
*wf
= gpuDynInst
->wavefront();
12320 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12321 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12322 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12327 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12328 if (wf
->execMask(lane
)) {
12329 vcc
.setBit(lane
, !(src0
[lane
] >= src1
[lane
]) ? 1 : 0);
12333 wf
->execMask() = vcc
.rawData();
12337 Inst_VOPC__V_CMPX_NLG_F64::Inst_VOPC__V_CMPX_NLG_F64(InFmt_VOPC
*iFmt
)
12338 : Inst_VOPC(iFmt
, "v_cmpx_nlg_f64")
12342 } // Inst_VOPC__V_CMPX_NLG_F64
12344 Inst_VOPC__V_CMPX_NLG_F64::~Inst_VOPC__V_CMPX_NLG_F64()
12346 } // ~Inst_VOPC__V_CMPX_NLG_F64
12348 // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
12350 Inst_VOPC__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst
)
12352 Wavefront
*wf
= gpuDynInst
->wavefront();
12353 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12354 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12355 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12360 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12361 if (wf
->execMask(lane
)) {
12362 vcc
.setBit(lane
, !(src0
[lane
] < src1
[lane
]
12363 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
12367 wf
->execMask() = vcc
.rawData();
12371 Inst_VOPC__V_CMPX_NGT_F64::Inst_VOPC__V_CMPX_NGT_F64(InFmt_VOPC
*iFmt
)
12372 : Inst_VOPC(iFmt
, "v_cmpx_ngt_f64")
12376 } // Inst_VOPC__V_CMPX_NGT_F64
12378 Inst_VOPC__V_CMPX_NGT_F64::~Inst_VOPC__V_CMPX_NGT_F64()
12380 } // ~Inst_VOPC__V_CMPX_NGT_F64
12382 // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
12384 Inst_VOPC__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst
)
12386 Wavefront
*wf
= gpuDynInst
->wavefront();
12387 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12388 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12389 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12394 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12395 if (wf
->execMask(lane
)) {
12396 vcc
.setBit(lane
, !(src0
[lane
] > src1
[lane
]) ? 1 : 0);
12400 wf
->execMask() = vcc
.rawData();
12404 Inst_VOPC__V_CMPX_NLE_F64::Inst_VOPC__V_CMPX_NLE_F64(InFmt_VOPC
*iFmt
)
12405 : Inst_VOPC(iFmt
, "v_cmpx_nle_f64")
12409 } // Inst_VOPC__V_CMPX_NLE_F64
12411 Inst_VOPC__V_CMPX_NLE_F64::~Inst_VOPC__V_CMPX_NLE_F64()
12413 } // ~Inst_VOPC__V_CMPX_NLE_F64
12415 // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
12417 Inst_VOPC__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst
)
12419 Wavefront
*wf
= gpuDynInst
->wavefront();
12420 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12421 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12422 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12427 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12428 if (wf
->execMask(lane
)) {
12429 vcc
.setBit(lane
, !(src0
[lane
] <= src1
[lane
]) ? 1 : 0);
12433 wf
->execMask() = vcc
.rawData();
12437 Inst_VOPC__V_CMPX_NEQ_F64::Inst_VOPC__V_CMPX_NEQ_F64(InFmt_VOPC
*iFmt
)
12438 : Inst_VOPC(iFmt
, "v_cmpx_neq_f64")
12442 } // Inst_VOPC__V_CMPX_NEQ_F64
12444 Inst_VOPC__V_CMPX_NEQ_F64::~Inst_VOPC__V_CMPX_NEQ_F64()
12446 } // ~Inst_VOPC__V_CMPX_NEQ_F64
12448 // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
12450 Inst_VOPC__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst
)
12452 Wavefront
*wf
= gpuDynInst
->wavefront();
12453 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12454 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12455 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12460 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12461 if (wf
->execMask(lane
)) {
12462 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
12466 wf
->execMask() = vcc
.rawData();
12470 Inst_VOPC__V_CMPX_NLT_F64::Inst_VOPC__V_CMPX_NLT_F64(InFmt_VOPC
*iFmt
)
12471 : Inst_VOPC(iFmt
, "v_cmpx_nlt_f64")
12475 } // Inst_VOPC__V_CMPX_NLT_F64
12477 Inst_VOPC__V_CMPX_NLT_F64::~Inst_VOPC__V_CMPX_NLT_F64()
12479 } // ~Inst_VOPC__V_CMPX_NLT_F64
12481 // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
12483 Inst_VOPC__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst
)
12485 Wavefront
*wf
= gpuDynInst
->wavefront();
12486 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12487 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12488 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12493 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12494 if (wf
->execMask(lane
)) {
12495 vcc
.setBit(lane
, !(src0
[lane
] < src1
[lane
]) ? 1 : 0);
12499 wf
->execMask() = vcc
.rawData();
12503 Inst_VOPC__V_CMPX_TRU_F64::Inst_VOPC__V_CMPX_TRU_F64(InFmt_VOPC
*iFmt
)
12504 : Inst_VOPC(iFmt
, "v_cmpx_tru_f64")
12508 } // Inst_VOPC__V_CMPX_TRU_F64
12510 Inst_VOPC__V_CMPX_TRU_F64::~Inst_VOPC__V_CMPX_TRU_F64()
12512 } // ~Inst_VOPC__V_CMPX_TRU_F64
12514 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
12516 Inst_VOPC__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst
)
12518 Wavefront
*wf
= gpuDynInst
->wavefront();
12519 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12521 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12522 if (wf
->execMask(lane
)) {
12523 vcc
.setBit(lane
, 1);
12527 wf
->execMask() = vcc
.rawData();
12531 Inst_VOPC__V_CMP_F_I16::Inst_VOPC__V_CMP_F_I16(InFmt_VOPC
*iFmt
)
12532 : Inst_VOPC(iFmt
, "v_cmp_f_i16")
12535 } // Inst_VOPC__V_CMP_F_I16
12537 Inst_VOPC__V_CMP_F_I16::~Inst_VOPC__V_CMP_F_I16()
12539 } // ~Inst_VOPC__V_CMP_F_I16
12541 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
12543 Inst_VOPC__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst
)
12545 Wavefront
*wf
= gpuDynInst
->wavefront();
12546 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12548 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12549 if (wf
->execMask(lane
)) {
12550 vcc
.setBit(lane
, 0);
12557 Inst_VOPC__V_CMP_LT_I16::Inst_VOPC__V_CMP_LT_I16(InFmt_VOPC
*iFmt
)
12558 : Inst_VOPC(iFmt
, "v_cmp_lt_i16")
12561 } // Inst_VOPC__V_CMP_LT_I16
12563 Inst_VOPC__V_CMP_LT_I16::~Inst_VOPC__V_CMP_LT_I16()
12565 } // ~Inst_VOPC__V_CMP_LT_I16
12567 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
12569 Inst_VOPC__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst
)
12571 Wavefront
*wf
= gpuDynInst
->wavefront();
12572 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
12573 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
12574 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12579 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12580 if (wf
->execMask(lane
)) {
12581 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
12588 Inst_VOPC__V_CMP_EQ_I16::Inst_VOPC__V_CMP_EQ_I16(InFmt_VOPC
*iFmt
)
12589 : Inst_VOPC(iFmt
, "v_cmp_eq_i16")
12592 } // Inst_VOPC__V_CMP_EQ_I16
12594 Inst_VOPC__V_CMP_EQ_I16::~Inst_VOPC__V_CMP_EQ_I16()
12596 } // ~Inst_VOPC__V_CMP_EQ_I16
12598 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
12600 Inst_VOPC__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst
)
12602 Wavefront
*wf
= gpuDynInst
->wavefront();
12603 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
12604 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
12605 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12610 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12611 if (wf
->execMask(lane
)) {
12612 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
12619 Inst_VOPC__V_CMP_LE_I16::Inst_VOPC__V_CMP_LE_I16(InFmt_VOPC
*iFmt
)
12620 : Inst_VOPC(iFmt
, "v_cmp_le_i16")
12623 } // Inst_VOPC__V_CMP_LE_I16
12625 Inst_VOPC__V_CMP_LE_I16::~Inst_VOPC__V_CMP_LE_I16()
12627 } // ~Inst_VOPC__V_CMP_LE_I16
12629 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
12631 Inst_VOPC__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst
)
12633 Wavefront
*wf
= gpuDynInst
->wavefront();
12634 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
12635 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
12636 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12641 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12642 if (wf
->execMask(lane
)) {
12643 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
12650 Inst_VOPC__V_CMP_GT_I16::Inst_VOPC__V_CMP_GT_I16(InFmt_VOPC
*iFmt
)
12651 : Inst_VOPC(iFmt
, "v_cmp_gt_i16")
12654 } // Inst_VOPC__V_CMP_GT_I16
12656 Inst_VOPC__V_CMP_GT_I16::~Inst_VOPC__V_CMP_GT_I16()
12658 } // ~Inst_VOPC__V_CMP_GT_I16
12660 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
12662 Inst_VOPC__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst
)
12664 Wavefront
*wf
= gpuDynInst
->wavefront();
12665 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
12666 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
12667 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12672 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12673 if (wf
->execMask(lane
)) {
12674 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
12681 Inst_VOPC__V_CMP_NE_I16::Inst_VOPC__V_CMP_NE_I16(InFmt_VOPC
*iFmt
)
12682 : Inst_VOPC(iFmt
, "v_cmp_ne_i16")
12685 } // Inst_VOPC__V_CMP_NE_I16
12687 Inst_VOPC__V_CMP_NE_I16::~Inst_VOPC__V_CMP_NE_I16()
12689 } // ~Inst_VOPC__V_CMP_NE_I16
12691 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
12693 Inst_VOPC__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst
)
12695 Wavefront
*wf
= gpuDynInst
->wavefront();
12696 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
12697 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
12698 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12703 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12704 if (wf
->execMask(lane
)) {
12705 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
12712 Inst_VOPC__V_CMP_GE_I16::Inst_VOPC__V_CMP_GE_I16(InFmt_VOPC
*iFmt
)
12713 : Inst_VOPC(iFmt
, "v_cmp_ge_i16")
12716 } // Inst_VOPC__V_CMP_GE_I16
12718 Inst_VOPC__V_CMP_GE_I16::~Inst_VOPC__V_CMP_GE_I16()
12720 } // ~Inst_VOPC__V_CMP_GE_I16
12722 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
12724 Inst_VOPC__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst
)
12726 Wavefront
*wf
= gpuDynInst
->wavefront();
12727 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
12728 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
12729 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12734 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12735 if (wf
->execMask(lane
)) {
12736 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
12743 Inst_VOPC__V_CMP_T_I16::Inst_VOPC__V_CMP_T_I16(InFmt_VOPC
*iFmt
)
12744 : Inst_VOPC(iFmt
, "v_cmp_t_i16")
12747 } // Inst_VOPC__V_CMP_T_I16
12749 Inst_VOPC__V_CMP_T_I16::~Inst_VOPC__V_CMP_T_I16()
12751 } // ~Inst_VOPC__V_CMP_T_I16
12753 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
12755 Inst_VOPC__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst
)
12757 Wavefront
*wf
= gpuDynInst
->wavefront();
12758 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12760 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12761 if (wf
->execMask(lane
)) {
12762 vcc
.setBit(lane
, 1);
12769 Inst_VOPC__V_CMP_F_U16::Inst_VOPC__V_CMP_F_U16(InFmt_VOPC
*iFmt
)
12770 : Inst_VOPC(iFmt
, "v_cmp_f_u16")
12773 } // Inst_VOPC__V_CMP_F_U16
12775 Inst_VOPC__V_CMP_F_U16::~Inst_VOPC__V_CMP_F_U16()
12777 } // ~Inst_VOPC__V_CMP_F_U16
12779 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
12781 Inst_VOPC__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst
)
12783 Wavefront
*wf
= gpuDynInst
->wavefront();
12784 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12786 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12787 if (wf
->execMask(lane
)) {
12788 vcc
.setBit(lane
, 0);
12795 Inst_VOPC__V_CMP_LT_U16::Inst_VOPC__V_CMP_LT_U16(InFmt_VOPC
*iFmt
)
12796 : Inst_VOPC(iFmt
, "v_cmp_lt_u16")
12799 } // Inst_VOPC__V_CMP_LT_U16
12801 Inst_VOPC__V_CMP_LT_U16::~Inst_VOPC__V_CMP_LT_U16()
12803 } // ~Inst_VOPC__V_CMP_LT_U16
12805 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
12807 Inst_VOPC__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst
)
12809 Wavefront
*wf
= gpuDynInst
->wavefront();
12810 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
12811 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
12812 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12817 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12818 if (wf
->execMask(lane
)) {
12819 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
12826 Inst_VOPC__V_CMP_EQ_U16::Inst_VOPC__V_CMP_EQ_U16(InFmt_VOPC
*iFmt
)
12827 : Inst_VOPC(iFmt
, "v_cmp_eq_u16")
12830 } // Inst_VOPC__V_CMP_EQ_U16
12832 Inst_VOPC__V_CMP_EQ_U16::~Inst_VOPC__V_CMP_EQ_U16()
12834 } // ~Inst_VOPC__V_CMP_EQ_U16
12836 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
12838 Inst_VOPC__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst
)
12840 Wavefront
*wf
= gpuDynInst
->wavefront();
12841 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
12842 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
12843 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12848 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12849 if (wf
->execMask(lane
)) {
12850 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
12857 Inst_VOPC__V_CMP_LE_U16::Inst_VOPC__V_CMP_LE_U16(InFmt_VOPC
*iFmt
)
12858 : Inst_VOPC(iFmt
, "v_cmp_le_u16")
12861 } // Inst_VOPC__V_CMP_LE_U16
12863 Inst_VOPC__V_CMP_LE_U16::~Inst_VOPC__V_CMP_LE_U16()
12865 } // ~Inst_VOPC__V_CMP_LE_U16
12867 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
12869 Inst_VOPC__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst
)
12871 Wavefront
*wf
= gpuDynInst
->wavefront();
12872 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
12873 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
12874 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12879 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12880 if (wf
->execMask(lane
)) {
12881 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
12888 Inst_VOPC__V_CMP_GT_U16::Inst_VOPC__V_CMP_GT_U16(InFmt_VOPC
*iFmt
)
12889 : Inst_VOPC(iFmt
, "v_cmp_gt_u16")
12892 } // Inst_VOPC__V_CMP_GT_U16
12894 Inst_VOPC__V_CMP_GT_U16::~Inst_VOPC__V_CMP_GT_U16()
12896 } // ~Inst_VOPC__V_CMP_GT_U16
12898 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
12900 Inst_VOPC__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst
)
12902 Wavefront
*wf
= gpuDynInst
->wavefront();
12903 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
12904 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
12905 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12910 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12911 if (wf
->execMask(lane
)) {
12912 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
12919 Inst_VOPC__V_CMP_NE_U16::Inst_VOPC__V_CMP_NE_U16(InFmt_VOPC
*iFmt
)
12920 : Inst_VOPC(iFmt
, "v_cmp_ne_u16")
12923 } // Inst_VOPC__V_CMP_NE_U16
12925 Inst_VOPC__V_CMP_NE_U16::~Inst_VOPC__V_CMP_NE_U16()
12927 } // ~Inst_VOPC__V_CMP_NE_U16
12929 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
12931 Inst_VOPC__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst
)
12933 Wavefront
*wf
= gpuDynInst
->wavefront();
12934 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
12935 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
12936 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12941 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12942 if (wf
->execMask(lane
)) {
12943 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
12950 Inst_VOPC__V_CMP_GE_U16::Inst_VOPC__V_CMP_GE_U16(InFmt_VOPC
*iFmt
)
12951 : Inst_VOPC(iFmt
, "v_cmp_ge_u16")
12954 } // Inst_VOPC__V_CMP_GE_U16
12956 Inst_VOPC__V_CMP_GE_U16::~Inst_VOPC__V_CMP_GE_U16()
12958 } // ~Inst_VOPC__V_CMP_GE_U16
12960 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
12962 Inst_VOPC__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst
)
12964 Wavefront
*wf
= gpuDynInst
->wavefront();
12965 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
12966 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
12967 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12972 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12973 if (wf
->execMask(lane
)) {
12974 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
12981 Inst_VOPC__V_CMP_T_U16::Inst_VOPC__V_CMP_T_U16(InFmt_VOPC
*iFmt
)
12982 : Inst_VOPC(iFmt
, "v_cmp_t_u16")
12985 } // Inst_VOPC__V_CMP_T_U16
12987 Inst_VOPC__V_CMP_T_U16::~Inst_VOPC__V_CMP_T_U16()
12989 } // ~Inst_VOPC__V_CMP_T_U16
12991 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
12993 Inst_VOPC__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst
)
12995 Wavefront
*wf
= gpuDynInst
->wavefront();
12996 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12998 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12999 if (wf
->execMask(lane
)) {
13000 vcc
.setBit(lane
, 1);
13007 Inst_VOPC__V_CMPX_F_I16::Inst_VOPC__V_CMPX_F_I16(InFmt_VOPC
*iFmt
)
13008 : Inst_VOPC(iFmt
, "v_cmpx_f_i16")
13011 } // Inst_VOPC__V_CMPX_F_I16
13013 Inst_VOPC__V_CMPX_F_I16::~Inst_VOPC__V_CMPX_F_I16()
13015 } // ~Inst_VOPC__V_CMPX_F_I16
13017 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
13019 Inst_VOPC__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst
)
13021 Wavefront
*wf
= gpuDynInst
->wavefront();
13022 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13024 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13025 if (wf
->execMask(lane
)) {
13026 vcc
.setBit(lane
, 0);
13030 wf
->execMask() = vcc
.rawData();
13034 Inst_VOPC__V_CMPX_LT_I16::Inst_VOPC__V_CMPX_LT_I16(InFmt_VOPC
*iFmt
)
13035 : Inst_VOPC(iFmt
, "v_cmpx_lt_i16")
13038 } // Inst_VOPC__V_CMPX_LT_I16
13040 Inst_VOPC__V_CMPX_LT_I16::~Inst_VOPC__V_CMPX_LT_I16()
13042 } // ~Inst_VOPC__V_CMPX_LT_I16
13044 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
13046 Inst_VOPC__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst
)
13048 Wavefront
*wf
= gpuDynInst
->wavefront();
13049 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
13050 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
13051 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13056 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13057 if (wf
->execMask(lane
)) {
13058 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
13062 wf
->execMask() = vcc
.rawData();
13066 Inst_VOPC__V_CMPX_EQ_I16::Inst_VOPC__V_CMPX_EQ_I16(InFmt_VOPC
*iFmt
)
13067 : Inst_VOPC(iFmt
, "v_cmpx_eq_i16")
13070 } // Inst_VOPC__V_CMPX_EQ_I16
13072 Inst_VOPC__V_CMPX_EQ_I16::~Inst_VOPC__V_CMPX_EQ_I16()
13074 } // ~Inst_VOPC__V_CMPX_EQ_I16
13076 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
13078 Inst_VOPC__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst
)
13080 Wavefront
*wf
= gpuDynInst
->wavefront();
13081 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
13082 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
13083 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13088 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13089 if (wf
->execMask(lane
)) {
13090 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
13094 wf
->execMask() = vcc
.rawData();
13098 Inst_VOPC__V_CMPX_LE_I16::Inst_VOPC__V_CMPX_LE_I16(InFmt_VOPC
*iFmt
)
13099 : Inst_VOPC(iFmt
, "v_cmpx_le_i16")
13102 } // Inst_VOPC__V_CMPX_LE_I16
13104 Inst_VOPC__V_CMPX_LE_I16::~Inst_VOPC__V_CMPX_LE_I16()
13106 } // ~Inst_VOPC__V_CMPX_LE_I16
13108 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
13110 Inst_VOPC__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst
)
13112 Wavefront
*wf
= gpuDynInst
->wavefront();
13113 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
13114 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
13115 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13120 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13121 if (wf
->execMask(lane
)) {
13122 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
13126 wf
->execMask() = vcc
.rawData();
13130 Inst_VOPC__V_CMPX_GT_I16::Inst_VOPC__V_CMPX_GT_I16(InFmt_VOPC
*iFmt
)
13131 : Inst_VOPC(iFmt
, "v_cmpx_gt_i16")
13134 } // Inst_VOPC__V_CMPX_GT_I16
13136 Inst_VOPC__V_CMPX_GT_I16::~Inst_VOPC__V_CMPX_GT_I16()
13138 } // ~Inst_VOPC__V_CMPX_GT_I16
13140 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
13142 Inst_VOPC__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst
)
13144 Wavefront
*wf
= gpuDynInst
->wavefront();
13145 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
13146 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
13147 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13152 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13153 if (wf
->execMask(lane
)) {
13154 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
13158 wf
->execMask() = vcc
.rawData();
13162 Inst_VOPC__V_CMPX_NE_I16::Inst_VOPC__V_CMPX_NE_I16(InFmt_VOPC
*iFmt
)
13163 : Inst_VOPC(iFmt
, "v_cmpx_ne_i16")
13166 } // Inst_VOPC__V_CMPX_NE_I16
13168 Inst_VOPC__V_CMPX_NE_I16::~Inst_VOPC__V_CMPX_NE_I16()
13170 } // ~Inst_VOPC__V_CMPX_NE_I16
13172 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
13174 Inst_VOPC__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst
)
13176 Wavefront
*wf
= gpuDynInst
->wavefront();
13177 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
13178 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
13179 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13184 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13185 if (wf
->execMask(lane
)) {
13186 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
13190 wf
->execMask() = vcc
.rawData();
13194 Inst_VOPC__V_CMPX_GE_I16::Inst_VOPC__V_CMPX_GE_I16(InFmt_VOPC
*iFmt
)
13195 : Inst_VOPC(iFmt
, "v_cmpx_ge_i16")
13198 } // Inst_VOPC__V_CMPX_GE_I16
13200 Inst_VOPC__V_CMPX_GE_I16::~Inst_VOPC__V_CMPX_GE_I16()
13202 } // ~Inst_VOPC__V_CMPX_GE_I16
13204 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
13206 Inst_VOPC__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst
)
13208 Wavefront
*wf
= gpuDynInst
->wavefront();
13209 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
13210 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
13211 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13216 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13217 if (wf
->execMask(lane
)) {
13218 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
13222 wf
->execMask() = vcc
.rawData();
13226 Inst_VOPC__V_CMPX_T_I16::Inst_VOPC__V_CMPX_T_I16(InFmt_VOPC
*iFmt
)
13227 : Inst_VOPC(iFmt
, "v_cmpx_t_i16")
13230 } // Inst_VOPC__V_CMPX_T_I16
13232 Inst_VOPC__V_CMPX_T_I16::~Inst_VOPC__V_CMPX_T_I16()
13234 } // ~Inst_VOPC__V_CMPX_T_I16
13236 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
13238 Inst_VOPC__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst
)
13240 Wavefront
*wf
= gpuDynInst
->wavefront();
13241 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13243 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13244 if (wf
->execMask(lane
)) {
13245 vcc
.setBit(lane
, 1);
13249 wf
->execMask() = vcc
.rawData();
13253 Inst_VOPC__V_CMPX_F_U16::Inst_VOPC__V_CMPX_F_U16(InFmt_VOPC
*iFmt
)
13254 : Inst_VOPC(iFmt
, "v_cmpx_f_u16")
13257 } // Inst_VOPC__V_CMPX_F_U16
13259 Inst_VOPC__V_CMPX_F_U16::~Inst_VOPC__V_CMPX_F_U16()
13261 } // ~Inst_VOPC__V_CMPX_F_U16
13263 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
13265 Inst_VOPC__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst
)
13267 Wavefront
*wf
= gpuDynInst
->wavefront();
13268 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13270 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13271 if (wf
->execMask(lane
)) {
13272 vcc
.setBit(lane
, 0);
13276 wf
->execMask() = vcc
.rawData();
13280 Inst_VOPC__V_CMPX_LT_U16::Inst_VOPC__V_CMPX_LT_U16(InFmt_VOPC
*iFmt
)
13281 : Inst_VOPC(iFmt
, "v_cmpx_lt_u16")
13284 } // Inst_VOPC__V_CMPX_LT_U16
13286 Inst_VOPC__V_CMPX_LT_U16::~Inst_VOPC__V_CMPX_LT_U16()
13288 } // ~Inst_VOPC__V_CMPX_LT_U16
13290 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
13292 Inst_VOPC__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst
)
13294 Wavefront
*wf
= gpuDynInst
->wavefront();
13295 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
13296 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
13297 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13302 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13303 if (wf
->execMask(lane
)) {
13304 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
13308 wf
->execMask() = vcc
.rawData();
13312 Inst_VOPC__V_CMPX_EQ_U16::Inst_VOPC__V_CMPX_EQ_U16(InFmt_VOPC
*iFmt
)
13313 : Inst_VOPC(iFmt
, "v_cmpx_eq_u16")
13316 } // Inst_VOPC__V_CMPX_EQ_U16
13318 Inst_VOPC__V_CMPX_EQ_U16::~Inst_VOPC__V_CMPX_EQ_U16()
13320 } // ~Inst_VOPC__V_CMPX_EQ_U16
13322 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
13324 Inst_VOPC__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst
)
13326 Wavefront
*wf
= gpuDynInst
->wavefront();
13327 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
13328 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
13329 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13334 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13335 if (wf
->execMask(lane
)) {
13336 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
13340 wf
->execMask() = vcc
.rawData();
13344 Inst_VOPC__V_CMPX_LE_U16::Inst_VOPC__V_CMPX_LE_U16(InFmt_VOPC
*iFmt
)
13345 : Inst_VOPC(iFmt
, "v_cmpx_le_u16")
13348 } // Inst_VOPC__V_CMPX_LE_U16
13350 Inst_VOPC__V_CMPX_LE_U16::~Inst_VOPC__V_CMPX_LE_U16()
13352 } // ~Inst_VOPC__V_CMPX_LE_U16
13354 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
13356 Inst_VOPC__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst
)
13358 Wavefront
*wf
= gpuDynInst
->wavefront();
13359 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
13360 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
13361 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13366 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13367 if (wf
->execMask(lane
)) {
13368 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
13372 wf
->execMask() = vcc
.rawData();
13376 Inst_VOPC__V_CMPX_GT_U16::Inst_VOPC__V_CMPX_GT_U16(InFmt_VOPC
*iFmt
)
13377 : Inst_VOPC(iFmt
, "v_cmpx_gt_u16")
13380 } // Inst_VOPC__V_CMPX_GT_U16
13382 Inst_VOPC__V_CMPX_GT_U16::~Inst_VOPC__V_CMPX_GT_U16()
13384 } // ~Inst_VOPC__V_CMPX_GT_U16
13386 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
13388 Inst_VOPC__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst
)
13390 Wavefront
*wf
= gpuDynInst
->wavefront();
13391 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
13392 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
13393 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13398 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13399 if (wf
->execMask(lane
)) {
13400 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
13404 wf
->execMask() = vcc
.rawData();
13408 Inst_VOPC__V_CMPX_NE_U16::Inst_VOPC__V_CMPX_NE_U16(InFmt_VOPC
*iFmt
)
13409 : Inst_VOPC(iFmt
, "v_cmpx_ne_u16")
13412 } // Inst_VOPC__V_CMPX_NE_U16
13414 Inst_VOPC__V_CMPX_NE_U16::~Inst_VOPC__V_CMPX_NE_U16()
13416 } // ~Inst_VOPC__V_CMPX_NE_U16
13418 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
13420 Inst_VOPC__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst
)
13422 Wavefront
*wf
= gpuDynInst
->wavefront();
13423 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
13424 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
13425 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13430 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13431 if (wf
->execMask(lane
)) {
13432 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
13436 wf
->execMask() = vcc
.rawData();
13440 Inst_VOPC__V_CMPX_GE_U16::Inst_VOPC__V_CMPX_GE_U16(InFmt_VOPC
*iFmt
)
13441 : Inst_VOPC(iFmt
, "v_cmpx_ge_u16")
13444 } // Inst_VOPC__V_CMPX_GE_U16
13446 Inst_VOPC__V_CMPX_GE_U16::~Inst_VOPC__V_CMPX_GE_U16()
13448 } // ~Inst_VOPC__V_CMPX_GE_U16
13450 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
13452 Inst_VOPC__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst
)
13454 Wavefront
*wf
= gpuDynInst
->wavefront();
13455 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
13456 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
13457 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13462 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13463 if (wf
->execMask(lane
)) {
13464 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
13468 wf
->execMask() = vcc
.rawData();
13472 Inst_VOPC__V_CMPX_T_U16::Inst_VOPC__V_CMPX_T_U16(InFmt_VOPC
*iFmt
)
13473 : Inst_VOPC(iFmt
, "v_cmpx_t_u16")
13476 } // Inst_VOPC__V_CMPX_T_U16
13478 Inst_VOPC__V_CMPX_T_U16::~Inst_VOPC__V_CMPX_T_U16()
13480 } // ~Inst_VOPC__V_CMPX_T_U16
13482 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
13484 Inst_VOPC__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst
)
13486 Wavefront
*wf
= gpuDynInst
->wavefront();
13487 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13489 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13490 if (wf
->execMask(lane
)) {
13491 vcc
.setBit(lane
, 1);
13495 wf
->execMask() = vcc
.rawData();
13499 Inst_VOPC__V_CMP_F_I32::Inst_VOPC__V_CMP_F_I32(InFmt_VOPC
*iFmt
)
13500 : Inst_VOPC(iFmt
, "v_cmp_f_i32")
13503 } // Inst_VOPC__V_CMP_F_I32
13505 Inst_VOPC__V_CMP_F_I32::~Inst_VOPC__V_CMP_F_I32()
13507 } // ~Inst_VOPC__V_CMP_F_I32
13509 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
13511 Inst_VOPC__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst
)
13513 Wavefront
*wf
= gpuDynInst
->wavefront();
13514 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13516 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13517 if (wf
->execMask(lane
)) {
13518 vcc
.setBit(lane
, 0);
13525 Inst_VOPC__V_CMP_LT_I32::Inst_VOPC__V_CMP_LT_I32(InFmt_VOPC
*iFmt
)
13526 : Inst_VOPC(iFmt
, "v_cmp_lt_i32")
13529 } // Inst_VOPC__V_CMP_LT_I32
13531 Inst_VOPC__V_CMP_LT_I32::~Inst_VOPC__V_CMP_LT_I32()
13533 } // ~Inst_VOPC__V_CMP_LT_I32
13535 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
13537 Inst_VOPC__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst
)
13539 Wavefront
*wf
= gpuDynInst
->wavefront();
13540 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
13541 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
13542 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13547 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13548 if (wf
->execMask(lane
)) {
13549 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
13556 Inst_VOPC__V_CMP_EQ_I32::Inst_VOPC__V_CMP_EQ_I32(InFmt_VOPC
*iFmt
)
13557 : Inst_VOPC(iFmt
, "v_cmp_eq_i32")
13560 } // Inst_VOPC__V_CMP_EQ_I32
13562 Inst_VOPC__V_CMP_EQ_I32::~Inst_VOPC__V_CMP_EQ_I32()
13564 } // ~Inst_VOPC__V_CMP_EQ_I32
13566 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
13568 Inst_VOPC__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst
)
13570 Wavefront
*wf
= gpuDynInst
->wavefront();
13571 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
13572 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
13573 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13578 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13579 if (wf
->execMask(lane
)) {
13580 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
13587 Inst_VOPC__V_CMP_LE_I32::Inst_VOPC__V_CMP_LE_I32(InFmt_VOPC
*iFmt
)
13588 : Inst_VOPC(iFmt
, "v_cmp_le_i32")
13591 } // Inst_VOPC__V_CMP_LE_I32
13593 Inst_VOPC__V_CMP_LE_I32::~Inst_VOPC__V_CMP_LE_I32()
13595 } // ~Inst_VOPC__V_CMP_LE_I32
13597 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
13599 Inst_VOPC__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst
)
13601 Wavefront
*wf
= gpuDynInst
->wavefront();
13602 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
13603 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
13604 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13609 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13610 if (wf
->execMask(lane
)) {
13611 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
13618 Inst_VOPC__V_CMP_GT_I32::Inst_VOPC__V_CMP_GT_I32(InFmt_VOPC
*iFmt
)
13619 : Inst_VOPC(iFmt
, "v_cmp_gt_i32")
13622 } // Inst_VOPC__V_CMP_GT_I32
13624 Inst_VOPC__V_CMP_GT_I32::~Inst_VOPC__V_CMP_GT_I32()
13626 } // ~Inst_VOPC__V_CMP_GT_I32
13628 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
13630 Inst_VOPC__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst
)
13632 Wavefront
*wf
= gpuDynInst
->wavefront();
13633 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
13634 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
13635 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13640 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13641 if (wf
->execMask(lane
)) {
13642 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
13649 Inst_VOPC__V_CMP_NE_I32::Inst_VOPC__V_CMP_NE_I32(InFmt_VOPC
*iFmt
)
13650 : Inst_VOPC(iFmt
, "v_cmp_ne_i32")
13653 } // Inst_VOPC__V_CMP_NE_I32
13655 Inst_VOPC__V_CMP_NE_I32::~Inst_VOPC__V_CMP_NE_I32()
13657 } // ~Inst_VOPC__V_CMP_NE_I32
13659 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
13661 Inst_VOPC__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst
)
13663 Wavefront
*wf
= gpuDynInst
->wavefront();
13664 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
13665 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
13666 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13671 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13672 if (wf
->execMask(lane
)) {
13673 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
13680 Inst_VOPC__V_CMP_GE_I32::Inst_VOPC__V_CMP_GE_I32(InFmt_VOPC
*iFmt
)
13681 : Inst_VOPC(iFmt
, "v_cmp_ge_i32")
13684 } // Inst_VOPC__V_CMP_GE_I32
13686 Inst_VOPC__V_CMP_GE_I32::~Inst_VOPC__V_CMP_GE_I32()
13688 } // ~Inst_VOPC__V_CMP_GE_I32
13690 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
13692 Inst_VOPC__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst
)
13694 Wavefront
*wf
= gpuDynInst
->wavefront();
13695 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
13696 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
13697 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13702 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13703 if (wf
->execMask(lane
)) {
13704 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
13711 Inst_VOPC__V_CMP_T_I32::Inst_VOPC__V_CMP_T_I32(InFmt_VOPC
*iFmt
)
13712 : Inst_VOPC(iFmt
, "v_cmp_t_i32")
13715 } // Inst_VOPC__V_CMP_T_I32
13717 Inst_VOPC__V_CMP_T_I32::~Inst_VOPC__V_CMP_T_I32()
13719 } // ~Inst_VOPC__V_CMP_T_I32
13721 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
13723 Inst_VOPC__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst
)
13725 Wavefront
*wf
= gpuDynInst
->wavefront();
13726 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13728 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13729 if (wf
->execMask(lane
)) {
13730 vcc
.setBit(lane
, 1);
13737 Inst_VOPC__V_CMP_F_U32::Inst_VOPC__V_CMP_F_U32(InFmt_VOPC
*iFmt
)
13738 : Inst_VOPC(iFmt
, "v_cmp_f_u32")
13741 } // Inst_VOPC__V_CMP_F_U32
13743 Inst_VOPC__V_CMP_F_U32::~Inst_VOPC__V_CMP_F_U32()
13745 } // ~Inst_VOPC__V_CMP_F_U32
13747 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
13749 Inst_VOPC__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst
)
13751 Wavefront
*wf
= gpuDynInst
->wavefront();
13752 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13754 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13755 if (wf
->execMask(lane
)) {
13756 vcc
.setBit(lane
, 0);
13763 Inst_VOPC__V_CMP_LT_U32::Inst_VOPC__V_CMP_LT_U32(InFmt_VOPC
*iFmt
)
13764 : Inst_VOPC(iFmt
, "v_cmp_lt_u32")
13767 } // Inst_VOPC__V_CMP_LT_U32
13769 Inst_VOPC__V_CMP_LT_U32::~Inst_VOPC__V_CMP_LT_U32()
13771 } // ~Inst_VOPC__V_CMP_LT_U32
13773 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
13775 Inst_VOPC__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst
)
13777 Wavefront
*wf
= gpuDynInst
->wavefront();
13778 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
13779 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
13780 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13785 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13786 if (wf
->execMask(lane
)) {
13787 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
13794 Inst_VOPC__V_CMP_EQ_U32::Inst_VOPC__V_CMP_EQ_U32(InFmt_VOPC
*iFmt
)
13795 : Inst_VOPC(iFmt
, "v_cmp_eq_u32")
13798 } // Inst_VOPC__V_CMP_EQ_U32
13800 Inst_VOPC__V_CMP_EQ_U32::~Inst_VOPC__V_CMP_EQ_U32()
13802 } // ~Inst_VOPC__V_CMP_EQ_U32
13804 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
13806 Inst_VOPC__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst
)
13808 Wavefront
*wf
= gpuDynInst
->wavefront();
13809 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
13810 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
13811 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13816 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13817 if (wf
->execMask(lane
)) {
13818 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
13825 Inst_VOPC__V_CMP_LE_U32::Inst_VOPC__V_CMP_LE_U32(InFmt_VOPC
*iFmt
)
13826 : Inst_VOPC(iFmt
, "v_cmp_le_u32")
13829 } // Inst_VOPC__V_CMP_LE_U32
13831 Inst_VOPC__V_CMP_LE_U32::~Inst_VOPC__V_CMP_LE_U32()
13833 } // ~Inst_VOPC__V_CMP_LE_U32
13835 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
13837 Inst_VOPC__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst
)
13839 Wavefront
*wf
= gpuDynInst
->wavefront();
13840 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
13841 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
13842 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13847 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13848 if (wf
->execMask(lane
)) {
13849 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
13856 Inst_VOPC__V_CMP_GT_U32::Inst_VOPC__V_CMP_GT_U32(InFmt_VOPC
*iFmt
)
13857 : Inst_VOPC(iFmt
, "v_cmp_gt_u32")
13860 } // Inst_VOPC__V_CMP_GT_U32
13862 Inst_VOPC__V_CMP_GT_U32::~Inst_VOPC__V_CMP_GT_U32()
13864 } // ~Inst_VOPC__V_CMP_GT_U32
13866 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
13868 Inst_VOPC__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst
)
13870 Wavefront
*wf
= gpuDynInst
->wavefront();
13871 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
13872 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
13873 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13878 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13879 if (wf
->execMask(lane
)) {
13880 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
13887 Inst_VOPC__V_CMP_NE_U32::Inst_VOPC__V_CMP_NE_U32(InFmt_VOPC
*iFmt
)
13888 : Inst_VOPC(iFmt
, "v_cmp_ne_u32")
13891 } // Inst_VOPC__V_CMP_NE_U32
13893 Inst_VOPC__V_CMP_NE_U32::~Inst_VOPC__V_CMP_NE_U32()
13895 } // ~Inst_VOPC__V_CMP_NE_U32
13897 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
13899 Inst_VOPC__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst
)
13901 Wavefront
*wf
= gpuDynInst
->wavefront();
13902 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
13903 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
13904 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13909 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13910 if (wf
->execMask(lane
)) {
13911 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
13918 Inst_VOPC__V_CMP_GE_U32::Inst_VOPC__V_CMP_GE_U32(InFmt_VOPC
*iFmt
)
13919 : Inst_VOPC(iFmt
, "v_cmp_ge_u32")
13922 } // Inst_VOPC__V_CMP_GE_U32
13924 Inst_VOPC__V_CMP_GE_U32::~Inst_VOPC__V_CMP_GE_U32()
13926 } // ~Inst_VOPC__V_CMP_GE_U32
13928 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
13930 Inst_VOPC__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst
)
13932 Wavefront
*wf
= gpuDynInst
->wavefront();
13933 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
13934 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
13935 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13940 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13941 if (wf
->execMask(lane
)) {
13942 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
13949 Inst_VOPC__V_CMP_T_U32::Inst_VOPC__V_CMP_T_U32(InFmt_VOPC
*iFmt
)
13950 : Inst_VOPC(iFmt
, "v_cmp_t_u32")
13953 } // Inst_VOPC__V_CMP_T_U32
13955 Inst_VOPC__V_CMP_T_U32::~Inst_VOPC__V_CMP_T_U32()
13957 } // ~Inst_VOPC__V_CMP_T_U32
13959 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
13961 Inst_VOPC__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst
)
13963 Wavefront
*wf
= gpuDynInst
->wavefront();
13964 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13966 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13967 if (wf
->execMask(lane
)) {
13968 vcc
.setBit(lane
, 1);
13975 Inst_VOPC__V_CMPX_F_I32::Inst_VOPC__V_CMPX_F_I32(InFmt_VOPC
*iFmt
)
13976 : Inst_VOPC(iFmt
, "v_cmpx_f_i32")
13979 } // Inst_VOPC__V_CMPX_F_I32
13981 Inst_VOPC__V_CMPX_F_I32::~Inst_VOPC__V_CMPX_F_I32()
13983 } // ~Inst_VOPC__V_CMPX_F_I32
13985 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
13987 Inst_VOPC__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst
)
13989 Wavefront
*wf
= gpuDynInst
->wavefront();
13990 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13992 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13993 if (wf
->execMask(lane
)) {
13994 vcc
.setBit(lane
, 0);
13998 wf
->execMask() = vcc
.rawData();
14002 Inst_VOPC__V_CMPX_LT_I32::Inst_VOPC__V_CMPX_LT_I32(InFmt_VOPC
*iFmt
)
14003 : Inst_VOPC(iFmt
, "v_cmpx_lt_i32")
14006 } // Inst_VOPC__V_CMPX_LT_I32
14008 Inst_VOPC__V_CMPX_LT_I32::~Inst_VOPC__V_CMPX_LT_I32()
14010 } // ~Inst_VOPC__V_CMPX_LT_I32
14012 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14014 Inst_VOPC__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst
)
14016 Wavefront
*wf
= gpuDynInst
->wavefront();
14017 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
14018 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
14019 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14024 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14025 if (wf
->execMask(lane
)) {
14026 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
14030 wf
->execMask() = vcc
.rawData();
14034 Inst_VOPC__V_CMPX_EQ_I32::Inst_VOPC__V_CMPX_EQ_I32(InFmt_VOPC
*iFmt
)
14035 : Inst_VOPC(iFmt
, "v_cmpx_eq_i32")
14038 } // Inst_VOPC__V_CMPX_EQ_I32
14040 Inst_VOPC__V_CMPX_EQ_I32::~Inst_VOPC__V_CMPX_EQ_I32()
14042 } // ~Inst_VOPC__V_CMPX_EQ_I32
14044 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
14046 Inst_VOPC__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst
)
14048 Wavefront
*wf
= gpuDynInst
->wavefront();
14049 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
14050 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
14051 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14056 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14057 if (wf
->execMask(lane
)) {
14058 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
14062 wf
->execMask() = vcc
.rawData();
14066 Inst_VOPC__V_CMPX_LE_I32::Inst_VOPC__V_CMPX_LE_I32(InFmt_VOPC
*iFmt
)
14067 : Inst_VOPC(iFmt
, "v_cmpx_le_i32")
14070 } // Inst_VOPC__V_CMPX_LE_I32
14072 Inst_VOPC__V_CMPX_LE_I32::~Inst_VOPC__V_CMPX_LE_I32()
14074 } // ~Inst_VOPC__V_CMPX_LE_I32
14076 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
14078 Inst_VOPC__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst
)
14080 Wavefront
*wf
= gpuDynInst
->wavefront();
14081 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
14082 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
14083 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14088 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14089 if (wf
->execMask(lane
)) {
14090 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
14094 wf
->execMask() = vcc
.rawData();
14098 Inst_VOPC__V_CMPX_GT_I32::Inst_VOPC__V_CMPX_GT_I32(InFmt_VOPC
*iFmt
)
14099 : Inst_VOPC(iFmt
, "v_cmpx_gt_i32")
14102 } // Inst_VOPC__V_CMPX_GT_I32
14104 Inst_VOPC__V_CMPX_GT_I32::~Inst_VOPC__V_CMPX_GT_I32()
14106 } // ~Inst_VOPC__V_CMPX_GT_I32
14108 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
14110 Inst_VOPC__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst
)
14112 Wavefront
*wf
= gpuDynInst
->wavefront();
14113 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
14114 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
14115 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14120 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14121 if (wf
->execMask(lane
)) {
14122 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
14126 wf
->execMask() = vcc
.rawData();
14130 Inst_VOPC__V_CMPX_NE_I32::Inst_VOPC__V_CMPX_NE_I32(InFmt_VOPC
*iFmt
)
14131 : Inst_VOPC(iFmt
, "v_cmpx_ne_i32")
14134 } // Inst_VOPC__V_CMPX_NE_I32
14136 Inst_VOPC__V_CMPX_NE_I32::~Inst_VOPC__V_CMPX_NE_I32()
14138 } // ~Inst_VOPC__V_CMPX_NE_I32
14140 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
14142 Inst_VOPC__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst
)
14144 Wavefront
*wf
= gpuDynInst
->wavefront();
14145 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
14146 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
14147 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14152 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14153 if (wf
->execMask(lane
)) {
14154 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
14158 wf
->execMask() = vcc
.rawData();
14162 Inst_VOPC__V_CMPX_GE_I32::Inst_VOPC__V_CMPX_GE_I32(InFmt_VOPC
*iFmt
)
14163 : Inst_VOPC(iFmt
, "v_cmpx_ge_i32")
14166 } // Inst_VOPC__V_CMPX_GE_I32
14168 Inst_VOPC__V_CMPX_GE_I32::~Inst_VOPC__V_CMPX_GE_I32()
14170 } // ~Inst_VOPC__V_CMPX_GE_I32
14172 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
14174 Inst_VOPC__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst
)
14176 Wavefront
*wf
= gpuDynInst
->wavefront();
14177 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
14178 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
14179 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14184 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14185 if (wf
->execMask(lane
)) {
14186 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
14190 wf
->execMask() = vcc
.rawData();
14194 Inst_VOPC__V_CMPX_T_I32::Inst_VOPC__V_CMPX_T_I32(InFmt_VOPC
*iFmt
)
14195 : Inst_VOPC(iFmt
, "v_cmpx_t_i32")
14198 } // Inst_VOPC__V_CMPX_T_I32
14200 Inst_VOPC__V_CMPX_T_I32::~Inst_VOPC__V_CMPX_T_I32()
14202 } // ~Inst_VOPC__V_CMPX_T_I32
14204 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
14206 Inst_VOPC__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst
)
14208 Wavefront
*wf
= gpuDynInst
->wavefront();
14209 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14211 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14212 if (wf
->execMask(lane
)) {
14213 vcc
.setBit(lane
, 1);
14217 wf
->execMask() = vcc
.rawData();
14221 Inst_VOPC__V_CMPX_F_U32::Inst_VOPC__V_CMPX_F_U32(InFmt_VOPC
*iFmt
)
14222 : Inst_VOPC(iFmt
, "v_cmpx_f_u32")
14225 } // Inst_VOPC__V_CMPX_F_U32
14227 Inst_VOPC__V_CMPX_F_U32::~Inst_VOPC__V_CMPX_F_U32()
14229 } // ~Inst_VOPC__V_CMPX_F_U32
14231 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
14233 Inst_VOPC__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst
)
14235 Wavefront
*wf
= gpuDynInst
->wavefront();
14236 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14238 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14239 if (wf
->execMask(lane
)) {
14240 vcc
.setBit(lane
, 0);
14244 wf
->execMask() = vcc
.rawData();
14248 Inst_VOPC__V_CMPX_LT_U32::Inst_VOPC__V_CMPX_LT_U32(InFmt_VOPC
*iFmt
)
14249 : Inst_VOPC(iFmt
, "v_cmpx_lt_u32")
14252 } // Inst_VOPC__V_CMPX_LT_U32
14254 Inst_VOPC__V_CMPX_LT_U32::~Inst_VOPC__V_CMPX_LT_U32()
14256 } // ~Inst_VOPC__V_CMPX_LT_U32
14258 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14260 Inst_VOPC__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst
)
14262 Wavefront
*wf
= gpuDynInst
->wavefront();
14263 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
14264 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
14265 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14270 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14271 if (wf
->execMask(lane
)) {
14272 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
14276 wf
->execMask() = vcc
.rawData();
14280 Inst_VOPC__V_CMPX_EQ_U32::Inst_VOPC__V_CMPX_EQ_U32(InFmt_VOPC
*iFmt
)
14281 : Inst_VOPC(iFmt
, "v_cmpx_eq_u32")
14284 } // Inst_VOPC__V_CMPX_EQ_U32
14286 Inst_VOPC__V_CMPX_EQ_U32::~Inst_VOPC__V_CMPX_EQ_U32()
14288 } // ~Inst_VOPC__V_CMPX_EQ_U32
14290 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
14292 Inst_VOPC__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst
)
14294 Wavefront
*wf
= gpuDynInst
->wavefront();
14295 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
14296 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
14297 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14302 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14303 if (wf
->execMask(lane
)) {
14304 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
14308 wf
->execMask() = vcc
.rawData();
14312 Inst_VOPC__V_CMPX_LE_U32::Inst_VOPC__V_CMPX_LE_U32(InFmt_VOPC
*iFmt
)
14313 : Inst_VOPC(iFmt
, "v_cmpx_le_u32")
14316 } // Inst_VOPC__V_CMPX_LE_U32
14318 Inst_VOPC__V_CMPX_LE_U32::~Inst_VOPC__V_CMPX_LE_U32()
14320 } // ~Inst_VOPC__V_CMPX_LE_U32
14322 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
14324 Inst_VOPC__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst
)
14326 Wavefront
*wf
= gpuDynInst
->wavefront();
14327 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
14328 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
14329 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14334 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14335 if (wf
->execMask(lane
)) {
14336 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
14340 wf
->execMask() = vcc
.rawData();
14344 Inst_VOPC__V_CMPX_GT_U32::Inst_VOPC__V_CMPX_GT_U32(InFmt_VOPC
*iFmt
)
14345 : Inst_VOPC(iFmt
, "v_cmpx_gt_u32")
14348 } // Inst_VOPC__V_CMPX_GT_U32
14350 Inst_VOPC__V_CMPX_GT_U32::~Inst_VOPC__V_CMPX_GT_U32()
14352 } // ~Inst_VOPC__V_CMPX_GT_U32
14354 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
14356 Inst_VOPC__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst
)
14358 Wavefront
*wf
= gpuDynInst
->wavefront();
14359 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
14360 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
14361 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14366 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14367 if (wf
->execMask(lane
)) {
14368 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
14372 wf
->execMask() = vcc
.rawData();
14376 Inst_VOPC__V_CMPX_NE_U32::Inst_VOPC__V_CMPX_NE_U32(InFmt_VOPC
*iFmt
)
14377 : Inst_VOPC(iFmt
, "v_cmpx_ne_u32")
14380 } // Inst_VOPC__V_CMPX_NE_U32
14382 Inst_VOPC__V_CMPX_NE_U32::~Inst_VOPC__V_CMPX_NE_U32()
14384 } // ~Inst_VOPC__V_CMPX_NE_U32
14386 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
14388 Inst_VOPC__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst
)
14390 Wavefront
*wf
= gpuDynInst
->wavefront();
14391 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
14392 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
14393 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14398 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14399 if (wf
->execMask(lane
)) {
14400 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
14404 wf
->execMask() = vcc
.rawData();
14408 Inst_VOPC__V_CMPX_GE_U32::Inst_VOPC__V_CMPX_GE_U32(InFmt_VOPC
*iFmt
)
14409 : Inst_VOPC(iFmt
, "v_cmpx_ge_u32")
14412 } // Inst_VOPC__V_CMPX_GE_U32
14414 Inst_VOPC__V_CMPX_GE_U32::~Inst_VOPC__V_CMPX_GE_U32()
14416 } // ~Inst_VOPC__V_CMPX_GE_U32
14418 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
14420 Inst_VOPC__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst
)
14422 Wavefront
*wf
= gpuDynInst
->wavefront();
14423 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
14424 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
14425 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14430 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14431 if (wf
->execMask(lane
)) {
14432 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
14436 wf
->execMask() = vcc
.rawData();
14440 Inst_VOPC__V_CMPX_T_U32::Inst_VOPC__V_CMPX_T_U32(InFmt_VOPC
*iFmt
)
14441 : Inst_VOPC(iFmt
, "v_cmpx_t_u32")
14444 } // Inst_VOPC__V_CMPX_T_U32
14446 Inst_VOPC__V_CMPX_T_U32::~Inst_VOPC__V_CMPX_T_U32()
14448 } // ~Inst_VOPC__V_CMPX_T_U32
14450 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
14452 Inst_VOPC__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst
)
14454 Wavefront
*wf
= gpuDynInst
->wavefront();
14455 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14457 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14458 if (wf
->execMask(lane
)) {
14459 vcc
.setBit(lane
, 1);
14463 wf
->execMask() = vcc
.rawData();
14467 Inst_VOPC__V_CMP_F_I64::Inst_VOPC__V_CMP_F_I64(InFmt_VOPC
*iFmt
)
14468 : Inst_VOPC(iFmt
, "v_cmp_f_i64")
14471 } // Inst_VOPC__V_CMP_F_I64
14473 Inst_VOPC__V_CMP_F_I64::~Inst_VOPC__V_CMP_F_I64()
14475 } // ~Inst_VOPC__V_CMP_F_I64
14477 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
14479 Inst_VOPC__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst
)
14481 Wavefront
*wf
= gpuDynInst
->wavefront();
14482 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14484 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14485 if (wf
->execMask(lane
)) {
14486 vcc
.setBit(lane
, 0);
14493 Inst_VOPC__V_CMP_LT_I64::Inst_VOPC__V_CMP_LT_I64(InFmt_VOPC
*iFmt
)
14494 : Inst_VOPC(iFmt
, "v_cmp_lt_i64")
14497 } // Inst_VOPC__V_CMP_LT_I64
14499 Inst_VOPC__V_CMP_LT_I64::~Inst_VOPC__V_CMP_LT_I64()
14501 } // ~Inst_VOPC__V_CMP_LT_I64
14503 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14505 Inst_VOPC__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst
)
14507 Wavefront
*wf
= gpuDynInst
->wavefront();
14508 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
14509 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
14510 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14515 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14516 if (wf
->execMask(lane
)) {
14517 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
14524 Inst_VOPC__V_CMP_EQ_I64::Inst_VOPC__V_CMP_EQ_I64(InFmt_VOPC
*iFmt
)
14525 : Inst_VOPC(iFmt
, "v_cmp_eq_i64")
14528 } // Inst_VOPC__V_CMP_EQ_I64
14530 Inst_VOPC__V_CMP_EQ_I64::~Inst_VOPC__V_CMP_EQ_I64()
14532 } // ~Inst_VOPC__V_CMP_EQ_I64
14534 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
14536 Inst_VOPC__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst
)
14538 Wavefront
*wf
= gpuDynInst
->wavefront();
14539 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
14540 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
14541 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14546 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14547 if (wf
->execMask(lane
)) {
14548 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
14555 Inst_VOPC__V_CMP_LE_I64::Inst_VOPC__V_CMP_LE_I64(InFmt_VOPC
*iFmt
)
14556 : Inst_VOPC(iFmt
, "v_cmp_le_i64")
14559 } // Inst_VOPC__V_CMP_LE_I64
14561 Inst_VOPC__V_CMP_LE_I64::~Inst_VOPC__V_CMP_LE_I64()
14563 } // ~Inst_VOPC__V_CMP_LE_I64
14565 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
14567 Inst_VOPC__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst
)
14569 Wavefront
*wf
= gpuDynInst
->wavefront();
14570 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
14571 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
14572 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14577 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14578 if (wf
->execMask(lane
)) {
14579 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
14586 Inst_VOPC__V_CMP_GT_I64::Inst_VOPC__V_CMP_GT_I64(InFmt_VOPC
*iFmt
)
14587 : Inst_VOPC(iFmt
, "v_cmp_gt_i64")
14590 } // Inst_VOPC__V_CMP_GT_I64
14592 Inst_VOPC__V_CMP_GT_I64::~Inst_VOPC__V_CMP_GT_I64()
14594 } // ~Inst_VOPC__V_CMP_GT_I64
14596 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
14598 Inst_VOPC__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst
)
14600 Wavefront
*wf
= gpuDynInst
->wavefront();
14601 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
14602 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
14603 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14608 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14609 if (wf
->execMask(lane
)) {
14610 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
14617 Inst_VOPC__V_CMP_NE_I64::Inst_VOPC__V_CMP_NE_I64(InFmt_VOPC
*iFmt
)
14618 : Inst_VOPC(iFmt
, "v_cmp_ne_i64")
14621 } // Inst_VOPC__V_CMP_NE_I64
14623 Inst_VOPC__V_CMP_NE_I64::~Inst_VOPC__V_CMP_NE_I64()
14625 } // ~Inst_VOPC__V_CMP_NE_I64
14627 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
14629 Inst_VOPC__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst
)
14631 Wavefront
*wf
= gpuDynInst
->wavefront();
14632 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
14633 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
14634 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14639 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14640 if (wf
->execMask(lane
)) {
14641 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
14648 Inst_VOPC__V_CMP_GE_I64::Inst_VOPC__V_CMP_GE_I64(InFmt_VOPC
*iFmt
)
14649 : Inst_VOPC(iFmt
, "v_cmp_ge_i64")
14652 } // Inst_VOPC__V_CMP_GE_I64
14654 Inst_VOPC__V_CMP_GE_I64::~Inst_VOPC__V_CMP_GE_I64()
14656 } // ~Inst_VOPC__V_CMP_GE_I64
14658 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
14660 Inst_VOPC__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst
)
14662 Wavefront
*wf
= gpuDynInst
->wavefront();
14663 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
14664 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
14665 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14670 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14671 if (wf
->execMask(lane
)) {
14672 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
14679 Inst_VOPC__V_CMP_T_I64::Inst_VOPC__V_CMP_T_I64(InFmt_VOPC
*iFmt
)
14680 : Inst_VOPC(iFmt
, "v_cmp_t_i64")
14683 } // Inst_VOPC__V_CMP_T_I64
14685 Inst_VOPC__V_CMP_T_I64::~Inst_VOPC__V_CMP_T_I64()
14687 } // ~Inst_VOPC__V_CMP_T_I64
14689 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
14691 Inst_VOPC__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst
)
14693 Wavefront
*wf
= gpuDynInst
->wavefront();
14694 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14696 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14697 if (wf
->execMask(lane
)) {
14698 vcc
.setBit(lane
, 1);
14705 Inst_VOPC__V_CMP_F_U64::Inst_VOPC__V_CMP_F_U64(InFmt_VOPC
*iFmt
)
14706 : Inst_VOPC(iFmt
, "v_cmp_f_u64")
14709 } // Inst_VOPC__V_CMP_F_U64
14711 Inst_VOPC__V_CMP_F_U64::~Inst_VOPC__V_CMP_F_U64()
14713 } // ~Inst_VOPC__V_CMP_F_U64
14715 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
14717 Inst_VOPC__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst
)
14719 Wavefront
*wf
= gpuDynInst
->wavefront();
14720 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14722 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14723 if (wf
->execMask(lane
)) {
14724 vcc
.setBit(lane
, 0);
14731 Inst_VOPC__V_CMP_LT_U64::Inst_VOPC__V_CMP_LT_U64(InFmt_VOPC
*iFmt
)
14732 : Inst_VOPC(iFmt
, "v_cmp_lt_u64")
14735 } // Inst_VOPC__V_CMP_LT_U64
14737 Inst_VOPC__V_CMP_LT_U64::~Inst_VOPC__V_CMP_LT_U64()
14739 } // ~Inst_VOPC__V_CMP_LT_U64
14741 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14743 Inst_VOPC__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst
)
14745 Wavefront
*wf
= gpuDynInst
->wavefront();
14746 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
14747 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
14748 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14753 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14754 if (wf
->execMask(lane
)) {
14755 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
14762 Inst_VOPC__V_CMP_EQ_U64::Inst_VOPC__V_CMP_EQ_U64(InFmt_VOPC
*iFmt
)
14763 : Inst_VOPC(iFmt
, "v_cmp_eq_u64")
14766 } // Inst_VOPC__V_CMP_EQ_U64
14768 Inst_VOPC__V_CMP_EQ_U64::~Inst_VOPC__V_CMP_EQ_U64()
14770 } // ~Inst_VOPC__V_CMP_EQ_U64
14772 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
14774 Inst_VOPC__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst
)
14776 Wavefront
*wf
= gpuDynInst
->wavefront();
14777 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
14778 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
14779 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14784 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14785 if (wf
->execMask(lane
)) {
14786 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
14793 Inst_VOPC__V_CMP_LE_U64::Inst_VOPC__V_CMP_LE_U64(InFmt_VOPC
*iFmt
)
14794 : Inst_VOPC(iFmt
, "v_cmp_le_u64")
14797 } // Inst_VOPC__V_CMP_LE_U64
14799 Inst_VOPC__V_CMP_LE_U64::~Inst_VOPC__V_CMP_LE_U64()
14801 } // ~Inst_VOPC__V_CMP_LE_U64
14803 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
14805 Inst_VOPC__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst
)
14807 Wavefront
*wf
= gpuDynInst
->wavefront();
14808 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
14809 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
14810 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14815 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14816 if (wf
->execMask(lane
)) {
14817 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
14824 Inst_VOPC__V_CMP_GT_U64::Inst_VOPC__V_CMP_GT_U64(InFmt_VOPC
*iFmt
)
14825 : Inst_VOPC(iFmt
, "v_cmp_gt_u64")
14828 } // Inst_VOPC__V_CMP_GT_U64
14830 Inst_VOPC__V_CMP_GT_U64::~Inst_VOPC__V_CMP_GT_U64()
14832 } // ~Inst_VOPC__V_CMP_GT_U64
14834 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
14836 Inst_VOPC__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst
)
14838 Wavefront
*wf
= gpuDynInst
->wavefront();
14839 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
14840 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
14841 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14846 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14847 if (wf
->execMask(lane
)) {
14848 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
14855 Inst_VOPC__V_CMP_NE_U64::Inst_VOPC__V_CMP_NE_U64(InFmt_VOPC
*iFmt
)
14856 : Inst_VOPC(iFmt
, "v_cmp_ne_u64")
14859 } // Inst_VOPC__V_CMP_NE_U64
14861 Inst_VOPC__V_CMP_NE_U64::~Inst_VOPC__V_CMP_NE_U64()
14863 } // ~Inst_VOPC__V_CMP_NE_U64
14865 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
14867 Inst_VOPC__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst
)
14869 Wavefront
*wf
= gpuDynInst
->wavefront();
14870 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
14871 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
14872 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14877 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14878 if (wf
->execMask(lane
)) {
14879 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
14886 Inst_VOPC__V_CMP_GE_U64::Inst_VOPC__V_CMP_GE_U64(InFmt_VOPC
*iFmt
)
14887 : Inst_VOPC(iFmt
, "v_cmp_ge_u64")
14890 } // Inst_VOPC__V_CMP_GE_U64
14892 Inst_VOPC__V_CMP_GE_U64::~Inst_VOPC__V_CMP_GE_U64()
14894 } // ~Inst_VOPC__V_CMP_GE_U64
14896 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
14898 Inst_VOPC__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst
)
14900 Wavefront
*wf
= gpuDynInst
->wavefront();
14901 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
14902 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
14903 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14908 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14909 if (wf
->execMask(lane
)) {
14910 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
14917 Inst_VOPC__V_CMP_T_U64::Inst_VOPC__V_CMP_T_U64(InFmt_VOPC
*iFmt
)
14918 : Inst_VOPC(iFmt
, "v_cmp_t_u64")
14921 } // Inst_VOPC__V_CMP_T_U64
14923 Inst_VOPC__V_CMP_T_U64::~Inst_VOPC__V_CMP_T_U64()
14925 } // ~Inst_VOPC__V_CMP_T_U64
14927 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
14929 Inst_VOPC__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst
)
14931 Wavefront
*wf
= gpuDynInst
->wavefront();
14932 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14934 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14935 if (wf
->execMask(lane
)) {
14936 vcc
.setBit(lane
, 1);
14943 Inst_VOPC__V_CMPX_F_I64::Inst_VOPC__V_CMPX_F_I64(InFmt_VOPC
*iFmt
)
14944 : Inst_VOPC(iFmt
, "v_cmpx_f_i64")
14947 } // Inst_VOPC__V_CMPX_F_I64
14949 Inst_VOPC__V_CMPX_F_I64::~Inst_VOPC__V_CMPX_F_I64()
14951 } // ~Inst_VOPC__V_CMPX_F_I64
14953 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
14955 Inst_VOPC__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst
)
14957 Wavefront
*wf
= gpuDynInst
->wavefront();
14958 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14960 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14961 if (wf
->execMask(lane
)) {
14962 vcc
.setBit(lane
, 0);
14966 wf
->execMask() = vcc
.rawData();
14970 Inst_VOPC__V_CMPX_LT_I64::Inst_VOPC__V_CMPX_LT_I64(InFmt_VOPC
*iFmt
)
14971 : Inst_VOPC(iFmt
, "v_cmpx_lt_i64")
14974 } // Inst_VOPC__V_CMPX_LT_I64
14976 Inst_VOPC__V_CMPX_LT_I64::~Inst_VOPC__V_CMPX_LT_I64()
14978 } // ~Inst_VOPC__V_CMPX_LT_I64
14980 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14982 Inst_VOPC__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst
)
14984 Wavefront
*wf
= gpuDynInst
->wavefront();
14985 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
14986 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
14987 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14992 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14993 if (wf
->execMask(lane
)) {
14994 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
14998 wf
->execMask() = vcc
.rawData();
15002 Inst_VOPC__V_CMPX_EQ_I64::Inst_VOPC__V_CMPX_EQ_I64(InFmt_VOPC
*iFmt
)
15003 : Inst_VOPC(iFmt
, "v_cmpx_eq_i64")
15006 } // Inst_VOPC__V_CMPX_EQ_I64
15008 Inst_VOPC__V_CMPX_EQ_I64::~Inst_VOPC__V_CMPX_EQ_I64()
15010 } // ~Inst_VOPC__V_CMPX_EQ_I64
15012 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
15014 Inst_VOPC__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst
)
15016 Wavefront
*wf
= gpuDynInst
->wavefront();
15017 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
15018 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
15019 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15024 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15025 if (wf
->execMask(lane
)) {
15026 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
15030 wf
->execMask() = vcc
.rawData();
15034 Inst_VOPC__V_CMPX_LE_I64::Inst_VOPC__V_CMPX_LE_I64(InFmt_VOPC
*iFmt
)
15035 : Inst_VOPC(iFmt
, "v_cmpx_le_i64")
15038 } // Inst_VOPC__V_CMPX_LE_I64
15040 Inst_VOPC__V_CMPX_LE_I64::~Inst_VOPC__V_CMPX_LE_I64()
15042 } // ~Inst_VOPC__V_CMPX_LE_I64
15044 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
15046 Inst_VOPC__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst
)
15048 Wavefront
*wf
= gpuDynInst
->wavefront();
15049 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
15050 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
15051 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15056 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15057 if (wf
->execMask(lane
)) {
15058 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
15062 wf
->execMask() = vcc
.rawData();
15066 Inst_VOPC__V_CMPX_GT_I64::Inst_VOPC__V_CMPX_GT_I64(InFmt_VOPC
*iFmt
)
15067 : Inst_VOPC(iFmt
, "v_cmpx_gt_i64")
15070 } // Inst_VOPC__V_CMPX_GT_I64
15072 Inst_VOPC__V_CMPX_GT_I64::~Inst_VOPC__V_CMPX_GT_I64()
15074 } // ~Inst_VOPC__V_CMPX_GT_I64
15076 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
15078 Inst_VOPC__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst
)
15080 Wavefront
*wf
= gpuDynInst
->wavefront();
15081 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
15082 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
15083 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15088 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15089 if (wf
->execMask(lane
)) {
15090 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
15094 wf
->execMask() = vcc
.rawData();
15098 Inst_VOPC__V_CMPX_NE_I64::Inst_VOPC__V_CMPX_NE_I64(InFmt_VOPC
*iFmt
)
15099 : Inst_VOPC(iFmt
, "v_cmpx_ne_i64")
15102 } // Inst_VOPC__V_CMPX_NE_I64
15104 Inst_VOPC__V_CMPX_NE_I64::~Inst_VOPC__V_CMPX_NE_I64()
15106 } // ~Inst_VOPC__V_CMPX_NE_I64
15108 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
15110 Inst_VOPC__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst
)
15112 Wavefront
*wf
= gpuDynInst
->wavefront();
15113 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
15114 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
15115 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15120 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15121 if (wf
->execMask(lane
)) {
15122 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
15126 wf
->execMask() = vcc
.rawData();
15130 Inst_VOPC__V_CMPX_GE_I64::Inst_VOPC__V_CMPX_GE_I64(InFmt_VOPC
*iFmt
)
15131 : Inst_VOPC(iFmt
, "v_cmpx_ge_i64")
15134 } // Inst_VOPC__V_CMPX_GE_I64
15136 Inst_VOPC__V_CMPX_GE_I64::~Inst_VOPC__V_CMPX_GE_I64()
15138 } // ~Inst_VOPC__V_CMPX_GE_I64
15140 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
15142 Inst_VOPC__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst
)
15144 Wavefront
*wf
= gpuDynInst
->wavefront();
15145 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
15146 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
15147 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15152 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15153 if (wf
->execMask(lane
)) {
15154 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
15158 wf
->execMask() = vcc
.rawData();
15162 Inst_VOPC__V_CMPX_T_I64::Inst_VOPC__V_CMPX_T_I64(InFmt_VOPC
*iFmt
)
15163 : Inst_VOPC(iFmt
, "v_cmpx_t_i64")
15166 } // Inst_VOPC__V_CMPX_T_I64
15168 Inst_VOPC__V_CMPX_T_I64::~Inst_VOPC__V_CMPX_T_I64()
15170 } // ~Inst_VOPC__V_CMPX_T_I64
15172 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
15174 Inst_VOPC__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst
)
15176 Wavefront
*wf
= gpuDynInst
->wavefront();
15177 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15179 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15180 if (wf
->execMask(lane
)) {
15181 vcc
.setBit(lane
, 1);
15185 wf
->execMask() = vcc
.rawData();
15189 Inst_VOPC__V_CMPX_F_U64::Inst_VOPC__V_CMPX_F_U64(InFmt_VOPC
*iFmt
)
15190 : Inst_VOPC(iFmt
, "v_cmpx_f_u64")
15193 } // Inst_VOPC__V_CMPX_F_U64
15195 Inst_VOPC__V_CMPX_F_U64::~Inst_VOPC__V_CMPX_F_U64()
15197 } // ~Inst_VOPC__V_CMPX_F_U64
15199 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
15201 Inst_VOPC__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst
)
15203 Wavefront
*wf
= gpuDynInst
->wavefront();
15204 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15206 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15207 if (wf
->execMask(lane
)) {
15208 vcc
.setBit(lane
, 0);
15212 wf
->execMask() = vcc
.rawData();
15216 Inst_VOPC__V_CMPX_LT_U64::Inst_VOPC__V_CMPX_LT_U64(InFmt_VOPC
*iFmt
)
15217 : Inst_VOPC(iFmt
, "v_cmpx_lt_u64")
15220 } // Inst_VOPC__V_CMPX_LT_U64
15222 Inst_VOPC__V_CMPX_LT_U64::~Inst_VOPC__V_CMPX_LT_U64()
15224 } // ~Inst_VOPC__V_CMPX_LT_U64
15226 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
15228 Inst_VOPC__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst
)
15230 Wavefront
*wf
= gpuDynInst
->wavefront();
15231 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
15232 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
15233 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15238 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15239 if (wf
->execMask(lane
)) {
15240 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
15244 wf
->execMask() = vcc
.rawData();
15248 Inst_VOPC__V_CMPX_EQ_U64::Inst_VOPC__V_CMPX_EQ_U64(InFmt_VOPC
*iFmt
)
15249 : Inst_VOPC(iFmt
, "v_cmpx_eq_u64")
15252 } // Inst_VOPC__V_CMPX_EQ_U64
15254 Inst_VOPC__V_CMPX_EQ_U64::~Inst_VOPC__V_CMPX_EQ_U64()
15256 } // ~Inst_VOPC__V_CMPX_EQ_U64
15258 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
15260 Inst_VOPC__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst
)
15262 Wavefront
*wf
= gpuDynInst
->wavefront();
15263 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
15264 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
15265 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15270 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15271 if (wf
->execMask(lane
)) {
15272 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
15276 wf
->execMask() = vcc
.rawData();
15280 Inst_VOPC__V_CMPX_LE_U64::Inst_VOPC__V_CMPX_LE_U64(InFmt_VOPC
*iFmt
)
15281 : Inst_VOPC(iFmt
, "v_cmpx_le_u64")
15284 } // Inst_VOPC__V_CMPX_LE_U64
15286 Inst_VOPC__V_CMPX_LE_U64::~Inst_VOPC__V_CMPX_LE_U64()
15288 } // ~Inst_VOPC__V_CMPX_LE_U64
15290 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
15292 Inst_VOPC__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst
)
15294 Wavefront
*wf
= gpuDynInst
->wavefront();
15295 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
15296 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
15297 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15302 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15303 if (wf
->execMask(lane
)) {
15304 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
15308 wf
->execMask() = vcc
.rawData();
15312 Inst_VOPC__V_CMPX_GT_U64::Inst_VOPC__V_CMPX_GT_U64(InFmt_VOPC
*iFmt
)
15313 : Inst_VOPC(iFmt
, "v_cmpx_gt_u64")
15316 } // Inst_VOPC__V_CMPX_GT_U64
15318 Inst_VOPC__V_CMPX_GT_U64::~Inst_VOPC__V_CMPX_GT_U64()
15320 } // ~Inst_VOPC__V_CMPX_GT_U64
15322 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
15324 Inst_VOPC__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst
)
15326 Wavefront
*wf
= gpuDynInst
->wavefront();
15327 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
15328 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
15329 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15334 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15335 if (wf
->execMask(lane
)) {
15336 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
15340 wf
->execMask() = vcc
.rawData();
15344 Inst_VOPC__V_CMPX_NE_U64::Inst_VOPC__V_CMPX_NE_U64(InFmt_VOPC
*iFmt
)
15345 : Inst_VOPC(iFmt
, "v_cmpx_ne_u64")
15348 } // Inst_VOPC__V_CMPX_NE_U64
15350 Inst_VOPC__V_CMPX_NE_U64::~Inst_VOPC__V_CMPX_NE_U64()
15352 } // ~Inst_VOPC__V_CMPX_NE_U64
15354 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
15356 Inst_VOPC__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst
)
15358 Wavefront
*wf
= gpuDynInst
->wavefront();
15359 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
15360 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
15361 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15366 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15367 if (wf
->execMask(lane
)) {
15368 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
15372 wf
->execMask() = vcc
.rawData();
15376 Inst_VOPC__V_CMPX_GE_U64::Inst_VOPC__V_CMPX_GE_U64(InFmt_VOPC
*iFmt
)
15377 : Inst_VOPC(iFmt
, "v_cmpx_ge_u64")
15380 } // Inst_VOPC__V_CMPX_GE_U64
15382 Inst_VOPC__V_CMPX_GE_U64::~Inst_VOPC__V_CMPX_GE_U64()
15384 } // ~Inst_VOPC__V_CMPX_GE_U64
15386 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
15388 Inst_VOPC__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst
)
15390 Wavefront
*wf
= gpuDynInst
->wavefront();
15391 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
15392 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
15393 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15398 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15399 if (wf
->execMask(lane
)) {
15400 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
15404 wf
->execMask() = vcc
.rawData();
15408 Inst_VOPC__V_CMPX_T_U64::Inst_VOPC__V_CMPX_T_U64(InFmt_VOPC
*iFmt
)
15409 : Inst_VOPC(iFmt
, "v_cmpx_t_u64")
15412 } // Inst_VOPC__V_CMPX_T_U64
15414 Inst_VOPC__V_CMPX_T_U64::~Inst_VOPC__V_CMPX_T_U64()
15416 } // ~Inst_VOPC__V_CMPX_T_U64
15418 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
15420 Inst_VOPC__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst
)
15422 Wavefront
*wf
= gpuDynInst
->wavefront();
15423 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15425 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15426 if (wf
->execMask(lane
)) {
15427 vcc
.setBit(lane
, 1);
15431 wf
->execMask() = vcc
.rawData();
15435 Inst_VINTRP__V_INTERP_P1_F32::Inst_VINTRP__V_INTERP_P1_F32(
15436 InFmt_VINTRP
*iFmt
)
15437 : Inst_VINTRP(iFmt
, "v_interp_p1_f32")
15441 } // Inst_VINTRP__V_INTERP_P1_F32
15443 Inst_VINTRP__V_INTERP_P1_F32::~Inst_VINTRP__V_INTERP_P1_F32()
15445 } // ~Inst_VINTRP__V_INTERP_P1_F32
15447 // D.f = P10 * S.f + P0; parameter interpolation
15449 Inst_VINTRP__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst
)
15451 panicUnimplemented();
15454 Inst_VINTRP__V_INTERP_P2_F32::Inst_VINTRP__V_INTERP_P2_F32(
15455 InFmt_VINTRP
*iFmt
)
15456 : Inst_VINTRP(iFmt
, "v_interp_p2_f32")
15460 } // Inst_VINTRP__V_INTERP_P2_F32
15462 Inst_VINTRP__V_INTERP_P2_F32::~Inst_VINTRP__V_INTERP_P2_F32()
15464 } // ~Inst_VINTRP__V_INTERP_P2_F32
15466 // D.f = P20 * S.f + D.f; parameter interpolation
15468 Inst_VINTRP__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst
)
15470 panicUnimplemented();
15473 Inst_VINTRP__V_INTERP_MOV_F32::Inst_VINTRP__V_INTERP_MOV_F32(
15474 InFmt_VINTRP
*iFmt
)
15475 : Inst_VINTRP(iFmt
, "v_interp_mov_f32")
15479 } // Inst_VINTRP__V_INTERP_MOV_F32
15481 Inst_VINTRP__V_INTERP_MOV_F32::~Inst_VINTRP__V_INTERP_MOV_F32()
15483 } // ~Inst_VINTRP__V_INTERP_MOV_F32
15486 Inst_VINTRP__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst
)
15488 panicUnimplemented();
15491 Inst_VOP3__V_CMP_CLASS_F32::Inst_VOP3__V_CMP_CLASS_F32(
15493 : Inst_VOP3(iFmt
, "v_cmp_class_f32", true)
15497 } // Inst_VOP3__V_CMP_CLASS_F32
15499 Inst_VOP3__V_CMP_CLASS_F32::~Inst_VOP3__V_CMP_CLASS_F32()
15501 } // ~Inst_VOP3__V_CMP_CLASS_F32
15503 // VCC = IEEE numeric class function specified in S1.u, performed on S0.f
15504 // The function reports true if the floating point value is any of the
15505 // numeric types selected in S1.u according to the following list:
15506 // S1.u[0] -- value is a signaling NaN.
15507 // S1.u[1] -- value is a quiet NaN.
15508 // S1.u[2] -- value is negative infinity.
15509 // S1.u[3] -- value is a negative normal value.
15510 // S1.u[4] -- value is a negative denormal value.
15511 // S1.u[5] -- value is negative zero.
15512 // S1.u[6] -- value is positive zero.
15513 // S1.u[7] -- value is a positive denormal value.
15514 // S1.u[8] -- value is a positive normal value.
15515 // S1.u[9] -- value is positive infinity.
15517 Inst_VOP3__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst
)
15519 Wavefront
*wf
= gpuDynInst
->wavefront();
15520 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
15521 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
15522 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
15527 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15528 if (wf
->execMask(lane
)) {
15529 if (bits(src1
[lane
], 0) || bits(src1
[lane
], 1)) {
15531 if (std::isnan(src0
[lane
])) {
15532 sdst
.setBit(lane
, 1);
15536 if (bits(src1
[lane
], 2)) {
15538 if (std::isinf(src0
[lane
]) && std::signbit(src0
[lane
])) {
15539 sdst
.setBit(lane
, 1);
15543 if (bits(src1
[lane
], 3)) {
15545 if (std::isnormal(src0
[lane
])
15546 && std::signbit(src0
[lane
])) {
15547 sdst
.setBit(lane
, 1);
15551 if (bits(src1
[lane
], 4)) {
15553 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
15554 && std::signbit(src0
[lane
])) {
15555 sdst
.setBit(lane
, 1);
15559 if (bits(src1
[lane
], 5)) {
15561 if (std::fpclassify(src0
[lane
]) == FP_ZERO
15562 && std::signbit(src0
[lane
])) {
15563 sdst
.setBit(lane
, 1);
15567 if (bits(src1
[lane
], 6)) {
15569 if (std::fpclassify(src0
[lane
]) == FP_ZERO
15570 && !std::signbit(src0
[lane
])) {
15571 sdst
.setBit(lane
, 1);
15575 if (bits(src1
[lane
], 7)) {
15577 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
15578 && !std::signbit(src0
[lane
])) {
15579 sdst
.setBit(lane
, 1);
15583 if (bits(src1
[lane
], 8)) {
15585 if (std::isnormal(src0
[lane
])
15586 && !std::signbit(src0
[lane
])) {
15587 sdst
.setBit(lane
, 1);
15591 if (bits(src1
[lane
], 9)) {
15593 if (std::isinf(src0
[lane
])
15594 && !std::signbit(src0
[lane
])) {
15595 sdst
.setBit(lane
, 1);
15605 Inst_VOP3__V_CMPX_CLASS_F32::Inst_VOP3__V_CMPX_CLASS_F32(
15607 : Inst_VOP3(iFmt
, "v_cmpx_class_f32", true)
15611 } // Inst_VOP3__V_CMPX_CLASS_F32
15613 Inst_VOP3__V_CMPX_CLASS_F32::~Inst_VOP3__V_CMPX_CLASS_F32()
15615 } // ~Inst_VOP3__V_CMPX_CLASS_F32
15617 // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
15619 // The function reports true if the floating point value is any of the
15620 // numeric types selected in S1.u according to the following list:
15621 // S1.u[0] -- value is a signaling NaN.
15622 // S1.u[1] -- value is a quiet NaN.
15623 // S1.u[2] -- value is negative infinity.
15624 // S1.u[3] -- value is a negative normal value.
15625 // S1.u[4] -- value is a negative denormal value.
15626 // S1.u[5] -- value is negative zero.
15627 // S1.u[6] -- value is positive zero.
15628 // S1.u[7] -- value is a positive denormal value.
15629 // S1.u[8] -- value is a positive normal value.
15630 // S1.u[9] -- value is positive infinity.
15632 Inst_VOP3__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst
)
15634 Wavefront
*wf
= gpuDynInst
->wavefront();
15635 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
15636 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
15637 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
15642 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15643 if (wf
->execMask(lane
)) {
15644 if (bits(src1
[lane
], 0) || bits(src1
[lane
], 1)) {
15646 if (std::isnan(src0
[lane
])) {
15647 sdst
.setBit(lane
, 1);
15651 if (bits(src1
[lane
], 2)) {
15653 if (std::isinf(src0
[lane
]) && std::signbit(src0
[lane
])) {
15654 sdst
.setBit(lane
, 1);
15658 if (bits(src1
[lane
], 3)) {
15660 if (std::isnormal(src0
[lane
])
15661 && std::signbit(src0
[lane
])) {
15662 sdst
.setBit(lane
, 1);
15666 if (bits(src1
[lane
], 4)) {
15668 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
15669 && std::signbit(src0
[lane
])) {
15670 sdst
.setBit(lane
, 1);
15674 if (bits(src1
[lane
], 5)) {
15676 if (std::fpclassify(src0
[lane
]) == FP_ZERO
15677 && std::signbit(src0
[lane
])) {
15678 sdst
.setBit(lane
, 1);
15682 if (bits(src1
[lane
], 6)) {
15684 if (std::fpclassify(src0
[lane
]) == FP_ZERO
15685 && !std::signbit(src0
[lane
])) {
15686 sdst
.setBit(lane
, 1);
15690 if (bits(src1
[lane
], 7)) {
15692 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
15693 && !std::signbit(src0
[lane
])) {
15694 sdst
.setBit(lane
, 1);
15698 if (bits(src1
[lane
], 8)) {
15700 if (std::isnormal(src0
[lane
])
15701 && !std::signbit(src0
[lane
])) {
15702 sdst
.setBit(lane
, 1);
15706 if (bits(src1
[lane
], 9)) {
15708 if (std::isinf(src0
[lane
])
15709 && !std::signbit(src0
[lane
])) {
15710 sdst
.setBit(lane
, 1);
15717 wf
->execMask() = sdst
.rawData();
15721 Inst_VOP3__V_CMP_CLASS_F64::Inst_VOP3__V_CMP_CLASS_F64(
15723 : Inst_VOP3(iFmt
, "v_cmp_class_f64", true)
15727 } // Inst_VOP3__V_CMP_CLASS_F64
15729 Inst_VOP3__V_CMP_CLASS_F64::~Inst_VOP3__V_CMP_CLASS_F64()
15731 } // ~Inst_VOP3__V_CMP_CLASS_F64
15733 // VCC = IEEE numeric class function specified in S1.u, performed on S0.d
15734 // The function reports true if the floating point value is any of the
15735 // numeric types selected in S1.u according to the following list:
15736 // S1.u[0] -- value is a signaling NaN.
15737 // S1.u[1] -- value is a quiet NaN.
15738 // S1.u[2] -- value is negative infinity.
15739 // S1.u[3] -- value is a negative normal value.
15740 // S1.u[4] -- value is a negative denormal value.
15741 // S1.u[5] -- value is negative zero.
15742 // S1.u[6] -- value is positive zero.
15743 // S1.u[7] -- value is a positive denormal value.
15744 // S1.u[8] -- value is a positive normal value.
15745 // S1.u[9] -- value is positive infinity.
15747 Inst_VOP3__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst
)
15749 Wavefront
*wf
= gpuDynInst
->wavefront();
15750 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
15751 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
15752 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
15757 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15758 if (wf
->execMask(lane
)) {
15759 if (bits(src1
[lane
], 0) || bits(src1
[lane
], 1)) {
15761 if (std::isnan(src0
[lane
])) {
15762 sdst
.setBit(lane
, 1);
15766 if (bits(src1
[lane
], 2)) {
15768 if (std::isinf(src0
[lane
]) && std::signbit(src0
[lane
])) {
15769 sdst
.setBit(lane
, 1);
15773 if (bits(src1
[lane
], 3)) {
15775 if (std::isnormal(src0
[lane
])
15776 && std::signbit(src0
[lane
])) {
15777 sdst
.setBit(lane
, 1);
15781 if (bits(src1
[lane
], 4)) {
15783 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
15784 && std::signbit(src0
[lane
])) {
15785 sdst
.setBit(lane
, 1);
15789 if (bits(src1
[lane
], 5)) {
15791 if (std::fpclassify(src0
[lane
]) == FP_ZERO
15792 && std::signbit(src0
[lane
])) {
15793 sdst
.setBit(lane
, 1);
15797 if (bits(src1
[lane
], 6)) {
15799 if (std::fpclassify(src0
[lane
]) == FP_ZERO
15800 && !std::signbit(src0
[lane
])) {
15801 sdst
.setBit(lane
, 1);
15805 if (bits(src1
[lane
], 7)) {
15807 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
15808 && !std::signbit(src0
[lane
])) {
15809 sdst
.setBit(lane
, 1);
15813 if (bits(src1
[lane
], 8)) {
15815 if (std::isnormal(src0
[lane
])
15816 && !std::signbit(src0
[lane
])) {
15817 sdst
.setBit(lane
, 1);
15821 if (bits(src1
[lane
], 9)) {
15823 if (std::isinf(src0
[lane
])
15824 && !std::signbit(src0
[lane
])) {
15825 sdst
.setBit(lane
, 1);
15835 Inst_VOP3__V_CMPX_CLASS_F64::Inst_VOP3__V_CMPX_CLASS_F64(
15837 : Inst_VOP3(iFmt
, "v_cmpx_class_f64", true)
15841 } // Inst_VOP3__V_CMPX_CLASS_F64
15843 Inst_VOP3__V_CMPX_CLASS_F64::~Inst_VOP3__V_CMPX_CLASS_F64()
15845 } // ~Inst_VOP3__V_CMPX_CLASS_F64
15847 // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
15849 // The function reports true if the floating point value is any of the
15850 // numeric types selected in S1.u according to the following list:
15851 // S1.u[0] -- value is a signaling NaN.
15852 // S1.u[1] -- value is a quiet NaN.
15853 // S1.u[2] -- value is negative infinity.
15854 // S1.u[3] -- value is a negative normal value.
15855 // S1.u[4] -- value is a negative denormal value.
15856 // S1.u[5] -- value is negative zero.
15857 // S1.u[6] -- value is positive zero.
15858 // S1.u[7] -- value is a positive denormal value.
15859 // S1.u[8] -- value is a positive normal value.
15860 // S1.u[9] -- value is positive infinity.
15862 Inst_VOP3__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst
)
15864 Wavefront
*wf
= gpuDynInst
->wavefront();
15865 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
15866 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
15867 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
15872 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15873 if (wf
->execMask(lane
)) {
15874 if (bits(src1
[lane
], 0) || bits(src1
[lane
], 1)) {
15876 if (std::isnan(src0
[lane
])) {
15877 sdst
.setBit(lane
, 1);
15881 if (bits(src1
[lane
], 2)) {
15883 if (std::isinf(src0
[lane
]) && std::signbit(src0
[lane
])) {
15884 sdst
.setBit(lane
, 1);
15888 if (bits(src1
[lane
], 3)) {
15890 if (std::isnormal(src0
[lane
])
15891 && std::signbit(src0
[lane
])) {
15892 sdst
.setBit(lane
, 1);
15896 if (bits(src1
[lane
], 4)) {
15898 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
15899 && std::signbit(src0
[lane
])) {
15900 sdst
.setBit(lane
, 1);
15904 if (bits(src1
[lane
], 5)) {
15906 if (std::fpclassify(src0
[lane
]) == FP_ZERO
15907 && std::signbit(src0
[lane
])) {
15908 sdst
.setBit(lane
, 1);
15912 if (bits(src1
[lane
], 6)) {
15914 if (std::fpclassify(src0
[lane
]) == FP_ZERO
15915 && !std::signbit(src0
[lane
])) {
15916 sdst
.setBit(lane
, 1);
15920 if (bits(src1
[lane
], 7)) {
15922 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
15923 && !std::signbit(src0
[lane
])) {
15924 sdst
.setBit(lane
, 1);
15928 if (bits(src1
[lane
], 8)) {
15930 if (std::isnormal(src0
[lane
])
15931 && !std::signbit(src0
[lane
])) {
15932 sdst
.setBit(lane
, 1);
15936 if (bits(src1
[lane
], 9)) {
15938 if (std::isinf(src0
[lane
])
15939 && !std::signbit(src0
[lane
])) {
15940 sdst
.setBit(lane
, 1);
15947 wf
->execMask() = sdst
.rawData();
15951 Inst_VOP3__V_CMP_CLASS_F16::Inst_VOP3__V_CMP_CLASS_F16(
15953 : Inst_VOP3(iFmt
, "v_cmp_class_f16", true)
15957 } // Inst_VOP3__V_CMP_CLASS_F16
15959 Inst_VOP3__V_CMP_CLASS_F16::~Inst_VOP3__V_CMP_CLASS_F16()
15961 } // ~Inst_VOP3__V_CMP_CLASS_F16
15963 // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16
15964 // The function reports true if the floating point value is any of the
15965 // numeric types selected in S1.u according to the following list:
15966 // S1.u[0] -- value is a signaling NaN.
15967 // S1.u[1] -- value is a quiet NaN.
15968 // S1.u[2] -- value is negative infinity.
15969 // S1.u[3] -- value is a negative normal value.
15970 // S1.u[4] -- value is a negative denormal value.
15971 // S1.u[5] -- value is negative zero.
15972 // S1.u[6] -- value is positive zero.
15973 // S1.u[7] -- value is a positive denormal value.
15974 // S1.u[8] -- value is a positive normal value.
15975 // S1.u[9] -- value is positive infinity.
15977 Inst_VOP3__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst
)
15979 panicUnimplemented();
15982 Inst_VOP3__V_CMPX_CLASS_F16::Inst_VOP3__V_CMPX_CLASS_F16(
15984 : Inst_VOP3(iFmt
, "v_cmpx_class_f16", true)
15988 } // Inst_VOP3__V_CMPX_CLASS_F16
15990 Inst_VOP3__V_CMPX_CLASS_F16::~Inst_VOP3__V_CMPX_CLASS_F16()
15992 } // ~Inst_VOP3__V_CMPX_CLASS_F16
15994 // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
15996 // The function reports true if the floating point value is any of the
15997 // numeric types selected in S1.u according to the following list:
15998 // S1.u[0] -- value is a signaling NaN.
15999 // S1.u[1] -- value is a quiet NaN.
16000 // S1.u[2] -- value is negative infinity.
16001 // S1.u[3] -- value is a negative normal value.
16002 // S1.u[4] -- value is a negative denormal value.
16003 // S1.u[5] -- value is negative zero.
16004 // S1.u[6] -- value is positive zero.
16005 // S1.u[7] -- value is a positive denormal value.
16006 // S1.u[8] -- value is a positive normal value.
16007 // S1.u[9] -- value is positive infinity.
16009 Inst_VOP3__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst
)
16011 panicUnimplemented();
16014 Inst_VOP3__V_CMP_F_F16::Inst_VOP3__V_CMP_F_F16(InFmt_VOP3
*iFmt
)
16015 : Inst_VOP3(iFmt
, "v_cmp_f_f16", true)
16019 } // Inst_VOP3__V_CMP_F_F16
16021 Inst_VOP3__V_CMP_F_F16::~Inst_VOP3__V_CMP_F_F16()
16023 } // ~Inst_VOP3__V_CMP_F_F16
16025 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
16027 Inst_VOP3__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst
)
16029 panicUnimplemented();
16032 Inst_VOP3__V_CMP_LT_F16::Inst_VOP3__V_CMP_LT_F16(
16034 : Inst_VOP3(iFmt
, "v_cmp_lt_f16", true)
16038 } // Inst_VOP3__V_CMP_LT_F16
16040 Inst_VOP3__V_CMP_LT_F16::~Inst_VOP3__V_CMP_LT_F16()
16042 } // ~Inst_VOP3__V_CMP_LT_F16
16044 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
16046 Inst_VOP3__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst
)
16048 panicUnimplemented();
16051 Inst_VOP3__V_CMP_EQ_F16::Inst_VOP3__V_CMP_EQ_F16(
16053 : Inst_VOP3(iFmt
, "v_cmp_eq_f16", true)
16057 } // Inst_VOP3__V_CMP_EQ_F16
16059 Inst_VOP3__V_CMP_EQ_F16::~Inst_VOP3__V_CMP_EQ_F16()
16061 } // ~Inst_VOP3__V_CMP_EQ_F16
16063 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
16065 Inst_VOP3__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst
)
16067 panicUnimplemented();
16070 Inst_VOP3__V_CMP_LE_F16::Inst_VOP3__V_CMP_LE_F16(
16072 : Inst_VOP3(iFmt
, "v_cmp_le_f16", true)
16076 } // Inst_VOP3__V_CMP_LE_F16
16078 Inst_VOP3__V_CMP_LE_F16::~Inst_VOP3__V_CMP_LE_F16()
16080 } // ~Inst_VOP3__V_CMP_LE_F16
16082 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
16084 Inst_VOP3__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst
)
16086 panicUnimplemented();
16089 Inst_VOP3__V_CMP_GT_F16::Inst_VOP3__V_CMP_GT_F16(
16091 : Inst_VOP3(iFmt
, "v_cmp_gt_f16", true)
16095 } // Inst_VOP3__V_CMP_GT_F16
16097 Inst_VOP3__V_CMP_GT_F16::~Inst_VOP3__V_CMP_GT_F16()
16099 } // ~Inst_VOP3__V_CMP_GT_F16
16101 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
16103 Inst_VOP3__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst
)
16105 panicUnimplemented();
16108 Inst_VOP3__V_CMP_LG_F16::Inst_VOP3__V_CMP_LG_F16(
16110 : Inst_VOP3(iFmt
, "v_cmp_lg_f16", true)
16114 } // Inst_VOP3__V_CMP_LG_F16
16116 Inst_VOP3__V_CMP_LG_F16::~Inst_VOP3__V_CMP_LG_F16()
16118 } // ~Inst_VOP3__V_CMP_LG_F16
16120 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
16122 Inst_VOP3__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst
)
16124 panicUnimplemented();
16127 Inst_VOP3__V_CMP_GE_F16::Inst_VOP3__V_CMP_GE_F16(
16129 : Inst_VOP3(iFmt
, "v_cmp_ge_f16", true)
16133 } // Inst_VOP3__V_CMP_GE_F16
16135 Inst_VOP3__V_CMP_GE_F16::~Inst_VOP3__V_CMP_GE_F16()
16137 } // ~Inst_VOP3__V_CMP_GE_F16
16139 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
16141 Inst_VOP3__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst
)
16143 panicUnimplemented();
16146 Inst_VOP3__V_CMP_O_F16::Inst_VOP3__V_CMP_O_F16(InFmt_VOP3
*iFmt
)
16147 : Inst_VOP3(iFmt
, "v_cmp_o_f16", true)
16151 } // Inst_VOP3__V_CMP_O_F16
16153 Inst_VOP3__V_CMP_O_F16::~Inst_VOP3__V_CMP_O_F16()
16155 } // ~Inst_VOP3__V_CMP_O_F16
16157 // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
16159 Inst_VOP3__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst
)
16161 panicUnimplemented();
16164 Inst_VOP3__V_CMP_U_F16::Inst_VOP3__V_CMP_U_F16(InFmt_VOP3
*iFmt
)
16165 : Inst_VOP3(iFmt
, "v_cmp_u_f16", true)
16169 } // Inst_VOP3__V_CMP_U_F16
16171 Inst_VOP3__V_CMP_U_F16::~Inst_VOP3__V_CMP_U_F16()
16173 } // ~Inst_VOP3__V_CMP_U_F16
16175 // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
16177 Inst_VOP3__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst
)
16179 panicUnimplemented();
16182 Inst_VOP3__V_CMP_NGE_F16::Inst_VOP3__V_CMP_NGE_F16(
16184 : Inst_VOP3(iFmt
, "v_cmp_nge_f16", true)
16188 } // Inst_VOP3__V_CMP_NGE_F16
16190 Inst_VOP3__V_CMP_NGE_F16::~Inst_VOP3__V_CMP_NGE_F16()
16192 } // ~Inst_VOP3__V_CMP_NGE_F16
16194 // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
16196 Inst_VOP3__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst
)
16198 panicUnimplemented();
16201 Inst_VOP3__V_CMP_NLG_F16::Inst_VOP3__V_CMP_NLG_F16(
16203 : Inst_VOP3(iFmt
, "v_cmp_nlg_f16", true)
16207 } // Inst_VOP3__V_CMP_NLG_F16
16209 Inst_VOP3__V_CMP_NLG_F16::~Inst_VOP3__V_CMP_NLG_F16()
16211 } // ~Inst_VOP3__V_CMP_NLG_F16
16213 // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
16215 Inst_VOP3__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst
)
16217 panicUnimplemented();
16220 Inst_VOP3__V_CMP_NGT_F16::Inst_VOP3__V_CMP_NGT_F16(
16222 : Inst_VOP3(iFmt
, "v_cmp_ngt_f16", true)
16226 } // Inst_VOP3__V_CMP_NGT_F16
16228 Inst_VOP3__V_CMP_NGT_F16::~Inst_VOP3__V_CMP_NGT_F16()
16230 } // ~Inst_VOP3__V_CMP_NGT_F16
16232 // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
16234 Inst_VOP3__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst
)
16236 panicUnimplemented();
16239 Inst_VOP3__V_CMP_NLE_F16::Inst_VOP3__V_CMP_NLE_F16(
16241 : Inst_VOP3(iFmt
, "v_cmp_nle_f16", true)
16245 } // Inst_VOP3__V_CMP_NLE_F16
16247 Inst_VOP3__V_CMP_NLE_F16::~Inst_VOP3__V_CMP_NLE_F16()
16249 } // ~Inst_VOP3__V_CMP_NLE_F16
16251 // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
16253 Inst_VOP3__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst
)
16255 panicUnimplemented();
16258 Inst_VOP3__V_CMP_NEQ_F16::Inst_VOP3__V_CMP_NEQ_F16(
16260 : Inst_VOP3(iFmt
, "v_cmp_neq_f16", true)
16264 } // Inst_VOP3__V_CMP_NEQ_F16
16266 Inst_VOP3__V_CMP_NEQ_F16::~Inst_VOP3__V_CMP_NEQ_F16()
16268 } // ~Inst_VOP3__V_CMP_NEQ_F16
16270 // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
16272 Inst_VOP3__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst
)
16274 panicUnimplemented();
16277 Inst_VOP3__V_CMP_NLT_F16::Inst_VOP3__V_CMP_NLT_F16(
16279 : Inst_VOP3(iFmt
, "v_cmp_nlt_f16", true)
16283 } // Inst_VOP3__V_CMP_NLT_F16
16285 Inst_VOP3__V_CMP_NLT_F16::~Inst_VOP3__V_CMP_NLT_F16()
16287 } // ~Inst_VOP3__V_CMP_NLT_F16
16289 // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
16291 Inst_VOP3__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst
)
16293 panicUnimplemented();
16296 Inst_VOP3__V_CMP_TRU_F16::Inst_VOP3__V_CMP_TRU_F16(
16298 : Inst_VOP3(iFmt
, "v_cmp_tru_f16", true)
16302 } // Inst_VOP3__V_CMP_TRU_F16
16304 Inst_VOP3__V_CMP_TRU_F16::~Inst_VOP3__V_CMP_TRU_F16()
16306 } // ~Inst_VOP3__V_CMP_TRU_F16
16308 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
16310 Inst_VOP3__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst
)
16312 Wavefront
*wf
= gpuDynInst
->wavefront();
16313 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16315 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16316 if (wf
->execMask(lane
)) {
16317 sdst
.setBit(lane
, 1);
16324 Inst_VOP3__V_CMPX_F_F16::Inst_VOP3__V_CMPX_F_F16(
16326 : Inst_VOP3(iFmt
, "v_cmpx_f_f16", true)
16329 } // Inst_VOP3__V_CMPX_F_F16
16331 Inst_VOP3__V_CMPX_F_F16::~Inst_VOP3__V_CMPX_F_F16()
16333 } // ~Inst_VOP3__V_CMPX_F_F16
16335 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
16337 Inst_VOP3__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst
)
16339 Wavefront
*wf
= gpuDynInst
->wavefront();
16340 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16342 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16343 if (wf
->execMask(lane
)) {
16344 sdst
.setBit(lane
, 0);
16348 wf
->execMask() = sdst
.rawData();
16352 Inst_VOP3__V_CMPX_LT_F16::Inst_VOP3__V_CMPX_LT_F16(
16354 : Inst_VOP3(iFmt
, "v_cmpx_lt_f16", true)
16358 } // Inst_VOP3__V_CMPX_LT_F16
16360 Inst_VOP3__V_CMPX_LT_F16::~Inst_VOP3__V_CMPX_LT_F16()
16362 } // ~Inst_VOP3__V_CMPX_LT_F16
16364 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
16366 Inst_VOP3__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst
)
16368 panicUnimplemented();
16371 Inst_VOP3__V_CMPX_EQ_F16::Inst_VOP3__V_CMPX_EQ_F16(
16373 : Inst_VOP3(iFmt
, "v_cmpx_eq_f16", true)
16377 } // Inst_VOP3__V_CMPX_EQ_F16
16379 Inst_VOP3__V_CMPX_EQ_F16::~Inst_VOP3__V_CMPX_EQ_F16()
16381 } // ~Inst_VOP3__V_CMPX_EQ_F16
16383 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
16385 Inst_VOP3__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst
)
16387 panicUnimplemented();
16390 Inst_VOP3__V_CMPX_LE_F16::Inst_VOP3__V_CMPX_LE_F16(
16392 : Inst_VOP3(iFmt
, "v_cmpx_le_f16", true)
16396 } // Inst_VOP3__V_CMPX_LE_F16
16398 Inst_VOP3__V_CMPX_LE_F16::~Inst_VOP3__V_CMPX_LE_F16()
16400 } // ~Inst_VOP3__V_CMPX_LE_F16
16402 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
16404 Inst_VOP3__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst
)
16406 panicUnimplemented();
16409 Inst_VOP3__V_CMPX_GT_F16::Inst_VOP3__V_CMPX_GT_F16(
16411 : Inst_VOP3(iFmt
, "v_cmpx_gt_f16", true)
16415 } // Inst_VOP3__V_CMPX_GT_F16
16417 Inst_VOP3__V_CMPX_GT_F16::~Inst_VOP3__V_CMPX_GT_F16()
16419 } // ~Inst_VOP3__V_CMPX_GT_F16
16421 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
16423 Inst_VOP3__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst
)
16425 panicUnimplemented();
16428 Inst_VOP3__V_CMPX_LG_F16::Inst_VOP3__V_CMPX_LG_F16(
16430 : Inst_VOP3(iFmt
, "v_cmpx_lg_f16", true)
16434 } // Inst_VOP3__V_CMPX_LG_F16
16436 Inst_VOP3__V_CMPX_LG_F16::~Inst_VOP3__V_CMPX_LG_F16()
16438 } // ~Inst_VOP3__V_CMPX_LG_F16
16440 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
16442 Inst_VOP3__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst
)
16444 panicUnimplemented();
16447 Inst_VOP3__V_CMPX_GE_F16::Inst_VOP3__V_CMPX_GE_F16(
16449 : Inst_VOP3(iFmt
, "v_cmpx_ge_f16", true)
16453 } // Inst_VOP3__V_CMPX_GE_F16
16455 Inst_VOP3__V_CMPX_GE_F16::~Inst_VOP3__V_CMPX_GE_F16()
16457 } // ~Inst_VOP3__V_CMPX_GE_F16
16459 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
16461 Inst_VOP3__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst
)
16463 panicUnimplemented();
16466 Inst_VOP3__V_CMPX_O_F16::Inst_VOP3__V_CMPX_O_F16(
16468 : Inst_VOP3(iFmt
, "v_cmpx_o_f16", true)
16472 } // Inst_VOP3__V_CMPX_O_F16
16474 Inst_VOP3__V_CMPX_O_F16::~Inst_VOP3__V_CMPX_O_F16()
16476 } // ~Inst_VOP3__V_CMPX_O_F16
16478 // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
16481 Inst_VOP3__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst
)
16483 panicUnimplemented();
16486 Inst_VOP3__V_CMPX_U_F16::Inst_VOP3__V_CMPX_U_F16(
16488 : Inst_VOP3(iFmt
, "v_cmpx_u_f16", true)
16492 } // Inst_VOP3__V_CMPX_U_F16
16494 Inst_VOP3__V_CMPX_U_F16::~Inst_VOP3__V_CMPX_U_F16()
16496 } // ~Inst_VOP3__V_CMPX_U_F16
16498 // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
16501 Inst_VOP3__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst
)
16503 panicUnimplemented();
16506 Inst_VOP3__V_CMPX_NGE_F16::Inst_VOP3__V_CMPX_NGE_F16(
16508 : Inst_VOP3(iFmt
, "v_cmpx_nge_f16", true)
16512 } // Inst_VOP3__V_CMPX_NGE_F16
16514 Inst_VOP3__V_CMPX_NGE_F16::~Inst_VOP3__V_CMPX_NGE_F16()
16516 } // ~Inst_VOP3__V_CMPX_NGE_F16
16518 // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
16520 Inst_VOP3__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst
)
16522 panicUnimplemented();
16525 Inst_VOP3__V_CMPX_NLG_F16::Inst_VOP3__V_CMPX_NLG_F16(
16527 : Inst_VOP3(iFmt
, "v_cmpx_nlg_f16", true)
16531 } // Inst_VOP3__V_CMPX_NLG_F16
16533 Inst_VOP3__V_CMPX_NLG_F16::~Inst_VOP3__V_CMPX_NLG_F16()
16535 } // ~Inst_VOP3__V_CMPX_NLG_F16
16537 // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
16539 Inst_VOP3__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst
)
16541 panicUnimplemented();
16544 Inst_VOP3__V_CMPX_NGT_F16::Inst_VOP3__V_CMPX_NGT_F16(
16546 : Inst_VOP3(iFmt
, "v_cmpx_ngt_f16", true)
16550 } // Inst_VOP3__V_CMPX_NGT_F16
16552 Inst_VOP3__V_CMPX_NGT_F16::~Inst_VOP3__V_CMPX_NGT_F16()
16554 } // ~Inst_VOP3__V_CMPX_NGT_F16
16556 // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
16558 Inst_VOP3__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst
)
16560 panicUnimplemented();
16563 Inst_VOP3__V_CMPX_NLE_F16::Inst_VOP3__V_CMPX_NLE_F16(
16565 : Inst_VOP3(iFmt
, "v_cmpx_nle_f16", true)
16569 } // Inst_VOP3__V_CMPX_NLE_F16
16571 Inst_VOP3__V_CMPX_NLE_F16::~Inst_VOP3__V_CMPX_NLE_F16()
16573 } // ~Inst_VOP3__V_CMPX_NLE_F16
16575 // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
16577 Inst_VOP3__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst
)
16579 panicUnimplemented();
16582 Inst_VOP3__V_CMPX_NEQ_F16::Inst_VOP3__V_CMPX_NEQ_F16(
16584 : Inst_VOP3(iFmt
, "v_cmpx_neq_f16", true)
16588 } // Inst_VOP3__V_CMPX_NEQ_F16
16590 Inst_VOP3__V_CMPX_NEQ_F16::~Inst_VOP3__V_CMPX_NEQ_F16()
16592 } // ~Inst_VOP3__V_CMPX_NEQ_F16
16594 // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
16596 Inst_VOP3__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst
)
16598 panicUnimplemented();
16601 Inst_VOP3__V_CMPX_NLT_F16::Inst_VOP3__V_CMPX_NLT_F16(
16603 : Inst_VOP3(iFmt
, "v_cmpx_nlt_f16", true)
16607 } // Inst_VOP3__V_CMPX_NLT_F16
16609 Inst_VOP3__V_CMPX_NLT_F16::~Inst_VOP3__V_CMPX_NLT_F16()
16611 } // ~Inst_VOP3__V_CMPX_NLT_F16
16613 // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
16615 Inst_VOP3__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst
)
16617 panicUnimplemented();
16620 Inst_VOP3__V_CMPX_TRU_F16::Inst_VOP3__V_CMPX_TRU_F16(
16622 : Inst_VOP3(iFmt
, "v_cmpx_tru_f16", true)
16626 } // Inst_VOP3__V_CMPX_TRU_F16
16628 Inst_VOP3__V_CMPX_TRU_F16::~Inst_VOP3__V_CMPX_TRU_F16()
16630 } // ~Inst_VOP3__V_CMPX_TRU_F16
16632 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
16634 Inst_VOP3__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst
)
16636 Wavefront
*wf
= gpuDynInst
->wavefront();
16637 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16639 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16640 if (wf
->execMask(lane
)) {
16641 sdst
.setBit(lane
, 1);
16645 wf
->execMask() = sdst
.rawData();
16649 Inst_VOP3__V_CMP_F_F32::Inst_VOP3__V_CMP_F_F32(InFmt_VOP3
*iFmt
)
16650 : Inst_VOP3(iFmt
, "v_cmp_f_f32", true)
16654 } // Inst_VOP3__V_CMP_F_F32
16656 Inst_VOP3__V_CMP_F_F32::~Inst_VOP3__V_CMP_F_F32()
16658 } // ~Inst_VOP3__V_CMP_F_F32
16660 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
16662 Inst_VOP3__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst
)
16664 Wavefront
*wf
= gpuDynInst
->wavefront();
16665 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16667 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16668 if (wf
->execMask(lane
)) {
16669 sdst
.setBit(lane
, 0);
16676 Inst_VOP3__V_CMP_LT_F32::Inst_VOP3__V_CMP_LT_F32(
16678 : Inst_VOP3(iFmt
, "v_cmp_lt_f32", true)
16682 } // Inst_VOP3__V_CMP_LT_F32
16684 Inst_VOP3__V_CMP_LT_F32::~Inst_VOP3__V_CMP_LT_F32()
16686 } // ~Inst_VOP3__V_CMP_LT_F32
16688 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
16690 Inst_VOP3__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst
)
16692 Wavefront
*wf
= gpuDynInst
->wavefront();
16693 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
16694 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
16695 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16700 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16701 if (wf
->execMask(lane
)) {
16702 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
16709 Inst_VOP3__V_CMP_EQ_F32::Inst_VOP3__V_CMP_EQ_F32(
16711 : Inst_VOP3(iFmt
, "v_cmp_eq_f32", true)
16715 } // Inst_VOP3__V_CMP_EQ_F32
16717 Inst_VOP3__V_CMP_EQ_F32::~Inst_VOP3__V_CMP_EQ_F32()
16719 } // ~Inst_VOP3__V_CMP_EQ_F32
16721 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
16723 Inst_VOP3__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst
)
16725 Wavefront
*wf
= gpuDynInst
->wavefront();
16726 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
16727 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
16728 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16733 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16734 if (wf
->execMask(lane
)) {
16735 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
16742 Inst_VOP3__V_CMP_LE_F32::Inst_VOP3__V_CMP_LE_F32(
16744 : Inst_VOP3(iFmt
, "v_cmp_le_f32", true)
16748 } // Inst_VOP3__V_CMP_LE_F32
16750 Inst_VOP3__V_CMP_LE_F32::~Inst_VOP3__V_CMP_LE_F32()
16752 } // ~Inst_VOP3__V_CMP_LE_F32
16754 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
16756 Inst_VOP3__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst
)
16758 Wavefront
*wf
= gpuDynInst
->wavefront();
16759 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
16760 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
16761 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16766 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16767 if (wf
->execMask(lane
)) {
16768 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
16775 Inst_VOP3__V_CMP_GT_F32::Inst_VOP3__V_CMP_GT_F32(
16777 : Inst_VOP3(iFmt
, "v_cmp_gt_f32", true)
16781 } // Inst_VOP3__V_CMP_GT_F32
16783 Inst_VOP3__V_CMP_GT_F32::~Inst_VOP3__V_CMP_GT_F32()
16785 } // ~Inst_VOP3__V_CMP_GT_F32
16787 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
16789 Inst_VOP3__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst
)
16791 Wavefront
*wf
= gpuDynInst
->wavefront();
16792 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
16793 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
16794 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16799 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16800 if (wf
->execMask(lane
)) {
16801 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
16808 Inst_VOP3__V_CMP_LG_F32::Inst_VOP3__V_CMP_LG_F32(
16810 : Inst_VOP3(iFmt
, "v_cmp_lg_f32", true)
16814 } // Inst_VOP3__V_CMP_LG_F32
16816 Inst_VOP3__V_CMP_LG_F32::~Inst_VOP3__V_CMP_LG_F32()
16818 } // ~Inst_VOP3__V_CMP_LG_F32
16820 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
16822 Inst_VOP3__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst
)
16824 Wavefront
*wf
= gpuDynInst
->wavefront();
16825 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
16826 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
16827 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16832 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16833 if (wf
->execMask(lane
)) {
16834 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
16841 Inst_VOP3__V_CMP_GE_F32::Inst_VOP3__V_CMP_GE_F32(
16843 : Inst_VOP3(iFmt
, "v_cmp_ge_f32", true)
16847 } // Inst_VOP3__V_CMP_GE_F32
16849 Inst_VOP3__V_CMP_GE_F32::~Inst_VOP3__V_CMP_GE_F32()
16851 } // ~Inst_VOP3__V_CMP_GE_F32
16853 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
16855 Inst_VOP3__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst
)
16857 Wavefront
*wf
= gpuDynInst
->wavefront();
16858 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
16859 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
16860 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16865 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16866 if (wf
->execMask(lane
)) {
16867 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
16874 Inst_VOP3__V_CMP_O_F32::Inst_VOP3__V_CMP_O_F32(InFmt_VOP3
*iFmt
)
16875 : Inst_VOP3(iFmt
, "v_cmp_o_f32", true)
16879 } // Inst_VOP3__V_CMP_O_F32
16881 Inst_VOP3__V_CMP_O_F32::~Inst_VOP3__V_CMP_O_F32()
16883 } // ~Inst_VOP3__V_CMP_O_F32
16885 // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
16887 Inst_VOP3__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst
)
16889 Wavefront
*wf
= gpuDynInst
->wavefront();
16890 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
16891 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
16892 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16897 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16898 if (wf
->execMask(lane
)) {
16899 sdst
.setBit(lane
, (!std::isnan(src0
[lane
])
16900 && !std::isnan(src1
[lane
])) ? 1 : 0);
16907 Inst_VOP3__V_CMP_U_F32::Inst_VOP3__V_CMP_U_F32(InFmt_VOP3
*iFmt
)
16908 : Inst_VOP3(iFmt
, "v_cmp_u_f32", true)
16912 } // Inst_VOP3__V_CMP_U_F32
16914 Inst_VOP3__V_CMP_U_F32::~Inst_VOP3__V_CMP_U_F32()
16916 } // ~Inst_VOP3__V_CMP_U_F32
16918 // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
16920 Inst_VOP3__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst
)
16922 Wavefront
*wf
= gpuDynInst
->wavefront();
16923 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
16924 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
16925 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16930 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16931 if (wf
->execMask(lane
)) {
16932 sdst
.setBit(lane
, (std::isnan(src0
[lane
])
16933 || std::isnan(src1
[lane
])) ? 1 : 0);
16940 Inst_VOP3__V_CMP_NGE_F32::Inst_VOP3__V_CMP_NGE_F32(
16942 : Inst_VOP3(iFmt
, "v_cmp_nge_f32", true)
16946 } // Inst_VOP3__V_CMP_NGE_F32
16948 Inst_VOP3__V_CMP_NGE_F32::~Inst_VOP3__V_CMP_NGE_F32()
16950 } // ~Inst_VOP3__V_CMP_NGE_F32
16952 // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
16954 Inst_VOP3__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst
)
16956 Wavefront
*wf
= gpuDynInst
->wavefront();
16957 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
16958 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
16959 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16964 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16965 if (wf
->execMask(lane
)) {
16966 sdst
.setBit(lane
, !(src0
[lane
] >= src1
[lane
]) ? 1 : 0);
16973 Inst_VOP3__V_CMP_NLG_F32::Inst_VOP3__V_CMP_NLG_F32(
16975 : Inst_VOP3(iFmt
, "v_cmp_nlg_f32", true)
16979 } // Inst_VOP3__V_CMP_NLG_F32
16981 Inst_VOP3__V_CMP_NLG_F32::~Inst_VOP3__V_CMP_NLG_F32()
16983 } // ~Inst_VOP3__V_CMP_NLG_F32
16985 // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
16987 Inst_VOP3__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst
)
16989 Wavefront
*wf
= gpuDynInst
->wavefront();
16990 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
16991 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
16992 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16997 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16998 if (wf
->execMask(lane
)) {
16999 sdst
.setBit(lane
, !(src0
[lane
] < src1
[lane
]
17000 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
17007 Inst_VOP3__V_CMP_NGT_F32::Inst_VOP3__V_CMP_NGT_F32(
17009 : Inst_VOP3(iFmt
, "v_cmp_ngt_f32", true)
17013 } // Inst_VOP3__V_CMP_NGT_F32
17015 Inst_VOP3__V_CMP_NGT_F32::~Inst_VOP3__V_CMP_NGT_F32()
17017 } // ~Inst_VOP3__V_CMP_NGT_F32
17019 // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
17021 Inst_VOP3__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst
)
17023 Wavefront
*wf
= gpuDynInst
->wavefront();
17024 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17025 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17026 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17031 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17032 if (wf
->execMask(lane
)) {
17033 sdst
.setBit(lane
, !(src0
[lane
] > src1
[lane
]) ? 1 : 0);
17040 Inst_VOP3__V_CMP_NLE_F32::Inst_VOP3__V_CMP_NLE_F32(
17042 : Inst_VOP3(iFmt
, "v_cmp_nle_f32", true)
17046 } // Inst_VOP3__V_CMP_NLE_F32
17048 Inst_VOP3__V_CMP_NLE_F32::~Inst_VOP3__V_CMP_NLE_F32()
17050 } // ~Inst_VOP3__V_CMP_NLE_F32
17052 // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
17054 Inst_VOP3__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst
)
17056 Wavefront
*wf
= gpuDynInst
->wavefront();
17057 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17058 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17059 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17064 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17065 if (wf
->execMask(lane
)) {
17066 sdst
.setBit(lane
, !(src0
[lane
] <= src1
[lane
]) ? 1 : 0);
17073 Inst_VOP3__V_CMP_NEQ_F32::Inst_VOP3__V_CMP_NEQ_F32(
17075 : Inst_VOP3(iFmt
, "v_cmp_neq_f32", true)
17079 } // Inst_VOP3__V_CMP_NEQ_F32
17081 Inst_VOP3__V_CMP_NEQ_F32::~Inst_VOP3__V_CMP_NEQ_F32()
17083 } // ~Inst_VOP3__V_CMP_NEQ_F32
17085 // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
17087 Inst_VOP3__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst
)
17089 Wavefront
*wf
= gpuDynInst
->wavefront();
17090 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17091 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17092 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17097 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17098 if (wf
->execMask(lane
)) {
17099 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
17106 Inst_VOP3__V_CMP_NLT_F32::Inst_VOP3__V_CMP_NLT_F32(
17108 : Inst_VOP3(iFmt
, "v_cmp_nlt_f32", true)
17112 } // Inst_VOP3__V_CMP_NLT_F32
17114 Inst_VOP3__V_CMP_NLT_F32::~Inst_VOP3__V_CMP_NLT_F32()
17116 } // ~Inst_VOP3__V_CMP_NLT_F32
17118 // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
17120 Inst_VOP3__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst
)
17122 Wavefront
*wf
= gpuDynInst
->wavefront();
17123 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17124 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17125 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17130 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17131 if (wf
->execMask(lane
)) {
17132 sdst
.setBit(lane
, !(src0
[lane
] < src1
[lane
]) ? 1 : 0);
17139 Inst_VOP3__V_CMP_TRU_F32::Inst_VOP3__V_CMP_TRU_F32(
17141 : Inst_VOP3(iFmt
, "v_cmp_tru_f32", true)
17145 } // Inst_VOP3__V_CMP_TRU_F32
17147 Inst_VOP3__V_CMP_TRU_F32::~Inst_VOP3__V_CMP_TRU_F32()
17149 } // ~Inst_VOP3__V_CMP_TRU_F32
17151 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
17153 Inst_VOP3__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst
)
17155 Wavefront
*wf
= gpuDynInst
->wavefront();
17156 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17158 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17159 if (wf
->execMask(lane
)) {
17160 sdst
.setBit(lane
, 1);
17167 Inst_VOP3__V_CMPX_F_F32::Inst_VOP3__V_CMPX_F_F32(
17169 : Inst_VOP3(iFmt
, "v_cmpx_f_f32", true)
17173 } // Inst_VOP3__V_CMPX_F_F32
17175 Inst_VOP3__V_CMPX_F_F32::~Inst_VOP3__V_CMPX_F_F32()
17177 } // ~Inst_VOP3__V_CMPX_F_F32
17179 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
17181 Inst_VOP3__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst
)
17183 Wavefront
*wf
= gpuDynInst
->wavefront();
17184 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17186 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17187 if (wf
->execMask(lane
)) {
17188 sdst
.setBit(lane
, 0);
17192 wf
->execMask() = sdst
.rawData();
17196 Inst_VOP3__V_CMPX_LT_F32::Inst_VOP3__V_CMPX_LT_F32(
17198 : Inst_VOP3(iFmt
, "v_cmpx_lt_f32", true)
17202 } // Inst_VOP3__V_CMPX_LT_F32
17204 Inst_VOP3__V_CMPX_LT_F32::~Inst_VOP3__V_CMPX_LT_F32()
17206 } // ~Inst_VOP3__V_CMPX_LT_F32
17208 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
17210 Inst_VOP3__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst
)
17212 Wavefront
*wf
= gpuDynInst
->wavefront();
17213 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17214 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17215 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17220 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17221 if (wf
->execMask(lane
)) {
17222 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
17226 wf
->execMask() = sdst
.rawData();
17230 Inst_VOP3__V_CMPX_EQ_F32::Inst_VOP3__V_CMPX_EQ_F32(
17232 : Inst_VOP3(iFmt
, "v_cmpx_eq_f32", true)
17236 } // Inst_VOP3__V_CMPX_EQ_F32
17238 Inst_VOP3__V_CMPX_EQ_F32::~Inst_VOP3__V_CMPX_EQ_F32()
17240 } // ~Inst_VOP3__V_CMPX_EQ_F32
17242 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
17244 Inst_VOP3__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst
)
17246 Wavefront
*wf
= gpuDynInst
->wavefront();
17247 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17248 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17249 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17254 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17255 if (wf
->execMask(lane
)) {
17256 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
17260 wf
->execMask() = sdst
.rawData();
17264 Inst_VOP3__V_CMPX_LE_F32::Inst_VOP3__V_CMPX_LE_F32(
17266 : Inst_VOP3(iFmt
, "v_cmpx_le_f32", true)
17270 } // Inst_VOP3__V_CMPX_LE_F32
17272 Inst_VOP3__V_CMPX_LE_F32::~Inst_VOP3__V_CMPX_LE_F32()
17274 } // ~Inst_VOP3__V_CMPX_LE_F32
17276 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
17278 Inst_VOP3__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst
)
17280 Wavefront
*wf
= gpuDynInst
->wavefront();
17281 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17282 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17283 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17288 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17289 if (wf
->execMask(lane
)) {
17290 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
17294 wf
->execMask() = sdst
.rawData();
17298 Inst_VOP3__V_CMPX_GT_F32::Inst_VOP3__V_CMPX_GT_F32(
17300 : Inst_VOP3(iFmt
, "v_cmpx_gt_f32", true)
17304 } // Inst_VOP3__V_CMPX_GT_F32
17306 Inst_VOP3__V_CMPX_GT_F32::~Inst_VOP3__V_CMPX_GT_F32()
17308 } // ~Inst_VOP3__V_CMPX_GT_F32
17310 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
17312 Inst_VOP3__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst
)
17314 Wavefront
*wf
= gpuDynInst
->wavefront();
17315 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17316 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17317 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17322 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17323 if (wf
->execMask(lane
)) {
17324 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
17328 wf
->execMask() = sdst
.rawData();
17332 Inst_VOP3__V_CMPX_LG_F32::Inst_VOP3__V_CMPX_LG_F32(
17334 : Inst_VOP3(iFmt
, "v_cmpx_lg_f32", true)
17338 } // Inst_VOP3__V_CMPX_LG_F32
17340 Inst_VOP3__V_CMPX_LG_F32::~Inst_VOP3__V_CMPX_LG_F32()
17342 } // ~Inst_VOP3__V_CMPX_LG_F32
17344 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
17346 Inst_VOP3__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst
)
17348 Wavefront
*wf
= gpuDynInst
->wavefront();
17349 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17350 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17351 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17356 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17357 if (wf
->execMask(lane
)) {
17358 sdst
.setBit(lane
, (src0
[lane
] < src1
[lane
]
17359 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
17363 wf
->execMask() = sdst
.rawData();
17367 Inst_VOP3__V_CMPX_GE_F32::Inst_VOP3__V_CMPX_GE_F32(
17369 : Inst_VOP3(iFmt
, "v_cmpx_ge_f32", true)
17373 } // Inst_VOP3__V_CMPX_GE_F32
17375 Inst_VOP3__V_CMPX_GE_F32::~Inst_VOP3__V_CMPX_GE_F32()
17377 } // ~Inst_VOP3__V_CMPX_GE_F32
17379 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
17381 Inst_VOP3__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst
)
17383 Wavefront
*wf
= gpuDynInst
->wavefront();
17384 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17385 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17386 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17391 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17392 if (wf
->execMask(lane
)) {
17393 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
17397 wf
->execMask() = sdst
.rawData();
17401 Inst_VOP3__V_CMPX_O_F32::Inst_VOP3__V_CMPX_O_F32(
17403 : Inst_VOP3(iFmt
, "v_cmpx_o_f32", true)
17407 } // Inst_VOP3__V_CMPX_O_F32
17409 Inst_VOP3__V_CMPX_O_F32::~Inst_VOP3__V_CMPX_O_F32()
17411 } // ~Inst_VOP3__V_CMPX_O_F32
17413 // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
17416 Inst_VOP3__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst
)
17418 Wavefront
*wf
= gpuDynInst
->wavefront();
17419 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17420 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17421 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17426 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17427 if (wf
->execMask(lane
)) {
17428 sdst
.setBit(lane
, (!std::isnan(src0
[lane
])
17429 && !std::isnan(src1
[lane
])) ? 1 : 0);
17433 wf
->execMask() = sdst
.rawData();
17437 Inst_VOP3__V_CMPX_U_F32::Inst_VOP3__V_CMPX_U_F32(
17439 : Inst_VOP3(iFmt
, "v_cmpx_u_f32", true)
17443 } // Inst_VOP3__V_CMPX_U_F32
17445 Inst_VOP3__V_CMPX_U_F32::~Inst_VOP3__V_CMPX_U_F32()
17447 } // ~Inst_VOP3__V_CMPX_U_F32
17449 // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
17452 Inst_VOP3__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst
)
17454 Wavefront
*wf
= gpuDynInst
->wavefront();
17455 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17456 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17457 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17462 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17463 if (wf
->execMask(lane
)) {
17464 sdst
.setBit(lane
, (std::isnan(src0
[lane
])
17465 || std::isnan(src1
[lane
])) ? 1 : 0);
17469 wf
->execMask() = sdst
.rawData();
17473 Inst_VOP3__V_CMPX_NGE_F32::Inst_VOP3__V_CMPX_NGE_F32(
17475 : Inst_VOP3(iFmt
, "v_cmpx_nge_f32", true)
17479 } // Inst_VOP3__V_CMPX_NGE_F32
17481 Inst_VOP3__V_CMPX_NGE_F32::~Inst_VOP3__V_CMPX_NGE_F32()
17483 } // ~Inst_VOP3__V_CMPX_NGE_F32
17485 // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
17487 Inst_VOP3__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst
)
17489 Wavefront
*wf
= gpuDynInst
->wavefront();
17490 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17491 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17492 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17497 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17498 if (wf
->execMask(lane
)) {
17499 sdst
.setBit(lane
, !(src0
[lane
] >= src1
[lane
]) ? 1 : 0);
17503 wf
->execMask() = sdst
.rawData();
17507 Inst_VOP3__V_CMPX_NLG_F32::Inst_VOP3__V_CMPX_NLG_F32(
17509 : Inst_VOP3(iFmt
, "v_cmpx_nlg_f32", true)
17513 } // Inst_VOP3__V_CMPX_NLG_F32
17515 Inst_VOP3__V_CMPX_NLG_F32::~Inst_VOP3__V_CMPX_NLG_F32()
17517 } // ~Inst_VOP3__V_CMPX_NLG_F32
17519 // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
17521 Inst_VOP3__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst
)
17523 Wavefront
*wf
= gpuDynInst
->wavefront();
17524 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17525 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17526 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17531 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17532 if (wf
->execMask(lane
)) {
17533 sdst
.setBit(lane
, !(src0
[lane
] < src1
[lane
]
17534 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
17538 wf
->execMask() = sdst
.rawData();
17542 Inst_VOP3__V_CMPX_NGT_F32::Inst_VOP3__V_CMPX_NGT_F32(
17544 : Inst_VOP3(iFmt
, "v_cmpx_ngt_f32", true)
17548 } // Inst_VOP3__V_CMPX_NGT_F32
17550 Inst_VOP3__V_CMPX_NGT_F32::~Inst_VOP3__V_CMPX_NGT_F32()
17552 } // ~Inst_VOP3__V_CMPX_NGT_F32
17554 // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
17556 Inst_VOP3__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst
)
17558 Wavefront
*wf
= gpuDynInst
->wavefront();
17559 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17560 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17561 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17566 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17567 if (wf
->execMask(lane
)) {
17568 sdst
.setBit(lane
, !(src0
[lane
] > src1
[lane
]) ? 1 : 0);
17572 wf
->execMask() = sdst
.rawData();
17576 Inst_VOP3__V_CMPX_NLE_F32::Inst_VOP3__V_CMPX_NLE_F32(
17578 : Inst_VOP3(iFmt
, "v_cmpx_nle_f32", true)
17582 } // Inst_VOP3__V_CMPX_NLE_F32
17584 Inst_VOP3__V_CMPX_NLE_F32::~Inst_VOP3__V_CMPX_NLE_F32()
17586 } // ~Inst_VOP3__V_CMPX_NLE_F32
17588 // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
17590 Inst_VOP3__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst
)
17592 Wavefront
*wf
= gpuDynInst
->wavefront();
17593 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17594 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17595 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17600 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17601 if (wf
->execMask(lane
)) {
17602 sdst
.setBit(lane
, !(src0
[lane
] <= src1
[lane
]) ? 1 : 0);
17606 wf
->execMask() = sdst
.rawData();
17610 Inst_VOP3__V_CMPX_NEQ_F32::Inst_VOP3__V_CMPX_NEQ_F32(
17612 : Inst_VOP3(iFmt
, "v_cmpx_neq_f32", true)
17616 } // Inst_VOP3__V_CMPX_NEQ_F32
17618 Inst_VOP3__V_CMPX_NEQ_F32::~Inst_VOP3__V_CMPX_NEQ_F32()
17620 } // ~Inst_VOP3__V_CMPX_NEQ_F32
17622 // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
17624 Inst_VOP3__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst
)
17626 Wavefront
*wf
= gpuDynInst
->wavefront();
17627 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17628 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17629 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17634 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17635 if (wf
->execMask(lane
)) {
17636 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
17640 wf
->execMask() = sdst
.rawData();
17644 Inst_VOP3__V_CMPX_NLT_F32::Inst_VOP3__V_CMPX_NLT_F32(
17646 : Inst_VOP3(iFmt
, "v_cmpx_nlt_f32", true)
17650 } // Inst_VOP3__V_CMPX_NLT_F32
17652 Inst_VOP3__V_CMPX_NLT_F32::~Inst_VOP3__V_CMPX_NLT_F32()
17654 } // ~Inst_VOP3__V_CMPX_NLT_F32
17656 // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
17658 Inst_VOP3__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst
)
17660 Wavefront
*wf
= gpuDynInst
->wavefront();
17661 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17662 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17663 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17668 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17669 if (wf
->execMask(lane
)) {
17670 sdst
.setBit(lane
, !(src0
[lane
] < src1
[lane
]) ? 1 : 0);
17674 wf
->execMask() = sdst
.rawData();
17678 Inst_VOP3__V_CMPX_TRU_F32::Inst_VOP3__V_CMPX_TRU_F32(
17680 : Inst_VOP3(iFmt
, "v_cmpx_tru_f32", true)
17684 } // Inst_VOP3__V_CMPX_TRU_F32
17686 Inst_VOP3__V_CMPX_TRU_F32::~Inst_VOP3__V_CMPX_TRU_F32()
17688 } // ~Inst_VOP3__V_CMPX_TRU_F32
17690 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
17692 Inst_VOP3__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst
)
17694 Wavefront
*wf
= gpuDynInst
->wavefront();
17695 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17697 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17698 if (wf
->execMask(lane
)) {
17699 sdst
.setBit(lane
, 1);
17703 wf
->execMask() = sdst
.rawData();
17707 Inst_VOP3__V_CMP_F_F64::Inst_VOP3__V_CMP_F_F64(InFmt_VOP3
*iFmt
)
17708 : Inst_VOP3(iFmt
, "v_cmp_f_f64", true)
17712 } // Inst_VOP3__V_CMP_F_F64
17714 Inst_VOP3__V_CMP_F_F64::~Inst_VOP3__V_CMP_F_F64()
17716 } // ~Inst_VOP3__V_CMP_F_F64
17718 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
17720 Inst_VOP3__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst
)
17722 Wavefront
*wf
= gpuDynInst
->wavefront();
17723 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17725 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17726 if (wf
->execMask(lane
)) {
17727 sdst
.setBit(lane
, 0);
17734 Inst_VOP3__V_CMP_LT_F64::Inst_VOP3__V_CMP_LT_F64(
17736 : Inst_VOP3(iFmt
, "v_cmp_lt_f64", true)
17740 } // Inst_VOP3__V_CMP_LT_F64
17742 Inst_VOP3__V_CMP_LT_F64::~Inst_VOP3__V_CMP_LT_F64()
17744 } // ~Inst_VOP3__V_CMP_LT_F64
17746 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
17748 Inst_VOP3__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst
)
17750 Wavefront
*wf
= gpuDynInst
->wavefront();
17751 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
17752 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
17753 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17758 if (instData
.ABS
& 0x1) {
17759 src0
.absModifier();
17762 if (instData
.ABS
& 0x2) {
17763 src1
.absModifier();
17766 if (extData
.NEG
& 0x1) {
17767 src0
.negModifier();
17770 if (extData
.NEG
& 0x2) {
17771 src1
.negModifier();
17775 * input modifiers are supported by FP operations only
17777 assert(!(instData
.ABS
& 0x4));
17778 assert(!(extData
.NEG
& 0x4));
17780 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17781 if (wf
->execMask(lane
)) {
17782 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
17789 Inst_VOP3__V_CMP_EQ_F64::Inst_VOP3__V_CMP_EQ_F64(
17791 : Inst_VOP3(iFmt
, "v_cmp_eq_f64", true)
17795 } // Inst_VOP3__V_CMP_EQ_F64
17797 Inst_VOP3__V_CMP_EQ_F64::~Inst_VOP3__V_CMP_EQ_F64()
17799 } // ~Inst_VOP3__V_CMP_EQ_F64
17801 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
17803 Inst_VOP3__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst
)
17805 Wavefront
*wf
= gpuDynInst
->wavefront();
17806 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
17807 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
17808 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17813 if (instData
.ABS
& 0x1) {
17814 src0
.absModifier();
17817 if (instData
.ABS
& 0x2) {
17818 src1
.absModifier();
17821 if (extData
.NEG
& 0x1) {
17822 src0
.negModifier();
17825 if (extData
.NEG
& 0x2) {
17826 src1
.negModifier();
17830 * input modifiers are supported by FP operations only
17832 assert(!(instData
.ABS
& 0x4));
17833 assert(!(extData
.NEG
& 0x4));
17835 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17836 if (wf
->execMask(lane
)) {
17837 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
17844 Inst_VOP3__V_CMP_LE_F64::Inst_VOP3__V_CMP_LE_F64(
17846 : Inst_VOP3(iFmt
, "v_cmp_le_f64", true)
17850 } // Inst_VOP3__V_CMP_LE_F64
17852 Inst_VOP3__V_CMP_LE_F64::~Inst_VOP3__V_CMP_LE_F64()
17854 } // ~Inst_VOP3__V_CMP_LE_F64
17856 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
17858 Inst_VOP3__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst
)
17860 Wavefront
*wf
= gpuDynInst
->wavefront();
17861 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
17862 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
17863 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17868 if (instData
.ABS
& 0x1) {
17869 src0
.absModifier();
17872 if (instData
.ABS
& 0x2) {
17873 src1
.absModifier();
17876 if (extData
.NEG
& 0x1) {
17877 src0
.negModifier();
17880 if (extData
.NEG
& 0x2) {
17881 src1
.negModifier();
17885 * input modifiers are supported by FP operations only
17887 assert(!(instData
.ABS
& 0x4));
17888 assert(!(extData
.NEG
& 0x4));
17890 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17891 if (wf
->execMask(lane
)) {
17892 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
17899 Inst_VOP3__V_CMP_GT_F64::Inst_VOP3__V_CMP_GT_F64(
17901 : Inst_VOP3(iFmt
, "v_cmp_gt_f64", true)
17905 } // Inst_VOP3__V_CMP_GT_F64
17907 Inst_VOP3__V_CMP_GT_F64::~Inst_VOP3__V_CMP_GT_F64()
17909 } // ~Inst_VOP3__V_CMP_GT_F64
17911 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
17913 Inst_VOP3__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst
)
17915 Wavefront
*wf
= gpuDynInst
->wavefront();
17916 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
17917 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
17918 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17923 if (instData
.ABS
& 0x1) {
17924 src0
.absModifier();
17927 if (instData
.ABS
& 0x2) {
17928 src1
.absModifier();
17931 if (extData
.NEG
& 0x1) {
17932 src0
.negModifier();
17935 if (extData
.NEG
& 0x2) {
17936 src1
.negModifier();
17940 * input modifiers are supported by FP operations only
17942 assert(!(instData
.ABS
& 0x4));
17943 assert(!(extData
.NEG
& 0x4));
17945 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17946 if (wf
->execMask(lane
)) {
17947 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
17954 Inst_VOP3__V_CMP_LG_F64::Inst_VOP3__V_CMP_LG_F64(
17956 : Inst_VOP3(iFmt
, "v_cmp_lg_f64", true)
17960 } // Inst_VOP3__V_CMP_LG_F64
17962 Inst_VOP3__V_CMP_LG_F64::~Inst_VOP3__V_CMP_LG_F64()
17964 } // ~Inst_VOP3__V_CMP_LG_F64
17966 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
17968 Inst_VOP3__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst
)
17970 Wavefront
*wf
= gpuDynInst
->wavefront();
17971 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
17972 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
17973 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17978 if (instData
.ABS
& 0x1) {
17979 src0
.absModifier();
17982 if (instData
.ABS
& 0x2) {
17983 src1
.absModifier();
17986 if (extData
.NEG
& 0x1) {
17987 src0
.negModifier();
17990 if (extData
.NEG
& 0x2) {
17991 src1
.negModifier();
17995 * input modifiers are supported by FP operations only
17997 assert(!(instData
.ABS
& 0x4));
17998 assert(!(extData
.NEG
& 0x4));
18000 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18001 if (wf
->execMask(lane
)) {
18002 sdst
.setBit(lane
, (src0
[lane
] < src1
[lane
]
18003 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
18010 Inst_VOP3__V_CMP_GE_F64::Inst_VOP3__V_CMP_GE_F64(
18012 : Inst_VOP3(iFmt
, "v_cmp_ge_f64", true)
18016 } // Inst_VOP3__V_CMP_GE_F64
18018 Inst_VOP3__V_CMP_GE_F64::~Inst_VOP3__V_CMP_GE_F64()
18020 } // ~Inst_VOP3__V_CMP_GE_F64
18022 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
18024 Inst_VOP3__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst
)
18026 Wavefront
*wf
= gpuDynInst
->wavefront();
18027 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18028 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18029 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18034 if (instData
.ABS
& 0x1) {
18035 src0
.absModifier();
18038 if (instData
.ABS
& 0x2) {
18039 src1
.absModifier();
18042 if (extData
.NEG
& 0x1) {
18043 src0
.negModifier();
18046 if (extData
.NEG
& 0x2) {
18047 src1
.negModifier();
18051 * input modifiers are supported by FP operations only
18053 assert(!(instData
.ABS
& 0x4));
18054 assert(!(extData
.NEG
& 0x4));
18056 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18057 if (wf
->execMask(lane
)) {
18058 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
18065 Inst_VOP3__V_CMP_O_F64::Inst_VOP3__V_CMP_O_F64(InFmt_VOP3
*iFmt
)
18066 : Inst_VOP3(iFmt
, "v_cmp_o_f64", true)
18070 } // Inst_VOP3__V_CMP_O_F64
18072 Inst_VOP3__V_CMP_O_F64::~Inst_VOP3__V_CMP_O_F64()
18074 } // ~Inst_VOP3__V_CMP_O_F64
18076 // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
18078 Inst_VOP3__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst
)
18080 Wavefront
*wf
= gpuDynInst
->wavefront();
18081 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18082 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18083 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18088 if (instData
.ABS
& 0x1) {
18089 src0
.absModifier();
18092 if (instData
.ABS
& 0x2) {
18093 src1
.absModifier();
18096 if (extData
.NEG
& 0x1) {
18097 src0
.negModifier();
18100 if (extData
.NEG
& 0x2) {
18101 src1
.negModifier();
18105 * input modifiers are supported by FP operations only
18107 assert(!(instData
.ABS
& 0x4));
18108 assert(!(extData
.NEG
& 0x4));
18110 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18111 if (wf
->execMask(lane
)) {
18112 sdst
.setBit(lane
, (!std::isnan(src0
[lane
])
18113 && !std::isnan(src1
[lane
])) ? 1 : 0);
18120 Inst_VOP3__V_CMP_U_F64::Inst_VOP3__V_CMP_U_F64(InFmt_VOP3
*iFmt
)
18121 : Inst_VOP3(iFmt
, "v_cmp_u_f64", true)
18125 } // Inst_VOP3__V_CMP_U_F64
18127 Inst_VOP3__V_CMP_U_F64::~Inst_VOP3__V_CMP_U_F64()
18129 } // ~Inst_VOP3__V_CMP_U_F64
18131 // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
18133 Inst_VOP3__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst
)
18135 Wavefront
*wf
= gpuDynInst
->wavefront();
18136 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18137 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18138 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18143 if (instData
.ABS
& 0x1) {
18144 src0
.absModifier();
18147 if (instData
.ABS
& 0x2) {
18148 src1
.absModifier();
18151 if (extData
.NEG
& 0x1) {
18152 src0
.negModifier();
18155 if (extData
.NEG
& 0x2) {
18156 src1
.negModifier();
18160 * input modifiers are supported by FP operations only
18162 assert(!(instData
.ABS
& 0x4));
18163 assert(!(extData
.NEG
& 0x4));
18165 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18166 if (wf
->execMask(lane
)) {
18167 sdst
.setBit(lane
, (std::isnan(src0
[lane
])
18168 || std::isnan(src1
[lane
])) ? 1 : 0);
18175 Inst_VOP3__V_CMP_NGE_F64::Inst_VOP3__V_CMP_NGE_F64(
18177 : Inst_VOP3(iFmt
, "v_cmp_nge_f64", true)
18181 } // Inst_VOP3__V_CMP_NGE_F64
18183 Inst_VOP3__V_CMP_NGE_F64::~Inst_VOP3__V_CMP_NGE_F64()
18185 } // ~Inst_VOP3__V_CMP_NGE_F64
18187 // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
18189 Inst_VOP3__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst
)
18191 Wavefront
*wf
= gpuDynInst
->wavefront();
18192 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18193 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18194 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18199 if (instData
.ABS
& 0x1) {
18200 src0
.absModifier();
18203 if (instData
.ABS
& 0x2) {
18204 src1
.absModifier();
18207 if (extData
.NEG
& 0x1) {
18208 src0
.negModifier();
18211 if (extData
.NEG
& 0x2) {
18212 src1
.negModifier();
18216 * input modifiers are supported by FP operations only
18218 assert(!(instData
.ABS
& 0x4));
18219 assert(!(extData
.NEG
& 0x4));
18221 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18222 if (wf
->execMask(lane
)) {
18223 sdst
.setBit(lane
, !(src0
[lane
] >= src1
[lane
]) ? 1 : 0);
18230 Inst_VOP3__V_CMP_NLG_F64::Inst_VOP3__V_CMP_NLG_F64(
18232 : Inst_VOP3(iFmt
, "v_cmp_nlg_f64", true)
18236 } // Inst_VOP3__V_CMP_NLG_F64
18238 Inst_VOP3__V_CMP_NLG_F64::~Inst_VOP3__V_CMP_NLG_F64()
18240 } // ~Inst_VOP3__V_CMP_NLG_F64
18242 // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
18244 Inst_VOP3__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst
)
18246 Wavefront
*wf
= gpuDynInst
->wavefront();
18247 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18248 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18249 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18254 if (instData
.ABS
& 0x1) {
18255 src0
.absModifier();
18258 if (instData
.ABS
& 0x2) {
18259 src1
.absModifier();
18262 if (extData
.NEG
& 0x1) {
18263 src0
.negModifier();
18266 if (extData
.NEG
& 0x2) {
18267 src1
.negModifier();
18271 * input modifiers are supported by FP operations only
18273 assert(!(instData
.ABS
& 0x4));
18274 assert(!(extData
.NEG
& 0x4));
18276 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18277 if (wf
->execMask(lane
)) {
18278 sdst
.setBit(lane
, !(src0
[lane
] < src1
[lane
]
18279 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
18286 Inst_VOP3__V_CMP_NGT_F64::Inst_VOP3__V_CMP_NGT_F64(
18288 : Inst_VOP3(iFmt
, "v_cmp_ngt_f64", true)
18292 } // Inst_VOP3__V_CMP_NGT_F64
18294 Inst_VOP3__V_CMP_NGT_F64::~Inst_VOP3__V_CMP_NGT_F64()
18296 } // ~Inst_VOP3__V_CMP_NGT_F64
18298 // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
18300 Inst_VOP3__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst
)
18302 Wavefront
*wf
= gpuDynInst
->wavefront();
18303 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18304 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18305 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18310 if (instData
.ABS
& 0x1) {
18311 src0
.absModifier();
18314 if (instData
.ABS
& 0x2) {
18315 src1
.absModifier();
18318 if (extData
.NEG
& 0x1) {
18319 src0
.negModifier();
18322 if (extData
.NEG
& 0x2) {
18323 src1
.negModifier();
18327 * input modifiers are supported by FP operations only
18329 assert(!(instData
.ABS
& 0x4));
18330 assert(!(extData
.NEG
& 0x4));
18332 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18333 if (wf
->execMask(lane
)) {
18334 sdst
.setBit(lane
, !(src0
[lane
] > src1
[lane
]) ? 1 : 0);
18341 Inst_VOP3__V_CMP_NLE_F64::Inst_VOP3__V_CMP_NLE_F64(
18343 : Inst_VOP3(iFmt
, "v_cmp_nle_f64", true)
18347 } // Inst_VOP3__V_CMP_NLE_F64
18349 Inst_VOP3__V_CMP_NLE_F64::~Inst_VOP3__V_CMP_NLE_F64()
18351 } // ~Inst_VOP3__V_CMP_NLE_F64
18353 // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
18355 Inst_VOP3__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst
)
18357 Wavefront
*wf
= gpuDynInst
->wavefront();
18358 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18359 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18360 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18365 if (instData
.ABS
& 0x1) {
18366 src0
.absModifier();
18369 if (instData
.ABS
& 0x2) {
18370 src1
.absModifier();
18373 if (extData
.NEG
& 0x1) {
18374 src0
.negModifier();
18377 if (extData
.NEG
& 0x2) {
18378 src1
.negModifier();
18382 * input modifiers are supported by FP operations only
18384 assert(!(instData
.ABS
& 0x4));
18385 assert(!(extData
.NEG
& 0x4));
18387 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18388 if (wf
->execMask(lane
)) {
18389 sdst
.setBit(lane
, !(src0
[lane
] <= src1
[lane
]) ? 1 : 0);
18396 Inst_VOP3__V_CMP_NEQ_F64::Inst_VOP3__V_CMP_NEQ_F64(
18398 : Inst_VOP3(iFmt
, "v_cmp_neq_f64", true)
18402 } // Inst_VOP3__V_CMP_NEQ_F64
18404 Inst_VOP3__V_CMP_NEQ_F64::~Inst_VOP3__V_CMP_NEQ_F64()
18406 } // ~Inst_VOP3__V_CMP_NEQ_F64
18408 // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
18410 Inst_VOP3__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst
)
18412 Wavefront
*wf
= gpuDynInst
->wavefront();
18413 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18414 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18415 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18420 if (instData
.ABS
& 0x1) {
18421 src0
.absModifier();
18424 if (instData
.ABS
& 0x2) {
18425 src1
.absModifier();
18428 if (extData
.NEG
& 0x1) {
18429 src0
.negModifier();
18432 if (extData
.NEG
& 0x2) {
18433 src1
.negModifier();
18437 * input modifiers are supported by FP operations only
18439 assert(!(instData
.ABS
& 0x4));
18440 assert(!(extData
.NEG
& 0x4));
18442 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18443 if (wf
->execMask(lane
)) {
18444 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
18451 Inst_VOP3__V_CMP_NLT_F64::Inst_VOP3__V_CMP_NLT_F64(
18453 : Inst_VOP3(iFmt
, "v_cmp_nlt_f64", true)
18457 } // Inst_VOP3__V_CMP_NLT_F64
18459 Inst_VOP3__V_CMP_NLT_F64::~Inst_VOP3__V_CMP_NLT_F64()
18461 } // ~Inst_VOP3__V_CMP_NLT_F64
18463 // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
18465 Inst_VOP3__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst
)
18467 Wavefront
*wf
= gpuDynInst
->wavefront();
18468 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18469 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18470 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18475 if (instData
.ABS
& 0x1) {
18476 src0
.absModifier();
18479 if (instData
.ABS
& 0x2) {
18480 src1
.absModifier();
18483 if (extData
.NEG
& 0x1) {
18484 src0
.negModifier();
18487 if (extData
.NEG
& 0x2) {
18488 src1
.negModifier();
18492 * input modifiers are supported by FP operations only
18494 assert(!(instData
.ABS
& 0x4));
18495 assert(!(extData
.NEG
& 0x4));
18497 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18498 if (wf
->execMask(lane
)) {
18499 sdst
.setBit(lane
, !(src0
[lane
] < src1
[lane
]) ? 1 : 0);
18506 Inst_VOP3__V_CMP_TRU_F64::Inst_VOP3__V_CMP_TRU_F64(
18508 : Inst_VOP3(iFmt
, "v_cmp_tru_f64", true)
18512 } // Inst_VOP3__V_CMP_TRU_F64
18514 Inst_VOP3__V_CMP_TRU_F64::~Inst_VOP3__V_CMP_TRU_F64()
18516 } // ~Inst_VOP3__V_CMP_TRU_F64
18518 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
18520 Inst_VOP3__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst
)
18522 Wavefront
*wf
= gpuDynInst
->wavefront();
18523 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18525 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18526 if (wf
->execMask(lane
)) {
18527 sdst
.setBit(lane
, 1);
18534 Inst_VOP3__V_CMPX_F_F64::Inst_VOP3__V_CMPX_F_F64(
18536 : Inst_VOP3(iFmt
, "v_cmpx_f_f64", true)
18540 } // Inst_VOP3__V_CMPX_F_F64
18542 Inst_VOP3__V_CMPX_F_F64::~Inst_VOP3__V_CMPX_F_F64()
18544 } // ~Inst_VOP3__V_CMPX_F_F64
18546 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
18548 Inst_VOP3__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst
)
18550 Wavefront
*wf
= gpuDynInst
->wavefront();
18551 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18553 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18554 if (wf
->execMask(lane
)) {
18555 sdst
.setBit(lane
, 0);
18559 wf
->execMask() = sdst
.rawData();
18563 Inst_VOP3__V_CMPX_LT_F64::Inst_VOP3__V_CMPX_LT_F64(
18565 : Inst_VOP3(iFmt
, "v_cmpx_lt_f64", true)
18569 } // Inst_VOP3__V_CMPX_LT_F64
18571 Inst_VOP3__V_CMPX_LT_F64::~Inst_VOP3__V_CMPX_LT_F64()
18573 } // ~Inst_VOP3__V_CMPX_LT_F64
18575 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
18577 Inst_VOP3__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst
)
18579 Wavefront
*wf
= gpuDynInst
->wavefront();
18580 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18581 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18582 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18587 if (instData
.ABS
& 0x1) {
18588 src0
.absModifier();
18591 if (instData
.ABS
& 0x2) {
18592 src1
.absModifier();
18595 if (extData
.NEG
& 0x1) {
18596 src0
.negModifier();
18599 if (extData
.NEG
& 0x2) {
18600 src1
.negModifier();
18604 * input modifiers are supported by FP operations only
18606 assert(!(instData
.ABS
& 0x4));
18607 assert(!(extData
.NEG
& 0x4));
18609 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18610 if (wf
->execMask(lane
)) {
18611 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
18615 wf
->execMask() = sdst
.rawData();
18619 Inst_VOP3__V_CMPX_EQ_F64::Inst_VOP3__V_CMPX_EQ_F64(
18621 : Inst_VOP3(iFmt
, "v_cmpx_eq_f64", true)
18625 } // Inst_VOP3__V_CMPX_EQ_F64
18627 Inst_VOP3__V_CMPX_EQ_F64::~Inst_VOP3__V_CMPX_EQ_F64()
18629 } // ~Inst_VOP3__V_CMPX_EQ_F64
18631 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
18633 Inst_VOP3__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst
)
18635 Wavefront
*wf
= gpuDynInst
->wavefront();
18636 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18637 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18638 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18643 if (instData
.ABS
& 0x1) {
18644 src0
.absModifier();
18647 if (instData
.ABS
& 0x2) {
18648 src1
.absModifier();
18651 if (extData
.NEG
& 0x1) {
18652 src0
.negModifier();
18655 if (extData
.NEG
& 0x2) {
18656 src1
.negModifier();
18660 * input modifiers are supported by FP operations only
18662 assert(!(instData
.ABS
& 0x4));
18663 assert(!(extData
.NEG
& 0x4));
18665 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18666 if (wf
->execMask(lane
)) {
18667 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
18671 wf
->execMask() = sdst
.rawData();
18675 Inst_VOP3__V_CMPX_LE_F64::Inst_VOP3__V_CMPX_LE_F64(
18677 : Inst_VOP3(iFmt
, "v_cmpx_le_f64", true)
18681 } // Inst_VOP3__V_CMPX_LE_F64
18683 Inst_VOP3__V_CMPX_LE_F64::~Inst_VOP3__V_CMPX_LE_F64()
18685 } // ~Inst_VOP3__V_CMPX_LE_F64
18687 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
18689 Inst_VOP3__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst
)
18691 Wavefront
*wf
= gpuDynInst
->wavefront();
18692 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18693 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18694 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18699 if (instData
.ABS
& 0x1) {
18700 src0
.absModifier();
18703 if (instData
.ABS
& 0x2) {
18704 src1
.absModifier();
18707 if (extData
.NEG
& 0x1) {
18708 src0
.negModifier();
18711 if (extData
.NEG
& 0x2) {
18712 src1
.negModifier();
18716 * input modifiers are supported by FP operations only
18718 assert(!(instData
.ABS
& 0x4));
18719 assert(!(extData
.NEG
& 0x4));
18721 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18722 if (wf
->execMask(lane
)) {
18723 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
18727 wf
->execMask() = sdst
.rawData();
18731 Inst_VOP3__V_CMPX_GT_F64::Inst_VOP3__V_CMPX_GT_F64(
18733 : Inst_VOP3(iFmt
, "v_cmpx_gt_f64", true)
18737 } // Inst_VOP3__V_CMPX_GT_F64
18739 Inst_VOP3__V_CMPX_GT_F64::~Inst_VOP3__V_CMPX_GT_F64()
18741 } // ~Inst_VOP3__V_CMPX_GT_F64
18743 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
18745 Inst_VOP3__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst
)
18747 Wavefront
*wf
= gpuDynInst
->wavefront();
18748 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18749 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18750 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18755 if (instData
.ABS
& 0x1) {
18756 src0
.absModifier();
18759 if (instData
.ABS
& 0x2) {
18760 src1
.absModifier();
18763 if (extData
.NEG
& 0x1) {
18764 src0
.negModifier();
18767 if (extData
.NEG
& 0x2) {
18768 src1
.negModifier();
18772 * input modifiers are supported by FP operations only
18774 assert(!(instData
.ABS
& 0x4));
18775 assert(!(extData
.NEG
& 0x4));
18777 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18778 if (wf
->execMask(lane
)) {
18779 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
18783 wf
->execMask() = sdst
.rawData();
18787 Inst_VOP3__V_CMPX_LG_F64::Inst_VOP3__V_CMPX_LG_F64(
18789 : Inst_VOP3(iFmt
, "v_cmpx_lg_f64", true)
18793 } // Inst_VOP3__V_CMPX_LG_F64
18795 Inst_VOP3__V_CMPX_LG_F64::~Inst_VOP3__V_CMPX_LG_F64()
18797 } // ~Inst_VOP3__V_CMPX_LG_F64
18799 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
18801 Inst_VOP3__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst
)
18803 Wavefront
*wf
= gpuDynInst
->wavefront();
18804 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18805 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18806 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18811 if (instData
.ABS
& 0x1) {
18812 src0
.absModifier();
18815 if (instData
.ABS
& 0x2) {
18816 src1
.absModifier();
18819 if (extData
.NEG
& 0x1) {
18820 src0
.negModifier();
18823 if (extData
.NEG
& 0x2) {
18824 src1
.negModifier();
18828 * input modifiers are supported by FP operations only
18830 assert(!(instData
.ABS
& 0x4));
18831 assert(!(extData
.NEG
& 0x4));
18833 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18834 if (wf
->execMask(lane
)) {
18835 sdst
.setBit(lane
, (src0
[lane
] < src1
[lane
]
18836 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
18840 wf
->execMask() = sdst
.rawData();
18844 Inst_VOP3__V_CMPX_GE_F64::Inst_VOP3__V_CMPX_GE_F64(
18846 : Inst_VOP3(iFmt
, "v_cmpx_ge_f64", true)
18850 } // Inst_VOP3__V_CMPX_GE_F64
18852 Inst_VOP3__V_CMPX_GE_F64::~Inst_VOP3__V_CMPX_GE_F64()
18854 } // ~Inst_VOP3__V_CMPX_GE_F64
18856 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
18858 Inst_VOP3__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst
)
18860 Wavefront
*wf
= gpuDynInst
->wavefront();
18861 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18862 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18863 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18868 if (instData
.ABS
& 0x1) {
18869 src0
.absModifier();
18872 if (instData
.ABS
& 0x2) {
18873 src1
.absModifier();
18876 if (extData
.NEG
& 0x1) {
18877 src0
.negModifier();
18880 if (extData
.NEG
& 0x2) {
18881 src1
.negModifier();
18885 * input modifiers are supported by FP operations only
18887 assert(!(instData
.ABS
& 0x4));
18888 assert(!(extData
.NEG
& 0x4));
18890 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18891 if (wf
->execMask(lane
)) {
18892 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
18896 wf
->execMask() = sdst
.rawData();
18900 Inst_VOP3__V_CMPX_O_F64::Inst_VOP3__V_CMPX_O_F64(
18902 : Inst_VOP3(iFmt
, "v_cmpx_o_f64", true)
18906 } // Inst_VOP3__V_CMPX_O_F64
18908 Inst_VOP3__V_CMPX_O_F64::~Inst_VOP3__V_CMPX_O_F64()
18910 } // ~Inst_VOP3__V_CMPX_O_F64
18912 // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
18915 Inst_VOP3__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst
)
18917 Wavefront
*wf
= gpuDynInst
->wavefront();
18918 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18919 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18920 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18925 if (instData
.ABS
& 0x1) {
18926 src0
.absModifier();
18929 if (instData
.ABS
& 0x2) {
18930 src1
.absModifier();
18933 if (extData
.NEG
& 0x1) {
18934 src0
.negModifier();
18937 if (extData
.NEG
& 0x2) {
18938 src1
.negModifier();
18942 * input modifiers are supported by FP operations only
18944 assert(!(instData
.ABS
& 0x4));
18945 assert(!(extData
.NEG
& 0x4));
18947 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18948 if (wf
->execMask(lane
)) {
18949 sdst
.setBit(lane
, (!std::isnan(src0
[lane
])
18950 && !std::isnan(src1
[lane
])) ? 1 : 0);
18954 wf
->execMask() = sdst
.rawData();
18958 Inst_VOP3__V_CMPX_U_F64::Inst_VOP3__V_CMPX_U_F64(
18960 : Inst_VOP3(iFmt
, "v_cmpx_u_f64", true)
18964 } // Inst_VOP3__V_CMPX_U_F64
18966 Inst_VOP3__V_CMPX_U_F64::~Inst_VOP3__V_CMPX_U_F64()
18968 } // ~Inst_VOP3__V_CMPX_U_F64
18970 // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
18973 Inst_VOP3__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst
)
18975 Wavefront
*wf
= gpuDynInst
->wavefront();
18976 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18977 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18978 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18983 if (instData
.ABS
& 0x1) {
18984 src0
.absModifier();
18987 if (instData
.ABS
& 0x2) {
18988 src1
.absModifier();
18991 if (extData
.NEG
& 0x1) {
18992 src0
.negModifier();
18995 if (extData
.NEG
& 0x2) {
18996 src1
.negModifier();
19000 * input modifiers are supported by FP operations only
19002 assert(!(instData
.ABS
& 0x4));
19003 assert(!(extData
.NEG
& 0x4));
19005 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19006 if (wf
->execMask(lane
)) {
19007 sdst
.setBit(lane
, (std::isnan(src0
[lane
])
19008 || std::isnan(src1
[lane
])) ? 1 : 0);
19012 wf
->execMask() = sdst
.rawData();
19016 Inst_VOP3__V_CMPX_NGE_F64::Inst_VOP3__V_CMPX_NGE_F64(
19018 : Inst_VOP3(iFmt
, "v_cmpx_nge_f64", true)
19022 } // Inst_VOP3__V_CMPX_NGE_F64
19024 Inst_VOP3__V_CMPX_NGE_F64::~Inst_VOP3__V_CMPX_NGE_F64()
19026 } // ~Inst_VOP3__V_CMPX_NGE_F64
19028 // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
19030 Inst_VOP3__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst
)
19032 Wavefront
*wf
= gpuDynInst
->wavefront();
19033 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
19034 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
19035 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19040 if (instData
.ABS
& 0x1) {
19041 src0
.absModifier();
19044 if (instData
.ABS
& 0x2) {
19045 src1
.absModifier();
19048 if (extData
.NEG
& 0x1) {
19049 src0
.negModifier();
19052 if (extData
.NEG
& 0x2) {
19053 src1
.negModifier();
19057 * input modifiers are supported by FP operations only
19059 assert(!(instData
.ABS
& 0x4));
19060 assert(!(extData
.NEG
& 0x4));
19062 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19063 if (wf
->execMask(lane
)) {
19064 sdst
.setBit(lane
, !(src0
[lane
] >= src1
[lane
]) ? 1 : 0);
19068 wf
->execMask() = sdst
.rawData();
19072 Inst_VOP3__V_CMPX_NLG_F64::Inst_VOP3__V_CMPX_NLG_F64(
19074 : Inst_VOP3(iFmt
, "v_cmpx_nlg_f64", true)
19078 } // Inst_VOP3__V_CMPX_NLG_F64
19080 Inst_VOP3__V_CMPX_NLG_F64::~Inst_VOP3__V_CMPX_NLG_F64()
19082 } // ~Inst_VOP3__V_CMPX_NLG_F64
19084 // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
19086 Inst_VOP3__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst
)
19088 Wavefront
*wf
= gpuDynInst
->wavefront();
19089 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
19090 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
19091 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19096 if (instData
.ABS
& 0x1) {
19097 src0
.absModifier();
19100 if (instData
.ABS
& 0x2) {
19101 src1
.absModifier();
19104 if (extData
.NEG
& 0x1) {
19105 src0
.negModifier();
19108 if (extData
.NEG
& 0x2) {
19109 src1
.negModifier();
19113 * input modifiers are supported by FP operations only
19115 assert(!(instData
.ABS
& 0x4));
19116 assert(!(extData
.NEG
& 0x4));
19118 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19119 if (wf
->execMask(lane
)) {
19120 sdst
.setBit(lane
, !(src0
[lane
] < src1
[lane
]
19121 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
19125 wf
->execMask() = sdst
.rawData();
19129 Inst_VOP3__V_CMPX_NGT_F64::Inst_VOP3__V_CMPX_NGT_F64(
19131 : Inst_VOP3(iFmt
, "v_cmpx_ngt_f64", true)
19135 } // Inst_VOP3__V_CMPX_NGT_F64
19137 Inst_VOP3__V_CMPX_NGT_F64::~Inst_VOP3__V_CMPX_NGT_F64()
19139 } // ~Inst_VOP3__V_CMPX_NGT_F64
19141 // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
19143 Inst_VOP3__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst
)
19145 Wavefront
*wf
= gpuDynInst
->wavefront();
19146 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
19147 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
19148 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19153 if (instData
.ABS
& 0x1) {
19154 src0
.absModifier();
19157 if (instData
.ABS
& 0x2) {
19158 src1
.absModifier();
19161 if (extData
.NEG
& 0x1) {
19162 src0
.negModifier();
19165 if (extData
.NEG
& 0x2) {
19166 src1
.negModifier();
19170 * input modifiers are supported by FP operations only
19172 assert(!(instData
.ABS
& 0x4));
19173 assert(!(extData
.NEG
& 0x4));
19175 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19176 if (wf
->execMask(lane
)) {
19177 sdst
.setBit(lane
, !(src0
[lane
] > src1
[lane
]) ? 1 : 0);
19181 wf
->execMask() = sdst
.rawData();
19185 Inst_VOP3__V_CMPX_NLE_F64::Inst_VOP3__V_CMPX_NLE_F64(
19187 : Inst_VOP3(iFmt
, "v_cmpx_nle_f64", true)
19191 } // Inst_VOP3__V_CMPX_NLE_F64
19193 Inst_VOP3__V_CMPX_NLE_F64::~Inst_VOP3__V_CMPX_NLE_F64()
19195 } // ~Inst_VOP3__V_CMPX_NLE_F64
19197 // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
19199 Inst_VOP3__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst
)
19201 Wavefront
*wf
= gpuDynInst
->wavefront();
19202 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
19203 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
19204 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19209 if (instData
.ABS
& 0x1) {
19210 src0
.absModifier();
19213 if (instData
.ABS
& 0x2) {
19214 src1
.absModifier();
19217 if (extData
.NEG
& 0x1) {
19218 src0
.negModifier();
19221 if (extData
.NEG
& 0x2) {
19222 src1
.negModifier();
19226 * input modifiers are supported by FP operations only
19228 assert(!(instData
.ABS
& 0x4));
19229 assert(!(extData
.NEG
& 0x4));
19231 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19232 if (wf
->execMask(lane
)) {
19233 sdst
.setBit(lane
, !(src0
[lane
] <= src1
[lane
]) ? 1 : 0);
19237 wf
->execMask() = sdst
.rawData();
19241 Inst_VOP3__V_CMPX_NEQ_F64::Inst_VOP3__V_CMPX_NEQ_F64(
19243 : Inst_VOP3(iFmt
, "v_cmpx_neq_f64", true)
19247 } // Inst_VOP3__V_CMPX_NEQ_F64
19249 Inst_VOP3__V_CMPX_NEQ_F64::~Inst_VOP3__V_CMPX_NEQ_F64()
19251 } // ~Inst_VOP3__V_CMPX_NEQ_F64
19253 // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
19255 Inst_VOP3__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst
)
19257 Wavefront
*wf
= gpuDynInst
->wavefront();
19258 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
19259 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
19260 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19265 if (instData
.ABS
& 0x1) {
19266 src0
.absModifier();
19269 if (instData
.ABS
& 0x2) {
19270 src1
.absModifier();
19273 if (extData
.NEG
& 0x1) {
19274 src0
.negModifier();
19277 if (extData
.NEG
& 0x2) {
19278 src1
.negModifier();
19282 * input modifiers are supported by FP operations only
19284 assert(!(instData
.ABS
& 0x4));
19285 assert(!(extData
.NEG
& 0x4));
19287 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19288 if (wf
->execMask(lane
)) {
19289 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
19293 wf
->execMask() = sdst
.rawData();
19297 Inst_VOP3__V_CMPX_NLT_F64::Inst_VOP3__V_CMPX_NLT_F64(
19299 : Inst_VOP3(iFmt
, "v_cmpx_nlt_f64", true)
19303 } // Inst_VOP3__V_CMPX_NLT_F64
19305 Inst_VOP3__V_CMPX_NLT_F64::~Inst_VOP3__V_CMPX_NLT_F64()
19307 } // ~Inst_VOP3__V_CMPX_NLT_F64
19309 // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
19311 Inst_VOP3__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst
)
19313 Wavefront
*wf
= gpuDynInst
->wavefront();
19314 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
19315 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
19316 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19321 if (instData
.ABS
& 0x1) {
19322 src0
.absModifier();
19325 if (instData
.ABS
& 0x2) {
19326 src1
.absModifier();
19329 if (extData
.NEG
& 0x1) {
19330 src0
.negModifier();
19333 if (extData
.NEG
& 0x2) {
19334 src1
.negModifier();
19338 * input modifiers are supported by FP operations only
19340 assert(!(instData
.ABS
& 0x4));
19341 assert(!(extData
.NEG
& 0x4));
19343 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19344 if (wf
->execMask(lane
)) {
19345 sdst
.setBit(lane
, !(src0
[lane
] < src1
[lane
]) ? 1 : 0);
19349 wf
->execMask() = sdst
.rawData();
19353 Inst_VOP3__V_CMPX_TRU_F64::Inst_VOP3__V_CMPX_TRU_F64(
19355 : Inst_VOP3(iFmt
, "v_cmpx_tru_f64", true)
19359 } // Inst_VOP3__V_CMPX_TRU_F64
19361 Inst_VOP3__V_CMPX_TRU_F64::~Inst_VOP3__V_CMPX_TRU_F64()
19363 } // ~Inst_VOP3__V_CMPX_TRU_F64
19365 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
19367 Inst_VOP3__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst
)
19369 Wavefront
*wf
= gpuDynInst
->wavefront();
19370 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19372 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19373 if (wf
->execMask(lane
)) {
19374 sdst
.setBit(lane
, 1);
19378 wf
->execMask() = sdst
.rawData();
19382 Inst_VOP3__V_CMP_F_I16::Inst_VOP3__V_CMP_F_I16(InFmt_VOP3
*iFmt
)
19383 : Inst_VOP3(iFmt
, "v_cmp_f_i16", true)
19386 } // Inst_VOP3__V_CMP_F_I16
19388 Inst_VOP3__V_CMP_F_I16::~Inst_VOP3__V_CMP_F_I16()
19390 } // ~Inst_VOP3__V_CMP_F_I16
19392 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
19394 Inst_VOP3__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst
)
19396 Wavefront
*wf
= gpuDynInst
->wavefront();
19397 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19399 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19400 if (wf
->execMask(lane
)) {
19401 sdst
.setBit(lane
, 0);
19408 Inst_VOP3__V_CMP_LT_I16::Inst_VOP3__V_CMP_LT_I16(
19410 : Inst_VOP3(iFmt
, "v_cmp_lt_i16", true)
19413 } // Inst_VOP3__V_CMP_LT_I16
19415 Inst_VOP3__V_CMP_LT_I16::~Inst_VOP3__V_CMP_LT_I16()
19417 } // ~Inst_VOP3__V_CMP_LT_I16
19419 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
19421 Inst_VOP3__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst
)
19423 Wavefront
*wf
= gpuDynInst
->wavefront();
19424 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
19425 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
19426 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19432 * input modifiers are supported by FP operations only
19434 assert(!(instData
.ABS
& 0x1));
19435 assert(!(instData
.ABS
& 0x2));
19436 assert(!(instData
.ABS
& 0x4));
19437 assert(!(extData
.NEG
& 0x1));
19438 assert(!(extData
.NEG
& 0x2));
19439 assert(!(extData
.NEG
& 0x4));
19441 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19442 if (wf
->execMask(lane
)) {
19443 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
19450 Inst_VOP3__V_CMP_EQ_I16::Inst_VOP3__V_CMP_EQ_I16(
19452 : Inst_VOP3(iFmt
, "v_cmp_eq_i16", true)
19455 } // Inst_VOP3__V_CMP_EQ_I16
19457 Inst_VOP3__V_CMP_EQ_I16::~Inst_VOP3__V_CMP_EQ_I16()
19459 } // ~Inst_VOP3__V_CMP_EQ_I16
19461 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
19463 Inst_VOP3__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst
)
19465 Wavefront
*wf
= gpuDynInst
->wavefront();
19466 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
19467 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
19468 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19474 * input modifiers are supported by FP operations only
19476 assert(!(instData
.ABS
& 0x1));
19477 assert(!(instData
.ABS
& 0x2));
19478 assert(!(instData
.ABS
& 0x4));
19479 assert(!(extData
.NEG
& 0x1));
19480 assert(!(extData
.NEG
& 0x2));
19481 assert(!(extData
.NEG
& 0x4));
19483 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19484 if (wf
->execMask(lane
)) {
19485 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
19492 Inst_VOP3__V_CMP_LE_I16::Inst_VOP3__V_CMP_LE_I16(
19494 : Inst_VOP3(iFmt
, "v_cmp_le_i16", true)
19497 } // Inst_VOP3__V_CMP_LE_I16
19499 Inst_VOP3__V_CMP_LE_I16::~Inst_VOP3__V_CMP_LE_I16()
19501 } // ~Inst_VOP3__V_CMP_LE_I16
19503 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
19505 Inst_VOP3__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst
)
19507 Wavefront
*wf
= gpuDynInst
->wavefront();
19508 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
19509 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
19510 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19516 * input modifiers are supported by FP operations only
19518 assert(!(instData
.ABS
& 0x1));
19519 assert(!(instData
.ABS
& 0x2));
19520 assert(!(instData
.ABS
& 0x4));
19521 assert(!(extData
.NEG
& 0x1));
19522 assert(!(extData
.NEG
& 0x2));
19523 assert(!(extData
.NEG
& 0x4));
19525 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19526 if (wf
->execMask(lane
)) {
19527 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
19534 Inst_VOP3__V_CMP_GT_I16::Inst_VOP3__V_CMP_GT_I16(
19536 : Inst_VOP3(iFmt
, "v_cmp_gt_i16", true)
19539 } // Inst_VOP3__V_CMP_GT_I16
19541 Inst_VOP3__V_CMP_GT_I16::~Inst_VOP3__V_CMP_GT_I16()
19543 } // ~Inst_VOP3__V_CMP_GT_I16
19545 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
19547 Inst_VOP3__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst
)
19549 Wavefront
*wf
= gpuDynInst
->wavefront();
19550 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
19551 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
19552 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19558 * input modifiers are supported by FP operations only
19560 assert(!(instData
.ABS
& 0x1));
19561 assert(!(instData
.ABS
& 0x2));
19562 assert(!(instData
.ABS
& 0x4));
19563 assert(!(extData
.NEG
& 0x1));
19564 assert(!(extData
.NEG
& 0x2));
19565 assert(!(extData
.NEG
& 0x4));
19567 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19568 if (wf
->execMask(lane
)) {
19569 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
19576 Inst_VOP3__V_CMP_NE_I16::Inst_VOP3__V_CMP_NE_I16(
19578 : Inst_VOP3(iFmt
, "v_cmp_ne_i16", true)
19581 } // Inst_VOP3__V_CMP_NE_I16
19583 Inst_VOP3__V_CMP_NE_I16::~Inst_VOP3__V_CMP_NE_I16()
19585 } // ~Inst_VOP3__V_CMP_NE_I16
19587 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
19589 Inst_VOP3__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst
)
19591 Wavefront
*wf
= gpuDynInst
->wavefront();
19592 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
19593 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
19594 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19600 * input modifiers are supported by FP operations only
19602 assert(!(instData
.ABS
& 0x1));
19603 assert(!(instData
.ABS
& 0x2));
19604 assert(!(instData
.ABS
& 0x4));
19605 assert(!(extData
.NEG
& 0x1));
19606 assert(!(extData
.NEG
& 0x2));
19607 assert(!(extData
.NEG
& 0x4));
19609 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19610 if (wf
->execMask(lane
)) {
19611 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
19618 Inst_VOP3__V_CMP_GE_I16::Inst_VOP3__V_CMP_GE_I16(
19620 : Inst_VOP3(iFmt
, "v_cmp_ge_i16", true)
19623 } // Inst_VOP3__V_CMP_GE_I16
19625 Inst_VOP3__V_CMP_GE_I16::~Inst_VOP3__V_CMP_GE_I16()
19627 } // ~Inst_VOP3__V_CMP_GE_I16
19629 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
19631 Inst_VOP3__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst
)
19633 Wavefront
*wf
= gpuDynInst
->wavefront();
19634 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
19635 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
19636 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19642 * input modifiers are supported by FP operations only
19644 assert(!(instData
.ABS
& 0x1));
19645 assert(!(instData
.ABS
& 0x2));
19646 assert(!(instData
.ABS
& 0x4));
19647 assert(!(extData
.NEG
& 0x1));
19648 assert(!(extData
.NEG
& 0x2));
19649 assert(!(extData
.NEG
& 0x4));
19651 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19652 if (wf
->execMask(lane
)) {
19653 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
19660 Inst_VOP3__V_CMP_T_I16::Inst_VOP3__V_CMP_T_I16(InFmt_VOP3
*iFmt
)
19661 : Inst_VOP3(iFmt
, "v_cmp_t_i16", true)
19664 } // Inst_VOP3__V_CMP_T_I16
19666 Inst_VOP3__V_CMP_T_I16::~Inst_VOP3__V_CMP_T_I16()
19668 } // ~Inst_VOP3__V_CMP_T_I16
19670 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
19672 Inst_VOP3__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst
)
19674 Wavefront
*wf
= gpuDynInst
->wavefront();
19675 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19677 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19678 if (wf
->execMask(lane
)) {
19679 sdst
.setBit(lane
, 1);
19686 Inst_VOP3__V_CMP_F_U16::Inst_VOP3__V_CMP_F_U16(InFmt_VOP3
*iFmt
)
19687 : Inst_VOP3(iFmt
, "v_cmp_f_u16", true)
19690 } // Inst_VOP3__V_CMP_F_U16
19692 Inst_VOP3__V_CMP_F_U16::~Inst_VOP3__V_CMP_F_U16()
19694 } // ~Inst_VOP3__V_CMP_F_U16
19696 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
19698 Inst_VOP3__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst
)
19700 Wavefront
*wf
= gpuDynInst
->wavefront();
19701 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19703 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19704 if (wf
->execMask(lane
)) {
19705 sdst
.setBit(lane
, 0);
19712 Inst_VOP3__V_CMP_LT_U16::Inst_VOP3__V_CMP_LT_U16(
19714 : Inst_VOP3(iFmt
, "v_cmp_lt_u16", true)
19717 } // Inst_VOP3__V_CMP_LT_U16
19719 Inst_VOP3__V_CMP_LT_U16::~Inst_VOP3__V_CMP_LT_U16()
19721 } // ~Inst_VOP3__V_CMP_LT_U16
19723 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
19725 Inst_VOP3__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst
)
19727 Wavefront
*wf
= gpuDynInst
->wavefront();
19728 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
19729 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
19730 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19736 * input modifiers are supported by FP operations only
19738 assert(!(instData
.ABS
& 0x1));
19739 assert(!(instData
.ABS
& 0x2));
19740 assert(!(instData
.ABS
& 0x4));
19741 assert(!(extData
.NEG
& 0x1));
19742 assert(!(extData
.NEG
& 0x2));
19743 assert(!(extData
.NEG
& 0x4));
19745 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19746 if (wf
->execMask(lane
)) {
19747 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
19754 Inst_VOP3__V_CMP_EQ_U16::Inst_VOP3__V_CMP_EQ_U16(
19756 : Inst_VOP3(iFmt
, "v_cmp_eq_u16", true)
19759 } // Inst_VOP3__V_CMP_EQ_U16
19761 Inst_VOP3__V_CMP_EQ_U16::~Inst_VOP3__V_CMP_EQ_U16()
19763 } // ~Inst_VOP3__V_CMP_EQ_U16
19765 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
19767 Inst_VOP3__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst
)
19769 Wavefront
*wf
= gpuDynInst
->wavefront();
19770 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
19771 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
19772 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19778 * input modifiers are supported by FP operations only
19780 assert(!(instData
.ABS
& 0x1));
19781 assert(!(instData
.ABS
& 0x2));
19782 assert(!(instData
.ABS
& 0x4));
19783 assert(!(extData
.NEG
& 0x1));
19784 assert(!(extData
.NEG
& 0x2));
19785 assert(!(extData
.NEG
& 0x4));
19787 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19788 if (wf
->execMask(lane
)) {
19789 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
19796 Inst_VOP3__V_CMP_LE_U16::Inst_VOP3__V_CMP_LE_U16(
19798 : Inst_VOP3(iFmt
, "v_cmp_le_u16", true)
19801 } // Inst_VOP3__V_CMP_LE_U16
19803 Inst_VOP3__V_CMP_LE_U16::~Inst_VOP3__V_CMP_LE_U16()
19805 } // ~Inst_VOP3__V_CMP_LE_U16
19807 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
19809 Inst_VOP3__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst
)
19811 Wavefront
*wf
= gpuDynInst
->wavefront();
19812 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
19813 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
19814 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19820 * input modifiers are supported by FP operations only
19822 assert(!(instData
.ABS
& 0x1));
19823 assert(!(instData
.ABS
& 0x2));
19824 assert(!(instData
.ABS
& 0x4));
19825 assert(!(extData
.NEG
& 0x1));
19826 assert(!(extData
.NEG
& 0x2));
19827 assert(!(extData
.NEG
& 0x4));
19829 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19830 if (wf
->execMask(lane
)) {
19831 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
19838 Inst_VOP3__V_CMP_GT_U16::Inst_VOP3__V_CMP_GT_U16(
19840 : Inst_VOP3(iFmt
, "v_cmp_gt_u16", true)
19843 } // Inst_VOP3__V_CMP_GT_U16
19845 Inst_VOP3__V_CMP_GT_U16::~Inst_VOP3__V_CMP_GT_U16()
19847 } // ~Inst_VOP3__V_CMP_GT_U16
19849 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
19851 Inst_VOP3__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst
)
19853 Wavefront
*wf
= gpuDynInst
->wavefront();
19854 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
19855 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
19856 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19862 * input modifiers are supported by FP operations only
19864 assert(!(instData
.ABS
& 0x1));
19865 assert(!(instData
.ABS
& 0x2));
19866 assert(!(instData
.ABS
& 0x4));
19867 assert(!(extData
.NEG
& 0x1));
19868 assert(!(extData
.NEG
& 0x2));
19869 assert(!(extData
.NEG
& 0x4));
19871 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19872 if (wf
->execMask(lane
)) {
19873 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
19880 Inst_VOP3__V_CMP_NE_U16::Inst_VOP3__V_CMP_NE_U16(
19882 : Inst_VOP3(iFmt
, "v_cmp_ne_u16", true)
19885 } // Inst_VOP3__V_CMP_NE_U16
19887 Inst_VOP3__V_CMP_NE_U16::~Inst_VOP3__V_CMP_NE_U16()
19889 } // ~Inst_VOP3__V_CMP_NE_U16
19891 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
19893 Inst_VOP3__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst
)
19895 Wavefront
*wf
= gpuDynInst
->wavefront();
19896 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
19897 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
19898 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19904 * input modifiers are supported by FP operations only
19906 assert(!(instData
.ABS
& 0x1));
19907 assert(!(instData
.ABS
& 0x2));
19908 assert(!(instData
.ABS
& 0x4));
19909 assert(!(extData
.NEG
& 0x1));
19910 assert(!(extData
.NEG
& 0x2));
19911 assert(!(extData
.NEG
& 0x4));
19913 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19914 if (wf
->execMask(lane
)) {
19915 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
19922 Inst_VOP3__V_CMP_GE_U16::Inst_VOP3__V_CMP_GE_U16(
19924 : Inst_VOP3(iFmt
, "v_cmp_ge_u16", true)
19927 } // Inst_VOP3__V_CMP_GE_U16
19929 Inst_VOP3__V_CMP_GE_U16::~Inst_VOP3__V_CMP_GE_U16()
19931 } // ~Inst_VOP3__V_CMP_GE_U16
19933 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
19935 Inst_VOP3__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst
)
19937 Wavefront
*wf
= gpuDynInst
->wavefront();
19938 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
19939 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
19940 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19946 * input modifiers are supported by FP operations only
19948 assert(!(instData
.ABS
& 0x1));
19949 assert(!(instData
.ABS
& 0x2));
19950 assert(!(instData
.ABS
& 0x4));
19951 assert(!(extData
.NEG
& 0x1));
19952 assert(!(extData
.NEG
& 0x2));
19953 assert(!(extData
.NEG
& 0x4));
19955 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19956 if (wf
->execMask(lane
)) {
19957 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
19964 Inst_VOP3__V_CMP_T_U16::Inst_VOP3__V_CMP_T_U16(InFmt_VOP3
*iFmt
)
19965 : Inst_VOP3(iFmt
, "v_cmp_t_u16", true)
19968 } // Inst_VOP3__V_CMP_T_U16
19970 Inst_VOP3__V_CMP_T_U16::~Inst_VOP3__V_CMP_T_U16()
19972 } // ~Inst_VOP3__V_CMP_T_U16
19974 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
19976 Inst_VOP3__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst
)
19978 Wavefront
*wf
= gpuDynInst
->wavefront();
19979 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19981 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19982 if (wf
->execMask(lane
)) {
19983 sdst
.setBit(lane
, 1);
19990 Inst_VOP3__V_CMPX_F_I16::Inst_VOP3__V_CMPX_F_I16(
19992 : Inst_VOP3(iFmt
, "v_cmpx_f_i16", true)
19995 } // Inst_VOP3__V_CMPX_F_I16
19997 Inst_VOP3__V_CMPX_F_I16::~Inst_VOP3__V_CMPX_F_I16()
19999 } // ~Inst_VOP3__V_CMPX_F_I16
20001 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
20003 Inst_VOP3__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst
)
20005 Wavefront
*wf
= gpuDynInst
->wavefront();
20006 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20008 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20009 if (wf
->execMask(lane
)) {
20010 sdst
.setBit(lane
, 0);
20014 wf
->execMask() = sdst
.rawData();
20018 Inst_VOP3__V_CMPX_LT_I16::Inst_VOP3__V_CMPX_LT_I16(
20020 : Inst_VOP3(iFmt
, "v_cmpx_lt_i16", true)
20023 } // Inst_VOP3__V_CMPX_LT_I16
20025 Inst_VOP3__V_CMPX_LT_I16::~Inst_VOP3__V_CMPX_LT_I16()
20027 } // ~Inst_VOP3__V_CMPX_LT_I16
20029 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
20031 Inst_VOP3__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst
)
20033 Wavefront
*wf
= gpuDynInst
->wavefront();
20034 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
20035 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
20036 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20042 * input modifiers are supported by FP operations only
20044 assert(!(instData
.ABS
& 0x1));
20045 assert(!(instData
.ABS
& 0x2));
20046 assert(!(instData
.ABS
& 0x4));
20047 assert(!(extData
.NEG
& 0x1));
20048 assert(!(extData
.NEG
& 0x2));
20049 assert(!(extData
.NEG
& 0x4));
20051 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20052 if (wf
->execMask(lane
)) {
20053 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
20057 wf
->execMask() = sdst
.rawData();
20061 Inst_VOP3__V_CMPX_EQ_I16::Inst_VOP3__V_CMPX_EQ_I16(
20063 : Inst_VOP3(iFmt
, "v_cmpx_eq_i16", true)
20066 } // Inst_VOP3__V_CMPX_EQ_I16
20068 Inst_VOP3__V_CMPX_EQ_I16::~Inst_VOP3__V_CMPX_EQ_I16()
20070 } // ~Inst_VOP3__V_CMPX_EQ_I16
20072 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
20074 Inst_VOP3__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst
)
20076 Wavefront
*wf
= gpuDynInst
->wavefront();
20077 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
20078 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
20079 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20085 * input modifiers are supported by FP operations only
20087 assert(!(instData
.ABS
& 0x1));
20088 assert(!(instData
.ABS
& 0x2));
20089 assert(!(instData
.ABS
& 0x4));
20090 assert(!(extData
.NEG
& 0x1));
20091 assert(!(extData
.NEG
& 0x2));
20092 assert(!(extData
.NEG
& 0x4));
20094 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20095 if (wf
->execMask(lane
)) {
20096 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
20100 wf
->execMask() = sdst
.rawData();
20104 Inst_VOP3__V_CMPX_LE_I16::Inst_VOP3__V_CMPX_LE_I16(
20106 : Inst_VOP3(iFmt
, "v_cmpx_le_i16", true)
20109 } // Inst_VOP3__V_CMPX_LE_I16
20111 Inst_VOP3__V_CMPX_LE_I16::~Inst_VOP3__V_CMPX_LE_I16()
20113 } // ~Inst_VOP3__V_CMPX_LE_I16
20115 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
20117 Inst_VOP3__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst
)
20119 Wavefront
*wf
= gpuDynInst
->wavefront();
20120 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
20121 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
20122 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20128 * input modifiers are supported by FP operations only
20130 assert(!(instData
.ABS
& 0x1));
20131 assert(!(instData
.ABS
& 0x2));
20132 assert(!(instData
.ABS
& 0x4));
20133 assert(!(extData
.NEG
& 0x1));
20134 assert(!(extData
.NEG
& 0x2));
20135 assert(!(extData
.NEG
& 0x4));
20137 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20138 if (wf
->execMask(lane
)) {
20139 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
20143 wf
->execMask() = sdst
.rawData();
20147 Inst_VOP3__V_CMPX_GT_I16::Inst_VOP3__V_CMPX_GT_I16(
20149 : Inst_VOP3(iFmt
, "v_cmpx_gt_i16", true)
20152 } // Inst_VOP3__V_CMPX_GT_I16
20154 Inst_VOP3__V_CMPX_GT_I16::~Inst_VOP3__V_CMPX_GT_I16()
20156 } // ~Inst_VOP3__V_CMPX_GT_I16
20158 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
20160 Inst_VOP3__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst
)
20162 Wavefront
*wf
= gpuDynInst
->wavefront();
20163 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
20164 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
20165 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20171 * input modifiers are supported by FP operations only
20173 assert(!(instData
.ABS
& 0x1));
20174 assert(!(instData
.ABS
& 0x2));
20175 assert(!(instData
.ABS
& 0x4));
20176 assert(!(extData
.NEG
& 0x1));
20177 assert(!(extData
.NEG
& 0x2));
20178 assert(!(extData
.NEG
& 0x4));
20180 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20181 if (wf
->execMask(lane
)) {
20182 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
20186 wf
->execMask() = sdst
.rawData();
20190 Inst_VOP3__V_CMPX_NE_I16::Inst_VOP3__V_CMPX_NE_I16(
20192 : Inst_VOP3(iFmt
, "v_cmpx_ne_i16", true)
20195 } // Inst_VOP3__V_CMPX_NE_I16
20197 Inst_VOP3__V_CMPX_NE_I16::~Inst_VOP3__V_CMPX_NE_I16()
20199 } // ~Inst_VOP3__V_CMPX_NE_I16
20201 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
20203 Inst_VOP3__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst
)
20205 Wavefront
*wf
= gpuDynInst
->wavefront();
20206 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
20207 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
20208 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20214 * input modifiers are supported by FP operations only
20216 assert(!(instData
.ABS
& 0x1));
20217 assert(!(instData
.ABS
& 0x2));
20218 assert(!(instData
.ABS
& 0x4));
20219 assert(!(extData
.NEG
& 0x1));
20220 assert(!(extData
.NEG
& 0x2));
20221 assert(!(extData
.NEG
& 0x4));
20223 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20224 if (wf
->execMask(lane
)) {
20225 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
20229 wf
->execMask() = sdst
.rawData();
20233 Inst_VOP3__V_CMPX_GE_I16::Inst_VOP3__V_CMPX_GE_I16(
20235 : Inst_VOP3(iFmt
, "v_cmpx_ge_i16", true)
20238 } // Inst_VOP3__V_CMPX_GE_I16
20240 Inst_VOP3__V_CMPX_GE_I16::~Inst_VOP3__V_CMPX_GE_I16()
20242 } // ~Inst_VOP3__V_CMPX_GE_I16
20244 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
20246 Inst_VOP3__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst
)
20248 Wavefront
*wf
= gpuDynInst
->wavefront();
20249 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
20250 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
20251 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20257 * input modifiers are supported by FP operations only
20259 assert(!(instData
.ABS
& 0x1));
20260 assert(!(instData
.ABS
& 0x2));
20261 assert(!(instData
.ABS
& 0x4));
20262 assert(!(extData
.NEG
& 0x1));
20263 assert(!(extData
.NEG
& 0x2));
20264 assert(!(extData
.NEG
& 0x4));
20266 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20267 if (wf
->execMask(lane
)) {
20268 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
20272 wf
->execMask() = sdst
.rawData();
20276 Inst_VOP3__V_CMPX_T_I16::Inst_VOP3__V_CMPX_T_I16(
20278 : Inst_VOP3(iFmt
, "v_cmpx_t_i16", true)
20281 } // Inst_VOP3__V_CMPX_T_I16
20283 Inst_VOP3__V_CMPX_T_I16::~Inst_VOP3__V_CMPX_T_I16()
20285 } // ~Inst_VOP3__V_CMPX_T_I16
20287 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
20289 Inst_VOP3__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst
)
20291 Wavefront
*wf
= gpuDynInst
->wavefront();
20292 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20294 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20295 if (wf
->execMask(lane
)) {
20296 sdst
.setBit(lane
, 1);
20300 wf
->execMask() = sdst
.rawData();
20304 Inst_VOP3__V_CMPX_F_U16::Inst_VOP3__V_CMPX_F_U16(
20306 : Inst_VOP3(iFmt
, "v_cmpx_f_u16", true)
20309 } // Inst_VOP3__V_CMPX_F_U16
20311 Inst_VOP3__V_CMPX_F_U16::~Inst_VOP3__V_CMPX_F_U16()
20313 } // ~Inst_VOP3__V_CMPX_F_U16
20315 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
20317 Inst_VOP3__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst
)
20319 Wavefront
*wf
= gpuDynInst
->wavefront();
20320 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20322 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20323 if (wf
->execMask(lane
)) {
20324 sdst
.setBit(lane
, 0);
20328 wf
->execMask() = sdst
.rawData();
20332 Inst_VOP3__V_CMPX_LT_U16::Inst_VOP3__V_CMPX_LT_U16(
20334 : Inst_VOP3(iFmt
, "v_cmpx_lt_u16", true)
20337 } // Inst_VOP3__V_CMPX_LT_U16
20339 Inst_VOP3__V_CMPX_LT_U16::~Inst_VOP3__V_CMPX_LT_U16()
20341 } // ~Inst_VOP3__V_CMPX_LT_U16
20343 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
20345 Inst_VOP3__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst
)
20347 Wavefront
*wf
= gpuDynInst
->wavefront();
20348 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
20349 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
20350 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20356 * input modifiers are supported by FP operations only
20358 assert(!(instData
.ABS
& 0x1));
20359 assert(!(instData
.ABS
& 0x2));
20360 assert(!(instData
.ABS
& 0x4));
20361 assert(!(extData
.NEG
& 0x1));
20362 assert(!(extData
.NEG
& 0x2));
20363 assert(!(extData
.NEG
& 0x4));
20365 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20366 if (wf
->execMask(lane
)) {
20367 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
20371 wf
->execMask() = sdst
.rawData();
20375 Inst_VOP3__V_CMPX_EQ_U16::Inst_VOP3__V_CMPX_EQ_U16(
20377 : Inst_VOP3(iFmt
, "v_cmpx_eq_u16", true)
20380 } // Inst_VOP3__V_CMPX_EQ_U16
20382 Inst_VOP3__V_CMPX_EQ_U16::~Inst_VOP3__V_CMPX_EQ_U16()
20384 } // ~Inst_VOP3__V_CMPX_EQ_U16
20386 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
20388 Inst_VOP3__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst
)
20390 Wavefront
*wf
= gpuDynInst
->wavefront();
20391 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
20392 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
20393 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20399 * input modifiers are supported by FP operations only
20401 assert(!(instData
.ABS
& 0x1));
20402 assert(!(instData
.ABS
& 0x2));
20403 assert(!(instData
.ABS
& 0x4));
20404 assert(!(extData
.NEG
& 0x1));
20405 assert(!(extData
.NEG
& 0x2));
20406 assert(!(extData
.NEG
& 0x4));
20408 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20409 if (wf
->execMask(lane
)) {
20410 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
20414 wf
->execMask() = sdst
.rawData();
20418 Inst_VOP3__V_CMPX_LE_U16::Inst_VOP3__V_CMPX_LE_U16(
20420 : Inst_VOP3(iFmt
, "v_cmpx_le_u16", true)
20423 } // Inst_VOP3__V_CMPX_LE_U16
20425 Inst_VOP3__V_CMPX_LE_U16::~Inst_VOP3__V_CMPX_LE_U16()
20427 } // ~Inst_VOP3__V_CMPX_LE_U16
20429 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
20431 Inst_VOP3__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst
)
20433 Wavefront
*wf
= gpuDynInst
->wavefront();
20434 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
20435 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
20436 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20442 * input modifiers are supported by FP operations only
20444 assert(!(instData
.ABS
& 0x1));
20445 assert(!(instData
.ABS
& 0x2));
20446 assert(!(instData
.ABS
& 0x4));
20447 assert(!(extData
.NEG
& 0x1));
20448 assert(!(extData
.NEG
& 0x2));
20449 assert(!(extData
.NEG
& 0x4));
20451 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20452 if (wf
->execMask(lane
)) {
20453 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
20457 wf
->execMask() = sdst
.rawData();
20461 Inst_VOP3__V_CMPX_GT_U16::Inst_VOP3__V_CMPX_GT_U16(
20463 : Inst_VOP3(iFmt
, "v_cmpx_gt_u16", true)
20466 } // Inst_VOP3__V_CMPX_GT_U16
20468 Inst_VOP3__V_CMPX_GT_U16::~Inst_VOP3__V_CMPX_GT_U16()
20470 } // ~Inst_VOP3__V_CMPX_GT_U16
20472 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
20474 Inst_VOP3__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst
)
20476 Wavefront
*wf
= gpuDynInst
->wavefront();
20477 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
20478 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
20479 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20485 * input modifiers are supported by FP operations only
20487 assert(!(instData
.ABS
& 0x1));
20488 assert(!(instData
.ABS
& 0x2));
20489 assert(!(instData
.ABS
& 0x4));
20490 assert(!(extData
.NEG
& 0x1));
20491 assert(!(extData
.NEG
& 0x2));
20492 assert(!(extData
.NEG
& 0x4));
20494 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20495 if (wf
->execMask(lane
)) {
20496 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
20500 wf
->execMask() = sdst
.rawData();
20504 Inst_VOP3__V_CMPX_NE_U16::Inst_VOP3__V_CMPX_NE_U16(
20506 : Inst_VOP3(iFmt
, "v_cmpx_ne_u16", true)
20509 } // Inst_VOP3__V_CMPX_NE_U16
20511 Inst_VOP3__V_CMPX_NE_U16::~Inst_VOP3__V_CMPX_NE_U16()
20513 } // ~Inst_VOP3__V_CMPX_NE_U16
20515 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
20517 Inst_VOP3__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst
)
20519 Wavefront
*wf
= gpuDynInst
->wavefront();
20520 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
20521 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
20522 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20528 * input modifiers are supported by FP operations only
20530 assert(!(instData
.ABS
& 0x1));
20531 assert(!(instData
.ABS
& 0x2));
20532 assert(!(instData
.ABS
& 0x4));
20533 assert(!(extData
.NEG
& 0x1));
20534 assert(!(extData
.NEG
& 0x2));
20535 assert(!(extData
.NEG
& 0x4));
20537 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20538 if (wf
->execMask(lane
)) {
20539 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
20543 wf
->execMask() = sdst
.rawData();
20547 Inst_VOP3__V_CMPX_GE_U16::Inst_VOP3__V_CMPX_GE_U16(
20549 : Inst_VOP3(iFmt
, "v_cmpx_ge_u16", true)
20552 } // Inst_VOP3__V_CMPX_GE_U16
20554 Inst_VOP3__V_CMPX_GE_U16::~Inst_VOP3__V_CMPX_GE_U16()
20556 } // ~Inst_VOP3__V_CMPX_GE_U16
20558 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
20560 Inst_VOP3__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst
)
20562 Wavefront
*wf
= gpuDynInst
->wavefront();
20563 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
20564 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
20565 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20571 * input modifiers are supported by FP operations only
20573 assert(!(instData
.ABS
& 0x1));
20574 assert(!(instData
.ABS
& 0x2));
20575 assert(!(instData
.ABS
& 0x4));
20576 assert(!(extData
.NEG
& 0x1));
20577 assert(!(extData
.NEG
& 0x2));
20578 assert(!(extData
.NEG
& 0x4));
20580 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20581 if (wf
->execMask(lane
)) {
20582 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
20586 wf
->execMask() = sdst
.rawData();
20590 Inst_VOP3__V_CMPX_T_U16::Inst_VOP3__V_CMPX_T_U16(
20592 : Inst_VOP3(iFmt
, "v_cmpx_t_u16", true)
20595 } // Inst_VOP3__V_CMPX_T_U16
20597 Inst_VOP3__V_CMPX_T_U16::~Inst_VOP3__V_CMPX_T_U16()
20599 } // ~Inst_VOP3__V_CMPX_T_U16
20601 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
20603 Inst_VOP3__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst
)
20605 Wavefront
*wf
= gpuDynInst
->wavefront();
20606 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20608 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20609 if (wf
->execMask(lane
)) {
20610 sdst
.setBit(lane
, 1);
20614 wf
->execMask() = sdst
.rawData();
20618 Inst_VOP3__V_CMP_F_I32::Inst_VOP3__V_CMP_F_I32(InFmt_VOP3
*iFmt
)
20619 : Inst_VOP3(iFmt
, "v_cmp_f_i32", true)
20622 } // Inst_VOP3__V_CMP_F_I32
20624 Inst_VOP3__V_CMP_F_I32::~Inst_VOP3__V_CMP_F_I32()
20626 } // ~Inst_VOP3__V_CMP_F_I32
20628 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
20630 Inst_VOP3__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst
)
20632 Wavefront
*wf
= gpuDynInst
->wavefront();
20633 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20635 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20636 if (wf
->execMask(lane
)) {
20637 sdst
.setBit(lane
, 0);
20641 wf
->execMask() = sdst
.rawData();
20645 Inst_VOP3__V_CMP_LT_I32::Inst_VOP3__V_CMP_LT_I32(
20647 : Inst_VOP3(iFmt
, "v_cmp_lt_i32", true)
20650 } // Inst_VOP3__V_CMP_LT_I32
20652 Inst_VOP3__V_CMP_LT_I32::~Inst_VOP3__V_CMP_LT_I32()
20654 } // ~Inst_VOP3__V_CMP_LT_I32
20656 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
20658 Inst_VOP3__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst
)
20660 Wavefront
*wf
= gpuDynInst
->wavefront();
20661 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
20662 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
20663 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20669 * input modifiers are supported by FP operations only
20671 assert(!(instData
.ABS
& 0x1));
20672 assert(!(instData
.ABS
& 0x2));
20673 assert(!(instData
.ABS
& 0x4));
20674 assert(!(extData
.NEG
& 0x1));
20675 assert(!(extData
.NEG
& 0x2));
20676 assert(!(extData
.NEG
& 0x4));
20678 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20679 if (wf
->execMask(lane
)) {
20680 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
20687 Inst_VOP3__V_CMP_EQ_I32::Inst_VOP3__V_CMP_EQ_I32(
20689 : Inst_VOP3(iFmt
, "v_cmp_eq_i32", true)
20692 } // Inst_VOP3__V_CMP_EQ_I32
20694 Inst_VOP3__V_CMP_EQ_I32::~Inst_VOP3__V_CMP_EQ_I32()
20696 } // ~Inst_VOP3__V_CMP_EQ_I32
20698 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
20700 Inst_VOP3__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst
)
20702 Wavefront
*wf
= gpuDynInst
->wavefront();
20703 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
20704 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
20705 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20711 * input modifiers are supported by FP operations only
20713 assert(!(instData
.ABS
& 0x1));
20714 assert(!(instData
.ABS
& 0x2));
20715 assert(!(instData
.ABS
& 0x4));
20716 assert(!(extData
.NEG
& 0x1));
20717 assert(!(extData
.NEG
& 0x2));
20718 assert(!(extData
.NEG
& 0x4));
20720 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20721 if (wf
->execMask(lane
)) {
20722 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
20729 Inst_VOP3__V_CMP_LE_I32::Inst_VOP3__V_CMP_LE_I32(
20731 : Inst_VOP3(iFmt
, "v_cmp_le_i32", true)
20734 } // Inst_VOP3__V_CMP_LE_I32
20736 Inst_VOP3__V_CMP_LE_I32::~Inst_VOP3__V_CMP_LE_I32()
20738 } // ~Inst_VOP3__V_CMP_LE_I32
20740 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
20742 Inst_VOP3__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst
)
20744 Wavefront
*wf
= gpuDynInst
->wavefront();
20745 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
20746 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
20747 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20753 * input modifiers are supported by FP operations only
20755 assert(!(instData
.ABS
& 0x1));
20756 assert(!(instData
.ABS
& 0x2));
20757 assert(!(instData
.ABS
& 0x4));
20758 assert(!(extData
.NEG
& 0x1));
20759 assert(!(extData
.NEG
& 0x2));
20760 assert(!(extData
.NEG
& 0x4));
20762 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20763 if (wf
->execMask(lane
)) {
20764 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
20771 Inst_VOP3__V_CMP_GT_I32::Inst_VOP3__V_CMP_GT_I32(
20773 : Inst_VOP3(iFmt
, "v_cmp_gt_i32", true)
20776 } // Inst_VOP3__V_CMP_GT_I32
20778 Inst_VOP3__V_CMP_GT_I32::~Inst_VOP3__V_CMP_GT_I32()
20780 } // ~Inst_VOP3__V_CMP_GT_I32
20782 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
20784 Inst_VOP3__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst
)
20786 Wavefront
*wf
= gpuDynInst
->wavefront();
20787 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
20788 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
20789 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20795 * input modifiers are supported by FP operations only
20797 assert(!(instData
.ABS
& 0x1));
20798 assert(!(instData
.ABS
& 0x2));
20799 assert(!(instData
.ABS
& 0x4));
20800 assert(!(extData
.NEG
& 0x1));
20801 assert(!(extData
.NEG
& 0x2));
20802 assert(!(extData
.NEG
& 0x4));
20804 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20805 if (wf
->execMask(lane
)) {
20806 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
20813 Inst_VOP3__V_CMP_NE_I32::Inst_VOP3__V_CMP_NE_I32(
20815 : Inst_VOP3(iFmt
, "v_cmp_ne_i32", true)
20818 } // Inst_VOP3__V_CMP_NE_I32
20820 Inst_VOP3__V_CMP_NE_I32::~Inst_VOP3__V_CMP_NE_I32()
20822 } // ~Inst_VOP3__V_CMP_NE_I32
20824 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
20826 Inst_VOP3__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst
)
20828 Wavefront
*wf
= gpuDynInst
->wavefront();
20829 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
20830 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
20831 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20837 * input modifiers are supported by FP operations only
20839 assert(!(instData
.ABS
& 0x1));
20840 assert(!(instData
.ABS
& 0x2));
20841 assert(!(instData
.ABS
& 0x4));
20842 assert(!(extData
.NEG
& 0x1));
20843 assert(!(extData
.NEG
& 0x2));
20844 assert(!(extData
.NEG
& 0x4));
20846 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20847 if (wf
->execMask(lane
)) {
20848 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
20855 Inst_VOP3__V_CMP_GE_I32::Inst_VOP3__V_CMP_GE_I32(
20857 : Inst_VOP3(iFmt
, "v_cmp_ge_i32", true)
20860 } // Inst_VOP3__V_CMP_GE_I32
20862 Inst_VOP3__V_CMP_GE_I32::~Inst_VOP3__V_CMP_GE_I32()
20864 } // ~Inst_VOP3__V_CMP_GE_I32
20866 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
20868 Inst_VOP3__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst
)
20870 Wavefront
*wf
= gpuDynInst
->wavefront();
20871 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
20872 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
20873 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20879 * input modifiers are supported by FP operations only
20881 assert(!(instData
.ABS
& 0x1));
20882 assert(!(instData
.ABS
& 0x2));
20883 assert(!(instData
.ABS
& 0x4));
20884 assert(!(extData
.NEG
& 0x1));
20885 assert(!(extData
.NEG
& 0x2));
20886 assert(!(extData
.NEG
& 0x4));
20888 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20889 if (wf
->execMask(lane
)) {
20890 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
20897 Inst_VOP3__V_CMP_T_I32::Inst_VOP3__V_CMP_T_I32(InFmt_VOP3
*iFmt
)
20898 : Inst_VOP3(iFmt
, "v_cmp_t_i32", true)
20901 } // Inst_VOP3__V_CMP_T_I32
20903 Inst_VOP3__V_CMP_T_I32::~Inst_VOP3__V_CMP_T_I32()
20905 } // ~Inst_VOP3__V_CMP_T_I32
20907 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
20909 Inst_VOP3__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst
)
20911 Wavefront
*wf
= gpuDynInst
->wavefront();
20912 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20914 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20915 if (wf
->execMask(lane
)) {
20916 sdst
.setBit(lane
, 1);
20923 Inst_VOP3__V_CMP_F_U32::Inst_VOP3__V_CMP_F_U32(InFmt_VOP3
*iFmt
)
20924 : Inst_VOP3(iFmt
, "v_cmp_f_u32", true)
20927 } // Inst_VOP3__V_CMP_F_U32
20929 Inst_VOP3__V_CMP_F_U32::~Inst_VOP3__V_CMP_F_U32()
20931 } // ~Inst_VOP3__V_CMP_F_U32
20933 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
20935 Inst_VOP3__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst
)
20937 Wavefront
*wf
= gpuDynInst
->wavefront();
20938 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20940 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20941 if (wf
->execMask(lane
)) {
20942 sdst
.setBit(lane
, 0);
20949 Inst_VOP3__V_CMP_LT_U32::Inst_VOP3__V_CMP_LT_U32(
20951 : Inst_VOP3(iFmt
, "v_cmp_lt_u32", true)
20954 } // Inst_VOP3__V_CMP_LT_U32
20956 Inst_VOP3__V_CMP_LT_U32::~Inst_VOP3__V_CMP_LT_U32()
20958 } // ~Inst_VOP3__V_CMP_LT_U32
20960 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
20962 Inst_VOP3__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst
)
20964 Wavefront
*wf
= gpuDynInst
->wavefront();
20965 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
20966 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
20967 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20973 * input modifiers are supported by FP operations only
20975 assert(!(instData
.ABS
& 0x1));
20976 assert(!(instData
.ABS
& 0x2));
20977 assert(!(instData
.ABS
& 0x4));
20978 assert(!(extData
.NEG
& 0x1));
20979 assert(!(extData
.NEG
& 0x2));
20980 assert(!(extData
.NEG
& 0x4));
20982 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20983 if (wf
->execMask(lane
)) {
20984 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
20991 Inst_VOP3__V_CMP_EQ_U32::Inst_VOP3__V_CMP_EQ_U32(
20993 : Inst_VOP3(iFmt
, "v_cmp_eq_u32", true)
20996 } // Inst_VOP3__V_CMP_EQ_U32
20998 Inst_VOP3__V_CMP_EQ_U32::~Inst_VOP3__V_CMP_EQ_U32()
21000 } // ~Inst_VOP3__V_CMP_EQ_U32
21002 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
21004 Inst_VOP3__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst
)
21006 Wavefront
*wf
= gpuDynInst
->wavefront();
21007 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
21008 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
21009 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21015 * input modifiers are supported by FP operations only
21017 assert(!(instData
.ABS
& 0x1));
21018 assert(!(instData
.ABS
& 0x2));
21019 assert(!(instData
.ABS
& 0x4));
21020 assert(!(extData
.NEG
& 0x1));
21021 assert(!(extData
.NEG
& 0x2));
21022 assert(!(extData
.NEG
& 0x4));
21024 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21025 if (wf
->execMask(lane
)) {
21026 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
21033 Inst_VOP3__V_CMP_LE_U32::Inst_VOP3__V_CMP_LE_U32(
21035 : Inst_VOP3(iFmt
, "v_cmp_le_u32", true)
21038 } // Inst_VOP3__V_CMP_LE_U32
21040 Inst_VOP3__V_CMP_LE_U32::~Inst_VOP3__V_CMP_LE_U32()
21042 } // ~Inst_VOP3__V_CMP_LE_U32
21044 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
21046 Inst_VOP3__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst
)
21048 Wavefront
*wf
= gpuDynInst
->wavefront();
21049 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
21050 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
21051 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21057 * input modifiers are supported by FP operations only
21059 assert(!(instData
.ABS
& 0x1));
21060 assert(!(instData
.ABS
& 0x2));
21061 assert(!(instData
.ABS
& 0x4));
21062 assert(!(extData
.NEG
& 0x1));
21063 assert(!(extData
.NEG
& 0x2));
21064 assert(!(extData
.NEG
& 0x4));
21066 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21067 if (wf
->execMask(lane
)) {
21068 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
21075 Inst_VOP3__V_CMP_GT_U32::Inst_VOP3__V_CMP_GT_U32(
21077 : Inst_VOP3(iFmt
, "v_cmp_gt_u32", true)
21080 } // Inst_VOP3__V_CMP_GT_U32
21082 Inst_VOP3__V_CMP_GT_U32::~Inst_VOP3__V_CMP_GT_U32()
21084 } // ~Inst_VOP3__V_CMP_GT_U32
21086 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
21088 Inst_VOP3__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst
)
21090 Wavefront
*wf
= gpuDynInst
->wavefront();
21091 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
21092 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
21093 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21099 * input modifiers are supported by FP operations only
21101 assert(!(instData
.ABS
& 0x1));
21102 assert(!(instData
.ABS
& 0x2));
21103 assert(!(instData
.ABS
& 0x4));
21104 assert(!(extData
.NEG
& 0x1));
21105 assert(!(extData
.NEG
& 0x2));
21106 assert(!(extData
.NEG
& 0x4));
21108 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21109 if (wf
->execMask(lane
)) {
21110 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
21117 Inst_VOP3__V_CMP_NE_U32::Inst_VOP3__V_CMP_NE_U32(
21119 : Inst_VOP3(iFmt
, "v_cmp_ne_u32", true)
21122 } // Inst_VOP3__V_CMP_NE_U32
21124 Inst_VOP3__V_CMP_NE_U32::~Inst_VOP3__V_CMP_NE_U32()
21126 } // ~Inst_VOP3__V_CMP_NE_U32
21128 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
21130 Inst_VOP3__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst
)
21132 Wavefront
*wf
= gpuDynInst
->wavefront();
21133 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
21134 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
21135 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21141 * input modifiers are supported by FP operations only
21143 assert(!(instData
.ABS
& 0x1));
21144 assert(!(instData
.ABS
& 0x2));
21145 assert(!(instData
.ABS
& 0x4));
21146 assert(!(extData
.NEG
& 0x1));
21147 assert(!(extData
.NEG
& 0x2));
21148 assert(!(extData
.NEG
& 0x4));
21150 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21151 if (wf
->execMask(lane
)) {
21152 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
21159 Inst_VOP3__V_CMP_GE_U32::Inst_VOP3__V_CMP_GE_U32(
21161 : Inst_VOP3(iFmt
, "v_cmp_ge_u32", true)
21164 } // Inst_VOP3__V_CMP_GE_U32
21166 Inst_VOP3__V_CMP_GE_U32::~Inst_VOP3__V_CMP_GE_U32()
21168 } // ~Inst_VOP3__V_CMP_GE_U32
21170 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
21172 Inst_VOP3__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst
)
21174 Wavefront
*wf
= gpuDynInst
->wavefront();
21175 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
21176 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
21177 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21183 * input modifiers are supported by FP operations only
21185 assert(!(instData
.ABS
& 0x1));
21186 assert(!(instData
.ABS
& 0x2));
21187 assert(!(instData
.ABS
& 0x4));
21188 assert(!(extData
.NEG
& 0x1));
21189 assert(!(extData
.NEG
& 0x2));
21190 assert(!(extData
.NEG
& 0x4));
21192 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21193 if (wf
->execMask(lane
)) {
21194 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
21201 Inst_VOP3__V_CMP_T_U32::Inst_VOP3__V_CMP_T_U32(InFmt_VOP3
*iFmt
)
21202 : Inst_VOP3(iFmt
, "v_cmp_t_u32", true)
21205 } // Inst_VOP3__V_CMP_T_U32
21207 Inst_VOP3__V_CMP_T_U32::~Inst_VOP3__V_CMP_T_U32()
21209 } // ~Inst_VOP3__V_CMP_T_U32
21211 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
21213 Inst_VOP3__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst
)
21215 Wavefront
*wf
= gpuDynInst
->wavefront();
21216 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21218 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21219 if (wf
->execMask(lane
)) {
21220 sdst
.setBit(lane
, 1);
21227 Inst_VOP3__V_CMPX_F_I32::Inst_VOP3__V_CMPX_F_I32(
21229 : Inst_VOP3(iFmt
, "v_cmpx_f_i32", true)
21232 } // Inst_VOP3__V_CMPX_F_I32
21234 Inst_VOP3__V_CMPX_F_I32::~Inst_VOP3__V_CMPX_F_I32()
21236 } // ~Inst_VOP3__V_CMPX_F_I32
21238 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
21240 Inst_VOP3__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst
)
21242 Wavefront
*wf
= gpuDynInst
->wavefront();
21243 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21245 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21246 if (wf
->execMask(lane
)) {
21247 sdst
.setBit(lane
, 0);
21251 wf
->execMask() = sdst
.rawData();
21255 Inst_VOP3__V_CMPX_LT_I32::Inst_VOP3__V_CMPX_LT_I32(
21257 : Inst_VOP3(iFmt
, "v_cmpx_lt_i32", true)
21260 } // Inst_VOP3__V_CMPX_LT_I32
21262 Inst_VOP3__V_CMPX_LT_I32::~Inst_VOP3__V_CMPX_LT_I32()
21264 } // ~Inst_VOP3__V_CMPX_LT_I32
21266 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
21268 Inst_VOP3__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst
)
21270 Wavefront
*wf
= gpuDynInst
->wavefront();
21271 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
21272 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
21273 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21279 * input modifiers are supported by FP operations only
21281 assert(!(instData
.ABS
& 0x1));
21282 assert(!(instData
.ABS
& 0x2));
21283 assert(!(instData
.ABS
& 0x4));
21284 assert(!(extData
.NEG
& 0x1));
21285 assert(!(extData
.NEG
& 0x2));
21286 assert(!(extData
.NEG
& 0x4));
21288 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21289 if (wf
->execMask(lane
)) {
21290 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
21294 wf
->execMask() = sdst
.rawData();
21298 Inst_VOP3__V_CMPX_EQ_I32::Inst_VOP3__V_CMPX_EQ_I32(
21300 : Inst_VOP3(iFmt
, "v_cmpx_eq_i32", true)
21303 } // Inst_VOP3__V_CMPX_EQ_I32
21305 Inst_VOP3__V_CMPX_EQ_I32::~Inst_VOP3__V_CMPX_EQ_I32()
21307 } // ~Inst_VOP3__V_CMPX_EQ_I32
21309 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
21311 Inst_VOP3__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst
)
21313 Wavefront
*wf
= gpuDynInst
->wavefront();
21314 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
21315 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
21316 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21322 * input modifiers are supported by FP operations only
21324 assert(!(instData
.ABS
& 0x1));
21325 assert(!(instData
.ABS
& 0x2));
21326 assert(!(instData
.ABS
& 0x4));
21327 assert(!(extData
.NEG
& 0x1));
21328 assert(!(extData
.NEG
& 0x2));
21329 assert(!(extData
.NEG
& 0x4));
21331 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21332 if (wf
->execMask(lane
)) {
21333 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
21337 wf
->execMask() = sdst
.rawData();
21341 Inst_VOP3__V_CMPX_LE_I32::Inst_VOP3__V_CMPX_LE_I32(
21343 : Inst_VOP3(iFmt
, "v_cmpx_le_i32", true)
21346 } // Inst_VOP3__V_CMPX_LE_I32
21348 Inst_VOP3__V_CMPX_LE_I32::~Inst_VOP3__V_CMPX_LE_I32()
21350 } // ~Inst_VOP3__V_CMPX_LE_I32
21352 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
21354 Inst_VOP3__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst
)
21356 Wavefront
*wf
= gpuDynInst
->wavefront();
21357 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
21358 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
21359 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21365 * input modifiers are supported by FP operations only
21367 assert(!(instData
.ABS
& 0x1));
21368 assert(!(instData
.ABS
& 0x2));
21369 assert(!(instData
.ABS
& 0x4));
21370 assert(!(extData
.NEG
& 0x1));
21371 assert(!(extData
.NEG
& 0x2));
21372 assert(!(extData
.NEG
& 0x4));
21374 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21375 if (wf
->execMask(lane
)) {
21376 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
21380 wf
->execMask() = sdst
.rawData();
21384 Inst_VOP3__V_CMPX_GT_I32::Inst_VOP3__V_CMPX_GT_I32(
21386 : Inst_VOP3(iFmt
, "v_cmpx_gt_i32", true)
21389 } // Inst_VOP3__V_CMPX_GT_I32
21391 Inst_VOP3__V_CMPX_GT_I32::~Inst_VOP3__V_CMPX_GT_I32()
21393 } // ~Inst_VOP3__V_CMPX_GT_I32
21395 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
21397 Inst_VOP3__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst
)
21399 Wavefront
*wf
= gpuDynInst
->wavefront();
21400 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
21401 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
21402 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21408 * input modifiers are supported by FP operations only
21410 assert(!(instData
.ABS
& 0x1));
21411 assert(!(instData
.ABS
& 0x2));
21412 assert(!(instData
.ABS
& 0x4));
21413 assert(!(extData
.NEG
& 0x1));
21414 assert(!(extData
.NEG
& 0x2));
21415 assert(!(extData
.NEG
& 0x4));
21417 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21418 if (wf
->execMask(lane
)) {
21419 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
21423 wf
->execMask() = sdst
.rawData();
21427 Inst_VOP3__V_CMPX_NE_I32::Inst_VOP3__V_CMPX_NE_I32(
21429 : Inst_VOP3(iFmt
, "v_cmpx_ne_i32", true)
21432 } // Inst_VOP3__V_CMPX_NE_I32
21434 Inst_VOP3__V_CMPX_NE_I32::~Inst_VOP3__V_CMPX_NE_I32()
21436 } // ~Inst_VOP3__V_CMPX_NE_I32
21438 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
21440 Inst_VOP3__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst
)
21442 Wavefront
*wf
= gpuDynInst
->wavefront();
21443 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
21444 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
21445 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21451 * input modifiers are supported by FP operations only
21453 assert(!(instData
.ABS
& 0x1));
21454 assert(!(instData
.ABS
& 0x2));
21455 assert(!(instData
.ABS
& 0x4));
21456 assert(!(extData
.NEG
& 0x1));
21457 assert(!(extData
.NEG
& 0x2));
21458 assert(!(extData
.NEG
& 0x4));
21460 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21461 if (wf
->execMask(lane
)) {
21462 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
21466 wf
->execMask() = sdst
.rawData();
21470 Inst_VOP3__V_CMPX_GE_I32::Inst_VOP3__V_CMPX_GE_I32(
21472 : Inst_VOP3(iFmt
, "v_cmpx_ge_i32", true)
21475 } // Inst_VOP3__V_CMPX_GE_I32
21477 Inst_VOP3__V_CMPX_GE_I32::~Inst_VOP3__V_CMPX_GE_I32()
21479 } // ~Inst_VOP3__V_CMPX_GE_I32
21481 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
21483 Inst_VOP3__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst
)
21485 Wavefront
*wf
= gpuDynInst
->wavefront();
21486 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
21487 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
21488 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21494 * input modifiers are supported by FP operations only
21496 assert(!(instData
.ABS
& 0x1));
21497 assert(!(instData
.ABS
& 0x2));
21498 assert(!(instData
.ABS
& 0x4));
21499 assert(!(extData
.NEG
& 0x1));
21500 assert(!(extData
.NEG
& 0x2));
21501 assert(!(extData
.NEG
& 0x4));
21503 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21504 if (wf
->execMask(lane
)) {
21505 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
21509 wf
->execMask() = sdst
.rawData();
21513 Inst_VOP3__V_CMPX_T_I32::Inst_VOP3__V_CMPX_T_I32(
21515 : Inst_VOP3(iFmt
, "v_cmpx_t_i32", true)
21518 } // Inst_VOP3__V_CMPX_T_I32
21520 Inst_VOP3__V_CMPX_T_I32::~Inst_VOP3__V_CMPX_T_I32()
21522 } // ~Inst_VOP3__V_CMPX_T_I32
21524 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
21526 Inst_VOP3__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst
)
21528 Wavefront
*wf
= gpuDynInst
->wavefront();
21529 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21531 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21532 if (wf
->execMask(lane
)) {
21533 sdst
.setBit(lane
, 1);
21537 wf
->execMask() = sdst
.rawData();
21541 Inst_VOP3__V_CMPX_F_U32::Inst_VOP3__V_CMPX_F_U32(
21543 : Inst_VOP3(iFmt
, "v_cmpx_f_u32", true)
21546 } // Inst_VOP3__V_CMPX_F_U32
21548 Inst_VOP3__V_CMPX_F_U32::~Inst_VOP3__V_CMPX_F_U32()
21550 } // ~Inst_VOP3__V_CMPX_F_U32
21552 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
21554 Inst_VOP3__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst
)
21556 Wavefront
*wf
= gpuDynInst
->wavefront();
21557 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21559 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21560 if (wf
->execMask(lane
)) {
21561 sdst
.setBit(lane
, 0);
21565 wf
->execMask() = sdst
.rawData();
21569 Inst_VOP3__V_CMPX_LT_U32::Inst_VOP3__V_CMPX_LT_U32(
21571 : Inst_VOP3(iFmt
, "v_cmpx_lt_u32", true)
21574 } // Inst_VOP3__V_CMPX_LT_U32
21576 Inst_VOP3__V_CMPX_LT_U32::~Inst_VOP3__V_CMPX_LT_U32()
21578 } // ~Inst_VOP3__V_CMPX_LT_U32
21580 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
21582 Inst_VOP3__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst
)
21584 Wavefront
*wf
= gpuDynInst
->wavefront();
21585 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
21586 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
21587 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21593 * input modifiers are supported by FP operations only
21595 assert(!(instData
.ABS
& 0x1));
21596 assert(!(instData
.ABS
& 0x2));
21597 assert(!(instData
.ABS
& 0x4));
21598 assert(!(extData
.NEG
& 0x1));
21599 assert(!(extData
.NEG
& 0x2));
21600 assert(!(extData
.NEG
& 0x4));
21602 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21603 if (wf
->execMask(lane
)) {
21604 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
21608 wf
->execMask() = sdst
.rawData();
21612 Inst_VOP3__V_CMPX_EQ_U32::Inst_VOP3__V_CMPX_EQ_U32(
21614 : Inst_VOP3(iFmt
, "v_cmpx_eq_u32", true)
21617 } // Inst_VOP3__V_CMPX_EQ_U32
21619 Inst_VOP3__V_CMPX_EQ_U32::~Inst_VOP3__V_CMPX_EQ_U32()
21621 } // ~Inst_VOP3__V_CMPX_EQ_U32
21623 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
21625 Inst_VOP3__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst
)
21627 Wavefront
*wf
= gpuDynInst
->wavefront();
21628 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
21629 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
21630 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21636 * input modifiers are supported by FP operations only
21638 assert(!(instData
.ABS
& 0x1));
21639 assert(!(instData
.ABS
& 0x2));
21640 assert(!(instData
.ABS
& 0x4));
21641 assert(!(extData
.NEG
& 0x1));
21642 assert(!(extData
.NEG
& 0x2));
21643 assert(!(extData
.NEG
& 0x4));
21645 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21646 if (wf
->execMask(lane
)) {
21647 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
21651 wf
->execMask() = sdst
.rawData();
21655 Inst_VOP3__V_CMPX_LE_U32::Inst_VOP3__V_CMPX_LE_U32(
21657 : Inst_VOP3(iFmt
, "v_cmpx_le_u32", true)
21660 } // Inst_VOP3__V_CMPX_LE_U32
21662 Inst_VOP3__V_CMPX_LE_U32::~Inst_VOP3__V_CMPX_LE_U32()
21664 } // ~Inst_VOP3__V_CMPX_LE_U32
21666 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
21668 Inst_VOP3__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst
)
21670 Wavefront
*wf
= gpuDynInst
->wavefront();
21671 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
21672 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
21673 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21679 * input modifiers are supported by FP operations only
21681 assert(!(instData
.ABS
& 0x1));
21682 assert(!(instData
.ABS
& 0x2));
21683 assert(!(instData
.ABS
& 0x4));
21684 assert(!(extData
.NEG
& 0x1));
21685 assert(!(extData
.NEG
& 0x2));
21686 assert(!(extData
.NEG
& 0x4));
21688 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21689 if (wf
->execMask(lane
)) {
21690 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
21694 wf
->execMask() = sdst
.rawData();
21698 Inst_VOP3__V_CMPX_GT_U32::Inst_VOP3__V_CMPX_GT_U32(
21700 : Inst_VOP3(iFmt
, "v_cmpx_gt_u32", true)
21703 } // Inst_VOP3__V_CMPX_GT_U32
21705 Inst_VOP3__V_CMPX_GT_U32::~Inst_VOP3__V_CMPX_GT_U32()
21707 } // ~Inst_VOP3__V_CMPX_GT_U32
21709 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
21711 Inst_VOP3__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst
)
21713 Wavefront
*wf
= gpuDynInst
->wavefront();
21714 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
21715 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
21716 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21722 * input modifiers are supported by FP operations only
21724 assert(!(instData
.ABS
& 0x1));
21725 assert(!(instData
.ABS
& 0x2));
21726 assert(!(instData
.ABS
& 0x4));
21727 assert(!(extData
.NEG
& 0x1));
21728 assert(!(extData
.NEG
& 0x2));
21729 assert(!(extData
.NEG
& 0x4));
21731 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21732 if (wf
->execMask(lane
)) {
21733 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
21737 wf
->execMask() = sdst
.rawData();
21741 Inst_VOP3__V_CMPX_NE_U32::Inst_VOP3__V_CMPX_NE_U32(
21743 : Inst_VOP3(iFmt
, "v_cmpx_ne_u32", true)
21746 } // Inst_VOP3__V_CMPX_NE_U32
21748 Inst_VOP3__V_CMPX_NE_U32::~Inst_VOP3__V_CMPX_NE_U32()
21750 } // ~Inst_VOP3__V_CMPX_NE_U32
21752 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
21754 Inst_VOP3__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst
)
21756 Wavefront
*wf
= gpuDynInst
->wavefront();
21757 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
21758 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
21759 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21765 * input modifiers are supported by FP operations only
21767 assert(!(instData
.ABS
& 0x1));
21768 assert(!(instData
.ABS
& 0x2));
21769 assert(!(instData
.ABS
& 0x4));
21770 assert(!(extData
.NEG
& 0x1));
21771 assert(!(extData
.NEG
& 0x2));
21772 assert(!(extData
.NEG
& 0x4));
21774 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21775 if (wf
->execMask(lane
)) {
21776 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
21780 wf
->execMask() = sdst
.rawData();
21784 Inst_VOP3__V_CMPX_GE_U32::Inst_VOP3__V_CMPX_GE_U32(
21786 : Inst_VOP3(iFmt
, "v_cmpx_ge_u32", true)
21789 } // Inst_VOP3__V_CMPX_GE_U32
21791 Inst_VOP3__V_CMPX_GE_U32::~Inst_VOP3__V_CMPX_GE_U32()
21793 } // ~Inst_VOP3__V_CMPX_GE_U32
21795 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
21797 Inst_VOP3__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst
)
21799 Wavefront
*wf
= gpuDynInst
->wavefront();
21800 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
21801 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
21802 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21808 * input modifiers are supported by FP operations only
21810 assert(!(instData
.ABS
& 0x1));
21811 assert(!(instData
.ABS
& 0x2));
21812 assert(!(instData
.ABS
& 0x4));
21813 assert(!(extData
.NEG
& 0x1));
21814 assert(!(extData
.NEG
& 0x2));
21815 assert(!(extData
.NEG
& 0x4));
21817 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21818 if (wf
->execMask(lane
)) {
21819 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
21823 wf
->execMask() = sdst
.rawData();
21827 Inst_VOP3__V_CMPX_T_U32::Inst_VOP3__V_CMPX_T_U32(
21829 : Inst_VOP3(iFmt
, "v_cmpx_t_u32", true)
21832 } // Inst_VOP3__V_CMPX_T_U32
21834 Inst_VOP3__V_CMPX_T_U32::~Inst_VOP3__V_CMPX_T_U32()
21836 } // ~Inst_VOP3__V_CMPX_T_U32
21838 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
21840 Inst_VOP3__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst
)
21842 Wavefront
*wf
= gpuDynInst
->wavefront();
21843 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21845 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21846 if (wf
->execMask(lane
)) {
21847 sdst
.setBit(lane
, 1);
21851 wf
->execMask() = sdst
.rawData();
21855 Inst_VOP3__V_CMP_F_I64::Inst_VOP3__V_CMP_F_I64(InFmt_VOP3
*iFmt
)
21856 : Inst_VOP3(iFmt
, "v_cmp_f_i64", true)
21859 } // Inst_VOP3__V_CMP_F_I64
21861 Inst_VOP3__V_CMP_F_I64::~Inst_VOP3__V_CMP_F_I64()
21863 } // ~Inst_VOP3__V_CMP_F_I64
21865 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
21867 Inst_VOP3__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst
)
21869 Wavefront
*wf
= gpuDynInst
->wavefront();
21870 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21872 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21873 if (wf
->execMask(lane
)) {
21874 sdst
.setBit(lane
, 0);
21881 Inst_VOP3__V_CMP_LT_I64::Inst_VOP3__V_CMP_LT_I64(
21883 : Inst_VOP3(iFmt
, "v_cmp_lt_i64", true)
21886 } // Inst_VOP3__V_CMP_LT_I64
21888 Inst_VOP3__V_CMP_LT_I64::~Inst_VOP3__V_CMP_LT_I64()
21890 } // ~Inst_VOP3__V_CMP_LT_I64
21892 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
21894 Inst_VOP3__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst
)
21896 Wavefront
*wf
= gpuDynInst
->wavefront();
21897 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
21898 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
21899 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21905 * input modifiers are supported by FP operations only
21907 assert(!(instData
.ABS
& 0x1));
21908 assert(!(instData
.ABS
& 0x2));
21909 assert(!(instData
.ABS
& 0x4));
21910 assert(!(extData
.NEG
& 0x1));
21911 assert(!(extData
.NEG
& 0x2));
21912 assert(!(extData
.NEG
& 0x4));
21914 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21915 if (wf
->execMask(lane
)) {
21916 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
21923 Inst_VOP3__V_CMP_EQ_I64::Inst_VOP3__V_CMP_EQ_I64(
21925 : Inst_VOP3(iFmt
, "v_cmp_eq_i64", true)
21928 } // Inst_VOP3__V_CMP_EQ_I64
21930 Inst_VOP3__V_CMP_EQ_I64::~Inst_VOP3__V_CMP_EQ_I64()
21932 } // ~Inst_VOP3__V_CMP_EQ_I64
21934 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
21936 Inst_VOP3__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst
)
21938 Wavefront
*wf
= gpuDynInst
->wavefront();
21939 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
21940 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
21941 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21947 * input modifiers are supported by FP operations only
21949 assert(!(instData
.ABS
& 0x1));
21950 assert(!(instData
.ABS
& 0x2));
21951 assert(!(instData
.ABS
& 0x4));
21952 assert(!(extData
.NEG
& 0x1));
21953 assert(!(extData
.NEG
& 0x2));
21954 assert(!(extData
.NEG
& 0x4));
21956 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21957 if (wf
->execMask(lane
)) {
21958 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
21965 Inst_VOP3__V_CMP_LE_I64::Inst_VOP3__V_CMP_LE_I64(
21967 : Inst_VOP3(iFmt
, "v_cmp_le_i64", true)
21970 } // Inst_VOP3__V_CMP_LE_I64
21972 Inst_VOP3__V_CMP_LE_I64::~Inst_VOP3__V_CMP_LE_I64()
21974 } // ~Inst_VOP3__V_CMP_LE_I64
21976 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
21978 Inst_VOP3__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst
)
21980 Wavefront
*wf
= gpuDynInst
->wavefront();
21981 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
21982 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
21983 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21989 * input modifiers are supported by FP operations only
21991 assert(!(instData
.ABS
& 0x1));
21992 assert(!(instData
.ABS
& 0x2));
21993 assert(!(instData
.ABS
& 0x4));
21994 assert(!(extData
.NEG
& 0x1));
21995 assert(!(extData
.NEG
& 0x2));
21996 assert(!(extData
.NEG
& 0x4));
21998 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21999 if (wf
->execMask(lane
)) {
22000 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
22007 Inst_VOP3__V_CMP_GT_I64::Inst_VOP3__V_CMP_GT_I64(
22009 : Inst_VOP3(iFmt
, "v_cmp_gt_i64", true)
22012 } // Inst_VOP3__V_CMP_GT_I64
22014 Inst_VOP3__V_CMP_GT_I64::~Inst_VOP3__V_CMP_GT_I64()
22016 } // ~Inst_VOP3__V_CMP_GT_I64
22018 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
22020 Inst_VOP3__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst
)
22022 Wavefront
*wf
= gpuDynInst
->wavefront();
22023 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
22024 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
22025 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22031 * input modifiers are supported by FP operations only
22033 assert(!(instData
.ABS
& 0x1));
22034 assert(!(instData
.ABS
& 0x2));
22035 assert(!(instData
.ABS
& 0x4));
22036 assert(!(extData
.NEG
& 0x1));
22037 assert(!(extData
.NEG
& 0x2));
22038 assert(!(extData
.NEG
& 0x4));
22040 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22041 if (wf
->execMask(lane
)) {
22042 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
22049 Inst_VOP3__V_CMP_NE_I64::Inst_VOP3__V_CMP_NE_I64(
22051 : Inst_VOP3(iFmt
, "v_cmp_ne_i64", true)
22054 } // Inst_VOP3__V_CMP_NE_I64
22056 Inst_VOP3__V_CMP_NE_I64::~Inst_VOP3__V_CMP_NE_I64()
22058 } // ~Inst_VOP3__V_CMP_NE_I64
22060 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
22062 Inst_VOP3__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst
)
22064 Wavefront
*wf
= gpuDynInst
->wavefront();
22065 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
22066 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
22067 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22073 * input modifiers are supported by FP operations only
22075 assert(!(instData
.ABS
& 0x1));
22076 assert(!(instData
.ABS
& 0x2));
22077 assert(!(instData
.ABS
& 0x4));
22078 assert(!(extData
.NEG
& 0x1));
22079 assert(!(extData
.NEG
& 0x2));
22080 assert(!(extData
.NEG
& 0x4));
22082 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22083 if (wf
->execMask(lane
)) {
22084 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
22091 Inst_VOP3__V_CMP_GE_I64::Inst_VOP3__V_CMP_GE_I64(
22093 : Inst_VOP3(iFmt
, "v_cmp_ge_i64", true)
22096 } // Inst_VOP3__V_CMP_GE_I64
22098 Inst_VOP3__V_CMP_GE_I64::~Inst_VOP3__V_CMP_GE_I64()
22100 } // ~Inst_VOP3__V_CMP_GE_I64
22102 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
22104 Inst_VOP3__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst
)
22106 Wavefront
*wf
= gpuDynInst
->wavefront();
22107 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
22108 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
22109 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22115 * input modifiers are supported by FP operations only
22117 assert(!(instData
.ABS
& 0x1));
22118 assert(!(instData
.ABS
& 0x2));
22119 assert(!(instData
.ABS
& 0x4));
22120 assert(!(extData
.NEG
& 0x1));
22121 assert(!(extData
.NEG
& 0x2));
22122 assert(!(extData
.NEG
& 0x4));
22124 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22125 if (wf
->execMask(lane
)) {
22126 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
22133 Inst_VOP3__V_CMP_T_I64::Inst_VOP3__V_CMP_T_I64(InFmt_VOP3
*iFmt
)
22134 : Inst_VOP3(iFmt
, "v_cmp_t_i64", true)
22137 } // Inst_VOP3__V_CMP_T_I64
22139 Inst_VOP3__V_CMP_T_I64::~Inst_VOP3__V_CMP_T_I64()
22141 } // ~Inst_VOP3__V_CMP_T_I64
22143 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
22145 Inst_VOP3__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst
)
22147 Wavefront
*wf
= gpuDynInst
->wavefront();
22148 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22150 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22151 if (wf
->execMask(lane
)) {
22152 sdst
.setBit(lane
, 1);
22159 Inst_VOP3__V_CMP_F_U64::Inst_VOP3__V_CMP_F_U64(InFmt_VOP3
*iFmt
)
22160 : Inst_VOP3(iFmt
, "v_cmp_f_u64", true)
22163 } // Inst_VOP3__V_CMP_F_U64
22165 Inst_VOP3__V_CMP_F_U64::~Inst_VOP3__V_CMP_F_U64()
22167 } // ~Inst_VOP3__V_CMP_F_U64
22169 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
22171 Inst_VOP3__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst
)
22173 Wavefront
*wf
= gpuDynInst
->wavefront();
22174 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22176 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22177 if (wf
->execMask(lane
)) {
22178 sdst
.setBit(lane
, 0);
22185 Inst_VOP3__V_CMP_LT_U64::Inst_VOP3__V_CMP_LT_U64(
22187 : Inst_VOP3(iFmt
, "v_cmp_lt_u64", true)
22190 } // Inst_VOP3__V_CMP_LT_U64
22192 Inst_VOP3__V_CMP_LT_U64::~Inst_VOP3__V_CMP_LT_U64()
22194 } // ~Inst_VOP3__V_CMP_LT_U64
22196 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
22198 Inst_VOP3__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst
)
22200 Wavefront
*wf
= gpuDynInst
->wavefront();
22201 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
22202 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
22203 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22209 * input modifiers are supported by FP operations only
22211 assert(!(instData
.ABS
& 0x1));
22212 assert(!(instData
.ABS
& 0x2));
22213 assert(!(instData
.ABS
& 0x4));
22214 assert(!(extData
.NEG
& 0x1));
22215 assert(!(extData
.NEG
& 0x2));
22216 assert(!(extData
.NEG
& 0x4));
22218 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22219 if (wf
->execMask(lane
)) {
22220 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
22227 Inst_VOP3__V_CMP_EQ_U64::Inst_VOP3__V_CMP_EQ_U64(
22229 : Inst_VOP3(iFmt
, "v_cmp_eq_u64", true)
22232 } // Inst_VOP3__V_CMP_EQ_U64
22234 Inst_VOP3__V_CMP_EQ_U64::~Inst_VOP3__V_CMP_EQ_U64()
22236 } // ~Inst_VOP3__V_CMP_EQ_U64
22238 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
22240 Inst_VOP3__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst
)
22242 Wavefront
*wf
= gpuDynInst
->wavefront();
22243 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
22244 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
22245 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22251 * input modifiers are supported by FP operations only
22253 assert(!(instData
.ABS
& 0x1));
22254 assert(!(instData
.ABS
& 0x2));
22255 assert(!(instData
.ABS
& 0x4));
22256 assert(!(extData
.NEG
& 0x1));
22257 assert(!(extData
.NEG
& 0x2));
22258 assert(!(extData
.NEG
& 0x4));
22260 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22261 if (wf
->execMask(lane
)) {
22262 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
22269 Inst_VOP3__V_CMP_LE_U64::Inst_VOP3__V_CMP_LE_U64(
22271 : Inst_VOP3(iFmt
, "v_cmp_le_u64", true)
22274 } // Inst_VOP3__V_CMP_LE_U64
22276 Inst_VOP3__V_CMP_LE_U64::~Inst_VOP3__V_CMP_LE_U64()
22278 } // ~Inst_VOP3__V_CMP_LE_U64
22280 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
22282 Inst_VOP3__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst
)
22284 Wavefront
*wf
= gpuDynInst
->wavefront();
22285 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
22286 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
22287 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22293 * input modifiers are supported by FP operations only
22295 assert(!(instData
.ABS
& 0x1));
22296 assert(!(instData
.ABS
& 0x2));
22297 assert(!(instData
.ABS
& 0x4));
22298 assert(!(extData
.NEG
& 0x1));
22299 assert(!(extData
.NEG
& 0x2));
22300 assert(!(extData
.NEG
& 0x4));
22302 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22303 if (wf
->execMask(lane
)) {
22304 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
22311 Inst_VOP3__V_CMP_GT_U64::Inst_VOP3__V_CMP_GT_U64(
22313 : Inst_VOP3(iFmt
, "v_cmp_gt_u64", true)
22316 } // Inst_VOP3__V_CMP_GT_U64
22318 Inst_VOP3__V_CMP_GT_U64::~Inst_VOP3__V_CMP_GT_U64()
22320 } // ~Inst_VOP3__V_CMP_GT_U64
22322 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
22324 Inst_VOP3__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst
)
22326 Wavefront
*wf
= gpuDynInst
->wavefront();
22327 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
22328 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
22329 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22335 * input modifiers are supported by FP operations only
22337 assert(!(instData
.ABS
& 0x1));
22338 assert(!(instData
.ABS
& 0x2));
22339 assert(!(instData
.ABS
& 0x4));
22340 assert(!(extData
.NEG
& 0x1));
22341 assert(!(extData
.NEG
& 0x2));
22342 assert(!(extData
.NEG
& 0x4));
22344 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22345 if (wf
->execMask(lane
)) {
22346 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
22353 Inst_VOP3__V_CMP_NE_U64::Inst_VOP3__V_CMP_NE_U64(
22355 : Inst_VOP3(iFmt
, "v_cmp_ne_u64", true)
22358 } // Inst_VOP3__V_CMP_NE_U64
22360 Inst_VOP3__V_CMP_NE_U64::~Inst_VOP3__V_CMP_NE_U64()
22362 } // ~Inst_VOP3__V_CMP_NE_U64
22364 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
22366 Inst_VOP3__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst
)
22368 Wavefront
*wf
= gpuDynInst
->wavefront();
22369 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
22370 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
22371 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22377 * input modifiers are supported by FP operations only
22379 assert(!(instData
.ABS
& 0x1));
22380 assert(!(instData
.ABS
& 0x2));
22381 assert(!(instData
.ABS
& 0x4));
22382 assert(!(extData
.NEG
& 0x1));
22383 assert(!(extData
.NEG
& 0x2));
22384 assert(!(extData
.NEG
& 0x4));
22386 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22387 if (wf
->execMask(lane
)) {
22388 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
22395 Inst_VOP3__V_CMP_GE_U64::Inst_VOP3__V_CMP_GE_U64(
22397 : Inst_VOP3(iFmt
, "v_cmp_ge_u64", true)
22400 } // Inst_VOP3__V_CMP_GE_U64
22402 Inst_VOP3__V_CMP_GE_U64::~Inst_VOP3__V_CMP_GE_U64()
22404 } // ~Inst_VOP3__V_CMP_GE_U64
22406 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
22408 Inst_VOP3__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst
)
22410 Wavefront
*wf
= gpuDynInst
->wavefront();
22411 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
22412 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
22413 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22419 * input modifiers are supported by FP operations only
22421 assert(!(instData
.ABS
& 0x1));
22422 assert(!(instData
.ABS
& 0x2));
22423 assert(!(instData
.ABS
& 0x4));
22424 assert(!(extData
.NEG
& 0x1));
22425 assert(!(extData
.NEG
& 0x2));
22426 assert(!(extData
.NEG
& 0x4));
22428 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22429 if (wf
->execMask(lane
)) {
22430 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
22437 Inst_VOP3__V_CMP_T_U64::Inst_VOP3__V_CMP_T_U64(InFmt_VOP3
*iFmt
)
22438 : Inst_VOP3(iFmt
, "v_cmp_t_u64", true)
22441 } // Inst_VOP3__V_CMP_T_U64
22443 Inst_VOP3__V_CMP_T_U64::~Inst_VOP3__V_CMP_T_U64()
22445 } // ~Inst_VOP3__V_CMP_T_U64
22447 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
22449 Inst_VOP3__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst
)
22451 Wavefront
*wf
= gpuDynInst
->wavefront();
22452 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22454 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22455 if (wf
->execMask(lane
)) {
22456 sdst
.setBit(lane
, 1);
22463 Inst_VOP3__V_CMPX_F_I64::Inst_VOP3__V_CMPX_F_I64(
22465 : Inst_VOP3(iFmt
, "v_cmpx_f_i64", true)
22468 } // Inst_VOP3__V_CMPX_F_I64
22470 Inst_VOP3__V_CMPX_F_I64::~Inst_VOP3__V_CMPX_F_I64()
22472 } // ~Inst_VOP3__V_CMPX_F_I64
22474 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
22476 Inst_VOP3__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst
)
22478 Wavefront
*wf
= gpuDynInst
->wavefront();
22479 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22481 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22482 if (wf
->execMask(lane
)) {
22483 sdst
.setBit(lane
, 0);
22487 wf
->execMask() = sdst
.rawData();
22491 Inst_VOP3__V_CMPX_LT_I64::Inst_VOP3__V_CMPX_LT_I64(
22493 : Inst_VOP3(iFmt
, "v_cmpx_lt_i64", true)
22496 } // Inst_VOP3__V_CMPX_LT_I64
22498 Inst_VOP3__V_CMPX_LT_I64::~Inst_VOP3__V_CMPX_LT_I64()
22500 } // ~Inst_VOP3__V_CMPX_LT_I64
22502 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
22504 Inst_VOP3__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst
)
22506 Wavefront
*wf
= gpuDynInst
->wavefront();
22507 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
22508 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
22509 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22515 * input modifiers are supported by FP operations only
22517 assert(!(instData
.ABS
& 0x1));
22518 assert(!(instData
.ABS
& 0x2));
22519 assert(!(instData
.ABS
& 0x4));
22520 assert(!(extData
.NEG
& 0x1));
22521 assert(!(extData
.NEG
& 0x2));
22522 assert(!(extData
.NEG
& 0x4));
22524 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22525 if (wf
->execMask(lane
)) {
22526 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
22530 wf
->execMask() = sdst
.rawData();
22534 Inst_VOP3__V_CMPX_EQ_I64::Inst_VOP3__V_CMPX_EQ_I64(
22536 : Inst_VOP3(iFmt
, "v_cmpx_eq_i64", true)
22539 } // Inst_VOP3__V_CMPX_EQ_I64
22541 Inst_VOP3__V_CMPX_EQ_I64::~Inst_VOP3__V_CMPX_EQ_I64()
22543 } // ~Inst_VOP3__V_CMPX_EQ_I64
22545 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
22547 Inst_VOP3__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst
)
22549 Wavefront
*wf
= gpuDynInst
->wavefront();
22550 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
22551 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
22552 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22558 * input modifiers are supported by FP operations only
22560 assert(!(instData
.ABS
& 0x1));
22561 assert(!(instData
.ABS
& 0x2));
22562 assert(!(instData
.ABS
& 0x4));
22563 assert(!(extData
.NEG
& 0x1));
22564 assert(!(extData
.NEG
& 0x2));
22565 assert(!(extData
.NEG
& 0x4));
22567 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22568 if (wf
->execMask(lane
)) {
22569 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
22573 wf
->execMask() = sdst
.rawData();
22577 Inst_VOP3__V_CMPX_LE_I64::Inst_VOP3__V_CMPX_LE_I64(
22579 : Inst_VOP3(iFmt
, "v_cmpx_le_i64", true)
22582 } // Inst_VOP3__V_CMPX_LE_I64
22584 Inst_VOP3__V_CMPX_LE_I64::~Inst_VOP3__V_CMPX_LE_I64()
22586 } // ~Inst_VOP3__V_CMPX_LE_I64
22588 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
22590 Inst_VOP3__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst
)
22592 Wavefront
*wf
= gpuDynInst
->wavefront();
22593 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
22594 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
22595 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22601 * input modifiers are supported by FP operations only
22603 assert(!(instData
.ABS
& 0x1));
22604 assert(!(instData
.ABS
& 0x2));
22605 assert(!(instData
.ABS
& 0x4));
22606 assert(!(extData
.NEG
& 0x1));
22607 assert(!(extData
.NEG
& 0x2));
22608 assert(!(extData
.NEG
& 0x4));
22610 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22611 if (wf
->execMask(lane
)) {
22612 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
22616 wf
->execMask() = sdst
.rawData();
22620 Inst_VOP3__V_CMPX_GT_I64::Inst_VOP3__V_CMPX_GT_I64(
22622 : Inst_VOP3(iFmt
, "v_cmpx_gt_i64", true)
22625 } // Inst_VOP3__V_CMPX_GT_I64
22627 Inst_VOP3__V_CMPX_GT_I64::~Inst_VOP3__V_CMPX_GT_I64()
22629 } // ~Inst_VOP3__V_CMPX_GT_I64
22631 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
22633 Inst_VOP3__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst
)
22635 Wavefront
*wf
= gpuDynInst
->wavefront();
22636 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
22637 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
22638 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22644 * input modifiers are supported by FP operations only
22646 assert(!(instData
.ABS
& 0x1));
22647 assert(!(instData
.ABS
& 0x2));
22648 assert(!(instData
.ABS
& 0x4));
22649 assert(!(extData
.NEG
& 0x1));
22650 assert(!(extData
.NEG
& 0x2));
22651 assert(!(extData
.NEG
& 0x4));
22653 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22654 if (wf
->execMask(lane
)) {
22655 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
22659 wf
->execMask() = sdst
.rawData();
22663 Inst_VOP3__V_CMPX_NE_I64::Inst_VOP3__V_CMPX_NE_I64(
22665 : Inst_VOP3(iFmt
, "v_cmpx_ne_i64", true)
22668 } // Inst_VOP3__V_CMPX_NE_I64
22670 Inst_VOP3__V_CMPX_NE_I64::~Inst_VOP3__V_CMPX_NE_I64()
22672 } // ~Inst_VOP3__V_CMPX_NE_I64
22674 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
22676 Inst_VOP3__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst
)
22678 Wavefront
*wf
= gpuDynInst
->wavefront();
22679 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
22680 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
22681 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22687 * input modifiers are supported by FP operations only
22689 assert(!(instData
.ABS
& 0x1));
22690 assert(!(instData
.ABS
& 0x2));
22691 assert(!(instData
.ABS
& 0x4));
22692 assert(!(extData
.NEG
& 0x1));
22693 assert(!(extData
.NEG
& 0x2));
22694 assert(!(extData
.NEG
& 0x4));
22696 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22697 if (wf
->execMask(lane
)) {
22698 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
22702 wf
->execMask() = sdst
.rawData();
22706 Inst_VOP3__V_CMPX_GE_I64::Inst_VOP3__V_CMPX_GE_I64(
22708 : Inst_VOP3(iFmt
, "v_cmpx_ge_i64", true)
22711 } // Inst_VOP3__V_CMPX_GE_I64
22713 Inst_VOP3__V_CMPX_GE_I64::~Inst_VOP3__V_CMPX_GE_I64()
22715 } // ~Inst_VOP3__V_CMPX_GE_I64
22717 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
22719 Inst_VOP3__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst
)
22721 Wavefront
*wf
= gpuDynInst
->wavefront();
22722 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
22723 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
22724 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22730 * input modifiers are supported by FP operations only
22732 assert(!(instData
.ABS
& 0x1));
22733 assert(!(instData
.ABS
& 0x2));
22734 assert(!(instData
.ABS
& 0x4));
22735 assert(!(extData
.NEG
& 0x1));
22736 assert(!(extData
.NEG
& 0x2));
22737 assert(!(extData
.NEG
& 0x4));
22739 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22740 if (wf
->execMask(lane
)) {
22741 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
22745 wf
->execMask() = sdst
.rawData();
22749 Inst_VOP3__V_CMPX_T_I64::Inst_VOP3__V_CMPX_T_I64(
22751 : Inst_VOP3(iFmt
, "v_cmpx_t_i64", true)
22754 } // Inst_VOP3__V_CMPX_T_I64
22756 Inst_VOP3__V_CMPX_T_I64::~Inst_VOP3__V_CMPX_T_I64()
22758 } // ~Inst_VOP3__V_CMPX_T_I64
22760 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
22762 Inst_VOP3__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst
)
22764 Wavefront
*wf
= gpuDynInst
->wavefront();
22765 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22767 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22768 if (wf
->execMask(lane
)) {
22769 sdst
.setBit(lane
, 1);
22773 wf
->execMask() = sdst
.rawData();
22777 Inst_VOP3__V_CMPX_F_U64::Inst_VOP3__V_CMPX_F_U64(
22779 : Inst_VOP3(iFmt
, "v_cmpx_f_u64", true)
22782 } // Inst_VOP3__V_CMPX_F_U64
22784 Inst_VOP3__V_CMPX_F_U64::~Inst_VOP3__V_CMPX_F_U64()
22786 } // ~Inst_VOP3__V_CMPX_F_U64
22788 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
22790 Inst_VOP3__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst
)
22792 Wavefront
*wf
= gpuDynInst
->wavefront();
22793 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22795 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22796 if (wf
->execMask(lane
)) {
22797 sdst
.setBit(lane
, 0);
22801 wf
->execMask() = sdst
.rawData();
22805 Inst_VOP3__V_CMPX_LT_U64::Inst_VOP3__V_CMPX_LT_U64(
22807 : Inst_VOP3(iFmt
, "v_cmpx_lt_u64", true)
22810 } // Inst_VOP3__V_CMPX_LT_U64
22812 Inst_VOP3__V_CMPX_LT_U64::~Inst_VOP3__V_CMPX_LT_U64()
22814 } // ~Inst_VOP3__V_CMPX_LT_U64
22816 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
22818 Inst_VOP3__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst
)
22820 Wavefront
*wf
= gpuDynInst
->wavefront();
22821 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
22822 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
22823 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22829 * input modifiers are supported by FP operations only
22831 assert(!(instData
.ABS
& 0x1));
22832 assert(!(instData
.ABS
& 0x2));
22833 assert(!(instData
.ABS
& 0x4));
22834 assert(!(extData
.NEG
& 0x1));
22835 assert(!(extData
.NEG
& 0x2));
22836 assert(!(extData
.NEG
& 0x4));
22838 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22839 if (wf
->execMask(lane
)) {
22840 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
22844 wf
->execMask() = sdst
.rawData();
22848 Inst_VOP3__V_CMPX_EQ_U64::Inst_VOP3__V_CMPX_EQ_U64(
22850 : Inst_VOP3(iFmt
, "v_cmpx_eq_u64", true)
22853 } // Inst_VOP3__V_CMPX_EQ_U64
22855 Inst_VOP3__V_CMPX_EQ_U64::~Inst_VOP3__V_CMPX_EQ_U64()
22857 } // ~Inst_VOP3__V_CMPX_EQ_U64
22859 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
22861 Inst_VOP3__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst
)
22863 Wavefront
*wf
= gpuDynInst
->wavefront();
22864 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
22865 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
22866 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22872 * input modifiers are supported by FP operations only
22874 assert(!(instData
.ABS
& 0x1));
22875 assert(!(instData
.ABS
& 0x2));
22876 assert(!(instData
.ABS
& 0x4));
22877 assert(!(extData
.NEG
& 0x1));
22878 assert(!(extData
.NEG
& 0x2));
22879 assert(!(extData
.NEG
& 0x4));
22881 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22882 if (wf
->execMask(lane
)) {
22883 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
22887 wf
->execMask() = sdst
.rawData();
22891 Inst_VOP3__V_CMPX_LE_U64::Inst_VOP3__V_CMPX_LE_U64(
22893 : Inst_VOP3(iFmt
, "v_cmpx_le_u64", true)
22896 } // Inst_VOP3__V_CMPX_LE_U64
22898 Inst_VOP3__V_CMPX_LE_U64::~Inst_VOP3__V_CMPX_LE_U64()
22900 } // ~Inst_VOP3__V_CMPX_LE_U64
22902 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
22904 Inst_VOP3__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst
)
22906 Wavefront
*wf
= gpuDynInst
->wavefront();
22907 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
22908 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
22909 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22915 * input modifiers are supported by FP operations only
22917 assert(!(instData
.ABS
& 0x1));
22918 assert(!(instData
.ABS
& 0x2));
22919 assert(!(instData
.ABS
& 0x4));
22920 assert(!(extData
.NEG
& 0x1));
22921 assert(!(extData
.NEG
& 0x2));
22922 assert(!(extData
.NEG
& 0x4));
22924 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22925 if (wf
->execMask(lane
)) {
22926 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
22930 wf
->execMask() = sdst
.rawData();
22934 Inst_VOP3__V_CMPX_GT_U64::Inst_VOP3__V_CMPX_GT_U64(
22936 : Inst_VOP3(iFmt
, "v_cmpx_gt_u64", true)
22939 } // Inst_VOP3__V_CMPX_GT_U64
22941 Inst_VOP3__V_CMPX_GT_U64::~Inst_VOP3__V_CMPX_GT_U64()
22943 } // ~Inst_VOP3__V_CMPX_GT_U64
22945 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
22947 Inst_VOP3__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst
)
22949 Wavefront
*wf
= gpuDynInst
->wavefront();
22950 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
22951 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
22952 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22958 * input modifiers are supported by FP operations only
22960 assert(!(instData
.ABS
& 0x1));
22961 assert(!(instData
.ABS
& 0x2));
22962 assert(!(instData
.ABS
& 0x4));
22963 assert(!(extData
.NEG
& 0x1));
22964 assert(!(extData
.NEG
& 0x2));
22965 assert(!(extData
.NEG
& 0x4));
22967 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22968 if (wf
->execMask(lane
)) {
22969 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
22973 wf
->execMask() = sdst
.rawData();
22977 Inst_VOP3__V_CMPX_NE_U64::Inst_VOP3__V_CMPX_NE_U64(
22979 : Inst_VOP3(iFmt
, "v_cmpx_ne_u64", true)
22982 } // Inst_VOP3__V_CMPX_NE_U64
22984 Inst_VOP3__V_CMPX_NE_U64::~Inst_VOP3__V_CMPX_NE_U64()
22986 } // ~Inst_VOP3__V_CMPX_NE_U64
22988 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
22990 Inst_VOP3__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst
)
22992 Wavefront
*wf
= gpuDynInst
->wavefront();
22993 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
22994 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
22995 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
23001 * input modifiers are supported by FP operations only
23003 assert(!(instData
.ABS
& 0x1));
23004 assert(!(instData
.ABS
& 0x2));
23005 assert(!(instData
.ABS
& 0x4));
23006 assert(!(extData
.NEG
& 0x1));
23007 assert(!(extData
.NEG
& 0x2));
23008 assert(!(extData
.NEG
& 0x4));
23010 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23011 if (wf
->execMask(lane
)) {
23012 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
23016 wf
->execMask() = sdst
.rawData();
23020 Inst_VOP3__V_CMPX_GE_U64::Inst_VOP3__V_CMPX_GE_U64(
23022 : Inst_VOP3(iFmt
, "v_cmpx_ge_u64", true)
23025 } // Inst_VOP3__V_CMPX_GE_U64
23027 Inst_VOP3__V_CMPX_GE_U64::~Inst_VOP3__V_CMPX_GE_U64()
23029 } // ~Inst_VOP3__V_CMPX_GE_U64
23031 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
23033 Inst_VOP3__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst
)
23035 Wavefront
*wf
= gpuDynInst
->wavefront();
23036 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
23037 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
23038 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
23044 * input modifiers are supported by FP operations only
23046 assert(!(instData
.ABS
& 0x1));
23047 assert(!(instData
.ABS
& 0x2));
23048 assert(!(instData
.ABS
& 0x4));
23049 assert(!(extData
.NEG
& 0x1));
23050 assert(!(extData
.NEG
& 0x2));
23051 assert(!(extData
.NEG
& 0x4));
23053 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23054 if (wf
->execMask(lane
)) {
23055 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
23059 wf
->execMask() = sdst
.rawData();
23063 Inst_VOP3__V_CMPX_T_U64::Inst_VOP3__V_CMPX_T_U64(
23065 : Inst_VOP3(iFmt
, "v_cmpx_t_u64", true)
23068 } // Inst_VOP3__V_CMPX_T_U64
23070 Inst_VOP3__V_CMPX_T_U64::~Inst_VOP3__V_CMPX_T_U64()
23072 } // ~Inst_VOP3__V_CMPX_T_U64
23074 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
23076 Inst_VOP3__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst
)
23078 Wavefront
*wf
= gpuDynInst
->wavefront();
23079 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
23082 * input modifiers are supported by FP operations only
23084 assert(!(instData
.ABS
& 0x1));
23085 assert(!(instData
.ABS
& 0x2));
23086 assert(!(instData
.ABS
& 0x4));
23087 assert(!(extData
.NEG
& 0x1));
23088 assert(!(extData
.NEG
& 0x2));
23089 assert(!(extData
.NEG
& 0x4));
23091 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23092 if (wf
->execMask(lane
)) {
23093 sdst
.setBit(lane
, 1);
23097 wf
->execMask() = sdst
.rawData();
23101 Inst_VOP3__V_CNDMASK_B32::Inst_VOP3__V_CNDMASK_B32(InFmt_VOP3
*iFmt
)
23102 : Inst_VOP3(iFmt
, "v_cndmask_b32", false)
23106 } // Inst_VOP3__V_CNDMASK_B32
23108 Inst_VOP3__V_CNDMASK_B32::~Inst_VOP3__V_CNDMASK_B32()
23110 } // ~Inst_VOP3__V_CNDMASK_B32
23112 // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC
23113 // as a scalar GPR in S2.
23115 Inst_VOP3__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst
)
23117 Wavefront
*wf
= gpuDynInst
->wavefront();
23118 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
23119 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
23120 ConstScalarOperandU64
vcc(gpuDynInst
, extData
.SRC2
);
23121 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
23128 * input modifiers are supported by FP operations only
23130 assert(!(instData
.ABS
& 0x1));
23131 assert(!(instData
.ABS
& 0x2));
23132 assert(!(instData
.ABS
& 0x4));
23133 assert(!(extData
.NEG
& 0x1));
23134 assert(!(extData
.NEG
& 0x2));
23135 assert(!(extData
.NEG
& 0x4));
23137 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23138 if (wf
->execMask(lane
)) {
23139 vdst
[lane
] = bits(vcc
.rawData(), lane
)
23140 ? src1
[lane
] : src0
[lane
];
23147 Inst_VOP3__V_ADD_F32::Inst_VOP3__V_ADD_F32(InFmt_VOP3
*iFmt
)
23148 : Inst_VOP3(iFmt
, "v_add_f32", false)
23152 } // Inst_VOP3__V_ADD_F32
23154 Inst_VOP3__V_ADD_F32::~Inst_VOP3__V_ADD_F32()
23156 } // ~Inst_VOP3__V_ADD_F32
23158 // D.f = S0.f + S1.f.
23160 Inst_VOP3__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst
)
23162 Wavefront
*wf
= gpuDynInst
->wavefront();
23163 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
23164 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
23165 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
23170 if (instData
.ABS
& 0x1) {
23171 src0
.absModifier();
23174 if (instData
.ABS
& 0x2) {
23175 src1
.absModifier();
23178 if (extData
.NEG
& 0x1) {
23179 src0
.negModifier();
23182 if (extData
.NEG
& 0x2) {
23183 src1
.negModifier();
23187 * input modifiers are supported by FP operations only
23189 assert(!(instData
.ABS
& 0x4));
23190 assert(!(extData
.NEG
& 0x4));
23192 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23193 if (wf
->execMask(lane
)) {
23194 vdst
[lane
] = src0
[lane
] + src1
[lane
];
23201 Inst_VOP3__V_SUB_F32::Inst_VOP3__V_SUB_F32(InFmt_VOP3
*iFmt
)
23202 : Inst_VOP3(iFmt
, "v_sub_f32", false)
23206 } // Inst_VOP3__V_SUB_F32
23208 Inst_VOP3__V_SUB_F32::~Inst_VOP3__V_SUB_F32()
23210 } // ~Inst_VOP3__V_SUB_F32
23212 // D.f = S0.f - S1.f.
23214 Inst_VOP3__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst
)
23216 Wavefront
*wf
= gpuDynInst
->wavefront();
23217 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
23218 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
23219 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
23224 if (instData
.ABS
& 0x1) {
23225 src0
.absModifier();
23228 if (instData
.ABS
& 0x2) {
23229 src1
.absModifier();
23232 if (extData
.NEG
& 0x1) {
23233 src0
.negModifier();
23236 if (extData
.NEG
& 0x2) {
23237 src1
.negModifier();
23241 * input modifiers are supported by FP operations only
23243 assert(!(instData
.ABS
& 0x4));
23244 assert(!(extData
.NEG
& 0x4));
23246 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23247 if (wf
->execMask(lane
)) {
23248 vdst
[lane
] = src0
[lane
] - src1
[lane
];
23255 Inst_VOP3__V_SUBREV_F32::Inst_VOP3__V_SUBREV_F32(InFmt_VOP3
*iFmt
)
23256 : Inst_VOP3(iFmt
, "v_subrev_f32", false)
23260 } // Inst_VOP3__V_SUBREV_F32
23262 Inst_VOP3__V_SUBREV_F32::~Inst_VOP3__V_SUBREV_F32()
23264 } // ~Inst_VOP3__V_SUBREV_F32
23266 // D.f = S1.f - S0.f.
23268 Inst_VOP3__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst
)
23270 Wavefront
*wf
= gpuDynInst
->wavefront();
23271 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
23272 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
23273 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
23278 if (instData
.ABS
& 0x1) {
23279 src0
.absModifier();
23282 if (instData
.ABS
& 0x2) {
23283 src1
.absModifier();
23286 if (extData
.NEG
& 0x1) {
23287 src0
.negModifier();
23290 if (extData
.NEG
& 0x2) {
23291 src1
.negModifier();
23295 * input modifiers are supported by FP operations only
23297 assert(!(instData
.ABS
& 0x4));
23298 assert(!(extData
.NEG
& 0x4));
23300 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23301 if (wf
->execMask(lane
)) {
23302 vdst
[lane
] = src1
[lane
] - src0
[lane
];
23309 Inst_VOP3__V_MUL_LEGACY_F32::Inst_VOP3__V_MUL_LEGACY_F32(InFmt_VOP3
*iFmt
)
23310 : Inst_VOP3(iFmt
, "v_mul_legacy_f32", false)
23314 } // Inst_VOP3__V_MUL_LEGACY_F32
23316 Inst_VOP3__V_MUL_LEGACY_F32::~Inst_VOP3__V_MUL_LEGACY_F32()
23318 } // ~Inst_VOP3__V_MUL_LEGACY_F32
23320 // D.f = S0.f * S1.f
23322 Inst_VOP3__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst
)
23324 Wavefront
*wf
= gpuDynInst
->wavefront();
23325 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
23326 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
23327 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
23332 if (instData
.ABS
& 0x1) {
23333 src0
.absModifier();
23336 if (instData
.ABS
& 0x2) {
23337 src1
.absModifier();
23340 if (extData
.NEG
& 0x1) {
23341 src0
.negModifier();
23344 if (extData
.NEG
& 0x2) {
23345 src1
.negModifier();
23349 * input modifiers are supported by FP operations only
23351 assert(!(instData
.ABS
& 0x4));
23352 assert(!(extData
.NEG
& 0x4));
23354 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23355 if (wf
->execMask(lane
)) {
23356 if (std::isnan(src0
[lane
]) ||
23357 std::isnan(src1
[lane
])) {
23359 } else if ((std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
||
23360 std::fpclassify(src0
[lane
]) == FP_ZERO
) &&
23361 !std::signbit(src0
[lane
])) {
23362 if (std::isinf(src1
[lane
])) {
23364 } else if (!std::signbit(src1
[lane
])) {
23369 } else if ((std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
||
23370 std::fpclassify(src0
[lane
]) == FP_ZERO
) &&
23371 std::signbit(src0
[lane
])) {
23372 if (std::isinf(src1
[lane
])) {
23374 } else if (std::signbit(src1
[lane
])) {
23379 } else if (std::isinf(src0
[lane
]) &&
23380 !std::signbit(src0
[lane
])) {
23381 if (std::fpclassify(src1
[lane
]) == FP_SUBNORMAL
||
23382 std::fpclassify(src1
[lane
]) == FP_ZERO
) {
23384 } else if (!std::signbit(src1
[lane
])) {
23385 vdst
[lane
] = +INFINITY
;
23387 vdst
[lane
] = -INFINITY
;
23389 } else if (std::isinf(src0
[lane
]) &&
23390 std::signbit(src0
[lane
])) {
23391 if (std::fpclassify(src1
[lane
]) == FP_SUBNORMAL
||
23392 std::fpclassify(src1
[lane
]) == FP_ZERO
) {
23394 } else if (std::signbit(src1
[lane
])) {
23395 vdst
[lane
] = +INFINITY
;
23397 vdst
[lane
] = -INFINITY
;
23400 vdst
[lane
] = src0
[lane
] * src1
[lane
];
23408 Inst_VOP3__V_MUL_F32::Inst_VOP3__V_MUL_F32(InFmt_VOP3
*iFmt
)
23409 : Inst_VOP3(iFmt
, "v_mul_f32", false)
23413 } // Inst_VOP3__V_MUL_F32
23415 Inst_VOP3__V_MUL_F32::~Inst_VOP3__V_MUL_F32()
23417 } // ~Inst_VOP3__V_MUL_F32
23419 // D.f = S0.f * S1.f.
23421 Inst_VOP3__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst
)
23423 Wavefront
*wf
= gpuDynInst
->wavefront();
23424 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
23425 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
23426 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
23431 if (instData
.ABS
& 0x1) {
23432 src0
.absModifier();
23435 if (instData
.ABS
& 0x2) {
23436 src1
.absModifier();
23439 if (extData
.NEG
& 0x1) {
23440 src0
.negModifier();
23443 if (extData
.NEG
& 0x2) {
23444 src1
.negModifier();
23448 * input modifiers are supported by FP operations only
23450 assert(!(instData
.ABS
& 0x4));
23451 assert(!(extData
.NEG
& 0x4));
23453 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23454 if (wf
->execMask(lane
)) {
23455 if (std::isnan(src0
[lane
]) ||
23456 std::isnan(src1
[lane
])) {
23458 } else if ((std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
||
23459 std::fpclassify(src0
[lane
]) == FP_ZERO
) &&
23460 !std::signbit(src0
[lane
])) {
23461 if (std::isinf(src1
[lane
])) {
23463 } else if (!std::signbit(src1
[lane
])) {
23468 } else if ((std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
||
23469 std::fpclassify(src0
[lane
]) == FP_ZERO
) &&
23470 std::signbit(src0
[lane
])) {
23471 if (std::isinf(src1
[lane
])) {
23473 } else if (std::signbit(src1
[lane
])) {
23478 } else if (std::isinf(src0
[lane
]) &&
23479 !std::signbit(src0
[lane
])) {
23480 if (std::fpclassify(src1
[lane
]) == FP_SUBNORMAL
||
23481 std::fpclassify(src1
[lane
]) == FP_ZERO
) {
23483 } else if (!std::signbit(src1
[lane
])) {
23484 vdst
[lane
] = +INFINITY
;
23486 vdst
[lane
] = -INFINITY
;
23488 } else if (std::isinf(src0
[lane
]) &&
23489 std::signbit(src0
[lane
])) {
23490 if (std::fpclassify(src1
[lane
]) == FP_SUBNORMAL
||
23491 std::fpclassify(src1
[lane
]) == FP_ZERO
) {
23493 } else if (std::signbit(src1
[lane
])) {
23494 vdst
[lane
] = +INFINITY
;
23496 vdst
[lane
] = -INFINITY
;
23499 vdst
[lane
] = src0
[lane
] * src1
[lane
];
23507 Inst_VOP3__V_MUL_I32_I24::Inst_VOP3__V_MUL_I32_I24(InFmt_VOP3
*iFmt
)
23508 : Inst_VOP3(iFmt
, "v_mul_i32_i24", false)
23511 } // Inst_VOP3__V_MUL_I32_I24
23513 Inst_VOP3__V_MUL_I32_I24::~Inst_VOP3__V_MUL_I32_I24()
23515 } // ~Inst_VOP3__V_MUL_I32_I24
23517 // D.i = S0.i[23:0] * S1.i[23:0].
23519 Inst_VOP3__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst
)
23521 Wavefront
*wf
= gpuDynInst
->wavefront();
23522 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
23523 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
23524 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
23530 * input modifiers are supported by FP operations only
23532 assert(!(instData
.ABS
& 0x1));
23533 assert(!(instData
.ABS
& 0x2));
23534 assert(!(instData
.ABS
& 0x4));
23535 assert(!(extData
.NEG
& 0x1));
23536 assert(!(extData
.NEG
& 0x2));
23537 assert(!(extData
.NEG
& 0x4));
23539 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23540 if (wf
->execMask(lane
)) {
23541 vdst
[lane
] = sext
<24>(bits(src0
[lane
], 23, 0))
23542 * sext
<24>(bits(src1
[lane
], 23, 0));
23549 Inst_VOP3__V_MUL_HI_I32_I24::Inst_VOP3__V_MUL_HI_I32_I24(InFmt_VOP3
*iFmt
)
23550 : Inst_VOP3(iFmt
, "v_mul_hi_i32_i24", false)
23553 } // Inst_VOP3__V_MUL_HI_I32_I24
23555 Inst_VOP3__V_MUL_HI_I32_I24::~Inst_VOP3__V_MUL_HI_I32_I24()
23557 } // ~Inst_VOP3__V_MUL_HI_I32_I24
23559 // D.i = (S0.i[23:0] * S1.i[23:0]) >> 32.
23561 Inst_VOP3__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst
)
23563 Wavefront
*wf
= gpuDynInst
->wavefront();
23564 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
23565 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
23566 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
23572 * input modifiers are supported by FP operations only
23574 assert(!(instData
.ABS
& 0x1));
23575 assert(!(instData
.ABS
& 0x2));
23576 assert(!(instData
.ABS
& 0x4));
23577 assert(!(extData
.NEG
& 0x1));
23578 assert(!(extData
.NEG
& 0x2));
23579 assert(!(extData
.NEG
& 0x4));
23581 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23582 if (wf
->execMask(lane
)) {
23583 VecElemI64 tmp_src0
23584 = (VecElemI64
)sext
<24>(bits(src0
[lane
], 23, 0));
23585 VecElemI64 tmp_src1
23586 = (VecElemI64
)sext
<24>(bits(src1
[lane
], 23, 0));
23588 vdst
[lane
] = (VecElemI32
)((tmp_src0
* tmp_src1
) >> 32);
23595 Inst_VOP3__V_MUL_U32_U24::Inst_VOP3__V_MUL_U32_U24(InFmt_VOP3
*iFmt
)
23596 : Inst_VOP3(iFmt
, "v_mul_u32_u24", false)
23599 } // Inst_VOP3__V_MUL_U32_U24
23601 Inst_VOP3__V_MUL_U32_U24::~Inst_VOP3__V_MUL_U32_U24()
23603 } // ~Inst_VOP3__V_MUL_U32_U24
23605 // D.u = S0.u[23:0] * S1.u[23:0].
23607 Inst_VOP3__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst
)
23609 Wavefront
*wf
= gpuDynInst
->wavefront();
23610 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
23611 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
23612 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
23618 * input modifiers are supported by FP operations only
23620 assert(!(instData
.ABS
& 0x1));
23621 assert(!(instData
.ABS
& 0x2));
23622 assert(!(instData
.ABS
& 0x4));
23623 assert(!(extData
.NEG
& 0x1));
23624 assert(!(extData
.NEG
& 0x2));
23625 assert(!(extData
.NEG
& 0x4));
23627 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23628 if (wf
->execMask(lane
)) {
23629 vdst
[lane
] = bits(src0
[lane
], 23, 0) * bits(src1
[lane
], 23, 0);
23636 Inst_VOP3__V_MUL_HI_U32_U24::Inst_VOP3__V_MUL_HI_U32_U24(InFmt_VOP3
*iFmt
)
23637 : Inst_VOP3(iFmt
, "v_mul_hi_u32_u24", false)
23640 } // Inst_VOP3__V_MUL_HI_U32_U24
23642 Inst_VOP3__V_MUL_HI_U32_U24::~Inst_VOP3__V_MUL_HI_U32_U24()
23644 } // ~Inst_VOP3__V_MUL_HI_U32_U24
23646 // D.i = (S0.u[23:0] * S1.u[23:0]) >> 32.
23648 Inst_VOP3__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst
)
23650 Wavefront
*wf
= gpuDynInst
->wavefront();
23651 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
23652 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
23653 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
23659 * input modifiers are supported by FP operations only
23661 assert(!(instData
.ABS
& 0x1));
23662 assert(!(instData
.ABS
& 0x2));
23663 assert(!(instData
.ABS
& 0x4));
23664 assert(!(extData
.NEG
& 0x1));
23665 assert(!(extData
.NEG
& 0x2));
23666 assert(!(extData
.NEG
& 0x4));
23668 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23669 if (wf
->execMask(lane
)) {
23670 VecElemU64 tmp_src0
= (VecElemU64
)bits(src0
[lane
], 23, 0);
23671 VecElemU64 tmp_src1
= (VecElemU64
)bits(src1
[lane
], 23, 0);
23672 vdst
[lane
] = (VecElemU32
)((tmp_src0
* tmp_src1
) >> 32);
23679 Inst_VOP3__V_MIN_F32::Inst_VOP3__V_MIN_F32(InFmt_VOP3
*iFmt
)
23680 : Inst_VOP3(iFmt
, "v_min_f32", false)
23684 } // Inst_VOP3__V_MIN_F32
23686 Inst_VOP3__V_MIN_F32::~Inst_VOP3__V_MIN_F32()
23688 } // ~Inst_VOP3__V_MIN_F32
23690 // D.f = (S0.f < S1.f ? S0.f : S1.f).
23692 Inst_VOP3__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst
)
23694 Wavefront
*wf
= gpuDynInst
->wavefront();
23695 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
23696 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
23697 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
23702 if (instData
.ABS
& 0x1) {
23703 src0
.absModifier();
23706 if (instData
.ABS
& 0x2) {
23707 src1
.absModifier();
23710 if (extData
.NEG
& 0x1) {
23711 src0
.negModifier();
23714 if (extData
.NEG
& 0x2) {
23715 src1
.negModifier();
23719 * input modifiers are supported by FP operations only
23721 assert(!(instData
.ABS
& 0x4));
23722 assert(!(extData
.NEG
& 0x4));
23724 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23725 if (wf
->execMask(lane
)) {
23726 vdst
[lane
] = std::fmin(src0
[lane
], src1
[lane
]);
23733 Inst_VOP3__V_MAX_F32::Inst_VOP3__V_MAX_F32(InFmt_VOP3
*iFmt
)
23734 : Inst_VOP3(iFmt
, "v_max_f32", false)
23738 } // Inst_VOP3__V_MAX_F32
23740 Inst_VOP3__V_MAX_F32::~Inst_VOP3__V_MAX_F32()
23742 } // ~Inst_VOP3__V_MAX_F32
23744 // D.f = (S0.f >= S1.f ? S0.f : S1.f).
23746 Inst_VOP3__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst
)
23748 Wavefront
*wf
= gpuDynInst
->wavefront();
23749 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
23750 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
23751 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
23756 if (instData
.ABS
& 0x1) {
23757 src0
.absModifier();
23760 if (instData
.ABS
& 0x2) {
23761 src1
.absModifier();
23764 if (extData
.NEG
& 0x1) {
23765 src0
.negModifier();
23768 if (extData
.NEG
& 0x2) {
23769 src1
.negModifier();
23773 * input modifiers are supported by FP operations only
23775 assert(!(instData
.ABS
& 0x4));
23776 assert(!(extData
.NEG
& 0x4));
23778 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23779 if (wf
->execMask(lane
)) {
23780 vdst
[lane
] = std::fmax(src0
[lane
], src1
[lane
]);
23787 Inst_VOP3__V_MIN_I32::Inst_VOP3__V_MIN_I32(InFmt_VOP3
*iFmt
)
23788 : Inst_VOP3(iFmt
, "v_min_i32", false)
23791 } // Inst_VOP3__V_MIN_I32
23793 Inst_VOP3__V_MIN_I32::~Inst_VOP3__V_MIN_I32()
23795 } // ~Inst_VOP3__V_MIN_I32
23797 // D.i = min(S0.i, S1.i).
23799 Inst_VOP3__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst
)
23801 Wavefront
*wf
= gpuDynInst
->wavefront();
23802 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
23803 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
23804 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
23810 * input modifiers are supported by FP operations only
23812 assert(!(instData
.ABS
& 0x1));
23813 assert(!(instData
.ABS
& 0x2));
23814 assert(!(instData
.ABS
& 0x4));
23815 assert(!(extData
.NEG
& 0x1));
23816 assert(!(extData
.NEG
& 0x2));
23817 assert(!(extData
.NEG
& 0x4));
23819 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23820 if (wf
->execMask(lane
)) {
23821 vdst
[lane
] = std::min(src0
[lane
], src1
[lane
]);
23828 Inst_VOP3__V_MAX_I32::Inst_VOP3__V_MAX_I32(InFmt_VOP3
*iFmt
)
23829 : Inst_VOP3(iFmt
, "v_max_i32", false)
23832 } // Inst_VOP3__V_MAX_I32
23834 Inst_VOP3__V_MAX_I32::~Inst_VOP3__V_MAX_I32()
23836 } // ~Inst_VOP3__V_MAX_I32
23838 // D.i = max(S0.i, S1.i).
23840 Inst_VOP3__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst
)
23842 Wavefront
*wf
= gpuDynInst
->wavefront();
23843 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
23844 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
23845 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
23851 * input modifiers are supported by FP operations only
23853 assert(!(instData
.ABS
& 0x1));
23854 assert(!(instData
.ABS
& 0x2));
23855 assert(!(instData
.ABS
& 0x4));
23856 assert(!(extData
.NEG
& 0x1));
23857 assert(!(extData
.NEG
& 0x2));
23858 assert(!(extData
.NEG
& 0x4));
23860 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23861 if (wf
->execMask(lane
)) {
23862 vdst
[lane
] = std::max(src0
[lane
], src1
[lane
]);
23869 Inst_VOP3__V_MIN_U32::Inst_VOP3__V_MIN_U32(InFmt_VOP3
*iFmt
)
23870 : Inst_VOP3(iFmt
, "v_min_u32", false)
23873 } // Inst_VOP3__V_MIN_U32
23875 Inst_VOP3__V_MIN_U32::~Inst_VOP3__V_MIN_U32()
23877 } // ~Inst_VOP3__V_MIN_U32
23879 // D.u = min(S0.u, S1.u).
23881 Inst_VOP3__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst
)
23883 Wavefront
*wf
= gpuDynInst
->wavefront();
23884 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
23885 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
23886 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
23892 * input modifiers are supported by FP operations only
23894 assert(!(instData
.ABS
& 0x1));
23895 assert(!(instData
.ABS
& 0x2));
23896 assert(!(instData
.ABS
& 0x4));
23897 assert(!(extData
.NEG
& 0x1));
23898 assert(!(extData
.NEG
& 0x2));
23899 assert(!(extData
.NEG
& 0x4));
23901 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23902 if (wf
->execMask(lane
)) {
23903 vdst
[lane
] = std::min(src0
[lane
], src1
[lane
]);
23910 Inst_VOP3__V_MAX_U32::Inst_VOP3__V_MAX_U32(InFmt_VOP3
*iFmt
)
23911 : Inst_VOP3(iFmt
, "v_max_u32", false)
23914 } // Inst_VOP3__V_MAX_U32
23916 Inst_VOP3__V_MAX_U32::~Inst_VOP3__V_MAX_U32()
23918 } // ~Inst_VOP3__V_MAX_U32
23920 // D.u = max(S0.u, S1.u).
23922 Inst_VOP3__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst
)
23924 Wavefront
*wf
= gpuDynInst
->wavefront();
23925 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
23926 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
23927 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
23933 * input modifiers are supported by FP operations only
23935 assert(!(instData
.ABS
& 0x1));
23936 assert(!(instData
.ABS
& 0x2));
23937 assert(!(instData
.ABS
& 0x4));
23938 assert(!(extData
.NEG
& 0x1));
23939 assert(!(extData
.NEG
& 0x2));
23940 assert(!(extData
.NEG
& 0x4));
23942 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23943 if (wf
->execMask(lane
)) {
23944 vdst
[lane
] = std::max(src0
[lane
], src1
[lane
]);
23951 Inst_VOP3__V_LSHRREV_B32::Inst_VOP3__V_LSHRREV_B32(InFmt_VOP3
*iFmt
)
23952 : Inst_VOP3(iFmt
, "v_lshrrev_b32", false)
23955 } // Inst_VOP3__V_LSHRREV_B32
23957 Inst_VOP3__V_LSHRREV_B32::~Inst_VOP3__V_LSHRREV_B32()
23959 } // ~Inst_VOP3__V_LSHRREV_B32
23961 // D.u = S1.u >> S0.u[4:0].
23962 // The vacated bits are set to zero.
23964 Inst_VOP3__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst
)
23966 Wavefront
*wf
= gpuDynInst
->wavefront();
23967 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
23968 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
23969 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
23975 * input modifiers are supported by FP operations only
23977 assert(!(instData
.ABS
& 0x1));
23978 assert(!(instData
.ABS
& 0x2));
23979 assert(!(instData
.ABS
& 0x4));
23980 assert(!(extData
.NEG
& 0x1));
23981 assert(!(extData
.NEG
& 0x2));
23982 assert(!(extData
.NEG
& 0x4));
23984 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23985 if (wf
->execMask(lane
)) {
23986 vdst
[lane
] = src1
[lane
] >> bits(src0
[lane
], 4, 0);
23993 Inst_VOP3__V_ASHRREV_I32::Inst_VOP3__V_ASHRREV_I32(InFmt_VOP3
*iFmt
)
23994 : Inst_VOP3(iFmt
, "v_ashrrev_i32", false)
23997 } // Inst_VOP3__V_ASHRREV_I32
23999 Inst_VOP3__V_ASHRREV_I32::~Inst_VOP3__V_ASHRREV_I32()
24001 } // ~Inst_VOP3__V_ASHRREV_I32
24003 // D.i = signext(S1.i) >> S0.i[4:0].
24004 // The vacated bits are set to the sign bit of the input value.
24006 Inst_VOP3__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst
)
24008 Wavefront
*wf
= gpuDynInst
->wavefront();
24009 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
24010 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
24011 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
24017 * input modifiers are supported by FP operations only
24019 assert(!(instData
.ABS
& 0x1));
24020 assert(!(instData
.ABS
& 0x2));
24021 assert(!(instData
.ABS
& 0x4));
24022 assert(!(extData
.NEG
& 0x1));
24023 assert(!(extData
.NEG
& 0x2));
24024 assert(!(extData
.NEG
& 0x4));
24026 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24027 if (wf
->execMask(lane
)) {
24028 vdst
[lane
] = src1
[lane
] >> bits(src0
[lane
], 4, 0);
24035 Inst_VOP3__V_LSHLREV_B32::Inst_VOP3__V_LSHLREV_B32(InFmt_VOP3
*iFmt
)
24036 : Inst_VOP3(iFmt
, "v_lshlrev_b32", false)
24039 } // Inst_VOP3__V_LSHLREV_B32
24041 Inst_VOP3__V_LSHLREV_B32::~Inst_VOP3__V_LSHLREV_B32()
24043 } // ~Inst_VOP3__V_LSHLREV_B32
24045 // D.u = S1.u << S0.u[4:0].
24047 Inst_VOP3__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst
)
24049 Wavefront
*wf
= gpuDynInst
->wavefront();
24050 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
24051 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
24052 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
24058 * input modifiers are supported by FP operations only
24060 assert(!(instData
.ABS
& 0x1));
24061 assert(!(instData
.ABS
& 0x2));
24062 assert(!(instData
.ABS
& 0x4));
24063 assert(!(extData
.NEG
& 0x1));
24064 assert(!(extData
.NEG
& 0x2));
24065 assert(!(extData
.NEG
& 0x4));
24067 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24068 if (wf
->execMask(lane
)) {
24069 vdst
[lane
] = src1
[lane
] << bits(src0
[lane
], 4, 0);
24076 Inst_VOP3__V_AND_B32::Inst_VOP3__V_AND_B32(InFmt_VOP3
*iFmt
)
24077 : Inst_VOP3(iFmt
, "v_and_b32", false)
24080 } // Inst_VOP3__V_AND_B32
24082 Inst_VOP3__V_AND_B32::~Inst_VOP3__V_AND_B32()
24084 } // ~Inst_VOP3__V_AND_B32
24086 // D.u = S0.u & S1.u.
24087 // Input and output modifiers not supported.
24089 Inst_VOP3__V_AND_B32::execute(GPUDynInstPtr gpuDynInst
)
24091 Wavefront
*wf
= gpuDynInst
->wavefront();
24092 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
24093 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
24094 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
24100 * input modifiers are supported by FP operations only
24102 assert(!(instData
.ABS
& 0x1));
24103 assert(!(instData
.ABS
& 0x2));
24104 assert(!(instData
.ABS
& 0x4));
24105 assert(!(extData
.NEG
& 0x1));
24106 assert(!(extData
.NEG
& 0x2));
24107 assert(!(extData
.NEG
& 0x4));
24109 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24110 if (wf
->execMask(lane
)) {
24111 vdst
[lane
] = src0
[lane
] & src1
[lane
];
24118 Inst_VOP3__V_OR_B32::Inst_VOP3__V_OR_B32(InFmt_VOP3
*iFmt
)
24119 : Inst_VOP3(iFmt
, "v_or_b32", false)
24122 } // Inst_VOP3__V_OR_B32
24124 Inst_VOP3__V_OR_B32::~Inst_VOP3__V_OR_B32()
24126 } // ~Inst_VOP3__V_OR_B32
24128 // D.u = S0.u | S1.u.
24129 // Input and output modifiers not supported.
24131 Inst_VOP3__V_OR_B32::execute(GPUDynInstPtr gpuDynInst
)
24133 Wavefront
*wf
= gpuDynInst
->wavefront();
24134 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
24135 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
24136 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
24142 * input modifiers are supported by FP operations only
24144 assert(!(instData
.ABS
& 0x1));
24145 assert(!(instData
.ABS
& 0x2));
24146 assert(!(instData
.ABS
& 0x4));
24147 assert(!(extData
.NEG
& 0x1));
24148 assert(!(extData
.NEG
& 0x2));
24149 assert(!(extData
.NEG
& 0x4));
24151 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24152 if (wf
->execMask(lane
)) {
24153 vdst
[lane
] = src0
[lane
] | src1
[lane
];
24160 Inst_VOP3__V_XOR_B32::Inst_VOP3__V_XOR_B32(InFmt_VOP3
*iFmt
)
24161 : Inst_VOP3(iFmt
, "v_xor_b32", false)
24164 } // Inst_VOP3__V_XOR_B32
24166 Inst_VOP3__V_XOR_B32::~Inst_VOP3__V_XOR_B32()
24168 } // ~Inst_VOP3__V_XOR_B32
24170 // D.u = S0.u ^ S1.u.
24171 // Input and output modifiers not supported.
24173 Inst_VOP3__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst
)
24175 Wavefront
*wf
= gpuDynInst
->wavefront();
24176 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
24177 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
24178 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
24184 * input modifiers are supported by FP operations only
24186 assert(!(instData
.ABS
& 0x1));
24187 assert(!(instData
.ABS
& 0x2));
24188 assert(!(instData
.ABS
& 0x4));
24189 assert(!(extData
.NEG
& 0x1));
24190 assert(!(extData
.NEG
& 0x2));
24191 assert(!(extData
.NEG
& 0x4));
24193 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24194 if (wf
->execMask(lane
)) {
24195 vdst
[lane
] = src0
[lane
] ^ src1
[lane
];
24202 Inst_VOP3__V_MAC_F32::Inst_VOP3__V_MAC_F32(InFmt_VOP3
*iFmt
)
24203 : Inst_VOP3(iFmt
, "v_mac_f32", false)
24208 } // Inst_VOP3__V_MAC_F32
24210 Inst_VOP3__V_MAC_F32::~Inst_VOP3__V_MAC_F32()
24212 } // ~Inst_VOP3__V_MAC_F32
24214 // D.f = S0.f * S1.f + D.f.
24216 Inst_VOP3__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst
)
24218 Wavefront
*wf
= gpuDynInst
->wavefront();
24219 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
24220 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
24221 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
24227 if (instData
.ABS
& 0x1) {
24228 src0
.absModifier();
24231 if (instData
.ABS
& 0x2) {
24232 src1
.absModifier();
24235 if (extData
.NEG
& 0x1) {
24236 src0
.negModifier();
24239 if (extData
.NEG
& 0x2) {
24240 src1
.negModifier();
24244 * input modifiers are supported by FP operations only
24246 assert(!(instData
.ABS
& 0x4));
24247 assert(!(extData
.NEG
& 0x4));
24249 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24250 if (wf
->execMask(lane
)) {
24251 vdst
[lane
] = std::fma(src0
[lane
], src1
[lane
], vdst
[lane
]);
24258 Inst_VOP3__V_ADD_U32::Inst_VOP3__V_ADD_U32(InFmt_VOP3_SDST_ENC
*iFmt
)
24259 : Inst_VOP3_SDST_ENC(iFmt
, "v_add_u32")
24262 setFlag(WritesVCC
);
24263 } // Inst_VOP3__V_ADD_U32
24265 Inst_VOP3__V_ADD_U32::~Inst_VOP3__V_ADD_U32()
24267 } // ~Inst_VOP3__V_ADD_U32
24269 // D.u = S0.u + S1.u;
24270 // VCC[threadId] = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an UNSIGNED
24271 // overflow or carry-out.
24272 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
24274 Inst_VOP3__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst
)
24276 Wavefront
*wf
= gpuDynInst
->wavefront();
24277 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
24278 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
24279 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
24280 ScalarOperandU64
vcc(gpuDynInst
, instData
.SDST
);
24286 * input modifiers are supported by FP operations only
24288 assert(!(extData
.NEG
& 0x1));
24289 assert(!(extData
.NEG
& 0x2));
24290 assert(!(extData
.NEG
& 0x4));
24292 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24293 if (wf
->execMask(lane
)) {
24294 vdst
[lane
] = src0
[lane
] + src1
[lane
];
24295 vcc
.setBit(lane
, ((VecElemU64
)src0
[lane
]
24296 + (VecElemU64
)src1
[lane
]) >= 0x100000000ULL
? 1 : 0);
24304 Inst_VOP3__V_SUB_U32::Inst_VOP3__V_SUB_U32(InFmt_VOP3_SDST_ENC
*iFmt
)
24305 : Inst_VOP3_SDST_ENC(iFmt
, "v_sub_u32")
24308 setFlag(WritesVCC
);
24309 } // Inst_VOP3__V_SUB_U32
24311 Inst_VOP3__V_SUB_U32::~Inst_VOP3__V_SUB_U32()
24313 } // ~Inst_VOP3__V_SUB_U32
24315 // D.u = S0.u - S1.u;
24316 // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or
24318 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
24320 Inst_VOP3__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst
)
24322 Wavefront
*wf
= gpuDynInst
->wavefront();
24323 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
24324 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
24325 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
24326 ScalarOperandU64
vcc(gpuDynInst
, instData
.SDST
);
24332 * input modifiers are supported by FP operations only
24334 assert(!(extData
.NEG
& 0x1));
24335 assert(!(extData
.NEG
& 0x2));
24336 assert(!(extData
.NEG
& 0x4));
24338 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24339 if (wf
->execMask(lane
)) {
24340 vdst
[lane
] = src0
[lane
] - src1
[lane
];
24341 vcc
.setBit(lane
, src1
[lane
] > src0
[lane
] ? 1 : 0);
24349 Inst_VOP3__V_SUBREV_U32::Inst_VOP3__V_SUBREV_U32(
24350 InFmt_VOP3_SDST_ENC
*iFmt
)
24351 : Inst_VOP3_SDST_ENC(iFmt
, "v_subrev_u32")
24354 setFlag(WritesVCC
);
24355 } // Inst_VOP3__V_SUBREV_U32
24357 Inst_VOP3__V_SUBREV_U32::~Inst_VOP3__V_SUBREV_U32()
24359 } // ~Inst_VOP3__V_SUBREV_U32
24361 // D.u = S1.u - S0.u;
24362 // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or
24364 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
24366 Inst_VOP3__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst
)
24368 Wavefront
*wf
= gpuDynInst
->wavefront();
24369 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
24370 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
24371 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
24372 ScalarOperandU64
vcc(gpuDynInst
, instData
.SDST
);
24378 * input modifiers are supported by FP operations only
24380 assert(!(extData
.NEG
& 0x1));
24381 assert(!(extData
.NEG
& 0x2));
24382 assert(!(extData
.NEG
& 0x4));
24384 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24385 if (wf
->execMask(lane
)) {
24386 vdst
[lane
] = src1
[lane
] - src0
[lane
];
24387 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
24395 Inst_VOP3__V_ADDC_U32::Inst_VOP3__V_ADDC_U32(InFmt_VOP3_SDST_ENC
*iFmt
)
24396 : Inst_VOP3_SDST_ENC(iFmt
, "v_addc_u32")
24399 setFlag(WritesVCC
);
24401 } // Inst_VOP3__V_ADDC_U32
24403 Inst_VOP3__V_ADDC_U32::~Inst_VOP3__V_ADDC_U32()
24405 } // ~Inst_VOP3__V_ADDC_U32
24407 // D.u = S0.u + S1.u + VCC[threadId];
24408 // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x100000000ULL ? 1 : 0)
24409 // is an UNSIGNED overflow.
24410 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
24411 // source comes from the SGPR-pair at S2.u.
24413 Inst_VOP3__V_ADDC_U32::execute(GPUDynInstPtr gpuDynInst
)
24415 Wavefront
*wf
= gpuDynInst
->wavefront();
24416 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
24417 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
24418 ConstScalarOperandU64
vcc(gpuDynInst
, extData
.SRC2
);
24419 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
24420 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
24427 * input modifiers are supported by FP operations only
24429 assert(!(extData
.NEG
& 0x1));
24430 assert(!(extData
.NEG
& 0x2));
24431 assert(!(extData
.NEG
& 0x4));
24433 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24434 if (wf
->execMask(lane
)) {
24435 vdst
[lane
] = src0
[lane
] + src1
[lane
]
24436 + bits(vcc
.rawData(), lane
);
24437 sdst
.setBit(lane
, ((VecElemU64
)src0
[lane
]
24438 + (VecElemU64
)src1
[lane
]
24439 + (VecElemU64
)bits(vcc
.rawData(), lane
))
24440 >= 0x100000000 ? 1 : 0);
24448 Inst_VOP3__V_SUBB_U32::Inst_VOP3__V_SUBB_U32(InFmt_VOP3_SDST_ENC
*iFmt
)
24449 : Inst_VOP3_SDST_ENC(iFmt
, "v_subb_u32")
24452 setFlag(WritesVCC
);
24454 } // Inst_VOP3__V_SUBB_U32
24456 Inst_VOP3__V_SUBB_U32::~Inst_VOP3__V_SUBB_U32()
24458 } // ~Inst_VOP3__V_SUBB_U32
24460 // D.u = S0.u - S1.u - VCC[threadId];
24461 // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
24463 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
24464 // source comes from the SGPR-pair at S2.u.
24466 Inst_VOP3__V_SUBB_U32::execute(GPUDynInstPtr gpuDynInst
)
24468 Wavefront
*wf
= gpuDynInst
->wavefront();
24469 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
24470 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
24471 ConstScalarOperandU64
vcc(gpuDynInst
, extData
.SRC2
);
24472 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
24473 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
24480 * input modifiers are supported by FP operations only
24482 assert(!(extData
.NEG
& 0x1));
24483 assert(!(extData
.NEG
& 0x2));
24484 assert(!(extData
.NEG
& 0x4));
24486 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24487 if (wf
->execMask(lane
)) {
24488 vdst
[lane
] = src0
[lane
] - src1
[lane
]
24489 - bits(vcc
.rawData(), lane
);
24490 sdst
.setBit(lane
, (src1
[lane
] + bits(vcc
.rawData(), lane
))
24491 > src0
[lane
] ? 1 : 0);
24499 Inst_VOP3__V_SUBBREV_U32::Inst_VOP3__V_SUBBREV_U32(
24500 InFmt_VOP3_SDST_ENC
*iFmt
)
24501 : Inst_VOP3_SDST_ENC(iFmt
, "v_subbrev_u32")
24504 setFlag(WritesVCC
);
24506 } // Inst_VOP3__V_SUBBREV_U32
24508 Inst_VOP3__V_SUBBREV_U32::~Inst_VOP3__V_SUBBREV_U32()
24510 } // ~Inst_VOP3__V_SUBBREV_U32
24512 // D.u = S1.u - S0.u - VCC[threadId];
24513 // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
24515 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
24516 // source comes from the SGPR-pair at S2.u.
24518 Inst_VOP3__V_SUBBREV_U32::execute(GPUDynInstPtr gpuDynInst
)
24520 Wavefront
*wf
= gpuDynInst
->wavefront();
24521 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
24522 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
24523 ConstScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
24524 ScalarOperandU64
vcc(gpuDynInst
, extData
.SRC2
);
24525 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
24532 * input modifiers are supported by FP operations only
24534 assert(!(extData
.NEG
& 0x1));
24535 assert(!(extData
.NEG
& 0x2));
24536 assert(!(extData
.NEG
& 0x4));
24538 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24539 if (wf
->execMask(lane
)) {
24540 vdst
[lane
] = src1
[lane
] - src0
[lane
]
24541 - bits(vcc
.rawData(), lane
);
24542 sdst
.setBit(lane
, (src1
[lane
] + bits(vcc
.rawData(), lane
))
24543 > src0
[lane
] ? 1 : 0);
24551 Inst_VOP3__V_ADD_F16::Inst_VOP3__V_ADD_F16(InFmt_VOP3
*iFmt
)
24552 : Inst_VOP3(iFmt
, "v_add_f16", false)
24556 } // Inst_VOP3__V_ADD_F16
24558 Inst_VOP3__V_ADD_F16::~Inst_VOP3__V_ADD_F16()
24560 } // ~Inst_VOP3__V_ADD_F16
24562 // D.f16 = S0.f16 + S1.f16.
24564 Inst_VOP3__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst
)
24566 panicUnimplemented();
24569 Inst_VOP3__V_SUB_F16::Inst_VOP3__V_SUB_F16(InFmt_VOP3
*iFmt
)
24570 : Inst_VOP3(iFmt
, "v_sub_f16", false)
24574 } // Inst_VOP3__V_SUB_F16
24576 Inst_VOP3__V_SUB_F16::~Inst_VOP3__V_SUB_F16()
24578 } // ~Inst_VOP3__V_SUB_F16
24580 // D.f16 = S0.f16 - S1.f16.
24582 Inst_VOP3__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst
)
24584 panicUnimplemented();
24587 Inst_VOP3__V_SUBREV_F16::Inst_VOP3__V_SUBREV_F16(InFmt_VOP3
*iFmt
)
24588 : Inst_VOP3(iFmt
, "v_subrev_f16", false)
24592 } // Inst_VOP3__V_SUBREV_F16
24594 Inst_VOP3__V_SUBREV_F16::~Inst_VOP3__V_SUBREV_F16()
24596 } // ~Inst_VOP3__V_SUBREV_F16
24598 // D.f16 = S1.f16 - S0.f16.
24600 Inst_VOP3__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst
)
24602 panicUnimplemented();
24605 Inst_VOP3__V_MUL_F16::Inst_VOP3__V_MUL_F16(InFmt_VOP3
*iFmt
)
24606 : Inst_VOP3(iFmt
, "v_mul_f16", false)
24610 } // Inst_VOP3__V_MUL_F16
24612 Inst_VOP3__V_MUL_F16::~Inst_VOP3__V_MUL_F16()
24614 } // ~Inst_VOP3__V_MUL_F16
24616 // D.f16 = S0.f16 * S1.f16.
24618 Inst_VOP3__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst
)
24620 panicUnimplemented();
24623 Inst_VOP3__V_MAC_F16::Inst_VOP3__V_MAC_F16(InFmt_VOP3
*iFmt
)
24624 : Inst_VOP3(iFmt
, "v_mac_f16", false)
24629 } // Inst_VOP3__V_MAC_F16
24631 Inst_VOP3__V_MAC_F16::~Inst_VOP3__V_MAC_F16()
24633 } // ~Inst_VOP3__V_MAC_F16
24635 // D.f16 = S0.f16 * S1.f16 + D.f16.
24637 Inst_VOP3__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst
)
24639 panicUnimplemented();
24642 Inst_VOP3__V_ADD_U16::Inst_VOP3__V_ADD_U16(InFmt_VOP3
*iFmt
)
24643 : Inst_VOP3(iFmt
, "v_add_u16", false)
24646 } // Inst_VOP3__V_ADD_U16
24648 Inst_VOP3__V_ADD_U16::~Inst_VOP3__V_ADD_U16()
24650 } // ~Inst_VOP3__V_ADD_U16
24652 // D.u16 = S0.u16 + S1.u16.
24654 Inst_VOP3__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst
)
24656 Wavefront
*wf
= gpuDynInst
->wavefront();
24657 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
24658 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
24659 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
24665 * input modifiers are supported by FP operations only
24667 assert(!(instData
.ABS
& 0x1));
24668 assert(!(instData
.ABS
& 0x2));
24669 assert(!(instData
.ABS
& 0x4));
24670 assert(!(extData
.NEG
& 0x1));
24671 assert(!(extData
.NEG
& 0x2));
24672 assert(!(extData
.NEG
& 0x4));
24674 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24675 if (wf
->execMask(lane
)) {
24676 vdst
[lane
] = src0
[lane
] + src1
[lane
];
24683 Inst_VOP3__V_SUB_U16::Inst_VOP3__V_SUB_U16(InFmt_VOP3
*iFmt
)
24684 : Inst_VOP3(iFmt
, "v_sub_u16", false)
24687 } // Inst_VOP3__V_SUB_U16
24689 Inst_VOP3__V_SUB_U16::~Inst_VOP3__V_SUB_U16()
24691 } // ~Inst_VOP3__V_SUB_U16
24693 // D.u16 = S0.u16 - S1.u16.
24695 Inst_VOP3__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst
)
24697 Wavefront
*wf
= gpuDynInst
->wavefront();
24698 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
24699 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
24700 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
24706 * input modifiers are supported by FP operations only
24708 assert(!(instData
.ABS
& 0x1));
24709 assert(!(instData
.ABS
& 0x2));
24710 assert(!(instData
.ABS
& 0x4));
24711 assert(!(extData
.NEG
& 0x1));
24712 assert(!(extData
.NEG
& 0x2));
24713 assert(!(extData
.NEG
& 0x4));
24715 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24716 if (wf
->execMask(lane
)) {
24717 vdst
[lane
] = src0
[lane
] - src1
[lane
];
24724 Inst_VOP3__V_SUBREV_U16::Inst_VOP3__V_SUBREV_U16(InFmt_VOP3
*iFmt
)
24725 : Inst_VOP3(iFmt
, "v_subrev_u16", false)
24728 } // Inst_VOP3__V_SUBREV_U16
24730 Inst_VOP3__V_SUBREV_U16::~Inst_VOP3__V_SUBREV_U16()
24732 } // ~Inst_VOP3__V_SUBREV_U16
24734 // D.u16 = S1.u16 - S0.u16.
24736 Inst_VOP3__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst
)
24738 Wavefront
*wf
= gpuDynInst
->wavefront();
24739 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
24740 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
24741 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
24747 * input modifiers are supported by FP operations only
24749 assert(!(instData
.ABS
& 0x1));
24750 assert(!(instData
.ABS
& 0x2));
24751 assert(!(instData
.ABS
& 0x4));
24752 assert(!(extData
.NEG
& 0x1));
24753 assert(!(extData
.NEG
& 0x2));
24754 assert(!(extData
.NEG
& 0x4));
24756 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24757 if (wf
->execMask(lane
)) {
24758 vdst
[lane
] = src1
[lane
] - src0
[lane
];
24765 Inst_VOP3__V_MUL_LO_U16::Inst_VOP3__V_MUL_LO_U16(InFmt_VOP3
*iFmt
)
24766 : Inst_VOP3(iFmt
, "v_mul_lo_u16", false)
24769 } // Inst_VOP3__V_MUL_LO_U16
24771 Inst_VOP3__V_MUL_LO_U16::~Inst_VOP3__V_MUL_LO_U16()
24773 } // ~Inst_VOP3__V_MUL_LO_U16
24775 // D.u16 = S0.u16 * S1.u16.
24777 Inst_VOP3__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst
)
24779 Wavefront
*wf
= gpuDynInst
->wavefront();
24780 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
24781 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
24782 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
24788 * input modifiers are supported by FP operations only
24790 assert(!(instData
.ABS
& 0x1));
24791 assert(!(instData
.ABS
& 0x2));
24792 assert(!(instData
.ABS
& 0x4));
24793 assert(!(extData
.NEG
& 0x1));
24794 assert(!(extData
.NEG
& 0x2));
24795 assert(!(extData
.NEG
& 0x4));
24797 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24798 if (wf
->execMask(lane
)) {
24799 vdst
[lane
] = src0
[lane
] * src1
[lane
];
24806 Inst_VOP3__V_LSHLREV_B16::Inst_VOP3__V_LSHLREV_B16(InFmt_VOP3
*iFmt
)
24807 : Inst_VOP3(iFmt
, "v_lshlrev_b16", false)
24810 } // Inst_VOP3__V_LSHLREV_B16
24812 Inst_VOP3__V_LSHLREV_B16::~Inst_VOP3__V_LSHLREV_B16()
24814 } // ~Inst_VOP3__V_LSHLREV_B16
24816 // D.u[15:0] = S1.u[15:0] << S0.u[3:0].
24818 Inst_VOP3__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst
)
24820 Wavefront
*wf
= gpuDynInst
->wavefront();
24821 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
24822 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
24823 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
24829 * input modifiers are supported by FP operations only
24831 assert(!(instData
.ABS
& 0x1));
24832 assert(!(instData
.ABS
& 0x2));
24833 assert(!(instData
.ABS
& 0x4));
24834 assert(!(extData
.NEG
& 0x1));
24835 assert(!(extData
.NEG
& 0x2));
24836 assert(!(extData
.NEG
& 0x4));
24838 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24839 if (wf
->execMask(lane
)) {
24840 vdst
[lane
] = src1
[lane
] << bits(src0
[lane
], 3, 0);
24847 Inst_VOP3__V_LSHRREV_B16::Inst_VOP3__V_LSHRREV_B16(InFmt_VOP3
*iFmt
)
24848 : Inst_VOP3(iFmt
, "v_lshrrev_b16", false)
24851 } // Inst_VOP3__V_LSHRREV_B16
24853 Inst_VOP3__V_LSHRREV_B16::~Inst_VOP3__V_LSHRREV_B16()
24855 } // ~Inst_VOP3__V_LSHRREV_B16
24857 // D.u[15:0] = S1.u[15:0] >> S0.u[3:0].
24858 // The vacated bits are set to zero.
24860 Inst_VOP3__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst
)
24862 Wavefront
*wf
= gpuDynInst
->wavefront();
24863 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
24864 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
24865 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
24870 if (instData
.ABS
& 0x1) {
24871 src0
.absModifier();
24874 if (instData
.ABS
& 0x2) {
24875 src1
.absModifier();
24878 if (extData
.NEG
& 0x1) {
24879 src0
.negModifier();
24882 if (extData
.NEG
& 0x2) {
24883 src1
.negModifier();
24886 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24887 if (wf
->execMask(lane
)) {
24888 vdst
[lane
] = src1
[lane
] >> bits(src0
[lane
], 3, 0);
24895 Inst_VOP3__V_ASHRREV_I16::Inst_VOP3__V_ASHRREV_I16(InFmt_VOP3
*iFmt
)
24896 : Inst_VOP3(iFmt
, "v_ashrrev_i16", false)
24899 } // Inst_VOP3__V_ASHRREV_I16
24901 Inst_VOP3__V_ASHRREV_I16::~Inst_VOP3__V_ASHRREV_I16()
24903 } // ~Inst_VOP3__V_ASHRREV_I16
24905 // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0].
24906 // The vacated bits are set to the sign bit of the input value.
24908 Inst_VOP3__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst
)
24910 Wavefront
*wf
= gpuDynInst
->wavefront();
24911 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
24912 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
24913 VecOperandI16
vdst(gpuDynInst
, instData
.VDST
);
24919 * input modifiers are supported by FP operations only
24921 assert(!(instData
.ABS
& 0x1));
24922 assert(!(instData
.ABS
& 0x2));
24923 assert(!(instData
.ABS
& 0x4));
24924 assert(!(extData
.NEG
& 0x1));
24925 assert(!(extData
.NEG
& 0x2));
24926 assert(!(extData
.NEG
& 0x4));
24928 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24929 if (wf
->execMask(lane
)) {
24930 vdst
[lane
] = src1
[lane
] >> bits(src0
[lane
], 3, 0);
24937 Inst_VOP3__V_MAX_F16::Inst_VOP3__V_MAX_F16(InFmt_VOP3
*iFmt
)
24938 : Inst_VOP3(iFmt
, "v_max_f16", false)
24942 } // Inst_VOP3__V_MAX_F16
24944 Inst_VOP3__V_MAX_F16::~Inst_VOP3__V_MAX_F16()
24946 } // ~Inst_VOP3__V_MAX_F16
24948 // D.f16 = max(S0.f16, S1.f16).
24950 Inst_VOP3__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst
)
24952 panicUnimplemented();
24955 Inst_VOP3__V_MIN_F16::Inst_VOP3__V_MIN_F16(InFmt_VOP3
*iFmt
)
24956 : Inst_VOP3(iFmt
, "v_min_f16", false)
24960 } // Inst_VOP3__V_MIN_F16
24962 Inst_VOP3__V_MIN_F16::~Inst_VOP3__V_MIN_F16()
24964 } // ~Inst_VOP3__V_MIN_F16
24966 // D.f16 = min(S0.f16, S1.f16).
24968 Inst_VOP3__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst
)
24970 panicUnimplemented();
24973 Inst_VOP3__V_MAX_U16::Inst_VOP3__V_MAX_U16(InFmt_VOP3
*iFmt
)
24974 : Inst_VOP3(iFmt
, "v_max_u16", false)
24977 } // Inst_VOP3__V_MAX_U16
24979 Inst_VOP3__V_MAX_U16::~Inst_VOP3__V_MAX_U16()
24981 } // ~Inst_VOP3__V_MAX_U16
24983 // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]).
24985 Inst_VOP3__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst
)
24987 Wavefront
*wf
= gpuDynInst
->wavefront();
24988 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
24989 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
24990 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
24995 if (instData
.ABS
& 0x1) {
24996 src0
.absModifier();
24999 if (instData
.ABS
& 0x2) {
25000 src1
.absModifier();
25003 if (extData
.NEG
& 0x1) {
25004 src0
.negModifier();
25007 if (extData
.NEG
& 0x2) {
25008 src1
.negModifier();
25011 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25012 if (wf
->execMask(lane
)) {
25013 vdst
[lane
] = std::max(src0
[lane
], src1
[lane
]);
25020 Inst_VOP3__V_MAX_I16::Inst_VOP3__V_MAX_I16(InFmt_VOP3
*iFmt
)
25021 : Inst_VOP3(iFmt
, "v_max_i16", false)
25024 } // Inst_VOP3__V_MAX_I16
25026 Inst_VOP3__V_MAX_I16::~Inst_VOP3__V_MAX_I16()
25028 } // ~Inst_VOP3__V_MAX_I16
25030 // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]).
25032 Inst_VOP3__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst
)
25034 Wavefront
*wf
= gpuDynInst
->wavefront();
25035 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
25036 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
25037 VecOperandI16
vdst(gpuDynInst
, instData
.VDST
);
25042 if (instData
.ABS
& 0x1) {
25043 src0
.absModifier();
25046 if (instData
.ABS
& 0x2) {
25047 src1
.absModifier();
25050 if (extData
.NEG
& 0x1) {
25051 src0
.negModifier();
25054 if (extData
.NEG
& 0x2) {
25055 src1
.negModifier();
25058 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25059 if (wf
->execMask(lane
)) {
25060 vdst
[lane
] = std::max(src0
[lane
], src1
[lane
]);
25067 Inst_VOP3__V_MIN_U16::Inst_VOP3__V_MIN_U16(InFmt_VOP3
*iFmt
)
25068 : Inst_VOP3(iFmt
, "v_min_u16", false)
25071 } // Inst_VOP3__V_MIN_U16
25073 Inst_VOP3__V_MIN_U16::~Inst_VOP3__V_MIN_U16()
25075 } // ~Inst_VOP3__V_MIN_U16
25077 // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]).
25079 Inst_VOP3__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst
)
25081 Wavefront
*wf
= gpuDynInst
->wavefront();
25082 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
25083 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
25084 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
25089 if (instData
.ABS
& 0x1) {
25090 src0
.absModifier();
25093 if (instData
.ABS
& 0x2) {
25094 src1
.absModifier();
25097 if (extData
.NEG
& 0x1) {
25098 src0
.negModifier();
25101 if (extData
.NEG
& 0x2) {
25102 src1
.negModifier();
25105 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25106 if (wf
->execMask(lane
)) {
25107 vdst
[lane
] = std::min(src0
[lane
], src1
[lane
]);
25114 Inst_VOP3__V_MIN_I16::Inst_VOP3__V_MIN_I16(InFmt_VOP3
*iFmt
)
25115 : Inst_VOP3(iFmt
, "v_min_i16", false)
25118 } // Inst_VOP3__V_MIN_I16
25120 Inst_VOP3__V_MIN_I16::~Inst_VOP3__V_MIN_I16()
25122 } // ~Inst_VOP3__V_MIN_I16
25124 // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]).
25126 Inst_VOP3__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst
)
25128 Wavefront
*wf
= gpuDynInst
->wavefront();
25129 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
25130 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
25131 VecOperandI16
vdst(gpuDynInst
, instData
.VDST
);
25136 if (instData
.ABS
& 0x1) {
25137 src0
.absModifier();
25140 if (instData
.ABS
& 0x2) {
25141 src1
.absModifier();
25144 if (extData
.NEG
& 0x1) {
25145 src0
.negModifier();
25148 if (extData
.NEG
& 0x2) {
25149 src1
.negModifier();
25152 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25153 if (wf
->execMask(lane
)) {
25154 vdst
[lane
] = std::min(src0
[lane
], src1
[lane
]);
25161 Inst_VOP3__V_LDEXP_F16::Inst_VOP3__V_LDEXP_F16(InFmt_VOP3
*iFmt
)
25162 : Inst_VOP3(iFmt
, "v_ldexp_f16", false)
25166 } // Inst_VOP3__V_LDEXP_F16
25168 Inst_VOP3__V_LDEXP_F16::~Inst_VOP3__V_LDEXP_F16()
25170 } // ~Inst_VOP3__V_LDEXP_F16
25172 // D.f16 = S0.f16 * (2 ** S1.i16).
25174 Inst_VOP3__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst
)
25176 panicUnimplemented();
25179 Inst_VOP3__V_NOP::Inst_VOP3__V_NOP(InFmt_VOP3
*iFmt
)
25180 : Inst_VOP3(iFmt
, "v_nop", false)
25184 } // Inst_VOP3__V_NOP
25186 Inst_VOP3__V_NOP::~Inst_VOP3__V_NOP()
25188 } // ~Inst_VOP3__V_NOP
25192 Inst_VOP3__V_NOP::execute(GPUDynInstPtr gpuDynInst
)
25196 Inst_VOP3__V_MOV_B32::Inst_VOP3__V_MOV_B32(InFmt_VOP3
*iFmt
)
25197 : Inst_VOP3(iFmt
, "v_mov_b32", false)
25200 } // Inst_VOP3__V_MOV_B32
25202 Inst_VOP3__V_MOV_B32::~Inst_VOP3__V_MOV_B32()
25204 } // ~Inst_VOP3__V_MOV_B32
25207 // Input and output modifiers not supported; this is an untyped operation.
25209 Inst_VOP3__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst
)
25211 Wavefront
*wf
= gpuDynInst
->wavefront();
25212 ConstVecOperandU32
src(gpuDynInst
, extData
.SRC0
);
25213 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
25217 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25218 if (wf
->execMask(lane
)) {
25219 vdst
[lane
] = src
[lane
];
25226 Inst_VOP3__V_CVT_I32_F64::Inst_VOP3__V_CVT_I32_F64(InFmt_VOP3
*iFmt
)
25227 : Inst_VOP3(iFmt
, "v_cvt_i32_f64", false)
25231 } // Inst_VOP3__V_CVT_I32_F64
25233 Inst_VOP3__V_CVT_I32_F64::~Inst_VOP3__V_CVT_I32_F64()
25235 } // ~Inst_VOP3__V_CVT_I32_F64
25237 // D.i = (int)S0.d.
25238 // Out-of-range floating point values (including infinity) saturate. NaN
25239 // is converted to 0.
25241 Inst_VOP3__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst
)
25243 Wavefront
*wf
= gpuDynInst
->wavefront();
25244 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
25245 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
25249 if (instData
.ABS
& 0x1) {
25253 if (extData
.NEG
& 0x1) {
25257 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25258 if (wf
->execMask(lane
)) {
25260 std::frexp(src
[lane
],&exp
);
25261 if (std::isnan(src
[lane
])) {
25263 } else if (std::isinf(src
[lane
]) || exp
> 30) {
25264 if (std::signbit(src
[lane
])) {
25265 vdst
[lane
] = INT_MIN
;
25267 vdst
[lane
] = INT_MAX
;
25270 vdst
[lane
] = (VecElemI32
)src
[lane
];
25278 Inst_VOP3__V_CVT_F64_I32::Inst_VOP3__V_CVT_F64_I32(InFmt_VOP3
*iFmt
)
25279 : Inst_VOP3(iFmt
, "v_cvt_f64_i32", false)
25283 } // Inst_VOP3__V_CVT_F64_I32
25285 Inst_VOP3__V_CVT_F64_I32::~Inst_VOP3__V_CVT_F64_I32()
25287 } // ~Inst_VOP3__V_CVT_F64_I32
25289 // D.d = (double)S0.i.
25291 Inst_VOP3__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst
)
25293 Wavefront
*wf
= gpuDynInst
->wavefront();
25294 ConstVecOperandI32
src(gpuDynInst
, extData
.SRC0
);
25295 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
25299 if (instData
.ABS
& 0x1) {
25303 if (extData
.NEG
& 0x1) {
25307 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25308 if (wf
->execMask(lane
)) {
25309 vdst
[lane
] = (VecElemF64
)src
[lane
];
25316 Inst_VOP3__V_CVT_F32_I32::Inst_VOP3__V_CVT_F32_I32(InFmt_VOP3
*iFmt
)
25317 : Inst_VOP3(iFmt
, "v_cvt_f32_i32", false)
25321 } // Inst_VOP3__V_CVT_F32_I32
25323 Inst_VOP3__V_CVT_F32_I32::~Inst_VOP3__V_CVT_F32_I32()
25325 } // ~Inst_VOP3__V_CVT_F32_I32
25327 // D.f = (float)S0.i.
25329 Inst_VOP3__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst
)
25331 Wavefront
*wf
= gpuDynInst
->wavefront();
25332 VecOperandI32
src(gpuDynInst
, extData
.SRC0
);
25333 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
25338 * input modifiers are supported by FP operations only
25340 assert(!(instData
.ABS
& 0x1));
25341 assert(!(instData
.ABS
& 0x2));
25342 assert(!(instData
.ABS
& 0x4));
25343 assert(!(extData
.NEG
& 0x1));
25344 assert(!(extData
.NEG
& 0x2));
25345 assert(!(extData
.NEG
& 0x4));
25347 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25348 if (wf
->execMask(lane
)) {
25349 vdst
[lane
] = (VecElemF32
)src
[lane
];
25356 Inst_VOP3__V_CVT_F32_U32::Inst_VOP3__V_CVT_F32_U32(InFmt_VOP3
*iFmt
)
25357 : Inst_VOP3(iFmt
, "v_cvt_f32_u32", false)
25361 } // Inst_VOP3__V_CVT_F32_U32
25363 Inst_VOP3__V_CVT_F32_U32::~Inst_VOP3__V_CVT_F32_U32()
25365 } // ~Inst_VOP3__V_CVT_F32_U32
25367 // D.f = (float)S0.u.
25369 Inst_VOP3__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst
)
25371 Wavefront
*wf
= gpuDynInst
->wavefront();
25372 ConstVecOperandU32
src(gpuDynInst
, extData
.SRC0
);
25373 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
25377 if (instData
.ABS
& 0x1) {
25381 if (extData
.NEG
& 0x1) {
25385 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25386 if (wf
->execMask(lane
)) {
25387 vdst
[lane
] = (VecElemF32
)src
[lane
];
25394 Inst_VOP3__V_CVT_U32_F32::Inst_VOP3__V_CVT_U32_F32(InFmt_VOP3
*iFmt
)
25395 : Inst_VOP3(iFmt
, "v_cvt_u32_f32", false)
25399 } // Inst_VOP3__V_CVT_U32_F32
25401 Inst_VOP3__V_CVT_U32_F32::~Inst_VOP3__V_CVT_U32_F32()
25403 } // ~Inst_VOP3__V_CVT_U32_F32
25405 // D.u = (unsigned)S0.f.
25406 // Out-of-range floating point values (including infinity) saturate. NaN
25407 // is converted to 0.
25409 Inst_VOP3__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst
)
25411 Wavefront
*wf
= gpuDynInst
->wavefront();
25412 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
25413 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
25417 if (instData
.ABS
& 0x1) {
25421 if (extData
.NEG
& 0x1) {
25425 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25426 if (wf
->execMask(lane
)) {
25428 std::frexp(src
[lane
],&exp
);
25429 if (std::isnan(src
[lane
])) {
25431 } else if (std::isinf(src
[lane
])) {
25432 if (std::signbit(src
[lane
])) {
25435 vdst
[lane
] = UINT_MAX
;
25437 } else if (exp
> 31) {
25438 vdst
[lane
] = UINT_MAX
;
25440 vdst
[lane
] = (VecElemU32
)src
[lane
];
25448 Inst_VOP3__V_CVT_I32_F32::Inst_VOP3__V_CVT_I32_F32(InFmt_VOP3
*iFmt
)
25449 : Inst_VOP3(iFmt
, "v_cvt_i32_f32", false)
25453 } // Inst_VOP3__V_CVT_I32_F32
25455 Inst_VOP3__V_CVT_I32_F32::~Inst_VOP3__V_CVT_I32_F32()
25457 } // ~Inst_VOP3__V_CVT_I32_F32
25459 // D.i = (int)S0.f.
25460 // Out-of-range floating point values (including infinity) saturate. NaN
25461 // is converted to 0.
25463 Inst_VOP3__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst
)
25465 Wavefront
*wf
= gpuDynInst
->wavefront();
25466 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
25467 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
25471 if (instData
.ABS
& 0x1) {
25475 if (extData
.NEG
& 0x1) {
25480 * input modifiers are supported by FP operations only
25482 assert(!(instData
.ABS
& 0x2));
25483 assert(!(instData
.ABS
& 0x4));
25484 assert(!(extData
.NEG
& 0x2));
25485 assert(!(extData
.NEG
& 0x4));
25487 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25488 if (wf
->execMask(lane
)) {
25490 std::frexp(src
[lane
],&exp
);
25491 if (std::isnan(src
[lane
])) {
25493 } else if (std::isinf(src
[lane
]) || exp
> 30) {
25494 if (std::signbit(src
[lane
])) {
25495 vdst
[lane
] = INT_MIN
;
25497 vdst
[lane
] = INT_MAX
;
25500 vdst
[lane
] = (VecElemI32
)src
[lane
];
25508 Inst_VOP3__V_MOV_FED_B32::Inst_VOP3__V_MOV_FED_B32(InFmt_VOP3
*iFmt
)
25509 : Inst_VOP3(iFmt
, "v_mov_fed_b32", false)
25512 } // Inst_VOP3__V_MOV_FED_B32
25514 Inst_VOP3__V_MOV_FED_B32::~Inst_VOP3__V_MOV_FED_B32()
25516 } // ~Inst_VOP3__V_MOV_FED_B32
25519 // Input and output modifiers not supported; this is an untyped operation.
25521 Inst_VOP3__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst
)
25523 panicUnimplemented();
25526 Inst_VOP3__V_CVT_F16_F32::Inst_VOP3__V_CVT_F16_F32(InFmt_VOP3
*iFmt
)
25527 : Inst_VOP3(iFmt
, "v_cvt_f16_f32", false)
25531 } // Inst_VOP3__V_CVT_F16_F32
25533 Inst_VOP3__V_CVT_F16_F32::~Inst_VOP3__V_CVT_F16_F32()
25535 } // ~Inst_VOP3__V_CVT_F16_F32
25537 // D.f16 = flt32_to_flt16(S0.f).
25539 Inst_VOP3__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst
)
25541 panicUnimplemented();
25544 Inst_VOP3__V_CVT_F32_F16::Inst_VOP3__V_CVT_F32_F16(InFmt_VOP3
*iFmt
)
25545 : Inst_VOP3(iFmt
, "v_cvt_f32_f16", false)
25549 } // Inst_VOP3__V_CVT_F32_F16
25551 Inst_VOP3__V_CVT_F32_F16::~Inst_VOP3__V_CVT_F32_F16()
25553 } // ~Inst_VOP3__V_CVT_F32_F16
25555 // D.f = flt16_to_flt32(S0.f16).
25557 Inst_VOP3__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst
)
25559 panicUnimplemented();
25562 Inst_VOP3__V_CVT_RPI_I32_F32::Inst_VOP3__V_CVT_RPI_I32_F32(
25564 : Inst_VOP3(iFmt
, "v_cvt_rpi_i32_f32", false)
25568 } // Inst_VOP3__V_CVT_RPI_I32_F32
25570 Inst_VOP3__V_CVT_RPI_I32_F32::~Inst_VOP3__V_CVT_RPI_I32_F32()
25572 } // ~Inst_VOP3__V_CVT_RPI_I32_F32
25574 // D.i = (int)floor(S0.f + 0.5).
25576 Inst_VOP3__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst
)
25578 Wavefront
*wf
= gpuDynInst
->wavefront();
25579 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
25580 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
25584 if (instData
.ABS
& 0x1) {
25588 if (extData
.NEG
& 0x1) {
25592 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25593 if (wf
->execMask(lane
)) {
25594 vdst
[lane
] = (VecElemI32
)std::floor(src
[lane
] + 0.5);
25601 Inst_VOP3__V_CVT_FLR_I32_F32::Inst_VOP3__V_CVT_FLR_I32_F32(
25603 : Inst_VOP3(iFmt
, "v_cvt_flr_i32_f32", false)
25607 } // Inst_VOP3__V_CVT_FLR_I32_F32
25609 Inst_VOP3__V_CVT_FLR_I32_F32::~Inst_VOP3__V_CVT_FLR_I32_F32()
25611 } // ~Inst_VOP3__V_CVT_FLR_I32_F32
25613 // D.i = (int)floor(S0.f).
25615 Inst_VOP3__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst
)
25617 Wavefront
*wf
= gpuDynInst
->wavefront();
25618 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
25619 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
25623 if (instData
.ABS
& 0x1) {
25627 if (extData
.NEG
& 0x1) {
25631 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25632 if (wf
->execMask(lane
)) {
25633 vdst
[lane
] = (VecElemI32
)std::floor(src
[lane
]);
25640 Inst_VOP3__V_CVT_OFF_F32_I4::Inst_VOP3__V_CVT_OFF_F32_I4(InFmt_VOP3
*iFmt
)
25641 : Inst_VOP3(iFmt
, "v_cvt_off_f32_i4", false)
25645 } // Inst_VOP3__V_CVT_OFF_F32_I4
25647 Inst_VOP3__V_CVT_OFF_F32_I4::~Inst_VOP3__V_CVT_OFF_F32_I4()
25649 } // ~Inst_VOP3__V_CVT_OFF_F32_I4
25651 // 4-bit signed int to 32-bit float.
25653 Inst_VOP3__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst
)
25655 panicUnimplemented();
25658 Inst_VOP3__V_CVT_F32_F64::Inst_VOP3__V_CVT_F32_F64(InFmt_VOP3
*iFmt
)
25659 : Inst_VOP3(iFmt
, "v_cvt_f32_f64", false)
25663 } // Inst_VOP3__V_CVT_F32_F64
25665 Inst_VOP3__V_CVT_F32_F64::~Inst_VOP3__V_CVT_F32_F64()
25667 } // ~Inst_VOP3__V_CVT_F32_F64
25669 // D.f = (float)S0.d.
25671 Inst_VOP3__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst
)
25673 Wavefront
*wf
= gpuDynInst
->wavefront();
25674 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
25675 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
25679 if (instData
.ABS
& 0x1) {
25683 if (extData
.NEG
& 0x1) {
25688 * input modifiers are supported by FP operations only
25690 assert(!(instData
.ABS
& 0x2));
25691 assert(!(instData
.ABS
& 0x4));
25692 assert(!(extData
.NEG
& 0x2));
25693 assert(!(extData
.NEG
& 0x4));
25695 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25696 if (wf
->execMask(lane
)) {
25697 vdst
[lane
] = (VecElemF32
)src
[lane
];
25704 Inst_VOP3__V_CVT_F64_F32::Inst_VOP3__V_CVT_F64_F32(InFmt_VOP3
*iFmt
)
25705 : Inst_VOP3(iFmt
, "v_cvt_f64_f32", false)
25709 } // Inst_VOP3__V_CVT_F64_F32
25711 Inst_VOP3__V_CVT_F64_F32::~Inst_VOP3__V_CVT_F64_F32()
25713 } // ~Inst_VOP3__V_CVT_F64_F32
25715 // D.d = (double)S0.f.
25717 Inst_VOP3__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst
)
25719 Wavefront
*wf
= gpuDynInst
->wavefront();
25720 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
25721 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
25725 if (instData
.ABS
& 0x1) {
25729 if (extData
.NEG
& 0x1) {
25734 * input modifiers are supported by FP operations only
25736 assert(!(instData
.ABS
& 0x2));
25737 assert(!(instData
.ABS
& 0x4));
25738 assert(!(extData
.NEG
& 0x2));
25739 assert(!(extData
.NEG
& 0x4));
25741 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25742 if (wf
->execMask(lane
)) {
25743 vdst
[lane
] = (VecElemF64
)src
[lane
];
25750 Inst_VOP3__V_CVT_F32_UBYTE0::Inst_VOP3__V_CVT_F32_UBYTE0(InFmt_VOP3
*iFmt
)
25751 : Inst_VOP3(iFmt
, "v_cvt_f32_ubyte0", false)
25755 } // Inst_VOP3__V_CVT_F32_UBYTE0
25757 Inst_VOP3__V_CVT_F32_UBYTE0::~Inst_VOP3__V_CVT_F32_UBYTE0()
25759 } // ~Inst_VOP3__V_CVT_F32_UBYTE0
25761 // D.f = (float)(S0.u[7:0]).
25763 Inst_VOP3__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst
)
25765 Wavefront
*wf
= gpuDynInst
->wavefront();
25766 ConstVecOperandU32
src(gpuDynInst
, extData
.SRC0
);
25767 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
25771 if (instData
.ABS
& 0x1) {
25775 if (extData
.NEG
& 0x1) {
25779 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25780 if (wf
->execMask(lane
)) {
25781 vdst
[lane
] = (VecElemF32
)bits(src
[lane
], 7, 0);
25788 Inst_VOP3__V_CVT_F32_UBYTE1::Inst_VOP3__V_CVT_F32_UBYTE1(InFmt_VOP3
*iFmt
)
25789 : Inst_VOP3(iFmt
, "v_cvt_f32_ubyte1", false)
25793 } // Inst_VOP3__V_CVT_F32_UBYTE1
25795 Inst_VOP3__V_CVT_F32_UBYTE1::~Inst_VOP3__V_CVT_F32_UBYTE1()
25797 } // ~Inst_VOP3__V_CVT_F32_UBYTE1
25799 // D.f = (float)(S0.u[15:8]).
25801 Inst_VOP3__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst
)
25803 Wavefront
*wf
= gpuDynInst
->wavefront();
25804 ConstVecOperandU32
src(gpuDynInst
, extData
.SRC0
);
25805 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
25809 if (instData
.ABS
& 0x1) {
25813 if (extData
.NEG
& 0x1) {
25817 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25818 if (wf
->execMask(lane
)) {
25819 vdst
[lane
] = (VecElemF32
)bits(src
[lane
], 15, 8);
25826 Inst_VOP3__V_CVT_F32_UBYTE2::Inst_VOP3__V_CVT_F32_UBYTE2(InFmt_VOP3
*iFmt
)
25827 : Inst_VOP3(iFmt
, "v_cvt_f32_ubyte2", false)
25831 } // Inst_VOP3__V_CVT_F32_UBYTE2
25833 Inst_VOP3__V_CVT_F32_UBYTE2::~Inst_VOP3__V_CVT_F32_UBYTE2()
25835 } // ~Inst_VOP3__V_CVT_F32_UBYTE2
25837 // D.f = (float)(S0.u[23:16]).
25839 Inst_VOP3__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst
)
25841 Wavefront
*wf
= gpuDynInst
->wavefront();
25842 ConstVecOperandU32
src(gpuDynInst
, extData
.SRC0
);
25843 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
25847 if (instData
.ABS
& 0x1) {
25851 if (extData
.NEG
& 0x1) {
25855 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25856 if (wf
->execMask(lane
)) {
25857 vdst
[lane
] = (VecElemF32
)bits(src
[lane
], 23, 16);
25864 Inst_VOP3__V_CVT_F32_UBYTE3::Inst_VOP3__V_CVT_F32_UBYTE3(InFmt_VOP3
*iFmt
)
25865 : Inst_VOP3(iFmt
, "v_cvt_f32_ubyte3", false)
25869 } // Inst_VOP3__V_CVT_F32_UBYTE3
25871 Inst_VOP3__V_CVT_F32_UBYTE3::~Inst_VOP3__V_CVT_F32_UBYTE3()
25873 } // ~Inst_VOP3__V_CVT_F32_UBYTE3
25875 // D.f = (float)(S0.u[31:24]).
25877 Inst_VOP3__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst
)
25879 Wavefront
*wf
= gpuDynInst
->wavefront();
25880 ConstVecOperandU32
src(gpuDynInst
, extData
.SRC0
);
25881 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
25885 if (instData
.ABS
& 0x1) {
25889 if (extData
.NEG
& 0x1) {
25893 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25894 if (wf
->execMask(lane
)) {
25895 vdst
[lane
] = (VecElemF32
)bits(src
[lane
], 31, 24);
25902 Inst_VOP3__V_CVT_U32_F64::Inst_VOP3__V_CVT_U32_F64(InFmt_VOP3
*iFmt
)
25903 : Inst_VOP3(iFmt
, "v_cvt_u32_f64", false)
25907 } // Inst_VOP3__V_CVT_U32_F64
25909 Inst_VOP3__V_CVT_U32_F64::~Inst_VOP3__V_CVT_U32_F64()
25911 } // ~Inst_VOP3__V_CVT_U32_F64
25913 // D.u = (unsigned)S0.d.
25914 // Out-of-range floating point values (including infinity) saturate. NaN
25915 // is converted to 0.
25917 Inst_VOP3__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst
)
25919 Wavefront
*wf
= gpuDynInst
->wavefront();
25920 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
25921 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
25925 if (instData
.ABS
& 0x1) {
25929 if (extData
.NEG
& 0x1) {
25933 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25934 if (wf
->execMask(lane
)) {
25936 std::frexp(src
[lane
],&exp
);
25937 if (std::isnan(src
[lane
])) {
25939 } else if (std::isinf(src
[lane
])) {
25940 if (std::signbit(src
[lane
])) {
25943 vdst
[lane
] = UINT_MAX
;
25945 } else if (exp
> 31) {
25946 vdst
[lane
] = UINT_MAX
;
25948 vdst
[lane
] = (VecElemU32
)src
[lane
];
25956 Inst_VOP3__V_CVT_F64_U32::Inst_VOP3__V_CVT_F64_U32(InFmt_VOP3
*iFmt
)
25957 : Inst_VOP3(iFmt
, "v_cvt_f64_u32", false)
25961 } // Inst_VOP3__V_CVT_F64_U32
25963 Inst_VOP3__V_CVT_F64_U32::~Inst_VOP3__V_CVT_F64_U32()
25965 } // ~Inst_VOP3__V_CVT_F64_U32
25967 // D.d = (double)S0.u.
25969 Inst_VOP3__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst
)
25971 Wavefront
*wf
= gpuDynInst
->wavefront();
25972 ConstVecOperandU32
src(gpuDynInst
, extData
.SRC0
);
25973 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
25977 if (instData
.ABS
& 0x1) {
25981 if (extData
.NEG
& 0x1) {
25985 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25986 if (wf
->execMask(lane
)) {
25987 vdst
[lane
] = (VecElemF64
)src
[lane
];
25994 Inst_VOP3__V_TRUNC_F64::Inst_VOP3__V_TRUNC_F64(InFmt_VOP3
*iFmt
)
25995 : Inst_VOP3(iFmt
, "v_trunc_f64", false)
25999 } // Inst_VOP3__V_TRUNC_F64
26001 Inst_VOP3__V_TRUNC_F64::~Inst_VOP3__V_TRUNC_F64()
26003 } // ~Inst_VOP3__V_TRUNC_F64
26005 // D.d = trunc(S0.d), return integer part of S0.d.
26007 Inst_VOP3__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst
)
26009 Wavefront
*wf
= gpuDynInst
->wavefront();
26010 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
26011 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
26015 if (instData
.ABS
& 0x1) {
26019 if (extData
.NEG
& 0x1) {
26023 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26024 if (wf
->execMask(lane
)) {
26025 vdst
[lane
] = std::trunc(src
[lane
]);
26032 Inst_VOP3__V_CEIL_F64::Inst_VOP3__V_CEIL_F64(InFmt_VOP3
*iFmt
)
26033 : Inst_VOP3(iFmt
, "v_ceil_f64", false)
26037 } // Inst_VOP3__V_CEIL_F64
26039 Inst_VOP3__V_CEIL_F64::~Inst_VOP3__V_CEIL_F64()
26041 } // ~Inst_VOP3__V_CEIL_F64
26043 // D.d = ceil(S0.d);
26045 Inst_VOP3__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst
)
26047 Wavefront
*wf
= gpuDynInst
->wavefront();
26048 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
26049 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
26053 if (instData
.ABS
& 0x1) {
26057 if (extData
.NEG
& 0x1) {
26061 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26062 if (wf
->execMask(lane
)) {
26063 vdst
[lane
] = std::ceil(src
[lane
]);
26070 Inst_VOP3__V_RNDNE_F64::Inst_VOP3__V_RNDNE_F64(InFmt_VOP3
*iFmt
)
26071 : Inst_VOP3(iFmt
, "v_rndne_f64", false)
26075 } // Inst_VOP3__V_RNDNE_F64
26077 Inst_VOP3__V_RNDNE_F64::~Inst_VOP3__V_RNDNE_F64()
26079 } // ~Inst_VOP3__V_RNDNE_F64
26081 // D.d = round_nearest_even(S0.d).
26083 Inst_VOP3__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst
)
26085 Wavefront
*wf
= gpuDynInst
->wavefront();
26086 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
26087 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
26091 if (instData
.ABS
& 0x1) {
26095 if (extData
.NEG
& 0x1) {
26099 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26100 if (wf
->execMask(lane
)) {
26101 vdst
[lane
] = roundNearestEven(src
[lane
]);
26108 Inst_VOP3__V_FLOOR_F64::Inst_VOP3__V_FLOOR_F64(InFmt_VOP3
*iFmt
)
26109 : Inst_VOP3(iFmt
, "v_floor_f64", false)
26113 } // Inst_VOP3__V_FLOOR_F64
26115 Inst_VOP3__V_FLOOR_F64::~Inst_VOP3__V_FLOOR_F64()
26117 } // ~Inst_VOP3__V_FLOOR_F64
26119 // D.d = floor(S0.d);
26121 Inst_VOP3__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst
)
26123 Wavefront
*wf
= gpuDynInst
->wavefront();
26124 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
26125 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
26129 if (instData
.ABS
& 0x1) {
26133 if (extData
.NEG
& 0x1) {
26137 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26138 if (wf
->execMask(lane
)) {
26139 vdst
[lane
] = std::floor(src
[lane
]);
26146 Inst_VOP3__V_FRACT_F32::Inst_VOP3__V_FRACT_F32(InFmt_VOP3
*iFmt
)
26147 : Inst_VOP3(iFmt
, "v_fract_f32", false)
26151 } // Inst_VOP3__V_FRACT_F32
26153 Inst_VOP3__V_FRACT_F32::~Inst_VOP3__V_FRACT_F32()
26155 } // ~Inst_VOP3__V_FRACT_F32
26157 // D.f = modf(S0.f).
26159 Inst_VOP3__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst
)
26161 Wavefront
*wf
= gpuDynInst
->wavefront();
26162 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26163 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26167 if (instData
.ABS
& 0x1) {
26171 if (extData
.NEG
& 0x1) {
26175 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26176 if (wf
->execMask(lane
)) {
26177 VecElemF32
int_part(0.0);
26178 vdst
[lane
] = std::modf(src
[lane
], &int_part
);
26185 Inst_VOP3__V_TRUNC_F32::Inst_VOP3__V_TRUNC_F32(InFmt_VOP3
*iFmt
)
26186 : Inst_VOP3(iFmt
, "v_trunc_f32", false)
26190 } // Inst_VOP3__V_TRUNC_F32
26192 Inst_VOP3__V_TRUNC_F32::~Inst_VOP3__V_TRUNC_F32()
26194 } // ~Inst_VOP3__V_TRUNC_F32
26196 // D.f = trunc(S0.f), return integer part of S0.f.
26198 Inst_VOP3__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst
)
26200 Wavefront
*wf
= gpuDynInst
->wavefront();
26201 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26202 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26206 if (instData
.ABS
& 0x1) {
26210 if (extData
.NEG
& 0x1) {
26214 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26215 if (wf
->execMask(lane
)) {
26216 vdst
[lane
] = std::trunc(src
[lane
]);
26223 Inst_VOP3__V_CEIL_F32::Inst_VOP3__V_CEIL_F32(InFmt_VOP3
*iFmt
)
26224 : Inst_VOP3(iFmt
, "v_ceil_f32", false)
26228 } // Inst_VOP3__V_CEIL_F32
26230 Inst_VOP3__V_CEIL_F32::~Inst_VOP3__V_CEIL_F32()
26232 } // ~Inst_VOP3__V_CEIL_F32
26234 // D.f = ceil(S0.f);
26236 Inst_VOP3__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst
)
26238 Wavefront
*wf
= gpuDynInst
->wavefront();
26239 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26240 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26244 if (instData
.ABS
& 0x1) {
26248 if (extData
.NEG
& 0x1) {
26252 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26253 if (wf
->execMask(lane
)) {
26254 vdst
[lane
] = std::ceil(src
[lane
]);
26261 Inst_VOP3__V_RNDNE_F32::Inst_VOP3__V_RNDNE_F32(InFmt_VOP3
*iFmt
)
26262 : Inst_VOP3(iFmt
, "v_rndne_f32", false)
26266 } // Inst_VOP3__V_RNDNE_F32
26268 Inst_VOP3__V_RNDNE_F32::~Inst_VOP3__V_RNDNE_F32()
26270 } // ~Inst_VOP3__V_RNDNE_F32
26272 // D.f = round_nearest_even(S0.f).
26274 Inst_VOP3__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst
)
26276 Wavefront
*wf
= gpuDynInst
->wavefront();
26277 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26278 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26282 if (instData
.ABS
& 0x1) {
26286 if (extData
.NEG
& 0x1) {
26290 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26291 if (wf
->execMask(lane
)) {
26292 vdst
[lane
] = roundNearestEven(src
[lane
]);
26299 Inst_VOP3__V_FLOOR_F32::Inst_VOP3__V_FLOOR_F32(InFmt_VOP3
*iFmt
)
26300 : Inst_VOP3(iFmt
, "v_floor_f32", false)
26304 } // Inst_VOP3__V_FLOOR_F32
26306 Inst_VOP3__V_FLOOR_F32::~Inst_VOP3__V_FLOOR_F32()
26308 } // ~Inst_VOP3__V_FLOOR_F32
26310 // D.f = floor(S0.f);
26312 Inst_VOP3__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst
)
26314 Wavefront
*wf
= gpuDynInst
->wavefront();
26315 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26316 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26320 if (instData
.ABS
& 0x1) {
26324 if (extData
.NEG
& 0x1) {
26328 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26329 if (wf
->execMask(lane
)) {
26330 vdst
[lane
] = std::floor(src
[lane
]);
26337 Inst_VOP3__V_EXP_F32::Inst_VOP3__V_EXP_F32(InFmt_VOP3
*iFmt
)
26338 : Inst_VOP3(iFmt
, "v_exp_f32", false)
26342 } // Inst_VOP3__V_EXP_F32
26344 Inst_VOP3__V_EXP_F32::~Inst_VOP3__V_EXP_F32()
26346 } // ~Inst_VOP3__V_EXP_F32
26348 // D.f = pow(2.0, S0.f).
26350 Inst_VOP3__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst
)
26352 Wavefront
*wf
= gpuDynInst
->wavefront();
26353 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26354 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26358 if (instData
.ABS
& 0x1) {
26362 if (extData
.NEG
& 0x1) {
26366 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26367 if (wf
->execMask(lane
)) {
26368 vdst
[lane
] = std::pow(2.0, src
[lane
]);
26375 Inst_VOP3__V_LOG_F32::Inst_VOP3__V_LOG_F32(InFmt_VOP3
*iFmt
)
26376 : Inst_VOP3(iFmt
, "v_log_f32", false)
26380 } // Inst_VOP3__V_LOG_F32
26382 Inst_VOP3__V_LOG_F32::~Inst_VOP3__V_LOG_F32()
26384 } // ~Inst_VOP3__V_LOG_F32
26386 // D.f = log2(S0.f).
26388 Inst_VOP3__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst
)
26390 Wavefront
*wf
= gpuDynInst
->wavefront();
26391 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26392 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26396 if (instData
.ABS
& 0x1) {
26400 if (extData
.NEG
& 0x1) {
26405 * input modifiers are supported by FP operations only
26407 assert(!(instData
.ABS
& 0x2));
26408 assert(!(instData
.ABS
& 0x4));
26409 assert(!(extData
.NEG
& 0x2));
26410 assert(!(extData
.NEG
& 0x4));
26412 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26413 if (wf
->execMask(lane
)) {
26414 vdst
[lane
] = std::log2(src
[lane
]);
26421 Inst_VOP3__V_RCP_F32::Inst_VOP3__V_RCP_F32(InFmt_VOP3
*iFmt
)
26422 : Inst_VOP3(iFmt
, "v_rcp_f32", false)
26426 } // Inst_VOP3__V_RCP_F32
26428 Inst_VOP3__V_RCP_F32::~Inst_VOP3__V_RCP_F32()
26430 } // ~Inst_VOP3__V_RCP_F32
26432 // D.f = 1.0 / S0.f.
26434 Inst_VOP3__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst
)
26436 Wavefront
*wf
= gpuDynInst
->wavefront();
26437 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26438 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26442 if (instData
.ABS
& 0x1) {
26446 if (extData
.NEG
& 0x1) {
26450 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26451 if (wf
->execMask(lane
)) {
26452 vdst
[lane
] = 1.0 / src
[lane
];
26459 Inst_VOP3__V_RCP_IFLAG_F32::Inst_VOP3__V_RCP_IFLAG_F32(InFmt_VOP3
*iFmt
)
26460 : Inst_VOP3(iFmt
, "v_rcp_iflag_f32", false)
26464 } // Inst_VOP3__V_RCP_IFLAG_F32
26466 Inst_VOP3__V_RCP_IFLAG_F32::~Inst_VOP3__V_RCP_IFLAG_F32()
26468 } // ~Inst_VOP3__V_RCP_IFLAG_F32
26470 // D.f = 1.0 / S0.f.
26472 Inst_VOP3__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst
)
26474 Wavefront
*wf
= gpuDynInst
->wavefront();
26475 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26476 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26480 if (instData
.ABS
& 0x1) {
26484 if (extData
.NEG
& 0x1) {
26488 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26489 if (wf
->execMask(lane
)) {
26490 vdst
[lane
] = 1.0 / src
[lane
];
26497 Inst_VOP3__V_RSQ_F32::Inst_VOP3__V_RSQ_F32(InFmt_VOP3
*iFmt
)
26498 : Inst_VOP3(iFmt
, "v_rsq_f32", false)
26502 } // Inst_VOP3__V_RSQ_F32
26504 Inst_VOP3__V_RSQ_F32::~Inst_VOP3__V_RSQ_F32()
26506 } // ~Inst_VOP3__V_RSQ_F32
26508 // D.f = 1.0 / sqrt(S0.f).
26510 Inst_VOP3__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst
)
26512 Wavefront
*wf
= gpuDynInst
->wavefront();
26513 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26514 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26518 if (instData
.ABS
& 0x1) {
26522 if (extData
.NEG
& 0x1) {
26526 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26527 if (wf
->execMask(lane
)) {
26528 vdst
[lane
] = 1.0 / std::sqrt(src
[lane
]);
26535 Inst_VOP3__V_RCP_F64::Inst_VOP3__V_RCP_F64(InFmt_VOP3
*iFmt
)
26536 : Inst_VOP3(iFmt
, "v_rcp_f64", false)
26540 } // Inst_VOP3__V_RCP_F64
26542 Inst_VOP3__V_RCP_F64::~Inst_VOP3__V_RCP_F64()
26544 } // ~Inst_VOP3__V_RCP_F64
26546 // D.d = 1.0 / S0.d.
26548 Inst_VOP3__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst
)
26550 Wavefront
*wf
= gpuDynInst
->wavefront();
26551 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
26552 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
26556 if (instData
.ABS
& 0x1) {
26560 if (extData
.NEG
& 0x1) {
26564 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26565 if (wf
->execMask(lane
)) {
26566 if (std::fpclassify(src
[lane
]) == FP_ZERO
) {
26567 vdst
[lane
] = +INFINITY
;
26568 } else if (std::isnan(src
[lane
])) {
26570 } else if (std::isinf(src
[lane
])) {
26571 if (std::signbit(src
[lane
])) {
26577 vdst
[lane
] = 1.0 / src
[lane
];
26585 Inst_VOP3__V_RSQ_F64::Inst_VOP3__V_RSQ_F64(InFmt_VOP3
*iFmt
)
26586 : Inst_VOP3(iFmt
, "v_rsq_f64", false)
26590 } // Inst_VOP3__V_RSQ_F64
26592 Inst_VOP3__V_RSQ_F64::~Inst_VOP3__V_RSQ_F64()
26594 } // ~Inst_VOP3__V_RSQ_F64
26596 // D.d = 1.0 / sqrt(S0.d).
26598 Inst_VOP3__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst
)
26600 Wavefront
*wf
= gpuDynInst
->wavefront();
26601 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
26602 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
26606 if (instData
.ABS
& 0x1) {
26610 if (extData
.NEG
& 0x1) {
26614 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26615 if (wf
->execMask(lane
)) {
26616 if (std::fpclassify(src
[lane
]) == FP_ZERO
) {
26617 vdst
[lane
] = +INFINITY
;
26618 } else if (std::isnan(src
[lane
])) {
26620 } else if (std::isinf(src
[lane
]) && !std::signbit(src
[lane
])) {
26622 } else if (std::signbit(src
[lane
])) {
26625 vdst
[lane
] = 1.0 / std::sqrt(src
[lane
]);
26633 Inst_VOP3__V_SQRT_F32::Inst_VOP3__V_SQRT_F32(InFmt_VOP3
*iFmt
)
26634 : Inst_VOP3(iFmt
, "v_sqrt_f32", false)
26638 } // Inst_VOP3__V_SQRT_F32
26640 Inst_VOP3__V_SQRT_F32::~Inst_VOP3__V_SQRT_F32()
26642 } // ~Inst_VOP3__V_SQRT_F32
26644 // D.f = sqrt(S0.f).
26646 Inst_VOP3__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst
)
26648 Wavefront
*wf
= gpuDynInst
->wavefront();
26649 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26650 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26654 if (instData
.ABS
& 0x1) {
26658 if (extData
.NEG
& 0x1) {
26662 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26663 if (wf
->execMask(lane
)) {
26664 vdst
[lane
] = std::sqrt(src
[lane
]);
26671 Inst_VOP3__V_SQRT_F64::Inst_VOP3__V_SQRT_F64(InFmt_VOP3
*iFmt
)
26672 : Inst_VOP3(iFmt
, "v_sqrt_f64", false)
26676 } // Inst_VOP3__V_SQRT_F64
26678 Inst_VOP3__V_SQRT_F64::~Inst_VOP3__V_SQRT_F64()
26680 } // ~Inst_VOP3__V_SQRT_F64
26682 // D.d = sqrt(S0.d).
26684 Inst_VOP3__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst
)
26686 Wavefront
*wf
= gpuDynInst
->wavefront();
26687 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
26688 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
26692 if (instData
.ABS
& 0x1) {
26696 if (extData
.NEG
& 0x1) {
26700 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26701 if (wf
->execMask(lane
)) {
26702 vdst
[lane
] = std::sqrt(src
[lane
]);
26709 Inst_VOP3__V_SIN_F32::Inst_VOP3__V_SIN_F32(InFmt_VOP3
*iFmt
)
26710 : Inst_VOP3(iFmt
, "v_sin_f32", false)
26714 } // Inst_VOP3__V_SIN_F32
26716 Inst_VOP3__V_SIN_F32::~Inst_VOP3__V_SIN_F32()
26718 } // ~Inst_VOP3__V_SIN_F32
26720 // D.f = sin(S0.f * 2 * PI).
26722 Inst_VOP3__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst
)
26724 Wavefront
*wf
= gpuDynInst
->wavefront();
26725 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26726 ConstScalarOperandF32
pi(gpuDynInst
, REG_PI
);
26727 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26732 if (instData
.ABS
& 0x1) {
26736 if (extData
.NEG
& 0x1) {
26740 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26741 if (wf
->execMask(lane
)) {
26742 vdst
[lane
] = std::sin(src
[lane
] * 2 * pi
.rawData());
26749 Inst_VOP3__V_COS_F32::Inst_VOP3__V_COS_F32(InFmt_VOP3
*iFmt
)
26750 : Inst_VOP3(iFmt
, "v_cos_f32", false)
26754 } // Inst_VOP3__V_COS_F32
26756 Inst_VOP3__V_COS_F32::~Inst_VOP3__V_COS_F32()
26758 } // ~Inst_VOP3__V_COS_F32
26760 // D.f = cos(S0.f * 2 * PI).
26762 Inst_VOP3__V_COS_F32::execute(GPUDynInstPtr gpuDynInst
)
26764 Wavefront
*wf
= gpuDynInst
->wavefront();
26765 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26766 ConstScalarOperandF32
pi(gpuDynInst
, REG_PI
);
26767 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26772 if (instData
.ABS
& 0x1) {
26776 if (extData
.NEG
& 0x1) {
26780 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26781 if (wf
->execMask(lane
)) {
26782 vdst
[lane
] = std::cos(src
[lane
] * 2 * pi
.rawData());
26789 Inst_VOP3__V_NOT_B32::Inst_VOP3__V_NOT_B32(InFmt_VOP3
*iFmt
)
26790 : Inst_VOP3(iFmt
, "v_not_b32", false)
26793 } // Inst_VOP3__V_NOT_B32
26795 Inst_VOP3__V_NOT_B32::~Inst_VOP3__V_NOT_B32()
26797 } // ~Inst_VOP3__V_NOT_B32
26800 // Input and output modifiers not supported.
26802 Inst_VOP3__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst
)
26804 Wavefront
*wf
= gpuDynInst
->wavefront();
26805 ConstVecOperandU32
src(gpuDynInst
, extData
.SRC0
);
26806 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
26810 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26811 if (wf
->execMask(lane
)) {
26812 vdst
[lane
] = ~src
[lane
];
26819 Inst_VOP3__V_BFREV_B32::Inst_VOP3__V_BFREV_B32(InFmt_VOP3
*iFmt
)
26820 : Inst_VOP3(iFmt
, "v_bfrev_b32", false)
26823 } // Inst_VOP3__V_BFREV_B32
26825 Inst_VOP3__V_BFREV_B32::~Inst_VOP3__V_BFREV_B32()
26827 } // ~Inst_VOP3__V_BFREV_B32
26829 // D.u[31:0] = S0.u[0:31], bitfield reverse.
26830 // Input and output modifiers not supported.
26832 Inst_VOP3__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst
)
26834 Wavefront
*wf
= gpuDynInst
->wavefront();
26835 ConstVecOperandU32
src(gpuDynInst
, extData
.SRC0
);
26836 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
26840 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26841 if (wf
->execMask(lane
)) {
26842 vdst
[lane
] = reverseBits(src
[lane
]);
26849 Inst_VOP3__V_FFBH_U32::Inst_VOP3__V_FFBH_U32(InFmt_VOP3
*iFmt
)
26850 : Inst_VOP3(iFmt
, "v_ffbh_u32", false)
26853 } // Inst_VOP3__V_FFBH_U32
26855 Inst_VOP3__V_FFBH_U32::~Inst_VOP3__V_FFBH_U32()
26857 } // ~Inst_VOP3__V_FFBH_U32
26859 // D.u = position of first 1 in S0.u from MSB;
26860 // D.u = 0xffffffff if S0.u == 0.
26862 Inst_VOP3__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst
)
26864 Wavefront
*wf
= gpuDynInst
->wavefront();
26865 ConstVecOperandU32
src(gpuDynInst
, extData
.SRC0
);
26866 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
26870 if (instData
.ABS
& 0x1) {
26874 if (extData
.NEG
& 0x1) {
26878 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26879 if (wf
->execMask(lane
)) {
26880 vdst
[lane
] = findFirstOneMsb(src
[lane
]);
26887 Inst_VOP3__V_FFBL_B32::Inst_VOP3__V_FFBL_B32(InFmt_VOP3
*iFmt
)
26888 : Inst_VOP3(iFmt
, "v_ffbl_b32", false)
26891 } // Inst_VOP3__V_FFBL_B32
26893 Inst_VOP3__V_FFBL_B32::~Inst_VOP3__V_FFBL_B32()
26895 } // ~Inst_VOP3__V_FFBL_B32
26897 // D.u = position of first 1 in S0.u from LSB;
26898 // D.u = 0xffffffff if S0.u == 0.
26900 Inst_VOP3__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst
)
26902 Wavefront
*wf
= gpuDynInst
->wavefront();
26903 ConstVecOperandU32
src(gpuDynInst
, extData
.SRC0
);
26904 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
26908 if (instData
.ABS
& 0x1) {
26912 if (extData
.NEG
& 0x1) {
26916 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26917 if (wf
->execMask(lane
)) {
26918 vdst
[lane
] = findFirstOne(src
[lane
]);
26925 Inst_VOP3__V_FFBH_I32::Inst_VOP3__V_FFBH_I32(InFmt_VOP3
*iFmt
)
26926 : Inst_VOP3(iFmt
, "v_ffbh_i32", false)
26929 } // Inst_VOP3__V_FFBH_I32
26931 Inst_VOP3__V_FFBH_I32::~Inst_VOP3__V_FFBH_I32()
26933 } // ~Inst_VOP3__V_FFBH_I32
26935 // D.u = position of first bit different from sign bit in S0.i from MSB;
26936 // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff.
26938 Inst_VOP3__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst
)
26940 Wavefront
*wf
= gpuDynInst
->wavefront();
26941 ConstVecOperandI32
src(gpuDynInst
, extData
.SRC0
);
26942 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
26946 if (instData
.ABS
& 0x1) {
26950 if (extData
.NEG
& 0x1) {
26954 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26955 if (wf
->execMask(lane
)) {
26956 vdst
[lane
] = firstOppositeSignBit(src
[lane
]);
26963 Inst_VOP3__V_FREXP_EXP_I32_F64::Inst_VOP3__V_FREXP_EXP_I32_F64(
26965 : Inst_VOP3(iFmt
, "v_frexp_exp_i32_f64", false)
26969 } // Inst_VOP3__V_FREXP_EXP_I32_F64
26971 Inst_VOP3__V_FREXP_EXP_I32_F64::~Inst_VOP3__V_FREXP_EXP_I32_F64()
26973 } // ~Inst_VOP3__V_FREXP_EXP_I32_F64
26975 // See V_FREXP_EXP_I32_F32.
26977 Inst_VOP3__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst
)
26979 Wavefront
*wf
= gpuDynInst
->wavefront();
26980 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
26981 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
26985 if (instData
.ABS
& 0x1) {
26989 if (extData
.NEG
& 0x1) {
26993 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26994 if (wf
->execMask(lane
)) {
26995 if (std::isinf(src
[lane
]) || std::isnan(src
[lane
])) {
26999 std::frexp(src
[lane
], &exp
);
27008 Inst_VOP3__V_FREXP_MANT_F64::Inst_VOP3__V_FREXP_MANT_F64(InFmt_VOP3
*iFmt
)
27009 : Inst_VOP3(iFmt
, "v_frexp_mant_f64", false)
27013 } // Inst_VOP3__V_FREXP_MANT_F64
27015 Inst_VOP3__V_FREXP_MANT_F64::~Inst_VOP3__V_FREXP_MANT_F64()
27017 } // ~Inst_VOP3__V_FREXP_MANT_F64
27020 Inst_VOP3__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst
)
27022 Wavefront
*wf
= gpuDynInst
->wavefront();
27023 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
27024 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
27028 if (instData
.ABS
& 0x1) {
27032 if (extData
.NEG
& 0x1) {
27036 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27037 if (wf
->execMask(lane
)) {
27039 vdst
[lane
] = std::frexp(src
[lane
], &exp
);
27046 Inst_VOP3__V_FRACT_F64::Inst_VOP3__V_FRACT_F64(InFmt_VOP3
*iFmt
)
27047 : Inst_VOP3(iFmt
, "v_fract_f64", false)
27051 } // Inst_VOP3__V_FRACT_F64
27053 Inst_VOP3__V_FRACT_F64::~Inst_VOP3__V_FRACT_F64()
27055 } // ~Inst_VOP3__V_FRACT_F64
27058 Inst_VOP3__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst
)
27060 Wavefront
*wf
= gpuDynInst
->wavefront();
27061 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
27062 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
27066 if (instData
.ABS
& 0x1) {
27070 if (extData
.NEG
& 0x1) {
27074 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27075 if (wf
->execMask(lane
)) {
27076 VecElemF32
int_part(0.0);
27077 vdst
[lane
] = std::modf(src
[lane
], &int_part
);
27084 Inst_VOP3__V_FREXP_EXP_I32_F32::Inst_VOP3__V_FREXP_EXP_I32_F32(
27086 : Inst_VOP3(iFmt
, "v_frexp_exp_i32_f32", false)
27090 } // Inst_VOP3__V_FREXP_EXP_I32_F32
27092 Inst_VOP3__V_FREXP_EXP_I32_F32::~Inst_VOP3__V_FREXP_EXP_I32_F32()
27094 } // ~Inst_VOP3__V_FREXP_EXP_I32_F32
27096 // frexp(S0.f, Exponenti(S0.f))
27097 // if (S0.f == INF || S0.f == NAN) then D.i = 0;
27098 // else D.i = Exponent(S0.f)
27100 Inst_VOP3__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst
)
27102 Wavefront
*wf
= gpuDynInst
->wavefront();
27103 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
27104 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
27108 if (instData
.ABS
& 0x1) {
27112 if (extData
.NEG
& 0x1) {
27116 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27117 if (wf
->execMask(lane
)) {
27118 if (std::isinf(src
[lane
])|| std::isnan(src
[lane
])) {
27122 std::frexp(src
[lane
], &exp
);
27131 Inst_VOP3__V_FREXP_MANT_F32::Inst_VOP3__V_FREXP_MANT_F32(InFmt_VOP3
*iFmt
)
27132 : Inst_VOP3(iFmt
, "v_frexp_mant_f32", false)
27136 } // Inst_VOP3__V_FREXP_MANT_F32
27138 Inst_VOP3__V_FREXP_MANT_F32::~Inst_VOP3__V_FREXP_MANT_F32()
27140 } // ~Inst_VOP3__V_FREXP_MANT_F32
27142 // if (S0.f == INF || S0.f == NAN) then D.f = S0.f;
27143 // else D.f = Mantissa(S0.f).
27145 Inst_VOP3__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst
)
27147 Wavefront
*wf
= gpuDynInst
->wavefront();
27148 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
27149 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
27153 if (instData
.ABS
& 0x1) {
27157 if (extData
.NEG
& 0x1) {
27161 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27162 if (wf
->execMask(lane
)) {
27163 if (std::isinf(src
[lane
]) || std::isnan(src
[lane
])) {
27164 vdst
[lane
] = src
[lane
];
27167 vdst
[lane
] = std::frexp(src
[lane
], &exp
);
27175 Inst_VOP3__V_CLREXCP::Inst_VOP3__V_CLREXCP(InFmt_VOP3
*iFmt
)
27176 : Inst_VOP3(iFmt
, "v_clrexcp", false)
27178 } // Inst_VOP3__V_CLREXCP
27180 Inst_VOP3__V_CLREXCP::~Inst_VOP3__V_CLREXCP()
27182 } // ~Inst_VOP3__V_CLREXCP
27185 Inst_VOP3__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst
)
27187 panicUnimplemented();
27190 Inst_VOP3__V_CVT_F16_U16::Inst_VOP3__V_CVT_F16_U16(InFmt_VOP3
*iFmt
)
27191 : Inst_VOP3(iFmt
, "v_cvt_f16_u16", false)
27195 } // Inst_VOP3__V_CVT_F16_U16
27197 Inst_VOP3__V_CVT_F16_U16::~Inst_VOP3__V_CVT_F16_U16()
27199 } // ~Inst_VOP3__V_CVT_F16_U16
27201 // D.f16 = uint16_to_flt16(S.u16).
27203 Inst_VOP3__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst
)
27205 panicUnimplemented();
27208 Inst_VOP3__V_CVT_F16_I16::Inst_VOP3__V_CVT_F16_I16(InFmt_VOP3
*iFmt
)
27209 : Inst_VOP3(iFmt
, "v_cvt_f16_i16", false)
27213 } // Inst_VOP3__V_CVT_F16_I16
27215 Inst_VOP3__V_CVT_F16_I16::~Inst_VOP3__V_CVT_F16_I16()
27217 } // ~Inst_VOP3__V_CVT_F16_I16
27219 // D.f16 = int16_to_flt16(S.i16).
27221 Inst_VOP3__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst
)
27223 panicUnimplemented();
27226 Inst_VOP3__V_CVT_U16_F16::Inst_VOP3__V_CVT_U16_F16(InFmt_VOP3
*iFmt
)
27227 : Inst_VOP3(iFmt
, "v_cvt_u16_f16", false)
27231 } // Inst_VOP3__V_CVT_U16_F16
27233 Inst_VOP3__V_CVT_U16_F16::~Inst_VOP3__V_CVT_U16_F16()
27235 } // ~Inst_VOP3__V_CVT_U16_F16
27237 // D.u16 = flt16_to_uint16(S.f16).
27239 Inst_VOP3__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst
)
27241 panicUnimplemented();
27244 Inst_VOP3__V_CVT_I16_F16::Inst_VOP3__V_CVT_I16_F16(InFmt_VOP3
*iFmt
)
27245 : Inst_VOP3(iFmt
, "v_cvt_i16_f16", false)
27249 } // Inst_VOP3__V_CVT_I16_F16
27251 Inst_VOP3__V_CVT_I16_F16::~Inst_VOP3__V_CVT_I16_F16()
27253 } // ~Inst_VOP3__V_CVT_I16_F16
27255 // D.i16 = flt16_to_int16(S.f16).
27257 Inst_VOP3__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst
)
27259 panicUnimplemented();
27262 Inst_VOP3__V_RCP_F16::Inst_VOP3__V_RCP_F16(InFmt_VOP3
*iFmt
)
27263 : Inst_VOP3(iFmt
, "v_rcp_f16", false)
27267 } // Inst_VOP3__V_RCP_F16
27269 Inst_VOP3__V_RCP_F16::~Inst_VOP3__V_RCP_F16()
27271 } // ~Inst_VOP3__V_RCP_F16
27273 // if (S0.f16 == 1.0f)
27276 // D.f16 = 1 / S0.f16.
27278 Inst_VOP3__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst
)
27280 panicUnimplemented();
27283 Inst_VOP3__V_SQRT_F16::Inst_VOP3__V_SQRT_F16(InFmt_VOP3
*iFmt
)
27284 : Inst_VOP3(iFmt
, "v_sqrt_f16", false)
27288 } // Inst_VOP3__V_SQRT_F16
27290 Inst_VOP3__V_SQRT_F16::~Inst_VOP3__V_SQRT_F16()
27292 } // ~Inst_VOP3__V_SQRT_F16
27294 // if (S0.f16 == 1.0f)
27297 // D.f16 = sqrt(S0.f16).
27299 Inst_VOP3__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst
)
27301 panicUnimplemented();
27304 Inst_VOP3__V_RSQ_F16::Inst_VOP3__V_RSQ_F16(InFmt_VOP3
*iFmt
)
27305 : Inst_VOP3(iFmt
, "v_rsq_f16", false)
27309 } // Inst_VOP3__V_RSQ_F16
27311 Inst_VOP3__V_RSQ_F16::~Inst_VOP3__V_RSQ_F16()
27313 } // ~Inst_VOP3__V_RSQ_F16
27315 // if (S0.f16 == 1.0f)
27318 // D.f16 = 1 / sqrt(S0.f16).
27320 Inst_VOP3__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst
)
27322 panicUnimplemented();
27325 Inst_VOP3__V_LOG_F16::Inst_VOP3__V_LOG_F16(InFmt_VOP3
*iFmt
)
27326 : Inst_VOP3(iFmt
, "v_log_f16", false)
27330 } // Inst_VOP3__V_LOG_F16
27332 Inst_VOP3__V_LOG_F16::~Inst_VOP3__V_LOG_F16()
27334 } // ~Inst_VOP3__V_LOG_F16
27336 // if (S0.f16 == 1.0f)
27339 // D.f16 = log2(S0.f16).
27341 Inst_VOP3__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst
)
27343 panicUnimplemented();
27346 Inst_VOP3__V_EXP_F16::Inst_VOP3__V_EXP_F16(InFmt_VOP3
*iFmt
)
27347 : Inst_VOP3(iFmt
, "v_exp_f16", false)
27351 } // Inst_VOP3__V_EXP_F16
27353 Inst_VOP3__V_EXP_F16::~Inst_VOP3__V_EXP_F16()
27355 } // ~Inst_VOP3__V_EXP_F16
27357 // if (S0.f16 == 0.0f)
27360 // D.f16 = pow(2.0, S0.f16).
27362 Inst_VOP3__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst
)
27364 panicUnimplemented();
27367 Inst_VOP3__V_FREXP_MANT_F16::Inst_VOP3__V_FREXP_MANT_F16(InFmt_VOP3
*iFmt
)
27368 : Inst_VOP3(iFmt
, "v_frexp_mant_f16", false)
27372 } // Inst_VOP3__V_FREXP_MANT_F16
27374 Inst_VOP3__V_FREXP_MANT_F16::~Inst_VOP3__V_FREXP_MANT_F16()
27376 } // ~Inst_VOP3__V_FREXP_MANT_F16
27378 // if (S0.f16 == +-INF || S0.f16 == NAN)
27381 // D.f16 = mantissa(S0.f16).
27383 Inst_VOP3__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst
)
27385 panicUnimplemented();
27388 Inst_VOP3__V_FREXP_EXP_I16_F16::Inst_VOP3__V_FREXP_EXP_I16_F16(
27390 : Inst_VOP3(iFmt
, "v_frexp_exp_i16_f16", false)
27394 } // Inst_VOP3__V_FREXP_EXP_I16_F16
27396 Inst_VOP3__V_FREXP_EXP_I16_F16::~Inst_VOP3__V_FREXP_EXP_I16_F16()
27398 } // ~Inst_VOP3__V_FREXP_EXP_I16_F16
27401 Inst_VOP3__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst
)
27403 panicUnimplemented();
27406 Inst_VOP3__V_FLOOR_F16::Inst_VOP3__V_FLOOR_F16(InFmt_VOP3
*iFmt
)
27407 : Inst_VOP3(iFmt
, "v_floor_f16", false)
27411 } // Inst_VOP3__V_FLOOR_F16
27413 Inst_VOP3__V_FLOOR_F16::~Inst_VOP3__V_FLOOR_F16()
27415 } // ~Inst_VOP3__V_FLOOR_F16
27417 // D.f16 = floor(S0.f16);
27419 Inst_VOP3__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst
)
27421 panicUnimplemented();
27424 Inst_VOP3__V_CEIL_F16::Inst_VOP3__V_CEIL_F16(InFmt_VOP3
*iFmt
)
27425 : Inst_VOP3(iFmt
, "v_ceil_f16", false)
27429 } // Inst_VOP3__V_CEIL_F16
27431 Inst_VOP3__V_CEIL_F16::~Inst_VOP3__V_CEIL_F16()
27433 } // ~Inst_VOP3__V_CEIL_F16
27435 // D.f16 = ceil(S0.f16);
27437 Inst_VOP3__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst
)
27439 panicUnimplemented();
27442 Inst_VOP3__V_TRUNC_F16::Inst_VOP3__V_TRUNC_F16(InFmt_VOP3
*iFmt
)
27443 : Inst_VOP3(iFmt
, "v_trunc_f16", false)
27447 } // Inst_VOP3__V_TRUNC_F16
27449 Inst_VOP3__V_TRUNC_F16::~Inst_VOP3__V_TRUNC_F16()
27451 } // ~Inst_VOP3__V_TRUNC_F16
27453 // D.f16 = trunc(S0.f16).
27455 Inst_VOP3__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst
)
27457 panicUnimplemented();
27460 Inst_VOP3__V_RNDNE_F16::Inst_VOP3__V_RNDNE_F16(InFmt_VOP3
*iFmt
)
27461 : Inst_VOP3(iFmt
, "v_rndne_f16", false)
27465 } // Inst_VOP3__V_RNDNE_F16
27467 Inst_VOP3__V_RNDNE_F16::~Inst_VOP3__V_RNDNE_F16()
27469 } // ~Inst_VOP3__V_RNDNE_F16
27471 // D.f16 = roundNearestEven(S0.f16);
27473 Inst_VOP3__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst
)
27475 panicUnimplemented();
27478 Inst_VOP3__V_FRACT_F16::Inst_VOP3__V_FRACT_F16(InFmt_VOP3
*iFmt
)
27479 : Inst_VOP3(iFmt
, "v_fract_f16", false)
27483 } // Inst_VOP3__V_FRACT_F16
27485 Inst_VOP3__V_FRACT_F16::~Inst_VOP3__V_FRACT_F16()
27487 } // ~Inst_VOP3__V_FRACT_F16
27489 // D.f16 = S0.f16 + -floor(S0.f16).
27491 Inst_VOP3__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst
)
27493 panicUnimplemented();
27496 Inst_VOP3__V_SIN_F16::Inst_VOP3__V_SIN_F16(InFmt_VOP3
*iFmt
)
27497 : Inst_VOP3(iFmt
, "v_sin_f16", false)
27501 } // Inst_VOP3__V_SIN_F16
27503 Inst_VOP3__V_SIN_F16::~Inst_VOP3__V_SIN_F16()
27505 } // ~Inst_VOP3__V_SIN_F16
27507 // D.f16 = sin(S0.f16 * 2 * PI).
27509 Inst_VOP3__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst
)
27511 panicUnimplemented();
27514 Inst_VOP3__V_COS_F16::Inst_VOP3__V_COS_F16(InFmt_VOP3
*iFmt
)
27515 : Inst_VOP3(iFmt
, "v_cos_f16", false)
27519 } // Inst_VOP3__V_COS_F16
27521 Inst_VOP3__V_COS_F16::~Inst_VOP3__V_COS_F16()
27523 } // ~Inst_VOP3__V_COS_F16
27525 // D.f16 = cos(S0.f16 * 2 * PI).
27527 Inst_VOP3__V_COS_F16::execute(GPUDynInstPtr gpuDynInst
)
27529 panicUnimplemented();
27532 Inst_VOP3__V_EXP_LEGACY_F32::Inst_VOP3__V_EXP_LEGACY_F32(InFmt_VOP3
*iFmt
)
27533 : Inst_VOP3(iFmt
, "v_exp_legacy_f32", false)
27537 } // Inst_VOP3__V_EXP_LEGACY_F32
27539 Inst_VOP3__V_EXP_LEGACY_F32::~Inst_VOP3__V_EXP_LEGACY_F32()
27541 } // ~Inst_VOP3__V_EXP_LEGACY_F32
27543 // D.f = pow(2.0, S0.f)
27545 Inst_VOP3__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst
)
27547 Wavefront
*wf
= gpuDynInst
->wavefront();
27548 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
27549 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
27553 if (instData
.ABS
& 0x1) {
27557 if (extData
.NEG
& 0x1) {
27562 * input modifiers are supported by FP operations only
27564 assert(!(instData
.ABS
& 0x2));
27565 assert(!(instData
.ABS
& 0x4));
27566 assert(!(extData
.NEG
& 0x2));
27567 assert(!(extData
.NEG
& 0x4));
27569 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27570 if (wf
->execMask(lane
)) {
27571 vdst
[lane
] = std::pow(2.0, src
[lane
]);
27578 Inst_VOP3__V_LOG_LEGACY_F32::Inst_VOP3__V_LOG_LEGACY_F32(InFmt_VOP3
*iFmt
)
27579 : Inst_VOP3(iFmt
, "v_log_legacy_f32", false)
27583 } // Inst_VOP3__V_LOG_LEGACY_F32
27585 Inst_VOP3__V_LOG_LEGACY_F32::~Inst_VOP3__V_LOG_LEGACY_F32()
27587 } // ~Inst_VOP3__V_LOG_LEGACY_F32
27589 // D.f = log2(S0.f).
27591 Inst_VOP3__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst
)
27593 Wavefront
*wf
= gpuDynInst
->wavefront();
27594 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
27595 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
27599 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27600 if (wf
->execMask(lane
)) {
27601 vdst
[lane
] = std::log2(src
[lane
]);
27608 Inst_VOP3__V_MAD_LEGACY_F32::Inst_VOP3__V_MAD_LEGACY_F32(InFmt_VOP3
*iFmt
)
27609 : Inst_VOP3(iFmt
, "v_mad_legacy_f32", false)
27614 } // Inst_VOP3__V_MAD_LEGACY_F32
27616 Inst_VOP3__V_MAD_LEGACY_F32::~Inst_VOP3__V_MAD_LEGACY_F32()
27618 } // ~Inst_VOP3__V_MAD_LEGACY_F32
27620 // D.f = S0.f * S1.f + S2.f
27622 Inst_VOP3__V_MAD_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst
)
27624 Wavefront
*wf
= gpuDynInst
->wavefront();
27625 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
27626 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
27627 ConstVecOperandF32
src2(gpuDynInst
, extData
.SRC2
);
27628 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
27634 if (instData
.ABS
& 0x1) {
27635 src0
.absModifier();
27638 if (instData
.ABS
& 0x2) {
27639 src1
.absModifier();
27642 if (instData
.ABS
& 0x4) {
27643 src2
.absModifier();
27646 if (extData
.NEG
& 0x1) {
27647 src0
.negModifier();
27650 if (extData
.NEG
& 0x2) {
27651 src1
.negModifier();
27654 if (extData
.NEG
& 0x4) {
27655 src2
.negModifier();
27658 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27659 if (wf
->execMask(lane
)) {
27660 vdst
[lane
] = std::fma(src0
[lane
], src1
[lane
], src2
[lane
]);
27667 Inst_VOP3__V_MAD_F32::Inst_VOP3__V_MAD_F32(InFmt_VOP3
*iFmt
)
27668 : Inst_VOP3(iFmt
, "v_mad_f32", false)
27673 } // Inst_VOP3__V_MAD_F32
27675 Inst_VOP3__V_MAD_F32::~Inst_VOP3__V_MAD_F32()
27677 } // ~Inst_VOP3__V_MAD_F32
27679 // D.f = S0.f * S1.f + S2.f.
27681 Inst_VOP3__V_MAD_F32::execute(GPUDynInstPtr gpuDynInst
)
27683 Wavefront
*wf
= gpuDynInst
->wavefront();
27684 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
27685 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
27686 ConstVecOperandF32
src2(gpuDynInst
, extData
.SRC2
);
27687 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
27693 if (instData
.ABS
& 0x1) {
27694 src0
.absModifier();
27697 if (instData
.ABS
& 0x2) {
27698 src1
.absModifier();
27701 if (instData
.ABS
& 0x4) {
27702 src2
.absModifier();
27705 if (extData
.NEG
& 0x1) {
27706 src0
.negModifier();
27709 if (extData
.NEG
& 0x2) {
27710 src1
.negModifier();
27713 if (extData
.NEG
& 0x4) {
27714 src2
.negModifier();
27717 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27718 if (wf
->execMask(lane
)) {
27719 vdst
[lane
] = std::fma(src0
[lane
], src1
[lane
], src2
[lane
]);
27726 Inst_VOP3__V_MAD_I32_I24::Inst_VOP3__V_MAD_I32_I24(InFmt_VOP3
*iFmt
)
27727 : Inst_VOP3(iFmt
, "v_mad_i32_i24", false)
27731 } // Inst_VOP3__V_MAD_I32_I24
27733 Inst_VOP3__V_MAD_I32_I24::~Inst_VOP3__V_MAD_I32_I24()
27735 } // ~Inst_VOP3__V_MAD_I32_I24
27737 // D.i = S0.i[23:0] * S1.i[23:0] + S2.i.
27739 Inst_VOP3__V_MAD_I32_I24::execute(GPUDynInstPtr gpuDynInst
)
27741 Wavefront
*wf
= gpuDynInst
->wavefront();
27742 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
27743 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
27744 ConstVecOperandI32
src2(gpuDynInst
, extData
.SRC2
);
27745 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
27752 * input modifiers are supported by FP operations only
27754 assert(!(instData
.ABS
& 0x1));
27755 assert(!(instData
.ABS
& 0x2));
27756 assert(!(instData
.ABS
& 0x4));
27757 assert(!(extData
.NEG
& 0x1));
27758 assert(!(extData
.NEG
& 0x2));
27759 assert(!(extData
.NEG
& 0x4));
27761 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27762 if (wf
->execMask(lane
)) {
27763 vdst
[lane
] = sext
<24>(bits(src0
[lane
], 23, 0))
27764 * sext
<24>(bits(src1
[lane
], 23, 0)) + src2
[lane
];
27771 Inst_VOP3__V_MAD_U32_U24::Inst_VOP3__V_MAD_U32_U24(InFmt_VOP3
*iFmt
)
27772 : Inst_VOP3(iFmt
, "v_mad_u32_u24", false)
27776 } // Inst_VOP3__V_MAD_U32_U24
27778 Inst_VOP3__V_MAD_U32_U24::~Inst_VOP3__V_MAD_U32_U24()
27780 } // ~Inst_VOP3__V_MAD_U32_U24
27782 // D.u = S0.u[23:0] * S1.u[23:0] + S2.u.
27784 Inst_VOP3__V_MAD_U32_U24::execute(GPUDynInstPtr gpuDynInst
)
27786 Wavefront
*wf
= gpuDynInst
->wavefront();
27787 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
27788 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
27789 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
27790 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
27797 * input modifiers are supported by FP operations only
27799 assert(!(instData
.ABS
& 0x1));
27800 assert(!(instData
.ABS
& 0x2));
27801 assert(!(instData
.ABS
& 0x4));
27802 assert(!(extData
.NEG
& 0x1));
27803 assert(!(extData
.NEG
& 0x2));
27804 assert(!(extData
.NEG
& 0x4));
27806 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27807 if (wf
->execMask(lane
)) {
27808 vdst
[lane
] = bits(src0
[lane
], 23, 0) * bits(src1
[lane
], 23, 0)
27816 Inst_VOP3__V_CUBEID_F32::Inst_VOP3__V_CUBEID_F32(InFmt_VOP3
*iFmt
)
27817 : Inst_VOP3(iFmt
, "v_cubeid_f32", false)
27821 } // Inst_VOP3__V_CUBEID_F32
27823 Inst_VOP3__V_CUBEID_F32::~Inst_VOP3__V_CUBEID_F32()
27825 } // ~Inst_VOP3__V_CUBEID_F32
27828 Inst_VOP3__V_CUBEID_F32::execute(GPUDynInstPtr gpuDynInst
)
27830 panicUnimplemented();
27833 Inst_VOP3__V_CUBESC_F32::Inst_VOP3__V_CUBESC_F32(InFmt_VOP3
*iFmt
)
27834 : Inst_VOP3(iFmt
, "v_cubesc_f32", false)
27838 } // Inst_VOP3__V_CUBESC_F32
27840 Inst_VOP3__V_CUBESC_F32::~Inst_VOP3__V_CUBESC_F32()
27842 } // ~Inst_VOP3__V_CUBESC_F32
27845 Inst_VOP3__V_CUBESC_F32::execute(GPUDynInstPtr gpuDynInst
)
27847 panicUnimplemented();
27850 Inst_VOP3__V_CUBETC_F32::Inst_VOP3__V_CUBETC_F32(InFmt_VOP3
*iFmt
)
27851 : Inst_VOP3(iFmt
, "v_cubetc_f32", false)
27855 } // Inst_VOP3__V_CUBETC_F32
27857 Inst_VOP3__V_CUBETC_F32::~Inst_VOP3__V_CUBETC_F32()
27859 } // ~Inst_VOP3__V_CUBETC_F32
27862 Inst_VOP3__V_CUBETC_F32::execute(GPUDynInstPtr gpuDynInst
)
27864 panicUnimplemented();
27867 Inst_VOP3__V_CUBEMA_F32::Inst_VOP3__V_CUBEMA_F32(InFmt_VOP3
*iFmt
)
27868 : Inst_VOP3(iFmt
, "v_cubema_f32", false)
27872 } // Inst_VOP3__V_CUBEMA_F32
27874 Inst_VOP3__V_CUBEMA_F32::~Inst_VOP3__V_CUBEMA_F32()
27876 } // ~Inst_VOP3__V_CUBEMA_F32
27879 Inst_VOP3__V_CUBEMA_F32::execute(GPUDynInstPtr gpuDynInst
)
27881 panicUnimplemented();
27884 Inst_VOP3__V_BFE_U32::Inst_VOP3__V_BFE_U32(InFmt_VOP3
*iFmt
)
27885 : Inst_VOP3(iFmt
, "v_bfe_u32", false)
27888 } // Inst_VOP3__V_BFE_U32
27890 Inst_VOP3__V_BFE_U32::~Inst_VOP3__V_BFE_U32()
27892 } // ~Inst_VOP3__V_BFE_U32
27894 // D.u = (S0.u >> S1.u[4:0]) & ((1 << S2.u[4:0]) - 1).
27895 // Bitfield extract with S0 = data, S1 = field_offset, S2 = field_width.
27897 Inst_VOP3__V_BFE_U32::execute(GPUDynInstPtr gpuDynInst
)
27899 Wavefront
*wf
= gpuDynInst
->wavefront();
27900 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
27901 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
27902 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
27903 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
27910 * input modifiers are supported by FP operations only
27912 assert(!(instData
.ABS
& 0x1));
27913 assert(!(instData
.ABS
& 0x2));
27914 assert(!(instData
.ABS
& 0x4));
27915 assert(!(extData
.NEG
& 0x1));
27916 assert(!(extData
.NEG
& 0x2));
27917 assert(!(extData
.NEG
& 0x4));
27919 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27920 if (wf
->execMask(lane
)) {
27921 vdst
[lane
] = (src0
[lane
] >> bits(src1
[lane
], 4, 0))
27922 & ((1 << bits(src2
[lane
], 4, 0)) - 1);
27929 Inst_VOP3__V_BFE_I32::Inst_VOP3__V_BFE_I32(InFmt_VOP3
*iFmt
)
27930 : Inst_VOP3(iFmt
, "v_bfe_i32", false)
27933 } // Inst_VOP3__V_BFE_I32
27935 Inst_VOP3__V_BFE_I32::~Inst_VOP3__V_BFE_I32()
27937 } // ~Inst_VOP3__V_BFE_I32
27939 // D.i = (S0.i >> S1.u[4:0]) & ((1 << S2.u[4:0]) - 1).
27940 // Bitfield extract with S0 = data, S1 = field_offset, S2 = field_width.
27942 Inst_VOP3__V_BFE_I32::execute(GPUDynInstPtr gpuDynInst
)
27944 Wavefront
*wf
= gpuDynInst
->wavefront();
27945 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
27946 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
27947 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
27948 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
27955 * input modifiers are supported by FP operations only
27957 assert(!(instData
.ABS
& 0x1));
27958 assert(!(instData
.ABS
& 0x2));
27959 assert(!(instData
.ABS
& 0x4));
27960 assert(!(extData
.NEG
& 0x1));
27961 assert(!(extData
.NEG
& 0x2));
27962 assert(!(extData
.NEG
& 0x4));
27964 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27965 if (wf
->execMask(lane
)) {
27966 vdst
[lane
] = (src0
[lane
] >> bits(src1
[lane
], 4, 0))
27967 & ((1 << bits(src2
[lane
], 4, 0)) - 1);
27974 Inst_VOP3__V_BFI_B32::Inst_VOP3__V_BFI_B32(InFmt_VOP3
*iFmt
)
27975 : Inst_VOP3(iFmt
, "v_bfi_b32", false)
27978 } // Inst_VOP3__V_BFI_B32
27980 Inst_VOP3__V_BFI_B32::~Inst_VOP3__V_BFI_B32()
27982 } // ~Inst_VOP3__V_BFI_B32
27984 // D.u = (S0.u & S1.u) | (~S0.u & S2.u); bitfield insert.
27986 Inst_VOP3__V_BFI_B32::execute(GPUDynInstPtr gpuDynInst
)
27988 Wavefront
*wf
= gpuDynInst
->wavefront();
27989 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
27990 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
27991 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
27992 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
27999 * input modifiers are supported by FP operations only
28001 assert(!(instData
.ABS
& 0x1));
28002 assert(!(instData
.ABS
& 0x2));
28003 assert(!(instData
.ABS
& 0x4));
28004 assert(!(extData
.NEG
& 0x1));
28005 assert(!(extData
.NEG
& 0x2));
28006 assert(!(extData
.NEG
& 0x4));
28008 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28009 if (wf
->execMask(lane
)) {
28010 vdst
[lane
] = (src0
[lane
] & src1
[lane
]) | (~src0
[lane
]
28018 Inst_VOP3__V_FMA_F32::Inst_VOP3__V_FMA_F32(InFmt_VOP3
*iFmt
)
28019 : Inst_VOP3(iFmt
, "v_fma_f32", false)
28024 } // Inst_VOP3__V_FMA_F32
28026 Inst_VOP3__V_FMA_F32::~Inst_VOP3__V_FMA_F32()
28028 } // ~Inst_VOP3__V_FMA_F32
28030 // D.f = S0.f * S1.f + S2.f.
28032 Inst_VOP3__V_FMA_F32::execute(GPUDynInstPtr gpuDynInst
)
28034 Wavefront
*wf
= gpuDynInst
->wavefront();
28035 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
28036 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
28037 ConstVecOperandF32
src2(gpuDynInst
, extData
.SRC2
);
28038 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
28044 if (instData
.ABS
& 0x1) {
28045 src0
.absModifier();
28048 if (instData
.ABS
& 0x2) {
28049 src1
.absModifier();
28052 if (instData
.ABS
& 0x4) {
28053 src2
.absModifier();
28056 if (extData
.NEG
& 0x1) {
28057 src0
.negModifier();
28060 if (extData
.NEG
& 0x2) {
28061 src1
.negModifier();
28064 if (extData
.NEG
& 0x4) {
28065 src2
.negModifier();
28068 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28069 if (wf
->execMask(lane
)) {
28070 vdst
[lane
] = std::fma(src0
[lane
], src1
[lane
], src2
[lane
]);
28077 Inst_VOP3__V_FMA_F64::Inst_VOP3__V_FMA_F64(InFmt_VOP3
*iFmt
)
28078 : Inst_VOP3(iFmt
, "v_fma_f64", false)
28083 } // Inst_VOP3__V_FMA_F64
28085 Inst_VOP3__V_FMA_F64::~Inst_VOP3__V_FMA_F64()
28087 } // ~Inst_VOP3__V_FMA_F64
28089 // D.d = S0.d * S1.d + S2.d.
28091 Inst_VOP3__V_FMA_F64::execute(GPUDynInstPtr gpuDynInst
)
28093 Wavefront
*wf
= gpuDynInst
->wavefront();
28094 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
28095 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
28096 ConstVecOperandF64
src2(gpuDynInst
, extData
.SRC2
);
28097 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
28103 if (instData
.ABS
& 0x1) {
28104 src0
.absModifier();
28107 if (instData
.ABS
& 0x2) {
28108 src1
.absModifier();
28111 if (instData
.ABS
& 0x4) {
28112 src2
.absModifier();
28115 if (extData
.NEG
& 0x1) {
28116 src0
.negModifier();
28119 if (extData
.NEG
& 0x2) {
28120 src1
.negModifier();
28123 if (extData
.NEG
& 0x4) {
28124 src2
.negModifier();
28127 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28128 if (wf
->execMask(lane
)) {
28129 vdst
[lane
] = std::fma(src0
[lane
], src1
[lane
], src2
[lane
]);
28136 Inst_VOP3__V_LERP_U8::Inst_VOP3__V_LERP_U8(InFmt_VOP3
*iFmt
)
28137 : Inst_VOP3(iFmt
, "v_lerp_u8", false)
28140 } // Inst_VOP3__V_LERP_U8
28142 Inst_VOP3__V_LERP_U8::~Inst_VOP3__V_LERP_U8()
28144 } // ~Inst_VOP3__V_LERP_U8
28146 // D.u = ((S0.u[31:24] + S1.u[31:24] + S2.u[24]) >> 1) << 24
28147 // D.u += ((S0.u[23:16] + S1.u[23:16] + S2.u[16]) >> 1) << 16;
28148 // D.u += ((S0.u[15:8] + S1.u[15:8] + S2.u[8]) >> 1) << 8;
28149 // D.u += ((S0.u[7:0] + S1.u[7:0] + S2.u[0]) >> 1).
28151 Inst_VOP3__V_LERP_U8::execute(GPUDynInstPtr gpuDynInst
)
28153 Wavefront
*wf
= gpuDynInst
->wavefront();
28154 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
28155 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
28156 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28157 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28164 * input modifiers are supported by FP operations only
28166 assert(!(instData
.ABS
& 0x1));
28167 assert(!(instData
.ABS
& 0x2));
28168 assert(!(instData
.ABS
& 0x4));
28169 assert(!(extData
.NEG
& 0x1));
28170 assert(!(extData
.NEG
& 0x2));
28171 assert(!(extData
.NEG
& 0x4));
28173 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28174 if (wf
->execMask(lane
)) {
28175 vdst
[lane
] = ((bits(src0
[lane
], 31, 24)
28176 + bits(src1
[lane
], 31, 24) + bits(src2
[lane
], 24)) >> 1)
28178 vdst
[lane
] += ((bits(src0
[lane
], 23, 16)
28179 + bits(src1
[lane
], 23, 16) + bits(src2
[lane
], 16)) >> 1)
28181 vdst
[lane
] += ((bits(src0
[lane
], 15, 8)
28182 + bits(src1
[lane
], 15, 8) + bits(src2
[lane
], 8)) >> 1)
28184 vdst
[lane
] += ((bits(src0
[lane
], 7, 0) + bits(src1
[lane
], 7, 0)
28185 + bits(src2
[lane
], 0)) >> 1);
28192 Inst_VOP3__V_ALIGNBIT_B32::Inst_VOP3__V_ALIGNBIT_B32(InFmt_VOP3
*iFmt
)
28193 : Inst_VOP3(iFmt
, "v_alignbit_b32", false)
28196 } // Inst_VOP3__V_ALIGNBIT_B32
28198 Inst_VOP3__V_ALIGNBIT_B32::~Inst_VOP3__V_ALIGNBIT_B32()
28200 } // ~Inst_VOP3__V_ALIGNBIT_B32
28202 // D.u = ({S0, S1} >> S2.u[4:0]) & 0xffffffff.
28204 Inst_VOP3__V_ALIGNBIT_B32::execute(GPUDynInstPtr gpuDynInst
)
28206 Wavefront
*wf
= gpuDynInst
->wavefront();
28207 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
28208 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
28209 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28210 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28217 * input modifiers are supported by FP operations only
28219 assert(!(instData
.ABS
& 0x1));
28220 assert(!(instData
.ABS
& 0x2));
28221 assert(!(instData
.ABS
& 0x4));
28222 assert(!(extData
.NEG
& 0x1));
28223 assert(!(extData
.NEG
& 0x2));
28224 assert(!(extData
.NEG
& 0x4));
28226 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28227 if (wf
->execMask(lane
)) {
28228 VecElemU64 src_0_1
= (((VecElemU64
)src0
[lane
] << 32)
28229 | (VecElemU64
)src1
[lane
]);
28230 vdst
[lane
] = (VecElemU32
)((src_0_1
28231 >> (VecElemU64
)bits(src2
[lane
], 4, 0)) & 0xffffffff);
28238 Inst_VOP3__V_ALIGNBYTE_B32::Inst_VOP3__V_ALIGNBYTE_B32(InFmt_VOP3
*iFmt
)
28239 : Inst_VOP3(iFmt
, "v_alignbyte_b32", false)
28242 } // Inst_VOP3__V_ALIGNBYTE_B32
28244 Inst_VOP3__V_ALIGNBYTE_B32::~Inst_VOP3__V_ALIGNBYTE_B32()
28246 } // ~Inst_VOP3__V_ALIGNBYTE_B32
28248 // D.u = ({S0, S1} >> (8 * S2.u[4:0])) & 0xffffffff.
28250 Inst_VOP3__V_ALIGNBYTE_B32::execute(GPUDynInstPtr gpuDynInst
)
28252 Wavefront
*wf
= gpuDynInst
->wavefront();
28253 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
28254 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
28255 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28256 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28263 * input modifiers are supported by FP operations only
28265 assert(!(instData
.ABS
& 0x1));
28266 assert(!(instData
.ABS
& 0x2));
28267 assert(!(instData
.ABS
& 0x4));
28268 assert(!(extData
.NEG
& 0x1));
28269 assert(!(extData
.NEG
& 0x2));
28270 assert(!(extData
.NEG
& 0x4));
28272 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28273 if (wf
->execMask(lane
)) {
28274 VecElemU64 src_0_1
= (((VecElemU64
)src0
[lane
] << 32)
28275 | (VecElemU64
)src1
[lane
]);
28276 vdst
[lane
] = (VecElemU32
)((src_0_1
28277 >> (8ULL * (VecElemU64
)bits(src2
[lane
], 4, 0)))
28285 Inst_VOP3__V_MIN3_F32::Inst_VOP3__V_MIN3_F32(InFmt_VOP3
*iFmt
)
28286 : Inst_VOP3(iFmt
, "v_min3_f32", false)
28290 } // Inst_VOP3__V_MIN3_F32
28292 Inst_VOP3__V_MIN3_F32::~Inst_VOP3__V_MIN3_F32()
28294 } // ~Inst_VOP3__V_MIN3_F32
28296 // D.f = min(S0.f, S1.f, S2.f).
28298 Inst_VOP3__V_MIN3_F32::execute(GPUDynInstPtr gpuDynInst
)
28300 Wavefront
*wf
= gpuDynInst
->wavefront();
28301 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
28302 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
28303 ConstVecOperandF32
src2(gpuDynInst
, extData
.SRC2
);
28304 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
28310 if (instData
.ABS
& 0x1) {
28311 src0
.absModifier();
28314 if (instData
.ABS
& 0x2) {
28315 src1
.absModifier();
28318 if (instData
.ABS
& 0x4) {
28319 src2
.absModifier();
28322 if (extData
.NEG
& 0x1) {
28323 src0
.negModifier();
28326 if (extData
.NEG
& 0x2) {
28327 src1
.negModifier();
28330 if (extData
.NEG
& 0x4) {
28331 src2
.negModifier();
28334 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28335 if (wf
->execMask(lane
)) {
28336 VecElemF32 min_0_1
= std::fmin(src0
[lane
], src1
[lane
]);
28337 vdst
[lane
] = std::fmin(min_0_1
, src2
[lane
]);
28344 Inst_VOP3__V_MIN3_I32::Inst_VOP3__V_MIN3_I32(InFmt_VOP3
*iFmt
)
28345 : Inst_VOP3(iFmt
, "v_min3_i32", false)
28348 } // Inst_VOP3__V_MIN3_I32
28350 Inst_VOP3__V_MIN3_I32::~Inst_VOP3__V_MIN3_I32()
28352 } // ~Inst_VOP3__V_MIN3_I32
28354 // D.i = min(S0.i, S1.i, S2.i).
28356 Inst_VOP3__V_MIN3_I32::execute(GPUDynInstPtr gpuDynInst
)
28358 Wavefront
*wf
= gpuDynInst
->wavefront();
28359 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
28360 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
28361 ConstVecOperandI32
src2(gpuDynInst
, extData
.SRC2
);
28362 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
28369 * input modifiers are supported by FP operations only
28371 assert(!(instData
.ABS
& 0x1));
28372 assert(!(instData
.ABS
& 0x2));
28373 assert(!(instData
.ABS
& 0x4));
28374 assert(!(extData
.NEG
& 0x1));
28375 assert(!(extData
.NEG
& 0x2));
28376 assert(!(extData
.NEG
& 0x4));
28378 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28379 if (wf
->execMask(lane
)) {
28380 VecElemI32 min_0_1
= std::min(src0
[lane
], src1
[lane
]);
28381 vdst
[lane
] = std::min(min_0_1
, src2
[lane
]);
28388 Inst_VOP3__V_MIN3_U32::Inst_VOP3__V_MIN3_U32(InFmt_VOP3
*iFmt
)
28389 : Inst_VOP3(iFmt
, "v_min3_u32", false)
28392 } // Inst_VOP3__V_MIN3_U32
28394 Inst_VOP3__V_MIN3_U32::~Inst_VOP3__V_MIN3_U32()
28396 } // ~Inst_VOP3__V_MIN3_U32
28398 // D.u = min(S0.u, S1.u, S2.u).
28400 Inst_VOP3__V_MIN3_U32::execute(GPUDynInstPtr gpuDynInst
)
28402 Wavefront
*wf
= gpuDynInst
->wavefront();
28403 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
28404 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
28405 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28406 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28413 * input modifiers are supported by FP operations only
28415 assert(!(instData
.ABS
& 0x1));
28416 assert(!(instData
.ABS
& 0x2));
28417 assert(!(instData
.ABS
& 0x4));
28418 assert(!(extData
.NEG
& 0x1));
28419 assert(!(extData
.NEG
& 0x2));
28420 assert(!(extData
.NEG
& 0x4));
28422 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28423 if (wf
->execMask(lane
)) {
28424 VecElemU32 min_0_1
= std::min(src0
[lane
], src1
[lane
]);
28425 vdst
[lane
] = std::min(min_0_1
, src2
[lane
]);
28432 Inst_VOP3__V_MAX3_F32::Inst_VOP3__V_MAX3_F32(InFmt_VOP3
*iFmt
)
28433 : Inst_VOP3(iFmt
, "v_max3_f32", false)
28437 } // Inst_VOP3__V_MAX3_F32
28439 Inst_VOP3__V_MAX3_F32::~Inst_VOP3__V_MAX3_F32()
28441 } // ~Inst_VOP3__V_MAX3_F32
28443 // D.f = max(S0.f, S1.f, S2.f).
28445 Inst_VOP3__V_MAX3_F32::execute(GPUDynInstPtr gpuDynInst
)
28447 Wavefront
*wf
= gpuDynInst
->wavefront();
28448 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
28449 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
28450 ConstVecOperandF32
src2(gpuDynInst
, extData
.SRC2
);
28451 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
28457 if (instData
.ABS
& 0x1) {
28458 src0
.absModifier();
28461 if (instData
.ABS
& 0x2) {
28462 src1
.absModifier();
28465 if (instData
.ABS
& 0x4) {
28466 src2
.absModifier();
28469 if (extData
.NEG
& 0x1) {
28470 src0
.negModifier();
28473 if (extData
.NEG
& 0x2) {
28474 src1
.negModifier();
28477 if (extData
.NEG
& 0x4) {
28478 src2
.negModifier();
28481 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28482 if (wf
->execMask(lane
)) {
28483 VecElemF32 max_0_1
= std::fmax(src0
[lane
], src1
[lane
]);
28484 vdst
[lane
] = std::fmax(max_0_1
, src2
[lane
]);
28491 Inst_VOP3__V_MAX3_I32::Inst_VOP3__V_MAX3_I32(InFmt_VOP3
*iFmt
)
28492 : Inst_VOP3(iFmt
, "v_max3_i32", false)
28495 } // Inst_VOP3__V_MAX3_I32
28497 Inst_VOP3__V_MAX3_I32::~Inst_VOP3__V_MAX3_I32()
28499 } // ~Inst_VOP3__V_MAX3_I32
28501 // D.i = max(S0.i, S1.i, S2.i).
28503 Inst_VOP3__V_MAX3_I32::execute(GPUDynInstPtr gpuDynInst
)
28505 Wavefront
*wf
= gpuDynInst
->wavefront();
28506 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
28507 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
28508 ConstVecOperandI32
src2(gpuDynInst
, extData
.SRC2
);
28509 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
28516 * input modifiers are supported by FP operations only
28518 assert(!(instData
.ABS
& 0x1));
28519 assert(!(instData
.ABS
& 0x2));
28520 assert(!(instData
.ABS
& 0x4));
28521 assert(!(extData
.NEG
& 0x1));
28522 assert(!(extData
.NEG
& 0x2));
28523 assert(!(extData
.NEG
& 0x4));
28525 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28526 if (wf
->execMask(lane
)) {
28527 VecElemI32 max_0_1
= std::max(src0
[lane
], src1
[lane
]);
28528 vdst
[lane
] = std::max(max_0_1
, src2
[lane
]);
28535 Inst_VOP3__V_MAX3_U32::Inst_VOP3__V_MAX3_U32(InFmt_VOP3
*iFmt
)
28536 : Inst_VOP3(iFmt
, "v_max3_u32", false)
28539 } // Inst_VOP3__V_MAX3_U32
28541 Inst_VOP3__V_MAX3_U32::~Inst_VOP3__V_MAX3_U32()
28543 } // ~Inst_VOP3__V_MAX3_U32
28545 // D.u = max(S0.u, S1.u, S2.u).
28547 Inst_VOP3__V_MAX3_U32::execute(GPUDynInstPtr gpuDynInst
)
28549 Wavefront
*wf
= gpuDynInst
->wavefront();
28550 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
28551 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
28552 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28553 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28560 * input modifiers are supported by FP operations only
28562 assert(!(instData
.ABS
& 0x1));
28563 assert(!(instData
.ABS
& 0x2));
28564 assert(!(instData
.ABS
& 0x4));
28565 assert(!(extData
.NEG
& 0x1));
28566 assert(!(extData
.NEG
& 0x2));
28567 assert(!(extData
.NEG
& 0x4));
28569 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28570 if (wf
->execMask(lane
)) {
28571 VecElemU32 max_0_1
= std::max(src0
[lane
], src1
[lane
]);
28572 vdst
[lane
] = std::max(max_0_1
, src2
[lane
]);
28579 Inst_VOP3__V_MED3_F32::Inst_VOP3__V_MED3_F32(InFmt_VOP3
*iFmt
)
28580 : Inst_VOP3(iFmt
, "v_med3_f32", false)
28584 } // Inst_VOP3__V_MED3_F32
28586 Inst_VOP3__V_MED3_F32::~Inst_VOP3__V_MED3_F32()
28588 } // ~Inst_VOP3__V_MED3_F32
28590 // D.f = median(S0.f, S1.f, S2.f).
28592 Inst_VOP3__V_MED3_F32::execute(GPUDynInstPtr gpuDynInst
)
28594 Wavefront
*wf
= gpuDynInst
->wavefront();
28595 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
28596 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
28597 ConstVecOperandF32
src2(gpuDynInst
, extData
.SRC2
);
28598 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
28604 if (instData
.ABS
& 0x1) {
28605 src0
.absModifier();
28608 if (instData
.ABS
& 0x2) {
28609 src1
.absModifier();
28612 if (instData
.ABS
& 0x4) {
28613 src2
.absModifier();
28616 if (extData
.NEG
& 0x1) {
28617 src0
.negModifier();
28620 if (extData
.NEG
& 0x2) {
28621 src1
.negModifier();
28624 if (extData
.NEG
& 0x4) {
28625 src2
.negModifier();
28628 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28629 if (wf
->execMask(lane
)) {
28630 vdst
[lane
] = median(src0
[lane
], src1
[lane
], src2
[lane
]);
28637 Inst_VOP3__V_MED3_I32::Inst_VOP3__V_MED3_I32(InFmt_VOP3
*iFmt
)
28638 : Inst_VOP3(iFmt
, "v_med3_i32", false)
28641 } // Inst_VOP3__V_MED3_I32
28643 Inst_VOP3__V_MED3_I32::~Inst_VOP3__V_MED3_I32()
28645 } // ~Inst_VOP3__V_MED3_I32
28647 // D.i = median(S0.i, S1.i, S2.i).
28649 Inst_VOP3__V_MED3_I32::execute(GPUDynInstPtr gpuDynInst
)
28651 Wavefront
*wf
= gpuDynInst
->wavefront();
28652 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
28653 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
28654 ConstVecOperandI32
src2(gpuDynInst
, extData
.SRC2
);
28655 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
28662 * input modifiers are supported by FP operations only
28664 assert(!(instData
.ABS
& 0x1));
28665 assert(!(instData
.ABS
& 0x2));
28666 assert(!(instData
.ABS
& 0x4));
28667 assert(!(extData
.NEG
& 0x1));
28668 assert(!(extData
.NEG
& 0x2));
28669 assert(!(extData
.NEG
& 0x4));
28671 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28672 if (wf
->execMask(lane
)) {
28673 vdst
[lane
] = median(src0
[lane
], src1
[lane
], src2
[lane
]);
28680 Inst_VOP3__V_MED3_U32::Inst_VOP3__V_MED3_U32(InFmt_VOP3
*iFmt
)
28681 : Inst_VOP3(iFmt
, "v_med3_u32", false)
28684 } // Inst_VOP3__V_MED3_U32
28686 Inst_VOP3__V_MED3_U32::~Inst_VOP3__V_MED3_U32()
28688 } // ~Inst_VOP3__V_MED3_U32
28690 // D.u = median(S0.u, S1.u, S2.u).
28692 Inst_VOP3__V_MED3_U32::execute(GPUDynInstPtr gpuDynInst
)
28694 Wavefront
*wf
= gpuDynInst
->wavefront();
28695 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
28696 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
28697 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28698 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28705 * input modifiers are supported by FP operations only
28707 assert(!(instData
.ABS
& 0x1));
28708 assert(!(instData
.ABS
& 0x2));
28709 assert(!(instData
.ABS
& 0x4));
28710 assert(!(extData
.NEG
& 0x1));
28711 assert(!(extData
.NEG
& 0x2));
28712 assert(!(extData
.NEG
& 0x4));
28714 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28715 if (wf
->execMask(lane
)) {
28716 vdst
[lane
] = median(src0
[lane
], src1
[lane
], src2
[lane
]);
28723 Inst_VOP3__V_SAD_U8::Inst_VOP3__V_SAD_U8(InFmt_VOP3
*iFmt
)
28724 : Inst_VOP3(iFmt
, "v_sad_u8", false)
28727 } // Inst_VOP3__V_SAD_U8
28729 Inst_VOP3__V_SAD_U8::~Inst_VOP3__V_SAD_U8()
28731 } // ~Inst_VOP3__V_SAD_U8
28733 // D.u = abs(S0.i[31:24] - S1.i[31:24]) + abs(S0.i[23:16] - S1.i[23:16]) +
28734 // abs(S0.i[15:8] - S1.i[15:8]) + abs(S0.i[7:0] - S1.i[7:0]) + S2.u.
28735 // Sum of absolute differences with accumulation, overflow into upper bits
28738 Inst_VOP3__V_SAD_U8::execute(GPUDynInstPtr gpuDynInst
)
28740 Wavefront
*wf
= gpuDynInst
->wavefront();
28741 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
28742 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
28743 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28744 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28751 * input modifiers are supported by FP operations only
28753 assert(!(instData
.ABS
& 0x1));
28754 assert(!(instData
.ABS
& 0x2));
28755 assert(!(instData
.ABS
& 0x4));
28756 assert(!(extData
.NEG
& 0x1));
28757 assert(!(extData
.NEG
& 0x2));
28758 assert(!(extData
.NEG
& 0x4));
28760 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28761 if (wf
->execMask(lane
)) {
28762 vdst
[lane
] = std::abs(bits(src0
[lane
], 31, 24)
28763 - bits(src1
[lane
], 31, 24))
28764 + std::abs(bits(src0
[lane
], 23, 16)
28765 - bits(src1
[lane
], 23, 16))
28766 + std::abs(bits(src0
[lane
], 15, 8)
28767 - bits(src1
[lane
], 15, 8))
28768 + std::abs(bits(src0
[lane
], 7, 0)
28769 - bits(src1
[lane
], 7, 0)) + src2
[lane
];
28776 Inst_VOP3__V_SAD_HI_U8::Inst_VOP3__V_SAD_HI_U8(InFmt_VOP3
*iFmt
)
28777 : Inst_VOP3(iFmt
, "v_sad_hi_u8", false)
28780 } // Inst_VOP3__V_SAD_HI_U8
28782 Inst_VOP3__V_SAD_HI_U8::~Inst_VOP3__V_SAD_HI_U8()
28784 } // ~Inst_VOP3__V_SAD_HI_U8
28786 // D.u = (SAD_U8(S0, S1, 0) << 16) + S2.u.
28787 // Sum of absolute differences with accumulation, overflow is lost.
28789 Inst_VOP3__V_SAD_HI_U8::execute(GPUDynInstPtr gpuDynInst
)
28791 Wavefront
*wf
= gpuDynInst
->wavefront();
28792 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
28793 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
28794 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28795 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28802 * input modifiers are supported by FP operations only
28804 assert(!(instData
.ABS
& 0x1));
28805 assert(!(instData
.ABS
& 0x2));
28806 assert(!(instData
.ABS
& 0x4));
28807 assert(!(extData
.NEG
& 0x1));
28808 assert(!(extData
.NEG
& 0x2));
28809 assert(!(extData
.NEG
& 0x4));
28811 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28812 if (wf
->execMask(lane
)) {
28813 vdst
[lane
] = (((bits(src0
[lane
], 31, 24)
28814 - bits(src1
[lane
], 31, 24)) + (bits(src0
[lane
], 23, 16)
28815 - bits(src1
[lane
], 23, 16)) + (bits(src0
[lane
], 15, 8)
28816 - bits(src1
[lane
], 15, 8)) + (bits(src0
[lane
], 7, 0)
28817 - bits(src1
[lane
], 7, 0))) << 16) + src2
[lane
];
28824 Inst_VOP3__V_SAD_U16::Inst_VOP3__V_SAD_U16(InFmt_VOP3
*iFmt
)
28825 : Inst_VOP3(iFmt
, "v_sad_u16", false)
28828 } // Inst_VOP3__V_SAD_U16
28830 Inst_VOP3__V_SAD_U16::~Inst_VOP3__V_SAD_U16()
28832 } // ~Inst_VOP3__V_SAD_U16
28834 // D.u = abs(S0.i[31:16] - S1.i[31:16]) + abs(S0.i[15:0] - S1.i[15:0])
28836 // Word SAD with accumulation.
28838 Inst_VOP3__V_SAD_U16::execute(GPUDynInstPtr gpuDynInst
)
28840 Wavefront
*wf
= gpuDynInst
->wavefront();
28841 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
28842 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
28843 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28844 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28851 * input modifiers are supported by FP operations only
28853 assert(!(instData
.ABS
& 0x1));
28854 assert(!(instData
.ABS
& 0x2));
28855 assert(!(instData
.ABS
& 0x4));
28856 assert(!(extData
.NEG
& 0x1));
28857 assert(!(extData
.NEG
& 0x2));
28858 assert(!(extData
.NEG
& 0x4));
28860 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28861 if (wf
->execMask(lane
)) {
28862 vdst
[lane
] = std::abs(bits(src0
[lane
], 31, 16)
28863 - bits(src1
[lane
], 31, 16))
28864 + std::abs(bits(src0
[lane
], 15, 0)
28865 - bits(src1
[lane
], 15, 0)) + src2
[lane
];
28872 Inst_VOP3__V_SAD_U32::Inst_VOP3__V_SAD_U32(InFmt_VOP3
*iFmt
)
28873 : Inst_VOP3(iFmt
, "v_sad_u32", false)
28876 } // Inst_VOP3__V_SAD_U32
28878 Inst_VOP3__V_SAD_U32::~Inst_VOP3__V_SAD_U32()
28880 } // ~Inst_VOP3__V_SAD_U32
28882 // D.u = abs(S0.i - S1.i) + S2.u.
28883 // Dword SAD with accumulation.
28885 Inst_VOP3__V_SAD_U32::execute(GPUDynInstPtr gpuDynInst
)
28887 Wavefront
*wf
= gpuDynInst
->wavefront();
28888 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
28889 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
28890 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28891 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28898 * input modifiers are supported by FP operations only
28900 assert(!(instData
.ABS
& 0x1));
28901 assert(!(instData
.ABS
& 0x2));
28902 assert(!(instData
.ABS
& 0x4));
28903 assert(!(extData
.NEG
& 0x1));
28904 assert(!(extData
.NEG
& 0x2));
28905 assert(!(extData
.NEG
& 0x4));
28907 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28908 if (wf
->execMask(lane
)) {
28909 vdst
[lane
] = std::abs(src0
[lane
] - src1
[lane
]) + src2
[lane
];
28916 Inst_VOP3__V_CVT_PK_U8_F32::Inst_VOP3__V_CVT_PK_U8_F32(InFmt_VOP3
*iFmt
)
28917 : Inst_VOP3(iFmt
, "v_cvt_pk_u8_f32", false)
28921 } // Inst_VOP3__V_CVT_PK_U8_F32
28923 Inst_VOP3__V_CVT_PK_U8_F32::~Inst_VOP3__V_CVT_PK_U8_F32()
28925 } // ~Inst_VOP3__V_CVT_PK_U8_F32
28927 // D.u = ((flt32_to_uint8(S0.f) & 0xff) << (8 * S1.u[1:0]))
28928 // | (S2.u & ~(0xff << (8 * S1.u[1:0]))).
28929 // Convert floating point value S0 to 8-bit unsigned integer and pack the
28930 // result into byte S1 of dword S2.
28932 Inst_VOP3__V_CVT_PK_U8_F32::execute(GPUDynInstPtr gpuDynInst
)
28934 Wavefront
*wf
= gpuDynInst
->wavefront();
28935 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
28936 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
28937 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28938 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28944 if (instData
.ABS
& 0x1) {
28945 src0
.absModifier();
28949 if (extData
.NEG
& 0x1) {
28950 src0
.negModifier();
28954 * input modifiers are supported by FP operations only
28956 assert(!(instData
.ABS
& 0x2));
28957 assert(!(instData
.ABS
& 0x4));
28958 assert(!(extData
.NEG
& 0x2));
28959 assert(!(extData
.NEG
& 0x4));
28961 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28962 if (wf
->execMask(lane
)) {
28963 vdst
[lane
] = (((VecElemU8
)src0
[lane
] & 0xff)
28964 << (8 * bits(src1
[lane
], 1, 0)))
28965 | (src2
[lane
] & ~(0xff << (8 * bits(src1
[lane
], 1, 0))));
28972 Inst_VOP3__V_DIV_FIXUP_F32::Inst_VOP3__V_DIV_FIXUP_F32(InFmt_VOP3
*iFmt
)
28973 : Inst_VOP3(iFmt
, "v_div_fixup_f32", false)
28977 } // Inst_VOP3__V_DIV_FIXUP_F32
28979 Inst_VOP3__V_DIV_FIXUP_F32::~Inst_VOP3__V_DIV_FIXUP_F32()
28981 } // ~Inst_VOP3__V_DIV_FIXUP_F32
28983 // D.f = Divide fixup and flags -- s0.f = Quotient, s1.f = Denominator,
28984 // s2.f = Numerator.
28986 Inst_VOP3__V_DIV_FIXUP_F32::execute(GPUDynInstPtr gpuDynInst
)
28988 Wavefront
*wf
= gpuDynInst
->wavefront();
28989 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
28990 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
28991 ConstVecOperandF32
src2(gpuDynInst
, extData
.SRC2
);
28992 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
28998 if (instData
.ABS
& 0x1) {
28999 src0
.absModifier();
29002 if (instData
.ABS
& 0x2) {
29003 src1
.absModifier();
29006 if (instData
.ABS
& 0x4) {
29007 src2
.absModifier();
29010 if (extData
.NEG
& 0x1) {
29011 src0
.negModifier();
29014 if (extData
.NEG
& 0x2) {
29015 src1
.negModifier();
29018 if (extData
.NEG
& 0x4) {
29019 src2
.negModifier();
29022 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29023 if (wf
->execMask(lane
)) {
29024 if (std::fpclassify(src1
[lane
]) == FP_ZERO
) {
29025 if (std::signbit(src1
[lane
])) {
29026 vdst
[lane
] = -INFINITY
;
29028 vdst
[lane
] = +INFINITY
;
29030 } else if (std::isnan(src2
[lane
]) || std::isnan(src1
[lane
])) {
29032 } else if (std::isinf(src1
[lane
])) {
29033 if (std::signbit(src1
[lane
])) {
29034 vdst
[lane
] = -INFINITY
;
29036 vdst
[lane
] = +INFINITY
;
29039 vdst
[lane
] = src2
[lane
] / src1
[lane
];
29046 // --- Inst_VOP3__V_DIV_FIXUP_F64 class methods ---
29048 Inst_VOP3__V_DIV_FIXUP_F64::Inst_VOP3__V_DIV_FIXUP_F64(InFmt_VOP3
*iFmt
)
29049 : Inst_VOP3(iFmt
, "v_div_fixup_f64", false)
29053 } // Inst_VOP3__V_DIV_FIXUP_F64
29055 Inst_VOP3__V_DIV_FIXUP_F64::~Inst_VOP3__V_DIV_FIXUP_F64()
29057 } // ~Inst_VOP3__V_DIV_FIXUP_F64
29059 // D.d = Divide fixup and flags -- s0.d = Quotient, s1.d = Denominator,
29060 // s2.d = Numerator.
29062 Inst_VOP3__V_DIV_FIXUP_F64::execute(GPUDynInstPtr gpuDynInst
)
29064 Wavefront
*wf
= gpuDynInst
->wavefront();
29065 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
29066 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
29067 ConstVecOperandF64
src2(gpuDynInst
, extData
.SRC2
);
29068 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
29074 if (instData
.ABS
& 0x1) {
29075 src0
.absModifier();
29078 if (instData
.ABS
& 0x2) {
29079 src1
.absModifier();
29082 if (instData
.ABS
& 0x4) {
29083 src2
.absModifier();
29086 if (extData
.NEG
& 0x1) {
29087 src0
.negModifier();
29090 if (extData
.NEG
& 0x2) {
29091 src1
.negModifier();
29094 if (extData
.NEG
& 0x4) {
29095 src2
.negModifier();
29098 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29099 if (wf
->execMask(lane
)) {
29100 int sign_out
= std::signbit(src1
[lane
])
29101 ^ std::signbit(src2
[lane
]);
29104 std::frexp(src1
[lane
], &exp1
);
29105 std::frexp(src2
[lane
], &exp2
);
29107 if (std::isnan(src1
[lane
]) || std::isnan(src2
[lane
])) {
29108 vdst
[lane
] = std::numeric_limits
<VecElemF64
>::quiet_NaN();
29109 } else if (std::fpclassify(src1
[lane
]) == FP_ZERO
29110 && std::fpclassify(src2
[lane
]) == FP_ZERO
) {
29112 = std::numeric_limits
<VecElemF64
>::signaling_NaN();
29113 } else if (std::isinf(src1
[lane
]) && std::isinf(src2
[lane
])) {
29115 = std::numeric_limits
<VecElemF64
>::signaling_NaN();
29116 } else if (std::fpclassify(src1
[lane
]) == FP_ZERO
29117 || std::isinf(src2
[lane
])) {
29118 vdst
[lane
] = sign_out
? -INFINITY
: +INFINITY
;
29119 } else if (std::isinf(src1
[lane
])
29120 || std::fpclassify(src2
[lane
]) == FP_ZERO
) {
29121 vdst
[lane
] = sign_out
? -0.0 : +0.0;
29122 } else if (exp2
- exp1
< -1075) {
29123 vdst
[lane
] = src0
[lane
];
29124 } else if (exp1
== 2047) {
29125 vdst
[lane
] = src0
[lane
];
29127 vdst
[lane
] = sign_out
? -std::fabs(src0
[lane
])
29128 : std::fabs(src0
[lane
]);
29136 Inst_VOP3__V_DIV_SCALE_F32::Inst_VOP3__V_DIV_SCALE_F32(
29137 InFmt_VOP3_SDST_ENC
*iFmt
)
29138 : Inst_VOP3_SDST_ENC(iFmt
, "v_div_scale_f32")
29141 setFlag(WritesVCC
);
29143 } // Inst_VOP3__V_DIV_SCALE_F32
29145 Inst_VOP3__V_DIV_SCALE_F32::~Inst_VOP3__V_DIV_SCALE_F32()
29147 } // ~Inst_VOP3__V_DIV_SCALE_F32
29149 // {vcc,D.f} = Divide preop and flags -- s0.f = Quotient, s1.f =
29150 // Denominator, s2.f = Numerator -- s0 must equal s1 or s2. Given a
29151 // numerator and denominator, this opcode will appropriately scale inputs
29152 // for division to avoid subnormal terms during Newton-Raphson correction
29153 // algorithm. This opcode producses a VCC flag for post-scale of quotient.
29155 Inst_VOP3__V_DIV_SCALE_F32::execute(GPUDynInstPtr gpuDynInst
)
29157 Wavefront
*wf
= gpuDynInst
->wavefront();
29158 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
29159 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
29160 ConstVecOperandF32
src2(gpuDynInst
, extData
.SRC2
);
29161 ScalarOperandU64
vcc(gpuDynInst
, instData
.SDST
);
29162 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
29168 if (extData
.NEG
& 0x1) {
29169 src0
.negModifier();
29172 if (extData
.NEG
& 0x2) {
29173 src1
.negModifier();
29176 if (extData
.NEG
& 0x4) {
29177 src2
.negModifier();
29180 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29181 if (wf
->execMask(lane
)) {
29182 vdst
[lane
] = src0
[lane
];
29183 vcc
.setBit(lane
, 0);
29190 // --- Inst_VOP3__V_DIV_SCALE_F64 class methods ---
29192 Inst_VOP3__V_DIV_SCALE_F64::Inst_VOP3__V_DIV_SCALE_F64(
29193 InFmt_VOP3_SDST_ENC
*iFmt
)
29194 : Inst_VOP3_SDST_ENC(iFmt
, "v_div_scale_f64")
29197 setFlag(WritesVCC
);
29199 } // Inst_VOP3__V_DIV_SCALE_F64
29201 Inst_VOP3__V_DIV_SCALE_F64::~Inst_VOP3__V_DIV_SCALE_F64()
29203 } // ~Inst_VOP3__V_DIV_SCALE_F64
29205 // {vcc,D.d} = Divide preop and flags -- s0.d = Quotient, s1.d =
29206 // Denominator, s2.d = Numerator -- s0 must equal s1 or s2. Given a
29207 // numerator and denominator, this opcode will appropriately scale inputs
29208 // for division to avoid subnormal terms during Newton-Raphson correction
29209 // algorithm. This opcode producses a VCC flag for post-scale of quotient.
29211 Inst_VOP3__V_DIV_SCALE_F64::execute(GPUDynInstPtr gpuDynInst
)
29213 Wavefront
*wf
= gpuDynInst
->wavefront();
29214 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
29215 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
29216 ConstVecOperandF64
src2(gpuDynInst
, extData
.SRC2
);
29217 ScalarOperandU64
vcc(gpuDynInst
, instData
.SDST
);
29218 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
29224 if (extData
.NEG
& 0x1) {
29225 src0
.negModifier();
29228 if (extData
.NEG
& 0x2) {
29229 src1
.negModifier();
29232 if (extData
.NEG
& 0x4) {
29233 src2
.negModifier();
29236 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29237 if (wf
->execMask(lane
)) {
29240 std::frexp(src1
[lane
], &exp1
);
29241 std::frexp(src2
[lane
], &exp2
);
29242 vcc
.setBit(lane
, 0);
29244 if (std::fpclassify(src1
[lane
]) == FP_ZERO
29245 || std::fpclassify(src2
[lane
]) == FP_ZERO
) {
29247 } else if (exp2
- exp1
>= 768) {
29248 vcc
.setBit(lane
, 1);
29249 if (src0
[lane
] == src1
[lane
]) {
29250 vdst
[lane
] = std::ldexp(src0
[lane
], 128);
29252 } else if (std::fpclassify(src1
[lane
]) == FP_SUBNORMAL
) {
29253 vdst
[lane
] = std::ldexp(src0
[lane
], 128);
29254 } else if (std::fpclassify(1.0 / src1
[lane
]) == FP_SUBNORMAL
29255 && std::fpclassify(src2
[lane
] / src1
[lane
])
29257 vcc
.setBit(lane
, 1);
29258 if (src0
[lane
] == src1
[lane
]) {
29259 vdst
[lane
] = std::ldexp(src0
[lane
], 128);
29261 } else if (std::fpclassify(1.0 / src1
[lane
]) == FP_SUBNORMAL
) {
29262 vdst
[lane
] = std::ldexp(src0
[lane
], -128);
29263 } else if (std::fpclassify(src2
[lane
] / src1
[lane
])
29265 vcc
.setBit(lane
, 1);
29266 if (src0
[lane
] == src2
[lane
]) {
29267 vdst
[lane
] = std::ldexp(src0
[lane
], 128);
29269 } else if (exp2
<= 53) {
29270 vdst
[lane
] = std::ldexp(src0
[lane
], 128);
29279 Inst_VOP3__V_DIV_FMAS_F32::Inst_VOP3__V_DIV_FMAS_F32(InFmt_VOP3
*iFmt
)
29280 : Inst_VOP3(iFmt
, "v_div_fmas_f32", false)
29286 } // Inst_VOP3__V_DIV_FMAS_F32
29288 Inst_VOP3__V_DIV_FMAS_F32::~Inst_VOP3__V_DIV_FMAS_F32()
29290 } // ~Inst_VOP3__V_DIV_FMAS_F32
29292 // D.f = Special case divide FMA with scale and flags(s0.f = Quotient,
29293 // s1.f = Denominator, s2.f = Numerator)
29295 Inst_VOP3__V_DIV_FMAS_F32::execute(GPUDynInstPtr gpuDynInst
)
29297 Wavefront
*wf
= gpuDynInst
->wavefront();
29298 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
29299 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
29300 ConstVecOperandF32
src2(gpuDynInst
, extData
.SRC2
);
29301 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
29307 if (instData
.ABS
& 0x1) {
29308 src0
.absModifier();
29311 if (instData
.ABS
& 0x2) {
29312 src1
.absModifier();
29315 if (instData
.ABS
& 0x4) {
29316 src2
.absModifier();
29319 if (extData
.NEG
& 0x1) {
29320 src0
.negModifier();
29323 if (extData
.NEG
& 0x2) {
29324 src1
.negModifier();
29327 if (extData
.NEG
& 0x4) {
29328 src2
.negModifier();
29331 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29332 if (wf
->execMask(lane
)) {
29333 vdst
[lane
] = std::fma(src0
[lane
], src1
[lane
], src2
[lane
]);
29339 // --- Inst_VOP3__V_DIV_FMAS_F64 class methods ---
29341 Inst_VOP3__V_DIV_FMAS_F64::Inst_VOP3__V_DIV_FMAS_F64(InFmt_VOP3
*iFmt
)
29342 : Inst_VOP3(iFmt
, "v_div_fmas_f64", false)
29348 } // Inst_VOP3__V_DIV_FMAS_F64
29350 Inst_VOP3__V_DIV_FMAS_F64::~Inst_VOP3__V_DIV_FMAS_F64()
29352 } // ~Inst_VOP3__V_DIV_FMAS_F64
29354 // D.d = Special case divide FMA with scale and flags(s0.d = Quotient,
29355 // s1.d = Denominator, s2.d = Numerator)
29357 Inst_VOP3__V_DIV_FMAS_F64::execute(GPUDynInstPtr gpuDynInst
)
29359 Wavefront
*wf
= gpuDynInst
->wavefront();
29360 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
29361 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
29362 ConstVecOperandF64
src2(gpuDynInst
, extData
.SRC2
);
29363 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
29364 ConstScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
29371 if (instData
.ABS
& 0x1) {
29372 src0
.absModifier();
29375 if (instData
.ABS
& 0x2) {
29376 src1
.absModifier();
29379 if (instData
.ABS
& 0x4) {
29380 src2
.absModifier();
29383 if (extData
.NEG
& 0x1) {
29384 src0
.negModifier();
29387 if (extData
.NEG
& 0x2) {
29388 src1
.negModifier();
29391 if (extData
.NEG
& 0x4) {
29392 src2
.negModifier();
29395 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29396 if (wf
->execMask(lane
)) {
29397 if (bits(vcc
.rawData(), lane
)) {
29398 vdst
[lane
] = std::pow(2, 64)
29399 * std::fma(src0
[lane
], src1
[lane
], src2
[lane
]);
29401 vdst
[lane
] = std::fma(src0
[lane
], src1
[lane
], src2
[lane
]);
29409 Inst_VOP3__V_MSAD_U8::Inst_VOP3__V_MSAD_U8(InFmt_VOP3
*iFmt
)
29410 : Inst_VOP3(iFmt
, "v_msad_u8", false)
29413 } // Inst_VOP3__V_MSAD_U8
29415 Inst_VOP3__V_MSAD_U8::~Inst_VOP3__V_MSAD_U8()
29417 } // ~Inst_VOP3__V_MSAD_U8
29419 // D.u = Masked Byte SAD with accum_lo(S0.u, S1.u, S2.u).
29421 Inst_VOP3__V_MSAD_U8::execute(GPUDynInstPtr gpuDynInst
)
29423 panicUnimplemented();
29426 Inst_VOP3__V_QSAD_PK_U16_U8::Inst_VOP3__V_QSAD_PK_U16_U8(InFmt_VOP3
*iFmt
)
29427 : Inst_VOP3(iFmt
, "v_qsad_pk_u16_u8", false)
29430 } // Inst_VOP3__V_QSAD_PK_U16_U8
29432 Inst_VOP3__V_QSAD_PK_U16_U8::~Inst_VOP3__V_QSAD_PK_U16_U8()
29434 } // ~Inst_VOP3__V_QSAD_PK_U16_U8
29436 // D.u = Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0],
29437 // S1.u[31:0], S2.u[63:0])
29439 Inst_VOP3__V_QSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst
)
29441 panicUnimplemented();
29444 Inst_VOP3__V_MQSAD_PK_U16_U8::Inst_VOP3__V_MQSAD_PK_U16_U8(
29446 : Inst_VOP3(iFmt
, "v_mqsad_pk_u16_u8", false)
29449 } // Inst_VOP3__V_MQSAD_PK_U16_U8
29451 Inst_VOP3__V_MQSAD_PK_U16_U8::~Inst_VOP3__V_MQSAD_PK_U16_U8()
29453 } // ~Inst_VOP3__V_MQSAD_PK_U16_U8
29455 // D.u = Masked Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0],
29456 // S1.u[31:0], S2.u[63:0])
29458 Inst_VOP3__V_MQSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst
)
29460 panicUnimplemented();
29463 Inst_VOP3__V_MQSAD_U32_U8::Inst_VOP3__V_MQSAD_U32_U8(InFmt_VOP3
*iFmt
)
29464 : Inst_VOP3(iFmt
, "v_mqsad_u32_u8", false)
29467 } // Inst_VOP3__V_MQSAD_U32_U8
29469 Inst_VOP3__V_MQSAD_U32_U8::~Inst_VOP3__V_MQSAD_U32_U8()
29471 } // ~Inst_VOP3__V_MQSAD_U32_U8
29473 // D.u128 = Masked Quad-Byte SAD with 32-bit accum_lo/hi(S0.u[63:0],
29474 // S1.u[31:0], S2.u[127:0])
29476 Inst_VOP3__V_MQSAD_U32_U8::execute(GPUDynInstPtr gpuDynInst
)
29478 panicUnimplemented();
29481 Inst_VOP3__V_MAD_U64_U32::Inst_VOP3__V_MAD_U64_U32(
29482 InFmt_VOP3_SDST_ENC
*iFmt
)
29483 : Inst_VOP3_SDST_ENC(iFmt
, "v_mad_u64_u32")
29486 setFlag(WritesVCC
);
29488 } // Inst_VOP3__V_MAD_U64_U32
29490 Inst_VOP3__V_MAD_U64_U32::~Inst_VOP3__V_MAD_U64_U32()
29492 } // ~Inst_VOP3__V_MAD_U64_U32
29494 // {vcc_out, D.u64} = S0.u32 * S1.u32 + S2.u64.
29496 Inst_VOP3__V_MAD_U64_U32::execute(GPUDynInstPtr gpuDynInst
)
29498 Wavefront
*wf
= gpuDynInst
->wavefront();
29499 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
29500 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
29501 ConstVecOperandU64
src2(gpuDynInst
, extData
.SRC2
);
29502 ScalarOperandU64
vcc(gpuDynInst
, instData
.SDST
);
29503 VecOperandU64
vdst(gpuDynInst
, instData
.VDST
);
29511 * input modifiers are supported by FP operations only
29513 assert(!(extData
.NEG
& 0x1));
29514 assert(!(extData
.NEG
& 0x2));
29515 assert(!(extData
.NEG
& 0x4));
29517 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29518 if (wf
->execMask(lane
)) {
29519 vcc
.setBit(lane
, muladd(vdst
[lane
], src0
[lane
], src1
[lane
],
29528 Inst_VOP3__V_MAD_I64_I32::Inst_VOP3__V_MAD_I64_I32(
29529 InFmt_VOP3_SDST_ENC
*iFmt
)
29530 : Inst_VOP3_SDST_ENC(iFmt
, "v_mad_i64_i32")
29533 setFlag(WritesVCC
);
29535 } // Inst_VOP3__V_MAD_I64_I32
29537 Inst_VOP3__V_MAD_I64_I32::~Inst_VOP3__V_MAD_I64_I32()
29539 } // ~Inst_VOP3__V_MAD_I64_I32
29541 // {vcc_out,D.i64} = S0.i32 * S1.i32 + S2.i64.
29543 Inst_VOP3__V_MAD_I64_I32::execute(GPUDynInstPtr gpuDynInst
)
29545 Wavefront
*wf
= gpuDynInst
->wavefront();
29546 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
29547 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
29548 ConstVecOperandI64
src2(gpuDynInst
, extData
.SRC2
);
29549 ScalarOperandU64
vcc(gpuDynInst
, instData
.SDST
);
29550 VecOperandI64
vdst(gpuDynInst
, instData
.VDST
);
29557 * input modifiers are supported by FP operations only
29559 assert(!(extData
.NEG
& 0x1));
29560 assert(!(extData
.NEG
& 0x2));
29561 assert(!(extData
.NEG
& 0x4));
29563 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29564 if (wf
->execMask(lane
)) {
29565 vcc
.setBit(lane
, muladd(vdst
[lane
], src0
[lane
], src1
[lane
],
29574 Inst_VOP3__V_MAD_F16::Inst_VOP3__V_MAD_F16(InFmt_VOP3
*iFmt
)
29575 : Inst_VOP3(iFmt
, "v_mad_f16", false)
29580 } // Inst_VOP3__V_MAD_F16
29582 Inst_VOP3__V_MAD_F16::~Inst_VOP3__V_MAD_F16()
29584 } // ~Inst_VOP3__V_MAD_F16
29586 // D.f16 = S0.f16 * S1.f16 + S2.f16.
29587 // Supports round mode, exception flags, saturation.
29589 Inst_VOP3__V_MAD_F16::execute(GPUDynInstPtr gpuDynInst
)
29591 panicUnimplemented();
29594 Inst_VOP3__V_MAD_U16::Inst_VOP3__V_MAD_U16(InFmt_VOP3
*iFmt
)
29595 : Inst_VOP3(iFmt
, "v_mad_u16", false)
29599 } // Inst_VOP3__V_MAD_U16
29601 Inst_VOP3__V_MAD_U16::~Inst_VOP3__V_MAD_U16()
29603 } // ~Inst_VOP3__V_MAD_U16
29605 // D.u16 = S0.u16 * S1.u16 + S2.u16.
29606 // Supports saturation (unsigned 16-bit integer domain).
29608 Inst_VOP3__V_MAD_U16::execute(GPUDynInstPtr gpuDynInst
)
29610 Wavefront
*wf
= gpuDynInst
->wavefront();
29611 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
29612 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
29613 ConstVecOperandU16
src2(gpuDynInst
, extData
.SRC2
);
29614 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
29621 * input modifiers are supported by FP operations only
29623 assert(!(instData
.ABS
& 0x1));
29624 assert(!(instData
.ABS
& 0x2));
29625 assert(!(instData
.ABS
& 0x4));
29626 assert(!(extData
.NEG
& 0x1));
29627 assert(!(extData
.NEG
& 0x2));
29628 assert(!(extData
.NEG
& 0x4));
29630 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29631 if (wf
->execMask(lane
)) {
29632 vdst
[lane
] = src0
[lane
] * src1
[lane
] + src2
[lane
];
29639 Inst_VOP3__V_MAD_I16::Inst_VOP3__V_MAD_I16(InFmt_VOP3
*iFmt
)
29640 : Inst_VOP3(iFmt
, "v_mad_i16", false)
29644 } // Inst_VOP3__V_MAD_I16
29646 Inst_VOP3__V_MAD_I16::~Inst_VOP3__V_MAD_I16()
29648 } // ~Inst_VOP3__V_MAD_I16
29650 // D.i16 = S0.i16 * S1.i16 + S2.i16.
29651 // Supports saturation (signed 16-bit integer domain).
29653 Inst_VOP3__V_MAD_I16::execute(GPUDynInstPtr gpuDynInst
)
29655 Wavefront
*wf
= gpuDynInst
->wavefront();
29656 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
29657 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
29658 ConstVecOperandI16
src2(gpuDynInst
, extData
.SRC2
);
29659 VecOperandI16
vdst(gpuDynInst
, instData
.VDST
);
29666 * input modifiers are supported by FP operations only
29668 assert(!(instData
.ABS
& 0x1));
29669 assert(!(instData
.ABS
& 0x2));
29670 assert(!(instData
.ABS
& 0x4));
29671 assert(!(extData
.NEG
& 0x1));
29672 assert(!(extData
.NEG
& 0x2));
29673 assert(!(extData
.NEG
& 0x4));
29675 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29676 if (wf
->execMask(lane
)) {
29677 vdst
[lane
] = src0
[lane
] * src1
[lane
] + src2
[lane
];
29684 Inst_VOP3__V_PERM_B32::Inst_VOP3__V_PERM_B32(InFmt_VOP3
*iFmt
)
29685 : Inst_VOP3(iFmt
, "v_perm_b32", false)
29688 } // Inst_VOP3__V_PERM_B32
29690 Inst_VOP3__V_PERM_B32::~Inst_VOP3__V_PERM_B32()
29692 } // ~Inst_VOP3__V_PERM_B32
29694 // D.u[31:24] = permute({S0.u, S1.u}, S2.u[31:24]);
29695 // D.u[23:16] = permute({S0.u, S1.u}, S2.u[23:16]);
29696 // D.u[15:8] = permute({S0.u, S1.u}, S2.u[15:8]);
29697 // D.u[7:0] = permute({S0.u, S1.u}, S2.u[7:0]);
29698 // byte permute(byte in[8], byte sel) {
29699 // if(sel>=13) then return 0xff;
29700 // elsif(sel==12) then return 0x00;
29701 // elsif(sel==11) then return in[7][7] * 0xff;
29702 // elsif(sel==10) then return in[5][7] * 0xff;
29703 // elsif(sel==9) then return in[3][7] * 0xff;
29704 // elsif(sel==8) then return in[1][7] * 0xff;
29705 // else return in[sel];
29708 Inst_VOP3__V_PERM_B32::execute(GPUDynInstPtr gpuDynInst
)
29710 Wavefront
*wf
= gpuDynInst
->wavefront();
29711 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
29712 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
29713 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
29714 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
29720 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29721 if (wf
->execMask(lane
)) {
29722 VecElemU64 selector
= (VecElemU64
)src0
[lane
];
29723 selector
= (selector
<< 32) | (VecElemU64
)src1
[lane
];
29726 DPRINTF(GCN3
, "Executing v_perm_b32 src_0 0x%08x, src_1 "
29727 "0x%08x, src_2 0x%08x, vdst 0x%08x\n", src0
[lane
],
29728 src1
[lane
], src2
[lane
], vdst
[lane
]);
29729 DPRINTF(GCN3
, "Selector: 0x%08x \n", selector
);
29731 for (int i
= 0; i
< 4 ; ++i
) {
29732 VecElemU32 permuted_val
= permute(selector
, 0xFF
29733 & ((VecElemU32
)src2
[lane
] >> (8 * i
)));
29734 vdst
[lane
] |= (permuted_val
<< i
);
29737 DPRINTF(GCN3
, "v_perm result: 0x%08x\n", vdst
[lane
]);
29744 Inst_VOP3__V_FMA_F16::Inst_VOP3__V_FMA_F16(InFmt_VOP3
*iFmt
)
29745 : Inst_VOP3(iFmt
, "v_fma_f16", false)
29750 } // Inst_VOP3__V_FMA_F16
29752 Inst_VOP3__V_FMA_F16::~Inst_VOP3__V_FMA_F16()
29754 } // ~Inst_VOP3__V_FMA_F16
29756 // D.f16 = S0.f16 * S1.f16 + S2.f16.
29757 // Fused half precision multiply add.
29759 Inst_VOP3__V_FMA_F16::execute(GPUDynInstPtr gpuDynInst
)
29761 panicUnimplemented();
29764 Inst_VOP3__V_DIV_FIXUP_F16::Inst_VOP3__V_DIV_FIXUP_F16(InFmt_VOP3
*iFmt
)
29765 : Inst_VOP3(iFmt
, "v_div_fixup_f16", false)
29769 } // Inst_VOP3__V_DIV_FIXUP_F16
29771 Inst_VOP3__V_DIV_FIXUP_F16::~Inst_VOP3__V_DIV_FIXUP_F16()
29773 } // ~Inst_VOP3__V_DIV_FIXUP_F16
29775 // sign_out = sign(S1.f16)^sign(S2.f16);
29776 // if (S2.f16 == NAN)
29777 // D.f16 = Quiet(S2.f16);
29778 // else if (S1.f16 == NAN)
29779 // D.f16 = Quiet(S1.f16);
29780 // else if (S1.f16 == S2.f16 == 0)
29782 // D.f16 = pele_nan(0xfe00);
29783 // else if (abs(S1.f16) == abs(S2.f16) == +-INF)
29785 // D.f16 = pele_nan(0xfe00);
29786 // else if (S1.f16 ==0 || abs(S2.f16) == +-INF)
29788 // D.f16 = sign_out ? -INF : INF;
29789 // else if (abs(S1.f16) == +-INF || S2.f16 == 0)
29791 // D.f16 = sign_out ? -0 : 0;
29792 // else if ((exp(S2.f16) - exp(S1.f16)) < -150)
29793 // D.f16 = sign_out ? -underflow : underflow;
29794 // else if (exp(S1.f16) == 255)
29795 // D.f16 = sign_out ? -overflow : overflow;
29797 // D.f16 = sign_out ? -abs(S0.f16) : abs(S0.f16).
29798 // Half precision division fixup.
29799 // S0 = Quotient, S1 = Denominator, S3 = Numerator.
29800 // Given a numerator, denominator, and quotient from a divide, this opcode
29801 // will detect and apply special case numerics, touching up the quotient if
29802 // necessary. This opcode also generates invalid, denorm and divide by
29803 // zero exceptions caused by the division.
29805 Inst_VOP3__V_DIV_FIXUP_F16::execute(GPUDynInstPtr gpuDynInst
)
29807 panicUnimplemented();
29810 Inst_VOP3__V_CVT_PKACCUM_U8_F32::Inst_VOP3__V_CVT_PKACCUM_U8_F32(
29812 : Inst_VOP3(iFmt
, "v_cvt_pkaccum_u8_f32", false)
29816 } // Inst_VOP3__V_CVT_PKACCUM_U8_F32
29818 Inst_VOP3__V_CVT_PKACCUM_U8_F32::~Inst_VOP3__V_CVT_PKACCUM_U8_F32()
29820 } // ~Inst_VOP3__V_CVT_PKACCUM_U8_F32
29822 // byte = S1.u[1:0]; bit = byte * 8;
29823 // D.u[bit + 7:bit] = flt32_to_uint8(S0.f);
29824 // Pack converted value of S0.f into byte S1 of the destination.
29825 // SQ translates to V_CVT_PK_U8_F32.
29826 // Note: this opcode uses src_c to pass destination in as a source.
29828 Inst_VOP3__V_CVT_PKACCUM_U8_F32::execute(GPUDynInstPtr gpuDynInst
)
29830 panicUnimplemented();
29833 Inst_VOP3__V_INTERP_P1_F32::Inst_VOP3__V_INTERP_P1_F32(InFmt_VOP3
*iFmt
)
29834 : Inst_VOP3(iFmt
, "v_interp_p1_f32", false)
29838 } // Inst_VOP3__V_INTERP_P1_F32
29840 Inst_VOP3__V_INTERP_P1_F32::~Inst_VOP3__V_INTERP_P1_F32()
29842 } // ~Inst_VOP3__V_INTERP_P1_F32
29844 // D.f = P10 * S.f + P0;
29846 Inst_VOP3__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst
)
29848 panicUnimplemented();
29851 Inst_VOP3__V_INTERP_P2_F32::Inst_VOP3__V_INTERP_P2_F32(InFmt_VOP3
*iFmt
)
29852 : Inst_VOP3(iFmt
, "v_interp_p2_f32", false)
29856 } // Inst_VOP3__V_INTERP_P2_F32
29858 Inst_VOP3__V_INTERP_P2_F32::~Inst_VOP3__V_INTERP_P2_F32()
29860 } // ~Inst_VOP3__V_INTERP_P2_F32
29862 // D.f = P20 * S.f + D.f;
29864 Inst_VOP3__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst
)
29866 panicUnimplemented();
29869 Inst_VOP3__V_INTERP_MOV_F32::Inst_VOP3__V_INTERP_MOV_F32(InFmt_VOP3
*iFmt
)
29870 : Inst_VOP3(iFmt
, "v_interp_mov_f32", false)
29874 } // Inst_VOP3__V_INTERP_MOV_F32
29876 Inst_VOP3__V_INTERP_MOV_F32::~Inst_VOP3__V_INTERP_MOV_F32()
29878 } // ~Inst_VOP3__V_INTERP_MOV_F32
29880 // D.f = {P10,P20,P0}[S.u]; parameter load.
29882 Inst_VOP3__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst
)
29884 panicUnimplemented();
29887 Inst_VOP3__V_INTERP_P1LL_F16::Inst_VOP3__V_INTERP_P1LL_F16(
29889 : Inst_VOP3(iFmt
, "v_interp_p1ll_f16", false)
29893 } // Inst_VOP3__V_INTERP_P1LL_F16
29895 Inst_VOP3__V_INTERP_P1LL_F16::~Inst_VOP3__V_INTERP_P1LL_F16()
29897 } // ~Inst_VOP3__V_INTERP_P1LL_F16
29899 // D.f32 = P10.f16 * S0.f32 + P0.f16.
29901 Inst_VOP3__V_INTERP_P1LL_F16::execute(GPUDynInstPtr gpuDynInst
)
29903 panicUnimplemented();
29906 Inst_VOP3__V_INTERP_P1LV_F16::Inst_VOP3__V_INTERP_P1LV_F16(
29908 : Inst_VOP3(iFmt
, "v_interp_p1lv_f16", false)
29912 } // Inst_VOP3__V_INTERP_P1LV_F16
29914 Inst_VOP3__V_INTERP_P1LV_F16::~Inst_VOP3__V_INTERP_P1LV_F16()
29916 } // ~Inst_VOP3__V_INTERP_P1LV_F16
29919 Inst_VOP3__V_INTERP_P1LV_F16::execute(GPUDynInstPtr gpuDynInst
)
29921 panicUnimplemented();
29924 Inst_VOP3__V_INTERP_P2_F16::Inst_VOP3__V_INTERP_P2_F16(InFmt_VOP3
*iFmt
)
29925 : Inst_VOP3(iFmt
, "v_interp_p2_f16", false)
29929 } // Inst_VOP3__V_INTERP_P2_F16
29931 Inst_VOP3__V_INTERP_P2_F16::~Inst_VOP3__V_INTERP_P2_F16()
29933 } // ~Inst_VOP3__V_INTERP_P2_F16
29935 // D.f16 = P20.f16 * S0.f32 + S2.f32.
29937 Inst_VOP3__V_INTERP_P2_F16::execute(GPUDynInstPtr gpuDynInst
)
29939 panicUnimplemented();
29942 Inst_VOP3__V_ADD_F64::Inst_VOP3__V_ADD_F64(InFmt_VOP3
*iFmt
)
29943 : Inst_VOP3(iFmt
, "v_add_f64", false)
29947 } // Inst_VOP3__V_ADD_F64
29949 Inst_VOP3__V_ADD_F64::~Inst_VOP3__V_ADD_F64()
29951 } // ~Inst_VOP3__V_ADD_F64
29953 // D.d = S0.d + S1.d.
29955 Inst_VOP3__V_ADD_F64::execute(GPUDynInstPtr gpuDynInst
)
29957 Wavefront
*wf
= gpuDynInst
->wavefront();
29958 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
29959 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
29960 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
29965 if (instData
.ABS
& 0x1) {
29966 src0
.absModifier();
29969 if (instData
.ABS
& 0x2) {
29970 src1
.absModifier();
29973 if (extData
.NEG
& 0x1) {
29974 src0
.negModifier();
29977 if (extData
.NEG
& 0x2) {
29978 src1
.negModifier();
29982 * input modifiers are supported by FP operations only
29984 assert(!(instData
.ABS
& 0x4));
29985 assert(!(extData
.NEG
& 0x4));
29987 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29988 if (wf
->execMask(lane
)) {
29989 if (std::isnan(src0
[lane
]) ||
29990 std::isnan(src1
[lane
]) ) {
29992 } else if (std::isinf(src0
[lane
]) &&
29993 std::isinf(src1
[lane
])) {
29994 if (std::signbit(src0
[lane
]) !=
29995 std::signbit(src1
[lane
])) {
29998 vdst
[lane
] = src0
[lane
];
30000 } else if (std::isinf(src0
[lane
])) {
30001 vdst
[lane
] = src0
[lane
];
30002 } else if (std::isinf(src1
[lane
])) {
30003 vdst
[lane
] = src1
[lane
];
30004 } else if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
||
30005 std::fpclassify(src0
[lane
]) == FP_ZERO
) {
30006 if (std::fpclassify(src1
[lane
]) == FP_SUBNORMAL
||
30007 std::fpclassify(src1
[lane
]) == FP_ZERO
) {
30008 if (std::signbit(src0
[lane
]) &&
30009 std::signbit(src1
[lane
])) {
30015 vdst
[lane
] = src1
[lane
];
30017 } else if (std::fpclassify(src1
[lane
]) == FP_SUBNORMAL
||
30018 std::fpclassify(src1
[lane
]) == FP_ZERO
) {
30019 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
||
30020 std::fpclassify(src0
[lane
]) == FP_ZERO
) {
30021 if (std::signbit(src0
[lane
]) &&
30022 std::signbit(src1
[lane
])) {
30028 vdst
[lane
] = src0
[lane
];
30031 vdst
[lane
] = src0
[lane
] + src1
[lane
];
30039 Inst_VOP3__V_MUL_F64::Inst_VOP3__V_MUL_F64(InFmt_VOP3
*iFmt
)
30040 : Inst_VOP3(iFmt
, "v_mul_f64", false)
30044 } // Inst_VOP3__V_MUL_F64
30046 Inst_VOP3__V_MUL_F64::~Inst_VOP3__V_MUL_F64()
30048 } // ~Inst_VOP3__V_MUL_F64
30050 // D.d = S0.d * S1.d.
30052 Inst_VOP3__V_MUL_F64::execute(GPUDynInstPtr gpuDynInst
)
30054 Wavefront
*wf
= gpuDynInst
->wavefront();
30055 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
30056 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
30057 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
30062 if (instData
.ABS
& 0x1) {
30063 src0
.absModifier();
30066 if (instData
.ABS
& 0x2) {
30067 src1
.absModifier();
30070 if (extData
.NEG
& 0x1) {
30071 src0
.negModifier();
30074 if (extData
.NEG
& 0x2) {
30075 src1
.negModifier();
30079 * input modifiers are supported by FP operations only
30081 assert(!(instData
.ABS
& 0x4));
30082 assert(!(extData
.NEG
& 0x4));
30084 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30085 if (wf
->execMask(lane
)) {
30086 if (std::isnan(src0
[lane
]) ||
30087 std::isnan(src1
[lane
])) {
30089 } else if ((std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
||
30090 std::fpclassify(src0
[lane
]) == FP_ZERO
) &&
30091 !std::signbit(src0
[lane
])) {
30092 if (std::isinf(src1
[lane
])) {
30094 } else if (!std::signbit(src1
[lane
])) {
30099 } else if ((std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
||
30100 std::fpclassify(src0
[lane
]) == FP_ZERO
) &&
30101 std::signbit(src0
[lane
])) {
30102 if (std::isinf(src1
[lane
])) {
30104 } else if (std::signbit(src1
[lane
])) {
30109 } else if (std::isinf(src0
[lane
]) &&
30110 !std::signbit(src0
[lane
])) {
30111 if (std::fpclassify(src1
[lane
]) == FP_SUBNORMAL
||
30112 std::fpclassify(src1
[lane
]) == FP_ZERO
) {
30114 } else if (!std::signbit(src1
[lane
])) {
30115 vdst
[lane
] = +INFINITY
;
30117 vdst
[lane
] = -INFINITY
;
30119 } else if (std::isinf(src0
[lane
]) &&
30120 std::signbit(src0
[lane
])) {
30121 if (std::fpclassify(src1
[lane
]) == FP_SUBNORMAL
||
30122 std::fpclassify(src1
[lane
]) == FP_ZERO
) {
30124 } else if (std::signbit(src1
[lane
])) {
30125 vdst
[lane
] = +INFINITY
;
30127 vdst
[lane
] = -INFINITY
;
30130 vdst
[lane
] = src0
[lane
] * src1
[lane
];
30138 Inst_VOP3__V_MIN_F64::Inst_VOP3__V_MIN_F64(InFmt_VOP3
*iFmt
)
30139 : Inst_VOP3(iFmt
, "v_min_f64", false)
30143 } // Inst_VOP3__V_MIN_F64
30145 Inst_VOP3__V_MIN_F64::~Inst_VOP3__V_MIN_F64()
30147 } // ~Inst_VOP3__V_MIN_F64
30149 // D.d = min(S0.d, S1.d).
30151 Inst_VOP3__V_MIN_F64::execute(GPUDynInstPtr gpuDynInst
)
30153 Wavefront
*wf
= gpuDynInst
->wavefront();
30154 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
30155 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
30156 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
30161 if (instData
.ABS
& 0x1) {
30162 src0
.absModifier();
30165 if (instData
.ABS
& 0x2) {
30166 src1
.absModifier();
30169 if (extData
.NEG
& 0x1) {
30170 src0
.negModifier();
30173 if (extData
.NEG
& 0x2) {
30174 src1
.negModifier();
30178 * input modifiers are supported by FP operations only
30180 assert(!(instData
.ABS
& 0x4));
30181 assert(!(extData
.NEG
& 0x4));
30183 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30184 if (wf
->execMask(lane
)) {
30185 vdst
[lane
] = std::fmin(src0
[lane
], src1
[lane
]);
30192 Inst_VOP3__V_MAX_F64::Inst_VOP3__V_MAX_F64(InFmt_VOP3
*iFmt
)
30193 : Inst_VOP3(iFmt
, "v_max_f64", false)
30197 } // Inst_VOP3__V_MAX_F64
30199 Inst_VOP3__V_MAX_F64::~Inst_VOP3__V_MAX_F64()
30201 } // ~Inst_VOP3__V_MAX_F64
30203 // D.d = max(S0.d, S1.d).
30205 Inst_VOP3__V_MAX_F64::execute(GPUDynInstPtr gpuDynInst
)
30207 Wavefront
*wf
= gpuDynInst
->wavefront();
30208 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
30209 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
30210 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
30215 if (instData
.ABS
& 0x1) {
30216 src0
.absModifier();
30219 if (instData
.ABS
& 0x2) {
30220 src1
.absModifier();
30223 if (extData
.NEG
& 0x1) {
30224 src0
.negModifier();
30227 if (extData
.NEG
& 0x2) {
30228 src1
.negModifier();
30232 * input modifiers are supported by FP operations only
30234 assert(!(instData
.ABS
& 0x4));
30235 assert(!(extData
.NEG
& 0x4));
30237 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30238 if (wf
->execMask(lane
)) {
30239 vdst
[lane
] = std::fmax(src0
[lane
], src1
[lane
]);
30246 Inst_VOP3__V_LDEXP_F64::Inst_VOP3__V_LDEXP_F64(InFmt_VOP3
*iFmt
)
30247 : Inst_VOP3(iFmt
, "v_ldexp_f64", false)
30251 } // Inst_VOP3__V_LDEXP_F64
30253 Inst_VOP3__V_LDEXP_F64::~Inst_VOP3__V_LDEXP_F64()
30255 } // ~Inst_VOP3__V_LDEXP_F64
30257 // D.d = pow(S0.d, S1.i[31:0]).
30259 Inst_VOP3__V_LDEXP_F64::execute(GPUDynInstPtr gpuDynInst
)
30261 Wavefront
*wf
= gpuDynInst
->wavefront();
30262 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
30263 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
30264 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
30269 if (instData
.ABS
& 0x1) {
30270 src0
.absModifier();
30273 if (extData
.NEG
& 0x1) {
30274 src0
.negModifier();
30278 * input modifiers are supported by FP operations only
30280 assert(!(instData
.ABS
& 0x2));
30281 assert(!(instData
.ABS
& 0x4));
30282 assert(!(extData
.NEG
& 0x2));
30283 assert(!(extData
.NEG
& 0x4));
30285 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30286 if (wf
->execMask(lane
)) {
30287 if (std::isnan(src0
[lane
]) || std::isinf(src0
[lane
])) {
30288 vdst
[lane
] = src0
[lane
];
30289 } else if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
30290 || std::fpclassify(src0
[lane
]) == FP_ZERO
) {
30291 if (std::signbit(src0
[lane
])) {
30297 vdst
[lane
] = std::ldexp(src0
[lane
], src1
[lane
]);
30305 Inst_VOP3__V_MUL_LO_U32::Inst_VOP3__V_MUL_LO_U32(InFmt_VOP3
*iFmt
)
30306 : Inst_VOP3(iFmt
, "v_mul_lo_u32", false)
30309 } // Inst_VOP3__V_MUL_LO_U32
30311 Inst_VOP3__V_MUL_LO_U32::~Inst_VOP3__V_MUL_LO_U32()
30313 } // ~Inst_VOP3__V_MUL_LO_U32
30315 // D.u = S0.u * S1.u.
30317 Inst_VOP3__V_MUL_LO_U32::execute(GPUDynInstPtr gpuDynInst
)
30319 Wavefront
*wf
= gpuDynInst
->wavefront();
30320 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
30321 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
30322 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
30328 * input modifiers are supported by FP operations only
30330 assert(!(instData
.ABS
& 0x1));
30331 assert(!(instData
.ABS
& 0x2));
30332 assert(!(instData
.ABS
& 0x4));
30333 assert(!(extData
.NEG
& 0x1));
30334 assert(!(extData
.NEG
& 0x2));
30335 assert(!(extData
.NEG
& 0x4));
30337 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30338 if (wf
->execMask(lane
)) {
30339 VecElemI64 s0
= (VecElemI64
)src0
[lane
];
30340 VecElemI64 s1
= (VecElemI64
)src1
[lane
];
30341 vdst
[lane
] = (VecElemU32
)((s0
* s1
) & 0xffffffffLL
);
30348 Inst_VOP3__V_MUL_HI_U32::Inst_VOP3__V_MUL_HI_U32(InFmt_VOP3
*iFmt
)
30349 : Inst_VOP3(iFmt
, "v_mul_hi_u32", false)
30352 } // Inst_VOP3__V_MUL_HI_U32
30354 Inst_VOP3__V_MUL_HI_U32::~Inst_VOP3__V_MUL_HI_U32()
30356 } // ~Inst_VOP3__V_MUL_HI_U32
30358 // D.u = (S0.u * S1.u) >> 32.
30360 Inst_VOP3__V_MUL_HI_U32::execute(GPUDynInstPtr gpuDynInst
)
30362 Wavefront
*wf
= gpuDynInst
->wavefront();
30363 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
30364 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
30365 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
30371 * input modifiers are supported by FP operations only
30373 assert(!(instData
.ABS
& 0x1));
30374 assert(!(instData
.ABS
& 0x2));
30375 assert(!(instData
.ABS
& 0x4));
30376 assert(!(extData
.NEG
& 0x1));
30377 assert(!(extData
.NEG
& 0x2));
30378 assert(!(extData
.NEG
& 0x4));
30380 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30381 if (wf
->execMask(lane
)) {
30382 VecElemI64 s0
= (VecElemI64
)src0
[lane
];
30383 VecElemI64 s1
= (VecElemI64
)src1
[lane
];
30385 = (VecElemU32
)(((s0
* s1
) >> 32) & 0xffffffffLL
);
30392 Inst_VOP3__V_MUL_HI_I32::Inst_VOP3__V_MUL_HI_I32(InFmt_VOP3
*iFmt
)
30393 : Inst_VOP3(iFmt
, "v_mul_hi_i32", false)
30396 } // Inst_VOP3__V_MUL_HI_I32
30398 Inst_VOP3__V_MUL_HI_I32::~Inst_VOP3__V_MUL_HI_I32()
30400 } // ~Inst_VOP3__V_MUL_HI_I32
30402 // D.i = (S0.i * S1.i) >> 32.
30404 Inst_VOP3__V_MUL_HI_I32::execute(GPUDynInstPtr gpuDynInst
)
30406 Wavefront
*wf
= gpuDynInst
->wavefront();
30407 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
30408 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
30409 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
30415 * input modifiers are supported by FP operations only
30417 assert(!(instData
.ABS
& 0x1));
30418 assert(!(instData
.ABS
& 0x2));
30419 assert(!(instData
.ABS
& 0x4));
30420 assert(!(extData
.NEG
& 0x1));
30421 assert(!(extData
.NEG
& 0x2));
30422 assert(!(extData
.NEG
& 0x4));
30424 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30425 if (wf
->execMask(lane
)) {
30426 VecElemI64 s0
= (VecElemI64
)src0
[lane
];
30427 VecElemI64 s1
= (VecElemI64
)src1
[lane
];
30429 = (VecElemI32
)(((s0
* s1
) >> 32LL) & 0xffffffffLL
);
30436 Inst_VOP3__V_LDEXP_F32::Inst_VOP3__V_LDEXP_F32(InFmt_VOP3
*iFmt
)
30437 : Inst_VOP3(iFmt
, "v_ldexp_f32", false)
30441 } // Inst_VOP3__V_LDEXP_F32
30443 Inst_VOP3__V_LDEXP_F32::~Inst_VOP3__V_LDEXP_F32()
30445 } // ~Inst_VOP3__V_LDEXP_F32
30447 // D.f = pow(S0.f, S1.i)
30449 Inst_VOP3__V_LDEXP_F32::execute(GPUDynInstPtr gpuDynInst
)
30451 Wavefront
*wf
= gpuDynInst
->wavefront();
30452 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
30453 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
30454 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
30460 * input modifiers are supported by FP operations only
30462 assert(!(instData
.ABS
& 0x2));
30463 assert(!(instData
.ABS
& 0x4));
30464 assert(!(extData
.NEG
& 0x2));
30465 assert(!(extData
.NEG
& 0x4));
30467 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30468 if (wf
->execMask(lane
)) {
30469 vdst
[lane
] = std::ldexp(src0
[lane
], src1
[lane
]);
30476 Inst_VOP3__V_READLANE_B32::Inst_VOP3__V_READLANE_B32(InFmt_VOP3
*iFmt
)
30477 : Inst_VOP3(iFmt
, "v_readlane_b32", true)
30480 setFlag(IgnoreExec
);
30481 } // Inst_VOP3__V_READLANE_B32
30483 Inst_VOP3__V_READLANE_B32::~Inst_VOP3__V_READLANE_B32()
30485 } // ~Inst_VOP3__V_READLANE_B32
30487 // Copy one VGPR value to one SGPR. D = SGPR-dest, S0 = Source Data (VGPR#
30488 // or M0(lds-direct)), S1 = Lane Select (SGPR or M0). Ignores exec mask.
30489 // Input and output modifiers not supported; this is an untyped operation.
30491 Inst_VOP3__V_READLANE_B32::execute(GPUDynInstPtr gpuDynInst
)
30493 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
30494 ConstScalarOperandU32
src1(gpuDynInst
, extData
.SRC1
);
30495 ScalarOperandU32
sdst(gpuDynInst
, instData
.VDST
);
30501 * input modifiers are supported by FP operations only
30503 assert(!(instData
.ABS
& 0x1));
30504 assert(!(instData
.ABS
& 0x2));
30505 assert(!(instData
.ABS
& 0x4));
30506 assert(!(extData
.NEG
& 0x1));
30507 assert(!(extData
.NEG
& 0x2));
30508 assert(!(extData
.NEG
& 0x4));
30510 sdst
= src0
[src1
.rawData() & 0x3f];
30515 Inst_VOP3__V_WRITELANE_B32::Inst_VOP3__V_WRITELANE_B32(InFmt_VOP3
*iFmt
)
30516 : Inst_VOP3(iFmt
, "v_writelane_b32", false)
30519 setFlag(IgnoreExec
);
30520 } // Inst_VOP3__V_WRITELANE_B32
30522 Inst_VOP3__V_WRITELANE_B32::~Inst_VOP3__V_WRITELANE_B32()
30524 } // ~Inst_VOP3__V_WRITELANE_B32
30526 // Write value into one VGPR in one lane. D = VGPR-dest, S0 = Source Data
30527 // (sgpr, m0, exec or constants), S1 = Lane Select (SGPR or M0). Ignores
30528 // exec mask. Input and output modifiers not supported; this is an untyped
30531 Inst_VOP3__V_WRITELANE_B32::execute(GPUDynInstPtr gpuDynInst
)
30533 ConstScalarOperandU32
src0(gpuDynInst
, extData
.SRC0
);
30534 ConstScalarOperandU32
src1(gpuDynInst
, extData
.SRC1
);
30535 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
30542 * input modifiers are supported by FP operations only
30544 assert(!(instData
.ABS
& 0x1));
30545 assert(!(instData
.ABS
& 0x2));
30546 assert(!(instData
.ABS
& 0x4));
30547 assert(!(extData
.NEG
& 0x1));
30548 assert(!(extData
.NEG
& 0x2));
30549 assert(!(extData
.NEG
& 0x4));
30551 vdst
[src1
.rawData() & 0x3f] = src0
.rawData();
30556 Inst_VOP3__V_BCNT_U32_B32::Inst_VOP3__V_BCNT_U32_B32(InFmt_VOP3
*iFmt
)
30557 : Inst_VOP3(iFmt
, "v_bcnt_u32_b32", false)
30560 } // Inst_VOP3__V_BCNT_U32_B32
30562 Inst_VOP3__V_BCNT_U32_B32::~Inst_VOP3__V_BCNT_U32_B32()
30564 } // ~Inst_VOP3__V_BCNT_U32_B32
30566 // D.u = CountOneBits(S0.u) + S1.u. Bit count.
30568 Inst_VOP3__V_BCNT_U32_B32::execute(GPUDynInstPtr gpuDynInst
)
30570 Wavefront
*wf
= gpuDynInst
->wavefront();
30571 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
30572 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
30573 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
30579 * input modifiers are supported by FP operations only
30581 assert(!(instData
.ABS
& 0x1));
30582 assert(!(instData
.ABS
& 0x2));
30583 assert(!(instData
.ABS
& 0x4));
30584 assert(!(extData
.NEG
& 0x1));
30585 assert(!(extData
.NEG
& 0x2));
30586 assert(!(extData
.NEG
& 0x4));
30588 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30589 if (wf
->execMask(lane
)) {
30590 vdst
[lane
] = popCount(src0
[lane
]) + src1
[lane
];
30597 Inst_VOP3__V_MBCNT_LO_U32_B32::Inst_VOP3__V_MBCNT_LO_U32_B32(
30599 : Inst_VOP3(iFmt
, "v_mbcnt_lo_u32_b32", false)
30602 } // Inst_VOP3__V_MBCNT_LO_U32_B32
30604 Inst_VOP3__V_MBCNT_LO_U32_B32::~Inst_VOP3__V_MBCNT_LO_U32_B32()
30606 } // ~Inst_VOP3__V_MBCNT_LO_U32_B32
30608 // Masked bit count, ThreadPosition is the position of this thread in the
30609 // wavefront (in 0..63).
30611 Inst_VOP3__V_MBCNT_LO_U32_B32::execute(GPUDynInstPtr gpuDynInst
)
30613 Wavefront
*wf
= gpuDynInst
->wavefront();
30614 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
30615 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
30616 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
30617 uint64_t threadMask
= 0;
30623 * input modifiers are supported by FP operations only
30625 assert(!(instData
.ABS
& 0x1));
30626 assert(!(instData
.ABS
& 0x2));
30627 assert(!(instData
.ABS
& 0x4));
30628 assert(!(extData
.NEG
& 0x1));
30629 assert(!(extData
.NEG
& 0x2));
30630 assert(!(extData
.NEG
& 0x4));
30632 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30633 if (wf
->execMask(lane
)) {
30634 threadMask
= ((1LL << lane
) - 1LL);
30635 vdst
[lane
] = popCount(src0
[lane
] & bits(threadMask
, 31, 0)) +
30642 // --- Inst_VOP3__V_MBCNT_HI_U32_B32 class methods ---
30644 Inst_VOP3__V_MBCNT_HI_U32_B32::Inst_VOP3__V_MBCNT_HI_U32_B32(
30646 : Inst_VOP3(iFmt
, "v_mbcnt_hi_u32_b32", false)
30649 } // Inst_VOP3__V_MBCNT_HI_U32_B32
30651 Inst_VOP3__V_MBCNT_HI_U32_B32::~Inst_VOP3__V_MBCNT_HI_U32_B32()
30653 } // ~Inst_VOP3__V_MBCNT_HI_U32_B32
30655 // ThreadMask = (1 << ThreadPosition) - 1;
30656 // D.u = CountOneBits(S0.u & ThreadMask[63:32]) + S1.u.
30657 // Masked bit count, ThreadPosition is the position of this thread in the
30658 // wavefront (in 0..63).
30660 Inst_VOP3__V_MBCNT_HI_U32_B32::execute(GPUDynInstPtr gpuDynInst
)
30662 Wavefront
*wf
= gpuDynInst
->wavefront();
30663 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
30664 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
30665 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
30666 uint64_t threadMask
= 0;
30672 * input modifiers are supported by FP operations only
30674 assert(!(instData
.ABS
& 0x1));
30675 assert(!(instData
.ABS
& 0x2));
30676 assert(!(instData
.ABS
& 0x4));
30677 assert(!(extData
.NEG
& 0x1));
30678 assert(!(extData
.NEG
& 0x2));
30679 assert(!(extData
.NEG
& 0x4));
30681 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30682 if (wf
->execMask(lane
)) {
30683 threadMask
= ((1LL << lane
) - 1LL);
30684 vdst
[lane
] = popCount(src0
[lane
] & bits(threadMask
, 63, 32)) +
30691 // --- Inst_VOP3__V_LSHLREV_B64 class methods ---
30693 Inst_VOP3__V_LSHLREV_B64::Inst_VOP3__V_LSHLREV_B64(InFmt_VOP3
*iFmt
)
30694 : Inst_VOP3(iFmt
, "v_lshlrev_b64", false)
30697 } // Inst_VOP3__V_LSHLREV_B64
30699 Inst_VOP3__V_LSHLREV_B64::~Inst_VOP3__V_LSHLREV_B64()
30701 } // ~Inst_VOP3__V_LSHLREV_B64
30703 // D.u64 = S1.u64 << S0.u[5:0].
30705 Inst_VOP3__V_LSHLREV_B64::execute(GPUDynInstPtr gpuDynInst
)
30707 Wavefront
*wf
= gpuDynInst
->wavefront();
30708 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
30709 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
30710 VecOperandU64
vdst(gpuDynInst
, instData
.VDST
);
30716 * input modifiers are supported by FP operations only
30718 assert(!(instData
.ABS
& 0x1));
30719 assert(!(instData
.ABS
& 0x2));
30720 assert(!(instData
.ABS
& 0x4));
30721 assert(!(extData
.NEG
& 0x1));
30722 assert(!(extData
.NEG
& 0x2));
30723 assert(!(extData
.NEG
& 0x4));
30725 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30726 if (wf
->execMask(lane
)) {
30727 vdst
[lane
] = src1
[lane
] << bits(src0
[lane
], 5, 0);
30734 Inst_VOP3__V_LSHRREV_B64::Inst_VOP3__V_LSHRREV_B64(InFmt_VOP3
*iFmt
)
30735 : Inst_VOP3(iFmt
, "v_lshrrev_b64", false)
30738 } // Inst_VOP3__V_LSHRREV_B64
30740 Inst_VOP3__V_LSHRREV_B64::~Inst_VOP3__V_LSHRREV_B64()
30742 } // ~Inst_VOP3__V_LSHRREV_B64
30744 // D.u64 = S1.u64 >> S0.u[5:0].
30745 // The vacated bits are set to zero.
30747 Inst_VOP3__V_LSHRREV_B64::execute(GPUDynInstPtr gpuDynInst
)
30749 Wavefront
*wf
= gpuDynInst
->wavefront();
30750 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
30751 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
30752 VecOperandU64
vdst(gpuDynInst
, instData
.VDST
);
30758 * input modifiers are supported by FP operations only
30760 assert(!(instData
.ABS
& 0x1));
30761 assert(!(instData
.ABS
& 0x2));
30762 assert(!(instData
.ABS
& 0x4));
30763 assert(!(extData
.NEG
& 0x1));
30764 assert(!(extData
.NEG
& 0x2));
30765 assert(!(extData
.NEG
& 0x4));
30767 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30768 if (wf
->execMask(lane
)) {
30769 vdst
[lane
] = src1
[lane
] >> bits(src0
[lane
], 5, 0);
30776 Inst_VOP3__V_ASHRREV_I64::Inst_VOP3__V_ASHRREV_I64(InFmt_VOP3
*iFmt
)
30777 : Inst_VOP3(iFmt
, "v_ashrrev_i64", false)
30780 } // Inst_VOP3__V_ASHRREV_I64
30782 Inst_VOP3__V_ASHRREV_I64::~Inst_VOP3__V_ASHRREV_I64()
30784 } // ~Inst_VOP3__V_ASHRREV_I64
30786 // D.u64 = signext(S1.u64) >> S0.u[5:0].
30787 // The vacated bits are set to the sign bit of the input value.
30789 Inst_VOP3__V_ASHRREV_I64::execute(GPUDynInstPtr gpuDynInst
)
30791 Wavefront
*wf
= gpuDynInst
->wavefront();
30792 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
30793 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
30794 VecOperandU64
vdst(gpuDynInst
, instData
.VDST
);
30800 * input modifiers are supported by FP operations only
30802 assert(!(instData
.ABS
& 0x1));
30803 assert(!(instData
.ABS
& 0x2));
30804 assert(!(instData
.ABS
& 0x4));
30805 assert(!(extData
.NEG
& 0x1));
30806 assert(!(extData
.NEG
& 0x2));
30807 assert(!(extData
.NEG
& 0x4));
30809 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30810 if (wf
->execMask(lane
)) {
30812 = src1
[lane
] >> bits(src0
[lane
], 5, 0);
30819 Inst_VOP3__V_TRIG_PREOP_F64::Inst_VOP3__V_TRIG_PREOP_F64(InFmt_VOP3
*iFmt
)
30820 : Inst_VOP3(iFmt
, "v_trig_preop_f64", false)
30824 } // Inst_VOP3__V_TRIG_PREOP_F64
30826 Inst_VOP3__V_TRIG_PREOP_F64::~Inst_VOP3__V_TRIG_PREOP_F64()
30828 } // ~Inst_VOP3__V_TRIG_PREOP_F64
30831 Inst_VOP3__V_TRIG_PREOP_F64::execute(GPUDynInstPtr gpuDynInst
)
30833 panicUnimplemented();
30836 Inst_VOP3__V_BFM_B32::Inst_VOP3__V_BFM_B32(InFmt_VOP3
*iFmt
)
30837 : Inst_VOP3(iFmt
, "v_bfm_b32", false)
30840 } // Inst_VOP3__V_BFM_B32
30842 Inst_VOP3__V_BFM_B32::~Inst_VOP3__V_BFM_B32()
30844 } // ~Inst_VOP3__V_BFM_B32
30846 // D.u = ((1 << S0.u[4:0]) - 1) << S1.u[4:0];
30848 Inst_VOP3__V_BFM_B32::execute(GPUDynInstPtr gpuDynInst
)
30850 Wavefront
*wf
= gpuDynInst
->wavefront();
30851 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
30852 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
30853 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
30859 * input modifiers are supported by FP operations only
30861 assert(!(instData
.ABS
& 0x1));
30862 assert(!(instData
.ABS
& 0x2));
30863 assert(!(instData
.ABS
& 0x4));
30864 assert(!(extData
.NEG
& 0x1));
30865 assert(!(extData
.NEG
& 0x2));
30866 assert(!(extData
.NEG
& 0x4));
30868 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30869 if (wf
->execMask(lane
)) {
30870 vdst
[lane
] = ((1 << bits(src0
[lane
], 4, 0)) - 1)
30871 << bits(src1
[lane
], 4, 0);
30878 Inst_VOP3__V_CVT_PKNORM_I16_F32::Inst_VOP3__V_CVT_PKNORM_I16_F32(
30880 : Inst_VOP3(iFmt
, "v_cvt_pknorm_i16_f32", false)
30884 } // Inst_VOP3__V_CVT_PKNORM_I16_F32
30886 Inst_VOP3__V_CVT_PKNORM_I16_F32::~Inst_VOP3__V_CVT_PKNORM_I16_F32()
30888 } // ~Inst_VOP3__V_CVT_PKNORM_I16_F32
30890 // D = {(snorm)S1.f, (snorm)S0.f}.
30892 Inst_VOP3__V_CVT_PKNORM_I16_F32::execute(GPUDynInstPtr gpuDynInst
)
30894 panicUnimplemented();
30897 Inst_VOP3__V_CVT_PKNORM_U16_F32::Inst_VOP3__V_CVT_PKNORM_U16_F32(
30899 : Inst_VOP3(iFmt
, "v_cvt_pknorm_u16_f32", false)
30903 } // Inst_VOP3__V_CVT_PKNORM_U16_F32
30905 Inst_VOP3__V_CVT_PKNORM_U16_F32::~Inst_VOP3__V_CVT_PKNORM_U16_F32()
30907 } // ~Inst_VOP3__V_CVT_PKNORM_U16_F32
30909 // D = {(unorm)S1.f, (unorm)S0.f}.
30911 Inst_VOP3__V_CVT_PKNORM_U16_F32::execute(GPUDynInstPtr gpuDynInst
)
30913 panicUnimplemented();
30916 Inst_VOP3__V_CVT_PKRTZ_F16_F32::Inst_VOP3__V_CVT_PKRTZ_F16_F32(
30918 : Inst_VOP3(iFmt
, "v_cvt_pkrtz_f16_f32", false)
30922 } // Inst_VOP3__V_CVT_PKRTZ_F16_F32
30924 Inst_VOP3__V_CVT_PKRTZ_F16_F32::~Inst_VOP3__V_CVT_PKRTZ_F16_F32()
30926 } // ~Inst_VOP3__V_CVT_PKRTZ_F16_F32
30929 Inst_VOP3__V_CVT_PKRTZ_F16_F32::execute(GPUDynInstPtr gpuDynInst
)
30931 panicUnimplemented();
30934 Inst_VOP3__V_CVT_PK_U16_U32::Inst_VOP3__V_CVT_PK_U16_U32(InFmt_VOP3
*iFmt
)
30935 : Inst_VOP3(iFmt
, "v_cvt_pk_u16_u32", false)
30938 } // Inst_VOP3__V_CVT_PK_U16_U32
30940 Inst_VOP3__V_CVT_PK_U16_U32::~Inst_VOP3__V_CVT_PK_U16_U32()
30942 } // ~Inst_VOP3__V_CVT_PK_U16_U32
30944 // D = {uint32_to_uint16(S1.u), uint32_to_uint16(S0.u)}.
30946 Inst_VOP3__V_CVT_PK_U16_U32::execute(GPUDynInstPtr gpuDynInst
)
30948 panicUnimplemented();
30951 Inst_VOP3__V_CVT_PK_I16_I32::Inst_VOP3__V_CVT_PK_I16_I32(InFmt_VOP3
*iFmt
)
30952 : Inst_VOP3(iFmt
, "v_cvt_pk_i16_i32", false)
30955 } // Inst_VOP3__V_CVT_PK_I16_I32
30957 Inst_VOP3__V_CVT_PK_I16_I32::~Inst_VOP3__V_CVT_PK_I16_I32()
30959 } // ~Inst_VOP3__V_CVT_PK_I16_I32
30961 // D = {int32_to_int16(S1.i), int32_to_int16(S0.i)}.
30963 Inst_VOP3__V_CVT_PK_I16_I32::execute(GPUDynInstPtr gpuDynInst
)
30965 panicUnimplemented();
30968 Inst_DS__DS_ADD_U32::Inst_DS__DS_ADD_U32(InFmt_DS
*iFmt
)
30969 : Inst_DS(iFmt
, "ds_add_u32")
30971 } // Inst_DS__DS_ADD_U32
30973 Inst_DS__DS_ADD_U32::~Inst_DS__DS_ADD_U32()
30975 } // ~Inst_DS__DS_ADD_U32
30977 // tmp = MEM[ADDR];
30978 // MEM[ADDR] += DATA;
30979 // RETURN_DATA = tmp.
30981 Inst_DS__DS_ADD_U32::execute(GPUDynInstPtr gpuDynInst
)
30983 panicUnimplemented();
30986 Inst_DS__DS_SUB_U32::Inst_DS__DS_SUB_U32(InFmt_DS
*iFmt
)
30987 : Inst_DS(iFmt
, "ds_sub_u32")
30989 } // Inst_DS__DS_SUB_U32
30991 Inst_DS__DS_SUB_U32::~Inst_DS__DS_SUB_U32()
30993 } // ~Inst_DS__DS_SUB_U32
30995 // tmp = MEM[ADDR];
30996 // MEM[ADDR] -= DATA;
30997 // RETURN_DATA = tmp.
30999 Inst_DS__DS_SUB_U32::execute(GPUDynInstPtr gpuDynInst
)
31001 panicUnimplemented();
31004 Inst_DS__DS_RSUB_U32::Inst_DS__DS_RSUB_U32(InFmt_DS
*iFmt
)
31005 : Inst_DS(iFmt
, "ds_rsub_u32")
31007 } // Inst_DS__DS_RSUB_U32
31009 Inst_DS__DS_RSUB_U32::~Inst_DS__DS_RSUB_U32()
31011 } // ~Inst_DS__DS_RSUB_U32
31013 // tmp = MEM[ADDR];
31014 // MEM[ADDR] = DATA - MEM[ADDR];
31015 // RETURN_DATA = tmp.
31016 // Subtraction with reversed operands.
31018 Inst_DS__DS_RSUB_U32::execute(GPUDynInstPtr gpuDynInst
)
31020 panicUnimplemented();
31023 Inst_DS__DS_INC_U32::Inst_DS__DS_INC_U32(InFmt_DS
*iFmt
)
31024 : Inst_DS(iFmt
, "ds_inc_u32")
31026 } // Inst_DS__DS_INC_U32
31028 Inst_DS__DS_INC_U32::~Inst_DS__DS_INC_U32()
31030 } // ~Inst_DS__DS_INC_U32
31032 // tmp = MEM[ADDR];
31033 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
31034 // RETURN_DATA = tmp.
31036 Inst_DS__DS_INC_U32::execute(GPUDynInstPtr gpuDynInst
)
31038 panicUnimplemented();
31041 Inst_DS__DS_DEC_U32::Inst_DS__DS_DEC_U32(InFmt_DS
*iFmt
)
31042 : Inst_DS(iFmt
, "ds_dec_u32")
31044 } // Inst_DS__DS_DEC_U32
31046 Inst_DS__DS_DEC_U32::~Inst_DS__DS_DEC_U32()
31048 } // ~Inst_DS__DS_DEC_U32
31050 // tmp = MEM[ADDR];
31051 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
31052 // (unsigned compare); RETURN_DATA = tmp.
31054 Inst_DS__DS_DEC_U32::execute(GPUDynInstPtr gpuDynInst
)
31056 panicUnimplemented();
31059 Inst_DS__DS_MIN_I32::Inst_DS__DS_MIN_I32(InFmt_DS
*iFmt
)
31060 : Inst_DS(iFmt
, "ds_min_i32")
31062 } // Inst_DS__DS_MIN_I32
31064 Inst_DS__DS_MIN_I32::~Inst_DS__DS_MIN_I32()
31066 } // ~Inst_DS__DS_MIN_I32
31068 // tmp = MEM[ADDR];
31069 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
31070 // RETURN_DATA = tmp.
31072 Inst_DS__DS_MIN_I32::execute(GPUDynInstPtr gpuDynInst
)
31074 panicUnimplemented();
31077 Inst_DS__DS_MAX_I32::Inst_DS__DS_MAX_I32(InFmt_DS
*iFmt
)
31078 : Inst_DS(iFmt
, "ds_max_i32")
31080 } // Inst_DS__DS_MAX_I32
31082 Inst_DS__DS_MAX_I32::~Inst_DS__DS_MAX_I32()
31084 } // ~Inst_DS__DS_MAX_I32
31086 // tmp = MEM[ADDR];
31087 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
31088 // RETURN_DATA = tmp.
31090 Inst_DS__DS_MAX_I32::execute(GPUDynInstPtr gpuDynInst
)
31092 panicUnimplemented();
31095 Inst_DS__DS_MIN_U32::Inst_DS__DS_MIN_U32(InFmt_DS
*iFmt
)
31096 : Inst_DS(iFmt
, "ds_min_u32")
31098 } // Inst_DS__DS_MIN_U32
31100 Inst_DS__DS_MIN_U32::~Inst_DS__DS_MIN_U32()
31102 } // ~Inst_DS__DS_MIN_U32
31104 // tmp = MEM[ADDR];
31105 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
31106 // RETURN_DATA = tmp.
31108 Inst_DS__DS_MIN_U32::execute(GPUDynInstPtr gpuDynInst
)
31110 panicUnimplemented();
31113 Inst_DS__DS_MAX_U32::Inst_DS__DS_MAX_U32(InFmt_DS
*iFmt
)
31114 : Inst_DS(iFmt
, "ds_max_u32")
31116 } // Inst_DS__DS_MAX_U32
31118 Inst_DS__DS_MAX_U32::~Inst_DS__DS_MAX_U32()
31120 } // ~Inst_DS__DS_MAX_U32
31122 // tmp = MEM[ADDR];
31123 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
31124 // RETURN_DATA = tmp.
31126 Inst_DS__DS_MAX_U32::execute(GPUDynInstPtr gpuDynInst
)
31128 panicUnimplemented();
31131 Inst_DS__DS_AND_B32::Inst_DS__DS_AND_B32(InFmt_DS
*iFmt
)
31132 : Inst_DS(iFmt
, "ds_and_b32")
31134 } // Inst_DS__DS_AND_B32
31136 Inst_DS__DS_AND_B32::~Inst_DS__DS_AND_B32()
31138 } // ~Inst_DS__DS_AND_B32
31140 // tmp = MEM[ADDR];
31141 // MEM[ADDR] &= DATA;
31142 // RETURN_DATA = tmp.
31144 Inst_DS__DS_AND_B32::execute(GPUDynInstPtr gpuDynInst
)
31146 panicUnimplemented();
31149 Inst_DS__DS_OR_B32::Inst_DS__DS_OR_B32(InFmt_DS
*iFmt
)
31150 : Inst_DS(iFmt
, "ds_or_b32")
31152 } // Inst_DS__DS_OR_B32
31154 Inst_DS__DS_OR_B32::~Inst_DS__DS_OR_B32()
31156 } // ~Inst_DS__DS_OR_B32
31158 // tmp = MEM[ADDR];
31159 // MEM[ADDR] |= DATA;
31160 // RETURN_DATA = tmp.
31162 Inst_DS__DS_OR_B32::execute(GPUDynInstPtr gpuDynInst
)
31164 panicUnimplemented();
31167 Inst_DS__DS_XOR_B32::Inst_DS__DS_XOR_B32(InFmt_DS
*iFmt
)
31168 : Inst_DS(iFmt
, "ds_xor_b32")
31170 } // Inst_DS__DS_XOR_B32
31172 Inst_DS__DS_XOR_B32::~Inst_DS__DS_XOR_B32()
31174 } // ~Inst_DS__DS_XOR_B32
31176 // tmp = MEM[ADDR];
31177 // MEM[ADDR] ^= DATA;
31178 // RETURN_DATA = tmp.
31180 Inst_DS__DS_XOR_B32::execute(GPUDynInstPtr gpuDynInst
)
31182 panicUnimplemented();
31185 Inst_DS__DS_MSKOR_B32::Inst_DS__DS_MSKOR_B32(InFmt_DS
*iFmt
)
31186 : Inst_DS(iFmt
, "ds_mskor_b32")
31188 } // Inst_DS__DS_MSKOR_B32
31190 Inst_DS__DS_MSKOR_B32::~Inst_DS__DS_MSKOR_B32()
31192 } // ~Inst_DS__DS_MSKOR_B32
31194 // tmp = MEM[ADDR];
31195 // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
31196 // RETURN_DATA = tmp.
31198 Inst_DS__DS_MSKOR_B32::execute(GPUDynInstPtr gpuDynInst
)
31200 panicUnimplemented();
31203 Inst_DS__DS_WRITE_B32::Inst_DS__DS_WRITE_B32(InFmt_DS
*iFmt
)
31204 : Inst_DS(iFmt
, "ds_write_b32")
31206 setFlag(MemoryRef
);
31208 } // Inst_DS__DS_WRITE_B32
31210 Inst_DS__DS_WRITE_B32::~Inst_DS__DS_WRITE_B32()
31212 } // ~Inst_DS__DS_WRITE_B32
31214 // MEM[ADDR] = DATA.
31217 Inst_DS__DS_WRITE_B32::execute(GPUDynInstPtr gpuDynInst
)
31219 Wavefront
*wf
= gpuDynInst
->wavefront();
31220 gpuDynInst
->execUnitId
= wf
->execUnitId
;
31221 gpuDynInst
->exec_mask
= wf
->execMask();
31222 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
31223 gpuDynInst
->latency
.set(
31224 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
31225 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
31226 ConstVecOperandU32
data(gpuDynInst
, extData
.DATA0
);
31231 calcAddr(gpuDynInst
, addr
);
31233 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
31234 if (wf
->execMask(lane
)) {
31235 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
]
31240 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
31242 wf
->wrLmReqsInPipe
--;
31243 wf
->outstandingReqsWrLm
++;
31244 wf
->outstandingReqs
++;
31245 wf
->validateRequestCounters();
31249 Inst_DS__DS_WRITE_B32::initiateAcc(GPUDynInstPtr gpuDynInst
)
31251 Addr offset0
= instData
.OFFSET0
;
31252 Addr offset1
= instData
.OFFSET1
;
31253 Addr offset
= (offset1
<< 8) | offset0
;
31255 initMemWrite
<VecElemU32
>(gpuDynInst
, offset
);
31259 Inst_DS__DS_WRITE_B32::completeAcc(GPUDynInstPtr gpuDynInst
)
31263 Inst_DS__DS_WRITE2_B32::Inst_DS__DS_WRITE2_B32(InFmt_DS
*iFmt
)
31264 : Inst_DS(iFmt
, "ds_write2_b32")
31266 setFlag(MemoryRef
);
31268 } // Inst_DS__DS_WRITE2_B32
31270 Inst_DS__DS_WRITE2_B32::~Inst_DS__DS_WRITE2_B32()
31272 } // ~Inst_DS__DS_WRITE2_B32
31274 // MEM[ADDR_BASE + OFFSET0 * 4] = DATA;
31275 // MEM[ADDR_BASE + OFFSET1 * 4] = DATA2.
31278 Inst_DS__DS_WRITE2_B32::execute(GPUDynInstPtr gpuDynInst
)
31280 Wavefront
*wf
= gpuDynInst
->wavefront();
31281 gpuDynInst
->execUnitId
= wf
->execUnitId
;
31282 gpuDynInst
->exec_mask
= wf
->execMask();
31283 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
31284 gpuDynInst
->latency
.set(
31285 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
31286 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
31287 ConstVecOperandU32
data0(gpuDynInst
, extData
.DATA0
);
31288 ConstVecOperandU32
data1(gpuDynInst
, extData
.DATA1
);
31294 calcAddr(gpuDynInst
, addr
);
31296 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
31297 if (wf
->execMask(lane
)) {
31298 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
* 2]
31300 (reinterpret_cast<VecElemU32
*>(
31301 gpuDynInst
->d_data
))[lane
* 2 + 1] = data1
[lane
];
31305 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
31307 wf
->wrLmReqsInPipe
--;
31308 wf
->outstandingReqsWrLm
++;
31309 wf
->outstandingReqs
++;
31310 wf
->validateRequestCounters();
31314 Inst_DS__DS_WRITE2_B32::initiateAcc(GPUDynInstPtr gpuDynInst
)
31316 Addr offset0
= instData
.OFFSET0
* 4;
31317 Addr offset1
= instData
.OFFSET1
* 4;
31319 initDualMemWrite
<VecElemU32
>(gpuDynInst
, offset0
, offset1
);
31323 Inst_DS__DS_WRITE2_B32::completeAcc(GPUDynInstPtr gpuDynInst
)
31327 Inst_DS__DS_WRITE2ST64_B32::Inst_DS__DS_WRITE2ST64_B32(InFmt_DS
*iFmt
)
31328 : Inst_DS(iFmt
, "ds_write2st64_b32")
31330 setFlag(MemoryRef
);
31332 } // Inst_DS__DS_WRITE2ST64_B32
31334 Inst_DS__DS_WRITE2ST64_B32::~Inst_DS__DS_WRITE2ST64_B32()
31336 } // ~Inst_DS__DS_WRITE2ST64_B32
31338 // MEM[ADDR_BASE + OFFSET0 * 4 * 64] = DATA;
31339 // MEM[ADDR_BASE + OFFSET1 * 4 * 64] = DATA2;
31342 Inst_DS__DS_WRITE2ST64_B32::execute(GPUDynInstPtr gpuDynInst
)
31344 Wavefront
*wf
= gpuDynInst
->wavefront();
31345 gpuDynInst
->execUnitId
= wf
->execUnitId
;
31346 gpuDynInst
->exec_mask
= wf
->execMask();
31347 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
31348 gpuDynInst
->latency
.set(
31349 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
31350 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
31351 ConstVecOperandU32
data0(gpuDynInst
, extData
.DATA0
);
31352 ConstVecOperandU32
data1(gpuDynInst
, extData
.DATA1
);
31358 calcAddr(gpuDynInst
, addr
);
31360 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
31361 if (wf
->execMask(lane
)) {
31362 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
* 2]
31364 (reinterpret_cast<VecElemU32
*>(
31365 gpuDynInst
->d_data
))[lane
* 2 + 1] = data1
[lane
];
31369 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
31371 wf
->wrLmReqsInPipe
--;
31372 wf
->outstandingReqsWrLm
++;
31373 wf
->outstandingReqs
++;
31374 wf
->validateRequestCounters();
31378 Inst_DS__DS_WRITE2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst
)
31380 Addr offset0
= instData
.OFFSET0
* 4 * 64;
31381 Addr offset1
= instData
.OFFSET1
* 4 * 64;
31383 initDualMemWrite
<VecElemU32
>(gpuDynInst
, offset0
, offset1
);
31387 Inst_DS__DS_WRITE2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst
)
31390 // --- Inst_DS__DS_CMPST_B32 class methods ---
31392 Inst_DS__DS_CMPST_B32::Inst_DS__DS_CMPST_B32(InFmt_DS
*iFmt
)
31393 : Inst_DS(iFmt
, "ds_cmpst_b32")
31395 } // Inst_DS__DS_CMPST_B32
31397 Inst_DS__DS_CMPST_B32::~Inst_DS__DS_CMPST_B32()
31399 } // ~Inst_DS__DS_CMPST_B32
31401 // tmp = MEM[ADDR];
31404 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
31405 // RETURN_DATA[0] = tmp.
31406 // Compare and store.
31408 Inst_DS__DS_CMPST_B32::execute(GPUDynInstPtr gpuDynInst
)
31410 panicUnimplemented();
31413 Inst_DS__DS_CMPST_F32::Inst_DS__DS_CMPST_F32(InFmt_DS
*iFmt
)
31414 : Inst_DS(iFmt
, "ds_cmpst_f32")
31417 } // Inst_DS__DS_CMPST_F32
31419 Inst_DS__DS_CMPST_F32::~Inst_DS__DS_CMPST_F32()
31421 } // ~Inst_DS__DS_CMPST_F32
31423 // tmp = MEM[ADDR];
31426 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
31427 // RETURN_DATA[0] = tmp.
31429 Inst_DS__DS_CMPST_F32::execute(GPUDynInstPtr gpuDynInst
)
31431 panicUnimplemented();
31434 Inst_DS__DS_MIN_F32::Inst_DS__DS_MIN_F32(InFmt_DS
*iFmt
)
31435 : Inst_DS(iFmt
, "ds_min_f32")
31438 } // Inst_DS__DS_MIN_F32
31440 Inst_DS__DS_MIN_F32::~Inst_DS__DS_MIN_F32()
31442 } // ~Inst_DS__DS_MIN_F32
31444 // tmp = MEM[ADDR];
31447 // MEM[ADDR] = (cmp < tmp) ? src : tmp.
31449 Inst_DS__DS_MIN_F32::execute(GPUDynInstPtr gpuDynInst
)
31451 panicUnimplemented();
31454 Inst_DS__DS_MAX_F32::Inst_DS__DS_MAX_F32(InFmt_DS
*iFmt
)
31455 : Inst_DS(iFmt
, "ds_max_f32")
31458 } // Inst_DS__DS_MAX_F32
31460 Inst_DS__DS_MAX_F32::~Inst_DS__DS_MAX_F32()
31462 } // ~Inst_DS__DS_MAX_F32
31464 // tmp = MEM[ADDR];
31467 // MEM[ADDR] = (tmp > cmp) ? src : tmp.
31469 Inst_DS__DS_MAX_F32::execute(GPUDynInstPtr gpuDynInst
)
31471 panicUnimplemented();
31474 Inst_DS__DS_NOP::Inst_DS__DS_NOP(InFmt_DS
*iFmt
)
31475 : Inst_DS(iFmt
, "ds_nop")
31478 } // Inst_DS__DS_NOP
31480 Inst_DS__DS_NOP::~Inst_DS__DS_NOP()
31482 } // ~Inst_DS__DS_NOP
31486 Inst_DS__DS_NOP::execute(GPUDynInstPtr gpuDynInst
)
31490 Inst_DS__DS_ADD_F32::Inst_DS__DS_ADD_F32(InFmt_DS
*iFmt
)
31491 : Inst_DS(iFmt
, "ds_add_f32")
31494 } // Inst_DS__DS_ADD_F32
31496 Inst_DS__DS_ADD_F32::~Inst_DS__DS_ADD_F32()
31498 } // ~Inst_DS__DS_ADD_F32
31500 // tmp = MEM[ADDR];
31501 // MEM[ADDR] += DATA;
31502 // RETURN_DATA = tmp.
31504 Inst_DS__DS_ADD_F32::execute(GPUDynInstPtr gpuDynInst
)
31506 panicUnimplemented();
31509 Inst_DS__DS_WRITE_B8::Inst_DS__DS_WRITE_B8(InFmt_DS
*iFmt
)
31510 : Inst_DS(iFmt
, "ds_write_b8")
31512 setFlag(MemoryRef
);
31514 } // Inst_DS__DS_WRITE_B8
31516 Inst_DS__DS_WRITE_B8::~Inst_DS__DS_WRITE_B8()
31518 } // ~Inst_DS__DS_WRITE_B8
31520 // MEM[ADDR] = DATA[7:0].
31522 Inst_DS__DS_WRITE_B8::execute(GPUDynInstPtr gpuDynInst
)
31524 Wavefront
*wf
= gpuDynInst
->wavefront();
31525 gpuDynInst
->execUnitId
= wf
->execUnitId
;
31526 gpuDynInst
->exec_mask
= wf
->execMask();
31527 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
31528 gpuDynInst
->latency
.set(
31529 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
31530 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
31531 ConstVecOperandU8
data(gpuDynInst
, extData
.DATA0
);
31536 calcAddr(gpuDynInst
, addr
);
31538 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
31539 if (wf
->execMask(lane
)) {
31540 (reinterpret_cast<VecElemU8
*>(gpuDynInst
->d_data
))[lane
]
31545 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
31547 wf
->wrLmReqsInPipe
--;
31548 wf
->outstandingReqsWrLm
++;
31549 wf
->outstandingReqs
++;
31550 wf
->validateRequestCounters();
31554 Inst_DS__DS_WRITE_B8::initiateAcc(GPUDynInstPtr gpuDynInst
)
31556 Addr offset0
= instData
.OFFSET0
;
31557 Addr offset1
= instData
.OFFSET1
;
31558 Addr offset
= (offset1
<< 8) | offset0
;
31560 initMemWrite
<VecElemU8
>(gpuDynInst
, offset
);
31564 Inst_DS__DS_WRITE_B8::completeAcc(GPUDynInstPtr gpuDynInst
)
31567 // --- Inst_DS__DS_WRITE_B16 class methods ---
31569 Inst_DS__DS_WRITE_B16::Inst_DS__DS_WRITE_B16(InFmt_DS
*iFmt
)
31570 : Inst_DS(iFmt
, "ds_write_b16")
31572 setFlag(MemoryRef
);
31574 } // Inst_DS__DS_WRITE_B16
31576 Inst_DS__DS_WRITE_B16::~Inst_DS__DS_WRITE_B16()
31578 } // ~Inst_DS__DS_WRITE_B16
31580 // MEM[ADDR] = DATA[15:0]
31582 Inst_DS__DS_WRITE_B16::execute(GPUDynInstPtr gpuDynInst
)
31584 Wavefront
*wf
= gpuDynInst
->wavefront();
31585 gpuDynInst
->execUnitId
= wf
->execUnitId
;
31586 gpuDynInst
->exec_mask
= wf
->execMask();
31587 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
31588 gpuDynInst
->latency
.set(
31589 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
31590 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
31591 ConstVecOperandU16
data(gpuDynInst
, extData
.DATA0
);
31596 calcAddr(gpuDynInst
, addr
);
31598 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
31599 if (wf
->execMask(lane
)) {
31600 (reinterpret_cast<VecElemU16
*>(gpuDynInst
->d_data
))[lane
]
31605 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
31607 wf
->wrLmReqsInPipe
--;
31608 wf
->outstandingReqsWrLm
++;
31609 wf
->outstandingReqs
++;
31610 wf
->validateRequestCounters();
31614 Inst_DS__DS_WRITE_B16::initiateAcc(GPUDynInstPtr gpuDynInst
)
31616 Addr offset0
= instData
.OFFSET0
;
31617 Addr offset1
= instData
.OFFSET1
;
31618 Addr offset
= (offset1
<< 8) | offset0
;
31620 initMemWrite
<VecElemU16
>(gpuDynInst
, offset
);
31624 Inst_DS__DS_WRITE_B16::completeAcc(GPUDynInstPtr gpuDynInst
)
31627 // --- Inst_DS__DS_ADD_RTN_U32 class methods ---
31629 Inst_DS__DS_ADD_RTN_U32::Inst_DS__DS_ADD_RTN_U32(InFmt_DS
*iFmt
)
31630 : Inst_DS(iFmt
, "ds_add_rtn_u32")
31632 } // Inst_DS__DS_ADD_RTN_U32
31634 Inst_DS__DS_ADD_RTN_U32::~Inst_DS__DS_ADD_RTN_U32()
31636 } // ~Inst_DS__DS_ADD_RTN_U32
31638 // tmp = MEM[ADDR];
31639 // MEM[ADDR] += DATA;
31640 // RETURN_DATA = tmp.
31642 Inst_DS__DS_ADD_RTN_U32::execute(GPUDynInstPtr gpuDynInst
)
31644 panicUnimplemented();
31647 Inst_DS__DS_SUB_RTN_U32::Inst_DS__DS_SUB_RTN_U32(InFmt_DS
*iFmt
)
31648 : Inst_DS(iFmt
, "ds_sub_rtn_u32")
31650 } // Inst_DS__DS_SUB_RTN_U32
31652 Inst_DS__DS_SUB_RTN_U32::~Inst_DS__DS_SUB_RTN_U32()
31654 } // ~Inst_DS__DS_SUB_RTN_U32
31656 // tmp = MEM[ADDR];
31657 // MEM[ADDR] -= DATA;
31658 // RETURN_DATA = tmp.
31660 Inst_DS__DS_SUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst
)
31662 panicUnimplemented();
31665 Inst_DS__DS_RSUB_RTN_U32::Inst_DS__DS_RSUB_RTN_U32(InFmt_DS
*iFmt
)
31666 : Inst_DS(iFmt
, "ds_rsub_rtn_u32")
31668 } // Inst_DS__DS_RSUB_RTN_U32
31670 Inst_DS__DS_RSUB_RTN_U32::~Inst_DS__DS_RSUB_RTN_U32()
31672 } // ~Inst_DS__DS_RSUB_RTN_U32
31674 // tmp = MEM[ADDR];
31675 // MEM[ADDR] = DATA - MEM[ADDR];
31676 // RETURN_DATA = tmp.
31678 Inst_DS__DS_RSUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst
)
31680 panicUnimplemented();
31683 Inst_DS__DS_INC_RTN_U32::Inst_DS__DS_INC_RTN_U32(InFmt_DS
*iFmt
)
31684 : Inst_DS(iFmt
, "ds_inc_rtn_u32")
31686 } // Inst_DS__DS_INC_RTN_U32
31688 Inst_DS__DS_INC_RTN_U32::~Inst_DS__DS_INC_RTN_U32()
31690 } // ~Inst_DS__DS_INC_RTN_U32
31692 // tmp = MEM[ADDR];
31693 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
31694 // RETURN_DATA = tmp.
31696 Inst_DS__DS_INC_RTN_U32::execute(GPUDynInstPtr gpuDynInst
)
31698 panicUnimplemented();
31701 Inst_DS__DS_DEC_RTN_U32::Inst_DS__DS_DEC_RTN_U32(InFmt_DS
*iFmt
)
31702 : Inst_DS(iFmt
, "ds_dec_rtn_u32")
31704 } // Inst_DS__DS_DEC_RTN_U32
31706 Inst_DS__DS_DEC_RTN_U32::~Inst_DS__DS_DEC_RTN_U32()
31708 } // ~Inst_DS__DS_DEC_RTN_U32
31710 // tmp = MEM[ADDR];
31711 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
31712 // (unsigned compare); RETURN_DATA = tmp.
31714 Inst_DS__DS_DEC_RTN_U32::execute(GPUDynInstPtr gpuDynInst
)
31716 panicUnimplemented();
31719 Inst_DS__DS_MIN_RTN_I32::Inst_DS__DS_MIN_RTN_I32(InFmt_DS
*iFmt
)
31720 : Inst_DS(iFmt
, "ds_min_rtn_i32")
31722 } // Inst_DS__DS_MIN_RTN_I32
31724 Inst_DS__DS_MIN_RTN_I32::~Inst_DS__DS_MIN_RTN_I32()
31726 } // ~Inst_DS__DS_MIN_RTN_I32
31728 // tmp = MEM[ADDR];
31729 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
31730 // RETURN_DATA = tmp.
31732 Inst_DS__DS_MIN_RTN_I32::execute(GPUDynInstPtr gpuDynInst
)
31734 panicUnimplemented();
31737 Inst_DS__DS_MAX_RTN_I32::Inst_DS__DS_MAX_RTN_I32(InFmt_DS
*iFmt
)
31738 : Inst_DS(iFmt
, "ds_max_rtn_i32")
31740 } // Inst_DS__DS_MAX_RTN_I32
31742 Inst_DS__DS_MAX_RTN_I32::~Inst_DS__DS_MAX_RTN_I32()
31744 } // ~Inst_DS__DS_MAX_RTN_I32
31746 // tmp = MEM[ADDR];
31747 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
31748 // RETURN_DATA = tmp.
31750 Inst_DS__DS_MAX_RTN_I32::execute(GPUDynInstPtr gpuDynInst
)
31752 panicUnimplemented();
31755 Inst_DS__DS_MIN_RTN_U32::Inst_DS__DS_MIN_RTN_U32(InFmt_DS
*iFmt
)
31756 : Inst_DS(iFmt
, "ds_min_rtn_u32")
31758 } // Inst_DS__DS_MIN_RTN_U32
31760 Inst_DS__DS_MIN_RTN_U32::~Inst_DS__DS_MIN_RTN_U32()
31762 } // ~Inst_DS__DS_MIN_RTN_U32
31764 // tmp = MEM[ADDR];
31765 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
31766 // RETURN_DATA = tmp.
31768 Inst_DS__DS_MIN_RTN_U32::execute(GPUDynInstPtr gpuDynInst
)
31770 panicUnimplemented();
31773 Inst_DS__DS_MAX_RTN_U32::Inst_DS__DS_MAX_RTN_U32(InFmt_DS
*iFmt
)
31774 : Inst_DS(iFmt
, "ds_max_rtn_u32")
31776 } // Inst_DS__DS_MAX_RTN_U32
31778 Inst_DS__DS_MAX_RTN_U32::~Inst_DS__DS_MAX_RTN_U32()
31780 } // ~Inst_DS__DS_MAX_RTN_U32
31782 // tmp = MEM[ADDR];
31783 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
31784 // RETURN_DATA = tmp.
31786 Inst_DS__DS_MAX_RTN_U32::execute(GPUDynInstPtr gpuDynInst
)
31788 panicUnimplemented();
31791 Inst_DS__DS_AND_RTN_B32::Inst_DS__DS_AND_RTN_B32(InFmt_DS
*iFmt
)
31792 : Inst_DS(iFmt
, "ds_and_rtn_b32")
31794 } // Inst_DS__DS_AND_RTN_B32
31796 Inst_DS__DS_AND_RTN_B32::~Inst_DS__DS_AND_RTN_B32()
31798 } // ~Inst_DS__DS_AND_RTN_B32
31800 // tmp = MEM[ADDR];
31801 // MEM[ADDR] &= DATA;
31802 // RETURN_DATA = tmp.
31804 Inst_DS__DS_AND_RTN_B32::execute(GPUDynInstPtr gpuDynInst
)
31806 panicUnimplemented();
31809 Inst_DS__DS_OR_RTN_B32::Inst_DS__DS_OR_RTN_B32(InFmt_DS
*iFmt
)
31810 : Inst_DS(iFmt
, "ds_or_rtn_b32")
31812 } // Inst_DS__DS_OR_RTN_B32
31814 Inst_DS__DS_OR_RTN_B32::~Inst_DS__DS_OR_RTN_B32()
31816 } // ~Inst_DS__DS_OR_RTN_B32
31818 // tmp = MEM[ADDR];
31819 // MEM[ADDR] |= DATA;
31820 // RETURN_DATA = tmp.
31822 Inst_DS__DS_OR_RTN_B32::execute(GPUDynInstPtr gpuDynInst
)
31824 panicUnimplemented();
31827 Inst_DS__DS_XOR_RTN_B32::Inst_DS__DS_XOR_RTN_B32(InFmt_DS
*iFmt
)
31828 : Inst_DS(iFmt
, "ds_xor_rtn_b32")
31830 } // Inst_DS__DS_XOR_RTN_B32
31832 Inst_DS__DS_XOR_RTN_B32::~Inst_DS__DS_XOR_RTN_B32()
31834 } // ~Inst_DS__DS_XOR_RTN_B32
31836 // tmp = MEM[ADDR];
31837 // MEM[ADDR] ^= DATA;
31838 // RETURN_DATA = tmp.
31840 Inst_DS__DS_XOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst
)
31842 panicUnimplemented();
31845 Inst_DS__DS_MSKOR_RTN_B32::Inst_DS__DS_MSKOR_RTN_B32(InFmt_DS
*iFmt
)
31846 : Inst_DS(iFmt
, "ds_mskor_rtn_b32")
31848 } // Inst_DS__DS_MSKOR_RTN_B32
31850 Inst_DS__DS_MSKOR_RTN_B32::~Inst_DS__DS_MSKOR_RTN_B32()
31852 } // ~Inst_DS__DS_MSKOR_RTN_B32
31854 // tmp = MEM[ADDR];
31855 // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
31856 // RETURN_DATA = tmp.
31858 Inst_DS__DS_MSKOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst
)
31860 panicUnimplemented();
31863 Inst_DS__DS_WRXCHG_RTN_B32::Inst_DS__DS_WRXCHG_RTN_B32(InFmt_DS
*iFmt
)
31864 : Inst_DS(iFmt
, "ds_wrxchg_rtn_b32")
31866 } // Inst_DS__DS_WRXCHG_RTN_B32
31868 Inst_DS__DS_WRXCHG_RTN_B32::~Inst_DS__DS_WRXCHG_RTN_B32()
31870 } // ~Inst_DS__DS_WRXCHG_RTN_B32
31872 // tmp = MEM[ADDR];
31873 // MEM[ADDR] = DATA;
31874 // RETURN_DATA = tmp.
31875 // Write-exchange operation.
31877 Inst_DS__DS_WRXCHG_RTN_B32::execute(GPUDynInstPtr gpuDynInst
)
31879 panicUnimplemented();
31882 Inst_DS__DS_WRXCHG2_RTN_B32::Inst_DS__DS_WRXCHG2_RTN_B32(InFmt_DS
*iFmt
)
31883 : Inst_DS(iFmt
, "ds_wrxchg2_rtn_b32")
31885 } // Inst_DS__DS_WRXCHG2_RTN_B32
31887 Inst_DS__DS_WRXCHG2_RTN_B32::~Inst_DS__DS_WRXCHG2_RTN_B32()
31889 } // ~Inst_DS__DS_WRXCHG2_RTN_B32
31891 // Write-exchange 2 separate dwords.
31893 Inst_DS__DS_WRXCHG2_RTN_B32::execute(GPUDynInstPtr gpuDynInst
)
31895 panicUnimplemented();
31898 Inst_DS__DS_WRXCHG2ST64_RTN_B32::Inst_DS__DS_WRXCHG2ST64_RTN_B32(
31900 : Inst_DS(iFmt
, "ds_wrxchg2st64_rtn_b32")
31902 } // Inst_DS__DS_WRXCHG2ST64_RTN_B32
31904 Inst_DS__DS_WRXCHG2ST64_RTN_B32::~Inst_DS__DS_WRXCHG2ST64_RTN_B32()
31906 } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B32
31908 // Write-exchange 2 separate dwords with a stride of 64 dwords.
31910 Inst_DS__DS_WRXCHG2ST64_RTN_B32::execute(GPUDynInstPtr gpuDynInst
)
31912 panicUnimplemented();
31915 Inst_DS__DS_CMPST_RTN_B32::Inst_DS__DS_CMPST_RTN_B32(InFmt_DS
*iFmt
)
31916 : Inst_DS(iFmt
, "ds_cmpst_rtn_b32")
31918 } // Inst_DS__DS_CMPST_RTN_B32
31920 Inst_DS__DS_CMPST_RTN_B32::~Inst_DS__DS_CMPST_RTN_B32()
31922 } // ~Inst_DS__DS_CMPST_RTN_B32
31924 // tmp = MEM[ADDR];
31927 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
31928 // RETURN_DATA[0] = tmp.
31929 // Compare and store.
31931 Inst_DS__DS_CMPST_RTN_B32::execute(GPUDynInstPtr gpuDynInst
)
31933 panicUnimplemented();
31936 Inst_DS__DS_CMPST_RTN_F32::Inst_DS__DS_CMPST_RTN_F32(InFmt_DS
*iFmt
)
31937 : Inst_DS(iFmt
, "ds_cmpst_rtn_f32")
31940 } // Inst_DS__DS_CMPST_RTN_F32
31942 Inst_DS__DS_CMPST_RTN_F32::~Inst_DS__DS_CMPST_RTN_F32()
31944 } // ~Inst_DS__DS_CMPST_RTN_F32
31946 // tmp = MEM[ADDR];
31949 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
31950 // RETURN_DATA[0] = tmp.
31952 Inst_DS__DS_CMPST_RTN_F32::execute(GPUDynInstPtr gpuDynInst
)
31954 panicUnimplemented();
31957 Inst_DS__DS_MIN_RTN_F32::Inst_DS__DS_MIN_RTN_F32(InFmt_DS
*iFmt
)
31958 : Inst_DS(iFmt
, "ds_min_rtn_f32")
31961 } // Inst_DS__DS_MIN_RTN_F32
31963 Inst_DS__DS_MIN_RTN_F32::~Inst_DS__DS_MIN_RTN_F32()
31965 } // ~Inst_DS__DS_MIN_RTN_F32
31967 // tmp = MEM[ADDR];
31970 // MEM[ADDR] = (cmp < tmp) ? src : tmp.
31972 Inst_DS__DS_MIN_RTN_F32::execute(GPUDynInstPtr gpuDynInst
)
31974 panicUnimplemented();
31977 Inst_DS__DS_MAX_RTN_F32::Inst_DS__DS_MAX_RTN_F32(InFmt_DS
*iFmt
)
31978 : Inst_DS(iFmt
, "ds_max_rtn_f32")
31981 } // Inst_DS__DS_MAX_RTN_F32
31983 Inst_DS__DS_MAX_RTN_F32::~Inst_DS__DS_MAX_RTN_F32()
31985 } // ~Inst_DS__DS_MAX_RTN_F32
31987 // tmp = MEM[ADDR];
31990 // MEM[ADDR] = (tmp > cmp) ? src : tmp.
31992 Inst_DS__DS_MAX_RTN_F32::execute(GPUDynInstPtr gpuDynInst
)
31994 panicUnimplemented();
31997 Inst_DS__DS_WRAP_RTN_B32::Inst_DS__DS_WRAP_RTN_B32(InFmt_DS
*iFmt
)
31998 : Inst_DS(iFmt
, "ds_wrap_rtn_b32")
32000 } // Inst_DS__DS_WRAP_RTN_B32
32002 Inst_DS__DS_WRAP_RTN_B32::~Inst_DS__DS_WRAP_RTN_B32()
32004 } // ~Inst_DS__DS_WRAP_RTN_B32
32006 // tmp = MEM[ADDR];
32007 // MEM[ADDR] = (tmp >= DATA) ? tmp - DATA : tmp + DATA2;
32008 // RETURN_DATA = tmp.
32010 Inst_DS__DS_WRAP_RTN_B32::execute(GPUDynInstPtr gpuDynInst
)
32012 panicUnimplemented();
32015 Inst_DS__DS_ADD_RTN_F32::Inst_DS__DS_ADD_RTN_F32(InFmt_DS
*iFmt
)
32016 : Inst_DS(iFmt
, "ds_add_rtn_f32")
32019 } // Inst_DS__DS_ADD_RTN_F32
32021 Inst_DS__DS_ADD_RTN_F32::~Inst_DS__DS_ADD_RTN_F32()
32023 } // ~Inst_DS__DS_ADD_RTN_F32
32025 // tmp = MEM[ADDR];
32026 // MEM[ADDR] += DATA;
32027 // RETURN_DATA = tmp.
32029 Inst_DS__DS_ADD_RTN_F32::execute(GPUDynInstPtr gpuDynInst
)
32033 Inst_DS__DS_READ_B32::Inst_DS__DS_READ_B32(InFmt_DS
*iFmt
)
32034 : Inst_DS(iFmt
, "ds_read_b32")
32036 setFlag(MemoryRef
);
32038 } // Inst_DS__DS_READ_B32
32040 Inst_DS__DS_READ_B32::~Inst_DS__DS_READ_B32()
32042 } // ~Inst_DS__DS_READ_B32
32044 // RETURN_DATA = MEM[ADDR].
32047 Inst_DS__DS_READ_B32::execute(GPUDynInstPtr gpuDynInst
)
32049 Wavefront
*wf
= gpuDynInst
->wavefront();
32050 gpuDynInst
->execUnitId
= wf
->execUnitId
;
32051 gpuDynInst
->exec_mask
= wf
->execMask();
32052 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
32053 gpuDynInst
->latency
.set(
32054 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
32055 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
32059 calcAddr(gpuDynInst
, addr
);
32061 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
32063 wf
->rdLmReqsInPipe
--;
32064 wf
->outstandingReqsRdLm
++;
32065 wf
->outstandingReqs
++;
32066 wf
->validateRequestCounters();
32070 Inst_DS__DS_READ_B32::initiateAcc(GPUDynInstPtr gpuDynInst
)
32072 Addr offset0
= instData
.OFFSET0
;
32073 Addr offset1
= instData
.OFFSET1
;
32074 Addr offset
= (offset1
<< 8) | offset0
;
32076 initMemRead
<VecElemU32
>(gpuDynInst
, offset
);
32080 Inst_DS__DS_READ_B32::completeAcc(GPUDynInstPtr gpuDynInst
)
32082 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
32084 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
32085 if (gpuDynInst
->exec_mask
[lane
]) {
32086 vdst
[lane
] = (reinterpret_cast<VecElemU32
*>(
32087 gpuDynInst
->d_data
))[lane
];
32094 Inst_DS__DS_READ2_B32::Inst_DS__DS_READ2_B32(InFmt_DS
*iFmt
)
32095 : Inst_DS(iFmt
, "ds_read2_b32")
32097 setFlag(MemoryRef
);
32099 } // Inst_DS__DS_READ2_B32
32101 Inst_DS__DS_READ2_B32::~Inst_DS__DS_READ2_B32()
32103 } // ~Inst_DS__DS_READ2_B32
32105 // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4];
32106 // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4].
32109 Inst_DS__DS_READ2_B32::execute(GPUDynInstPtr gpuDynInst
)
32111 Wavefront
*wf
= gpuDynInst
->wavefront();
32112 gpuDynInst
->execUnitId
= wf
->execUnitId
;
32113 gpuDynInst
->exec_mask
= wf
->execMask();
32114 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
32115 gpuDynInst
->latency
.set(
32116 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
32117 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
32121 calcAddr(gpuDynInst
, addr
);
32123 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
32125 wf
->rdLmReqsInPipe
--;
32126 wf
->outstandingReqsRdLm
++;
32127 wf
->outstandingReqs
++;
32128 wf
->validateRequestCounters();
32132 Inst_DS__DS_READ2_B32::initiateAcc(GPUDynInstPtr gpuDynInst
)
32134 Addr offset0
= instData
.OFFSET0
* 4;
32135 Addr offset1
= instData
.OFFSET1
* 4;
32137 initDualMemRead
<VecElemU32
>(gpuDynInst
, offset0
, offset1
);
32141 Inst_DS__DS_READ2_B32::completeAcc(GPUDynInstPtr gpuDynInst
)
32143 VecOperandU32
vdst0(gpuDynInst
, extData
.VDST
);
32144 VecOperandU32
vdst1(gpuDynInst
, extData
.VDST
+ 1);
32146 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
32147 if (gpuDynInst
->exec_mask
[lane
]) {
32148 vdst0
[lane
] = (reinterpret_cast<VecElemU32
*>(
32149 gpuDynInst
->d_data
))[lane
* 2];
32150 vdst1
[lane
] = (reinterpret_cast<VecElemU32
*>(
32151 gpuDynInst
->d_data
))[lane
* 2 + 1];
32159 Inst_DS__DS_READ2ST64_B32::Inst_DS__DS_READ2ST64_B32(InFmt_DS
*iFmt
)
32160 : Inst_DS(iFmt
, "ds_read2st64_b32")
32162 setFlag(MemoryRef
);
32164 } // Inst_DS__DS_READ2ST64_B32
32166 Inst_DS__DS_READ2ST64_B32::~Inst_DS__DS_READ2ST64_B32()
32168 } // ~Inst_DS__DS_READ2ST64_B32
32170 // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4 * 64];
32171 // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4 * 64].
32174 Inst_DS__DS_READ2ST64_B32::execute(GPUDynInstPtr gpuDynInst
)
32176 Wavefront
*wf
= gpuDynInst
->wavefront();
32177 gpuDynInst
->execUnitId
= wf
->execUnitId
;
32178 gpuDynInst
->exec_mask
= wf
->execMask();
32179 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
32180 gpuDynInst
->latency
.set(
32181 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
32182 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
32186 calcAddr(gpuDynInst
, addr
);
32188 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
32190 wf
->rdLmReqsInPipe
--;
32191 wf
->outstandingReqsRdLm
++;
32192 wf
->outstandingReqs
++;
32193 wf
->validateRequestCounters();
32197 Inst_DS__DS_READ2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst
)
32199 Addr offset0
= (instData
.OFFSET0
* 4 * 64);
32200 Addr offset1
= (instData
.OFFSET1
* 4 * 64);
32202 initDualMemRead
<VecElemU32
>(gpuDynInst
, offset0
, offset1
);
32206 Inst_DS__DS_READ2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst
)
32208 VecOperandU32
vdst0(gpuDynInst
, extData
.VDST
);
32209 VecOperandU32
vdst1(gpuDynInst
, extData
.VDST
+ 1);
32211 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
32212 if (gpuDynInst
->exec_mask
[lane
]) {
32213 vdst0
[lane
] = (reinterpret_cast<VecElemU64
*>(
32214 gpuDynInst
->d_data
))[lane
* 2];
32215 vdst1
[lane
] = (reinterpret_cast<VecElemU64
*>(
32216 gpuDynInst
->d_data
))[lane
* 2 + 1];
32223 // --- Inst_DS__DS_READ_I8 class methods ---
32225 Inst_DS__DS_READ_I8::Inst_DS__DS_READ_I8(InFmt_DS
*iFmt
)
32226 : Inst_DS(iFmt
, "ds_read_i8")
32228 setFlag(MemoryRef
);
32230 } // Inst_DS__DS_READ_I8
32232 Inst_DS__DS_READ_I8::~Inst_DS__DS_READ_I8()
32234 } // ~Inst_DS__DS_READ_I8
32236 // RETURN_DATA = signext(MEM[ADDR][7:0]).
32237 // Signed byte read.
32239 Inst_DS__DS_READ_I8::execute(GPUDynInstPtr gpuDynInst
)
32241 panicUnimplemented();
32244 Inst_DS__DS_READ_U8::Inst_DS__DS_READ_U8(InFmt_DS
*iFmt
)
32245 : Inst_DS(iFmt
, "ds_read_u8")
32247 setFlag(MemoryRef
);
32249 } // Inst_DS__DS_READ_U8
32251 Inst_DS__DS_READ_U8::~Inst_DS__DS_READ_U8()
32253 } // ~Inst_DS__DS_READ_U8
32255 // RETURN_DATA = {24'h0,MEM[ADDR][7:0]}.
32256 // Unsigned byte read.
32258 Inst_DS__DS_READ_U8::execute(GPUDynInstPtr gpuDynInst
)
32260 Wavefront
*wf
= gpuDynInst
->wavefront();
32261 gpuDynInst
->execUnitId
= wf
->execUnitId
;
32262 gpuDynInst
->exec_mask
= wf
->execMask();
32263 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
32264 gpuDynInst
->latency
.set(
32265 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
32266 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
32270 calcAddr(gpuDynInst
, addr
);
32272 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
32274 wf
->rdLmReqsInPipe
--;
32275 wf
->outstandingReqsRdLm
++;
32276 wf
->outstandingReqs
++;
32277 wf
->validateRequestCounters();
32281 Inst_DS__DS_READ_U8::initiateAcc(GPUDynInstPtr gpuDynInst
)
32283 Addr offset0
= instData
.OFFSET0
;
32284 Addr offset1
= instData
.OFFSET1
;
32285 Addr offset
= (offset1
<< 8) | offset0
;
32287 initMemRead
<VecElemU8
>(gpuDynInst
, offset
);
32291 Inst_DS__DS_READ_U8::completeAcc(GPUDynInstPtr gpuDynInst
)
32293 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
32295 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
32296 if (gpuDynInst
->exec_mask
[lane
]) {
32297 vdst
[lane
] = (VecElemU32
)(reinterpret_cast<VecElemU8
*>(
32298 gpuDynInst
->d_data
))[lane
];
32304 // --- Inst_DS__DS_READ_I16 class methods ---
32306 Inst_DS__DS_READ_I16::Inst_DS__DS_READ_I16(InFmt_DS
*iFmt
)
32307 : Inst_DS(iFmt
, "ds_read_i16")
32309 setFlag(MemoryRef
);
32311 } // Inst_DS__DS_READ_I16
32313 Inst_DS__DS_READ_I16::~Inst_DS__DS_READ_I16()
32315 } // ~Inst_DS__DS_READ_I16
32317 // RETURN_DATA = signext(MEM[ADDR][15:0]).
32318 // Signed short read.
32320 Inst_DS__DS_READ_I16::execute(GPUDynInstPtr gpuDynInst
)
32322 panicUnimplemented();
32325 Inst_DS__DS_READ_U16::Inst_DS__DS_READ_U16(InFmt_DS
*iFmt
)
32326 : Inst_DS(iFmt
, "ds_read_u16")
32328 setFlag(MemoryRef
);
32330 } // Inst_DS__DS_READ_U16
32332 Inst_DS__DS_READ_U16::~Inst_DS__DS_READ_U16()
32334 } // ~Inst_DS__DS_READ_U16
32336 // RETURN_DATA = {16'h0,MEM[ADDR][15:0]}.
32337 // Unsigned short read.
32339 Inst_DS__DS_READ_U16::execute(GPUDynInstPtr gpuDynInst
)
32341 Wavefront
*wf
= gpuDynInst
->wavefront();
32342 gpuDynInst
->execUnitId
= wf
->execUnitId
;
32343 gpuDynInst
->exec_mask
= wf
->execMask();
32344 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
32345 gpuDynInst
->latency
.set(
32346 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
32347 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
32351 calcAddr(gpuDynInst
, addr
);
32353 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
32355 wf
->rdLmReqsInPipe
--;
32356 wf
->outstandingReqsRdLm
++;
32357 wf
->outstandingReqs
++;
32358 wf
->validateRequestCounters();
32361 Inst_DS__DS_READ_U16::initiateAcc(GPUDynInstPtr gpuDynInst
)
32363 Addr offset0
= instData
.OFFSET0
;
32364 Addr offset1
= instData
.OFFSET1
;
32365 Addr offset
= (offset1
<< 8) | offset0
;
32367 initMemRead
<VecElemU16
>(gpuDynInst
, offset
);
32371 Inst_DS__DS_READ_U16::completeAcc(GPUDynInstPtr gpuDynInst
)
32373 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
32375 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
32376 if (gpuDynInst
->exec_mask
[lane
]) {
32377 vdst
[lane
] = (VecElemU32
)(reinterpret_cast<VecElemU16
*>(
32378 gpuDynInst
->d_data
))[lane
];
32384 // --- Inst_DS__DS_SWIZZLE_B32 class methods ---
32386 Inst_DS__DS_SWIZZLE_B32::Inst_DS__DS_SWIZZLE_B32(InFmt_DS
*iFmt
)
32387 : Inst_DS(iFmt
, "ds_swizzle_b32")
32390 } // Inst_DS__DS_SWIZZLE_B32
32392 Inst_DS__DS_SWIZZLE_B32::~Inst_DS__DS_SWIZZLE_B32()
32394 } // ~Inst_DS__DS_SWIZZLE_B32
32396 // RETURN_DATA = swizzle(vgpr_data, offset1:offset0).
32397 // Dword swizzle, no data is written to LDS memory;
32399 Inst_DS__DS_SWIZZLE_B32::execute(GPUDynInstPtr gpuDynInst
)
32401 Wavefront
*wf
= gpuDynInst
->wavefront();
32402 wf
->rdLmReqsInPipe
--;
32403 wf
->validateRequestCounters();
32405 if (gpuDynInst
->exec_mask
.none()) {
32409 gpuDynInst
->execUnitId
= wf
->execUnitId
;
32410 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
32411 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()
32412 ->cyclesToTicks(Cycles(24)));
32414 ConstVecOperandU32
data(gpuDynInst
, extData
.DATA0
);
32415 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
32417 * The "DS pattern" is comprised of both offset fields. That is, the
32418 * swizzle pattern between lanes. Bit 15 of the DS pattern dictates
32419 * which swizzle mode to use. There are two different swizzle
32420 * patterns: 1) QDMode and 2) Bit-masks mode. If bit 15 is set use
32421 * QDMode else use Bit-masks mode. The remaining bits dictate how to
32422 * swizzle the lanes.
32424 * QDMode: Chunks the lanes into 4s and swizzles among them.
32425 * Bits 7:6 dictate where lane 3 (of the current chunk)
32426 * gets its date, 5:4 lane 2, etc.
32428 * Bit-mask: This mode breaks bits 14:0 into 3 equal-sized chunks.
32429 * 14:10 is the xor_mask, 9:5 is the or_mask, and 4:0
32430 * is the and_mask. Each lane is swizzled by performing
32431 * the appropriate operation using these masks.
32433 VecElemU16 ds_pattern
= ((instData
.OFFSET1
<< 8) | instData
.OFFSET0
);
32437 if (bits(ds_pattern
, 15)) {
32439 for (int lane
= 0; lane
< NumVecElemPerVecReg
; lane
+= 4) {
32441 * This operation allows data sharing between groups
32442 * of four consecutive threads. Note the increment by
32443 * 4 in the for loop.
32445 if (gpuDynInst
->exec_mask
[lane
]) {
32446 int index0
= lane
+ bits(ds_pattern
, 1, 0);
32447 panic_if(index0
>= NumVecElemPerVecReg
, "%s: index0 (%d) "
32448 "is out of bounds.\n", gpuDynInst
->disassemble(),
32451 = gpuDynInst
->exec_mask
[index0
] ? data
[index0
]: 0;
32453 if (gpuDynInst
->exec_mask
[lane
+ 1]) {
32454 int index1
= lane
+ bits(ds_pattern
, 3, 2);
32455 panic_if(index1
>= NumVecElemPerVecReg
, "%s: index1 (%d) "
32456 "is out of bounds.\n", gpuDynInst
->disassemble(),
32459 = gpuDynInst
->exec_mask
[index1
] ? data
[index1
]: 0;
32461 if (gpuDynInst
->exec_mask
[lane
+ 2]) {
32462 int index2
= lane
+ bits(ds_pattern
, 5, 4);
32463 panic_if(index2
>= NumVecElemPerVecReg
, "%s: index2 (%d) "
32464 "is out of bounds.\n", gpuDynInst
->disassemble(),
32467 = gpuDynInst
->exec_mask
[index2
] ? data
[index2
]: 0;
32469 if (gpuDynInst
->exec_mask
[lane
+ 3]) {
32470 int index3
= lane
+ bits(ds_pattern
, 7, 6);
32471 panic_if(index3
>= NumVecElemPerVecReg
, "%s: index3 (%d) "
32472 "is out of bounds.\n", gpuDynInst
->disassemble(),
32475 = gpuDynInst
->exec_mask
[index3
] ? data
[index3
]: 0;
32480 int and_mask
= bits(ds_pattern
, 4, 0);
32481 int or_mask
= bits(ds_pattern
, 9, 5);
32482 int xor_mask
= bits(ds_pattern
, 14, 10);
32483 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
32484 if (gpuDynInst
->exec_mask
[lane
]) {
32485 int index
= (((lane
& and_mask
) | or_mask
) ^ xor_mask
);
32486 // Adjust for the next 32 lanes.
32490 panic_if(index
>= NumVecElemPerVecReg
, "%s: index (%d) is "
32491 "out of bounds.\n", gpuDynInst
->disassemble(),
32494 = gpuDynInst
->exec_mask
[index
] ? data
[index
] : 0;
32501 // --- Inst_DS__DS_PERMUTE_B32 class methods ---
32503 Inst_DS__DS_PERMUTE_B32::Inst_DS__DS_PERMUTE_B32(InFmt_DS
*iFmt
)
32504 : Inst_DS(iFmt
, "ds_permute_b32")
32506 setFlag(MemoryRef
);
32508 * While this operation doesn't actually use DS storage we classify
32509 * it as a load here because it does a writeback to a VGPR, which
32510 * fits in better with the LDS pipeline logic.
32513 } // Inst_DS__DS_PERMUTE_B32
32515 Inst_DS__DS_PERMUTE_B32::~Inst_DS__DS_PERMUTE_B32()
32517 } // ~Inst_DS__DS_PERMUTE_B32
32519 // Forward permute.
32521 Inst_DS__DS_PERMUTE_B32::execute(GPUDynInstPtr gpuDynInst
)
32523 Wavefront
*wf
= gpuDynInst
->wavefront();
32524 gpuDynInst
->execUnitId
= wf
->execUnitId
;
32525 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
32526 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()
32527 ->cyclesToTicks(Cycles(24)));
32528 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
32529 ConstVecOperandU32
data(gpuDynInst
, extData
.DATA0
);
32530 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
32535 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
32536 if (wf
->execMask(lane
)) {
32538 * One of the offset fields can be used for the index.
32539 * It is assumed OFFSET0 would be used, as OFFSET1 is
32540 * typically only used for DS ops that operate on two
32541 * disparate pieces of data.
32543 assert(!instData
.OFFSET1
);
32545 * The address provided is a byte address, but VGPRs are
32546 * 4 bytes, so we must divide by 4 to get the actual VGPR
32547 * index. Additionally, the index is calculated modulo the
32548 * WF size, 64 in this case, so we simply extract bits 7-2.
32550 int index
= bits(addr
[lane
] + instData
.OFFSET0
, 7, 2);
32551 panic_if(index
>= NumVecElemPerVecReg
, "%s: index (%d) is out "
32552 "of bounds.\n", gpuDynInst
->disassemble(), index
);
32554 * If the shuffled index corresponds to a lane that is
32555 * inactive then this instruction writes a 0 to the active
32558 if (wf
->execMask(index
)) {
32559 vdst
[index
] = data
[lane
];
32568 wf
->decLGKMInstsIssued();
32569 wf
->rdLmReqsInPipe
--;
32570 wf
->validateRequestCounters();
32572 // --- Inst_DS__DS_BPERMUTE_B32 class methods ---
32574 Inst_DS__DS_BPERMUTE_B32::Inst_DS__DS_BPERMUTE_B32(InFmt_DS
*iFmt
)
32575 : Inst_DS(iFmt
, "ds_bpermute_b32")
32577 setFlag(MemoryRef
);
32579 * While this operation doesn't actually use DS storage we classify
32580 * it as a load here because it does a writeback to a VGPR, which
32581 * fits in better with the LDS pipeline logic.
32584 } // Inst_DS__DS_BPERMUTE_B32
32586 Inst_DS__DS_BPERMUTE_B32::~Inst_DS__DS_BPERMUTE_B32()
32588 } // ~Inst_DS__DS_BPERMUTE_B32
32590 // Backward permute.
32592 Inst_DS__DS_BPERMUTE_B32::execute(GPUDynInstPtr gpuDynInst
)
32594 Wavefront
*wf
= gpuDynInst
->wavefront();
32595 gpuDynInst
->execUnitId
= wf
->execUnitId
;
32596 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
32597 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()
32598 ->cyclesToTicks(Cycles(24)));
32599 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
32600 ConstVecOperandU32
data(gpuDynInst
, extData
.DATA0
);
32601 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
32606 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
32607 if (wf
->execMask(lane
)) {
32609 * One of the offset fields can be used for the index.
32610 * It is assumed OFFSET0 would be used, as OFFSET1 is
32611 * typically only used for DS ops that operate on two
32612 * disparate pieces of data.
32614 assert(!instData
.OFFSET1
);
32616 * The address provided is a byte address, but VGPRs are
32617 * 4 bytes, so we must divide by 4 to get the actual VGPR
32618 * index. Additionally, the index is calculated modulo the
32619 * WF size, 64 in this case, so we simply extract bits 7-2.
32621 int index
= bits(addr
[lane
] + instData
.OFFSET0
, 7, 2);
32622 panic_if(index
>= NumVecElemPerVecReg
, "%s: index (%d) is out "
32623 "of bounds.\n", gpuDynInst
->disassemble(), index
);
32625 * If the shuffled index corresponds to a lane that is
32626 * inactive then this instruction writes a 0 to the active
32629 if (wf
->execMask(index
)) {
32630 vdst
[lane
] = data
[index
];
32639 wf
->decLGKMInstsIssued();
32640 wf
->rdLmReqsInPipe
--;
32641 wf
->validateRequestCounters();
32644 // --- Inst_DS__DS_ADD_U64 class methods ---
32646 Inst_DS__DS_ADD_U64::Inst_DS__DS_ADD_U64(InFmt_DS
*iFmt
)
32647 : Inst_DS(iFmt
, "ds_add_u64")
32649 } // Inst_DS__DS_ADD_U64
32651 Inst_DS__DS_ADD_U64::~Inst_DS__DS_ADD_U64()
32653 } // ~Inst_DS__DS_ADD_U64
32655 // tmp = MEM[ADDR];
32656 // MEM[ADDR] += DATA[0:1];
32657 // RETURN_DATA[0:1] = tmp.
32659 Inst_DS__DS_ADD_U64::execute(GPUDynInstPtr gpuDynInst
)
32661 panicUnimplemented();
32664 Inst_DS__DS_SUB_U64::Inst_DS__DS_SUB_U64(InFmt_DS
*iFmt
)
32665 : Inst_DS(iFmt
, "ds_sub_u64")
32667 } // Inst_DS__DS_SUB_U64
32669 Inst_DS__DS_SUB_U64::~Inst_DS__DS_SUB_U64()
32671 } // ~Inst_DS__DS_SUB_U64
32673 // tmp = MEM[ADDR];
32674 // MEM[ADDR] -= DATA[0:1];
32675 // RETURN_DATA[0:1] = tmp.
32677 Inst_DS__DS_SUB_U64::execute(GPUDynInstPtr gpuDynInst
)
32679 panicUnimplemented();
32682 Inst_DS__DS_RSUB_U64::Inst_DS__DS_RSUB_U64(InFmt_DS
*iFmt
)
32683 : Inst_DS(iFmt
, "ds_rsub_u64")
32685 } // Inst_DS__DS_RSUB_U64
32687 Inst_DS__DS_RSUB_U64::~Inst_DS__DS_RSUB_U64()
32689 } // ~Inst_DS__DS_RSUB_U64
32691 // tmp = MEM[ADDR];
32692 // MEM[ADDR] = DATA - MEM[ADDR];
32693 // RETURN_DATA = tmp.
32694 // Subtraction with reversed operands.
32696 Inst_DS__DS_RSUB_U64::execute(GPUDynInstPtr gpuDynInst
)
32698 panicUnimplemented();
32701 Inst_DS__DS_INC_U64::Inst_DS__DS_INC_U64(InFmt_DS
*iFmt
)
32702 : Inst_DS(iFmt
, "ds_inc_u64")
32704 } // Inst_DS__DS_INC_U64
32706 Inst_DS__DS_INC_U64::~Inst_DS__DS_INC_U64()
32708 } // ~Inst_DS__DS_INC_U64
32710 // tmp = MEM[ADDR];
32711 // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
32712 // RETURN_DATA[0:1] = tmp.
32714 Inst_DS__DS_INC_U64::execute(GPUDynInstPtr gpuDynInst
)
32716 panicUnimplemented();
32719 Inst_DS__DS_DEC_U64::Inst_DS__DS_DEC_U64(InFmt_DS
*iFmt
)
32720 : Inst_DS(iFmt
, "ds_dec_u64")
32722 } // Inst_DS__DS_DEC_U64
32724 Inst_DS__DS_DEC_U64::~Inst_DS__DS_DEC_U64()
32726 } // ~Inst_DS__DS_DEC_U64
32728 // tmp = MEM[ADDR];
32729 // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
32730 // (unsigned compare);
32731 // RETURN_DATA[0:1] = tmp.
32733 Inst_DS__DS_DEC_U64::execute(GPUDynInstPtr gpuDynInst
)
32735 panicUnimplemented();
32738 Inst_DS__DS_MIN_I64::Inst_DS__DS_MIN_I64(InFmt_DS
*iFmt
)
32739 : Inst_DS(iFmt
, "ds_min_i64")
32741 } // Inst_DS__DS_MIN_I64
32743 Inst_DS__DS_MIN_I64::~Inst_DS__DS_MIN_I64()
32745 } // ~Inst_DS__DS_MIN_I64
32747 // tmp = MEM[ADDR];
32748 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
32749 // RETURN_DATA[0:1] = tmp.
32751 Inst_DS__DS_MIN_I64::execute(GPUDynInstPtr gpuDynInst
)
32753 panicUnimplemented();
32756 Inst_DS__DS_MAX_I64::Inst_DS__DS_MAX_I64(InFmt_DS
*iFmt
)
32757 : Inst_DS(iFmt
, "ds_max_i64")
32759 } // Inst_DS__DS_MAX_I64
32761 Inst_DS__DS_MAX_I64::~Inst_DS__DS_MAX_I64()
32763 } // ~Inst_DS__DS_MAX_I64
32765 // tmp = MEM[ADDR];
32766 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
32767 // RETURN_DATA[0:1] = tmp.
32769 Inst_DS__DS_MAX_I64::execute(GPUDynInstPtr gpuDynInst
)
32771 panicUnimplemented();
32774 Inst_DS__DS_MIN_U64::Inst_DS__DS_MIN_U64(InFmt_DS
*iFmt
)
32775 : Inst_DS(iFmt
, "ds_min_u64")
32777 } // Inst_DS__DS_MIN_U64
32779 Inst_DS__DS_MIN_U64::~Inst_DS__DS_MIN_U64()
32781 } // ~Inst_DS__DS_MIN_U64
32783 // tmp = MEM[ADDR];
32784 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
32785 // RETURN_DATA[0:1] = tmp.
32787 Inst_DS__DS_MIN_U64::execute(GPUDynInstPtr gpuDynInst
)
32789 panicUnimplemented();
32792 Inst_DS__DS_MAX_U64::Inst_DS__DS_MAX_U64(InFmt_DS
*iFmt
)
32793 : Inst_DS(iFmt
, "ds_max_u64")
32795 } // Inst_DS__DS_MAX_U64
32797 Inst_DS__DS_MAX_U64::~Inst_DS__DS_MAX_U64()
32799 } // ~Inst_DS__DS_MAX_U64
32801 // tmp = MEM[ADDR];
32802 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
32803 // RETURN_DATA[0:1] = tmp.
32805 Inst_DS__DS_MAX_U64::execute(GPUDynInstPtr gpuDynInst
)
32807 panicUnimplemented();
32810 Inst_DS__DS_AND_B64::Inst_DS__DS_AND_B64(InFmt_DS
*iFmt
)
32811 : Inst_DS(iFmt
, "ds_and_b64")
32813 } // Inst_DS__DS_AND_B64
32815 Inst_DS__DS_AND_B64::~Inst_DS__DS_AND_B64()
32817 } // ~Inst_DS__DS_AND_B64
32819 // tmp = MEM[ADDR];
32820 // MEM[ADDR] &= DATA[0:1];
32821 // RETURN_DATA[0:1] = tmp.
32823 Inst_DS__DS_AND_B64::execute(GPUDynInstPtr gpuDynInst
)
32825 panicUnimplemented();
32828 Inst_DS__DS_OR_B64::Inst_DS__DS_OR_B64(InFmt_DS
*iFmt
)
32829 : Inst_DS(iFmt
, "ds_or_b64")
32831 } // Inst_DS__DS_OR_B64
32833 Inst_DS__DS_OR_B64::~Inst_DS__DS_OR_B64()
32835 } // ~Inst_DS__DS_OR_B64
32837 // tmp = MEM[ADDR];
32838 // MEM[ADDR] |= DATA[0:1];
32839 // RETURN_DATA[0:1] = tmp.
32841 Inst_DS__DS_OR_B64::execute(GPUDynInstPtr gpuDynInst
)
32843 panicUnimplemented();
32846 Inst_DS__DS_XOR_B64::Inst_DS__DS_XOR_B64(InFmt_DS
*iFmt
)
32847 : Inst_DS(iFmt
, "ds_xor_b64")
32849 } // Inst_DS__DS_XOR_B64
32851 Inst_DS__DS_XOR_B64::~Inst_DS__DS_XOR_B64()
32853 } // ~Inst_DS__DS_XOR_B64
32855 // tmp = MEM[ADDR];
32856 // MEM[ADDR] ^= DATA[0:1];
32857 // RETURN_DATA[0:1] = tmp.
32859 Inst_DS__DS_XOR_B64::execute(GPUDynInstPtr gpuDynInst
)
32861 panicUnimplemented();
32864 Inst_DS__DS_MSKOR_B64::Inst_DS__DS_MSKOR_B64(InFmt_DS
*iFmt
)
32865 : Inst_DS(iFmt
, "ds_mskor_b64")
32867 } // Inst_DS__DS_MSKOR_B64
32869 Inst_DS__DS_MSKOR_B64::~Inst_DS__DS_MSKOR_B64()
32871 } // ~Inst_DS__DS_MSKOR_B64
32873 // tmp = MEM[ADDR];
32874 // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
32875 // RETURN_DATA = tmp.
32877 Inst_DS__DS_MSKOR_B64::execute(GPUDynInstPtr gpuDynInst
)
32879 panicUnimplemented();
32882 Inst_DS__DS_WRITE_B64::Inst_DS__DS_WRITE_B64(InFmt_DS
*iFmt
)
32883 : Inst_DS(iFmt
, "ds_write_b64")
32885 setFlag(MemoryRef
);
32887 } // Inst_DS__DS_WRITE_B64
32889 Inst_DS__DS_WRITE_B64::~Inst_DS__DS_WRITE_B64()
32891 } // ~Inst_DS__DS_WRITE_B64
32893 // MEM[ADDR] = DATA.
32896 Inst_DS__DS_WRITE_B64::execute(GPUDynInstPtr gpuDynInst
)
32898 Wavefront
*wf
= gpuDynInst
->wavefront();
32899 gpuDynInst
->execUnitId
= wf
->execUnitId
;
32900 gpuDynInst
->exec_mask
= wf
->execMask();
32901 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
32902 gpuDynInst
->latency
.set(
32903 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
32904 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
32905 ConstVecOperandU64
data(gpuDynInst
, extData
.DATA0
);
32910 calcAddr(gpuDynInst
, addr
);
32912 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
32913 if (wf
->execMask(lane
)) {
32914 (reinterpret_cast<VecElemU64
*>(gpuDynInst
->d_data
))[lane
]
32919 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
32921 wf
->wrLmReqsInPipe
--;
32922 wf
->outstandingReqsWrLm
++;
32923 wf
->outstandingReqs
++;
32924 wf
->validateRequestCounters();
32928 Inst_DS__DS_WRITE_B64::initiateAcc(GPUDynInstPtr gpuDynInst
)
32930 Addr offset0
= instData
.OFFSET0
;
32931 Addr offset1
= instData
.OFFSET1
;
32932 Addr offset
= (offset1
<< 8) | offset0
;
32934 initMemWrite
<VecElemU64
>(gpuDynInst
, offset
);
32938 Inst_DS__DS_WRITE_B64::completeAcc(GPUDynInstPtr gpuDynInst
)
32942 Inst_DS__DS_WRITE2_B64::Inst_DS__DS_WRITE2_B64(InFmt_DS
*iFmt
)
32943 : Inst_DS(iFmt
, "ds_write2_b64")
32945 setFlag(MemoryRef
);
32947 } // Inst_DS__DS_WRITE2_B64
32949 Inst_DS__DS_WRITE2_B64::~Inst_DS__DS_WRITE2_B64()
32951 } // ~Inst_DS__DS_WRITE2_B64
32953 // MEM[ADDR_BASE + OFFSET0 * 8] = DATA;
32954 // MEM[ADDR_BASE + OFFSET1 * 8] = DATA2.
32957 Inst_DS__DS_WRITE2_B64::execute(GPUDynInstPtr gpuDynInst
)
32959 Wavefront
*wf
= gpuDynInst
->wavefront();
32960 gpuDynInst
->execUnitId
= wf
->execUnitId
;
32961 gpuDynInst
->exec_mask
= wf
->execMask();
32962 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
32963 gpuDynInst
->latency
.set(
32964 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
32965 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
32966 ConstVecOperandU64
data0(gpuDynInst
, extData
.DATA0
);
32967 ConstVecOperandU64
data1(gpuDynInst
, extData
.DATA1
);
32973 calcAddr(gpuDynInst
, addr
);
32975 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
32976 if (wf
->execMask(lane
)) {
32977 (reinterpret_cast<VecElemU64
*>(
32978 gpuDynInst
->d_data
))[lane
* 2] = data0
[lane
];
32979 (reinterpret_cast<VecElemU64
*>(
32980 gpuDynInst
->d_data
))[lane
* 2 + 1] = data1
[lane
];
32984 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
32986 wf
->wrLmReqsInPipe
--;
32987 wf
->outstandingReqsWrLm
++;
32988 wf
->outstandingReqs
++;
32989 wf
->validateRequestCounters();
32993 Inst_DS__DS_WRITE2_B64::initiateAcc(GPUDynInstPtr gpuDynInst
)
32995 Addr offset0
= instData
.OFFSET0
* 8;
32996 Addr offset1
= instData
.OFFSET1
* 8;
32998 initDualMemWrite
<VecElemU64
>(gpuDynInst
, offset0
, offset1
);
33002 Inst_DS__DS_WRITE2_B64::completeAcc(GPUDynInstPtr gpuDynInst
)
33006 Inst_DS__DS_WRITE2ST64_B64::Inst_DS__DS_WRITE2ST64_B64(InFmt_DS
*iFmt
)
33007 : Inst_DS(iFmt
, "ds_write2st64_b64")
33009 setFlag(MemoryRef
);
33011 } // Inst_DS__DS_WRITE2ST64_B64
33013 Inst_DS__DS_WRITE2ST64_B64::~Inst_DS__DS_WRITE2ST64_B64()
33015 } // ~Inst_DS__DS_WRITE2ST64_B64
33017 // MEM[ADDR_BASE + OFFSET0 * 8 * 64] = DATA;
33018 // MEM[ADDR_BASE + OFFSET1 * 8 * 64] = DATA2;
33021 Inst_DS__DS_WRITE2ST64_B64::execute(GPUDynInstPtr gpuDynInst
)
33023 panicUnimplemented();
33026 Inst_DS__DS_CMPST_B64::Inst_DS__DS_CMPST_B64(InFmt_DS
*iFmt
)
33027 : Inst_DS(iFmt
, "ds_cmpst_b64")
33029 } // Inst_DS__DS_CMPST_B64
33031 Inst_DS__DS_CMPST_B64::~Inst_DS__DS_CMPST_B64()
33033 } // ~Inst_DS__DS_CMPST_B64
33035 // tmp = MEM[ADDR];
33038 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
33039 // RETURN_DATA[0] = tmp.
33040 // Compare and store.
33042 Inst_DS__DS_CMPST_B64::execute(GPUDynInstPtr gpuDynInst
)
33044 panicUnimplemented();
33047 Inst_DS__DS_CMPST_F64::Inst_DS__DS_CMPST_F64(InFmt_DS
*iFmt
)
33048 : Inst_DS(iFmt
, "ds_cmpst_f64")
33051 } // Inst_DS__DS_CMPST_F64
33053 Inst_DS__DS_CMPST_F64::~Inst_DS__DS_CMPST_F64()
33055 } // ~Inst_DS__DS_CMPST_F64
33057 // tmp = MEM[ADDR];
33060 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
33061 // RETURN_DATA[0] = tmp.
33063 Inst_DS__DS_CMPST_F64::execute(GPUDynInstPtr gpuDynInst
)
33065 panicUnimplemented();
33068 Inst_DS__DS_MIN_F64::Inst_DS__DS_MIN_F64(InFmt_DS
*iFmt
)
33069 : Inst_DS(iFmt
, "ds_min_f64")
33072 } // Inst_DS__DS_MIN_F64
33074 Inst_DS__DS_MIN_F64::~Inst_DS__DS_MIN_F64()
33076 } // ~Inst_DS__DS_MIN_F64
33078 // tmp = MEM[ADDR];
33081 // MEM[ADDR] = (cmp < tmp) ? src : tmp.
33083 Inst_DS__DS_MIN_F64::execute(GPUDynInstPtr gpuDynInst
)
33085 panicUnimplemented();
33088 Inst_DS__DS_MAX_F64::Inst_DS__DS_MAX_F64(InFmt_DS
*iFmt
)
33089 : Inst_DS(iFmt
, "ds_max_f64")
33092 } // Inst_DS__DS_MAX_F64
33094 Inst_DS__DS_MAX_F64::~Inst_DS__DS_MAX_F64()
33096 } // ~Inst_DS__DS_MAX_F64
33098 // tmp = MEM[ADDR];
33101 // MEM[ADDR] = (tmp > cmp) ? src : tmp.
33103 Inst_DS__DS_MAX_F64::execute(GPUDynInstPtr gpuDynInst
)
33105 panicUnimplemented();
33108 Inst_DS__DS_ADD_RTN_U64::Inst_DS__DS_ADD_RTN_U64(InFmt_DS
*iFmt
)
33109 : Inst_DS(iFmt
, "ds_add_rtn_u64")
33111 } // Inst_DS__DS_ADD_RTN_U64
33113 Inst_DS__DS_ADD_RTN_U64::~Inst_DS__DS_ADD_RTN_U64()
33115 } // ~Inst_DS__DS_ADD_RTN_U64
33117 // tmp = MEM[ADDR];
33118 // MEM[ADDR] += DATA[0:1];
33119 // RETURN_DATA[0:1] = tmp.
33121 Inst_DS__DS_ADD_RTN_U64::execute(GPUDynInstPtr gpuDynInst
)
33123 panicUnimplemented();
33126 Inst_DS__DS_SUB_RTN_U64::Inst_DS__DS_SUB_RTN_U64(InFmt_DS
*iFmt
)
33127 : Inst_DS(iFmt
, "ds_sub_rtn_u64")
33129 } // Inst_DS__DS_SUB_RTN_U64
33131 Inst_DS__DS_SUB_RTN_U64::~Inst_DS__DS_SUB_RTN_U64()
33133 } // ~Inst_DS__DS_SUB_RTN_U64
33135 // tmp = MEM[ADDR];
33136 // MEM[ADDR] -= DATA[0:1];
33137 // RETURN_DATA[0:1] = tmp.
33139 Inst_DS__DS_SUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst
)
33141 panicUnimplemented();
33144 Inst_DS__DS_RSUB_RTN_U64::Inst_DS__DS_RSUB_RTN_U64(InFmt_DS
*iFmt
)
33145 : Inst_DS(iFmt
, "ds_rsub_rtn_u64")
33147 } // Inst_DS__DS_RSUB_RTN_U64
33149 Inst_DS__DS_RSUB_RTN_U64::~Inst_DS__DS_RSUB_RTN_U64()
33151 } // ~Inst_DS__DS_RSUB_RTN_U64
33153 // tmp = MEM[ADDR];
33154 // MEM[ADDR] = DATA - MEM[ADDR];
33155 // RETURN_DATA = tmp.
33156 // Subtraction with reversed operands.
33158 Inst_DS__DS_RSUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst
)
33160 panicUnimplemented();
33163 Inst_DS__DS_INC_RTN_U64::Inst_DS__DS_INC_RTN_U64(InFmt_DS
*iFmt
)
33164 : Inst_DS(iFmt
, "ds_inc_rtn_u64")
33166 } // Inst_DS__DS_INC_RTN_U64
33168 Inst_DS__DS_INC_RTN_U64::~Inst_DS__DS_INC_RTN_U64()
33170 } // ~Inst_DS__DS_INC_RTN_U64
33172 // tmp = MEM[ADDR];
33173 // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
33174 // RETURN_DATA[0:1] = tmp.
33176 Inst_DS__DS_INC_RTN_U64::execute(GPUDynInstPtr gpuDynInst
)
33178 panicUnimplemented();
33181 Inst_DS__DS_DEC_RTN_U64::Inst_DS__DS_DEC_RTN_U64(InFmt_DS
*iFmt
)
33182 : Inst_DS(iFmt
, "ds_dec_rtn_u64")
33184 } // Inst_DS__DS_DEC_RTN_U64
33186 Inst_DS__DS_DEC_RTN_U64::~Inst_DS__DS_DEC_RTN_U64()
33188 } // ~Inst_DS__DS_DEC_RTN_U64
33190 // tmp = MEM[ADDR];
33191 // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
33192 // (unsigned compare);
33193 // RETURN_DATA[0:1] = tmp.
33195 Inst_DS__DS_DEC_RTN_U64::execute(GPUDynInstPtr gpuDynInst
)
33197 panicUnimplemented();
33200 Inst_DS__DS_MIN_RTN_I64::Inst_DS__DS_MIN_RTN_I64(InFmt_DS
*iFmt
)
33201 : Inst_DS(iFmt
, "ds_min_rtn_i64")
33203 } // Inst_DS__DS_MIN_RTN_I64
33205 Inst_DS__DS_MIN_RTN_I64::~Inst_DS__DS_MIN_RTN_I64()
33207 } // ~Inst_DS__DS_MIN_RTN_I64
33209 // tmp = MEM[ADDR];
33210 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
33211 // RETURN_DATA[0:1] = tmp.
33213 Inst_DS__DS_MIN_RTN_I64::execute(GPUDynInstPtr gpuDynInst
)
33215 panicUnimplemented();
33218 Inst_DS__DS_MAX_RTN_I64::Inst_DS__DS_MAX_RTN_I64(InFmt_DS
*iFmt
)
33219 : Inst_DS(iFmt
, "ds_max_rtn_i64")
33221 } // Inst_DS__DS_MAX_RTN_I64
33223 Inst_DS__DS_MAX_RTN_I64::~Inst_DS__DS_MAX_RTN_I64()
33225 } // ~Inst_DS__DS_MAX_RTN_I64
33227 // tmp = MEM[ADDR];
33228 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
33229 // RETURN_DATA[0:1] = tmp.
33231 Inst_DS__DS_MAX_RTN_I64::execute(GPUDynInstPtr gpuDynInst
)
33233 panicUnimplemented();
33236 Inst_DS__DS_MIN_RTN_U64::Inst_DS__DS_MIN_RTN_U64(InFmt_DS
*iFmt
)
33237 : Inst_DS(iFmt
, "ds_min_rtn_u64")
33239 } // Inst_DS__DS_MIN_RTN_U64
33241 Inst_DS__DS_MIN_RTN_U64::~Inst_DS__DS_MIN_RTN_U64()
33243 } // ~Inst_DS__DS_MIN_RTN_U64
33245 // tmp = MEM[ADDR];
33246 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
33247 // RETURN_DATA[0:1] = tmp.
33249 Inst_DS__DS_MIN_RTN_U64::execute(GPUDynInstPtr gpuDynInst
)
33251 panicUnimplemented();
33254 Inst_DS__DS_MAX_RTN_U64::Inst_DS__DS_MAX_RTN_U64(InFmt_DS
*iFmt
)
33255 : Inst_DS(iFmt
, "ds_max_rtn_u64")
33257 } // Inst_DS__DS_MAX_RTN_U64
33259 Inst_DS__DS_MAX_RTN_U64::~Inst_DS__DS_MAX_RTN_U64()
33261 } // ~Inst_DS__DS_MAX_RTN_U64
33263 // tmp = MEM[ADDR];
33264 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
33265 // RETURN_DATA[0:1] = tmp.
33267 Inst_DS__DS_MAX_RTN_U64::execute(GPUDynInstPtr gpuDynInst
)
33269 panicUnimplemented();
33272 Inst_DS__DS_AND_RTN_B64::Inst_DS__DS_AND_RTN_B64(InFmt_DS
*iFmt
)
33273 : Inst_DS(iFmt
, "ds_and_rtn_b64")
33275 } // Inst_DS__DS_AND_RTN_B64
33277 Inst_DS__DS_AND_RTN_B64::~Inst_DS__DS_AND_RTN_B64()
33279 } // ~Inst_DS__DS_AND_RTN_B64
33281 // tmp = MEM[ADDR];
33282 // MEM[ADDR] &= DATA[0:1];
33283 // RETURN_DATA[0:1] = tmp.
33285 Inst_DS__DS_AND_RTN_B64::execute(GPUDynInstPtr gpuDynInst
)
33287 panicUnimplemented();
33290 Inst_DS__DS_OR_RTN_B64::Inst_DS__DS_OR_RTN_B64(InFmt_DS
*iFmt
)
33291 : Inst_DS(iFmt
, "ds_or_rtn_b64")
33293 } // Inst_DS__DS_OR_RTN_B64
33295 Inst_DS__DS_OR_RTN_B64::~Inst_DS__DS_OR_RTN_B64()
33297 } // ~Inst_DS__DS_OR_RTN_B64
33299 // tmp = MEM[ADDR];
33300 // MEM[ADDR] |= DATA[0:1];
33301 // RETURN_DATA[0:1] = tmp.
33303 Inst_DS__DS_OR_RTN_B64::execute(GPUDynInstPtr gpuDynInst
)
33305 panicUnimplemented();
33308 Inst_DS__DS_XOR_RTN_B64::Inst_DS__DS_XOR_RTN_B64(InFmt_DS
*iFmt
)
33309 : Inst_DS(iFmt
, "ds_xor_rtn_b64")
33311 } // Inst_DS__DS_XOR_RTN_B64
33313 Inst_DS__DS_XOR_RTN_B64::~Inst_DS__DS_XOR_RTN_B64()
33315 } // ~Inst_DS__DS_XOR_RTN_B64
33317 // tmp = MEM[ADDR];
33318 // MEM[ADDR] ^= DATA[0:1];
33319 // RETURN_DATA[0:1] = tmp.
33321 Inst_DS__DS_XOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst
)
33323 panicUnimplemented();
33326 Inst_DS__DS_MSKOR_RTN_B64::Inst_DS__DS_MSKOR_RTN_B64(InFmt_DS
*iFmt
)
33327 : Inst_DS(iFmt
, "ds_mskor_rtn_b64")
33329 } // Inst_DS__DS_MSKOR_RTN_B64
33331 Inst_DS__DS_MSKOR_RTN_B64::~Inst_DS__DS_MSKOR_RTN_B64()
33333 } // ~Inst_DS__DS_MSKOR_RTN_B64
33335 // tmp = MEM[ADDR];
33336 // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
33337 // RETURN_DATA = tmp.
33338 // Masked dword OR, D0 contains the mask and D1 contains the new value.
33340 Inst_DS__DS_MSKOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst
)
33342 panicUnimplemented();
33345 Inst_DS__DS_WRXCHG_RTN_B64::Inst_DS__DS_WRXCHG_RTN_B64(InFmt_DS
*iFmt
)
33346 : Inst_DS(iFmt
, "ds_wrxchg_rtn_b64")
33348 } // Inst_DS__DS_WRXCHG_RTN_B64
33350 Inst_DS__DS_WRXCHG_RTN_B64::~Inst_DS__DS_WRXCHG_RTN_B64()
33352 } // ~Inst_DS__DS_WRXCHG_RTN_B64
33354 // tmp = MEM[ADDR];
33355 // MEM[ADDR] = DATA;
33356 // RETURN_DATA = tmp.
33357 // Write-exchange operation.
33359 Inst_DS__DS_WRXCHG_RTN_B64::execute(GPUDynInstPtr gpuDynInst
)
33361 panicUnimplemented();
33364 Inst_DS__DS_WRXCHG2_RTN_B64::Inst_DS__DS_WRXCHG2_RTN_B64(InFmt_DS
*iFmt
)
33365 : Inst_DS(iFmt
, "ds_wrxchg2_rtn_b64")
33367 } // Inst_DS__DS_WRXCHG2_RTN_B64
33369 Inst_DS__DS_WRXCHG2_RTN_B64::~Inst_DS__DS_WRXCHG2_RTN_B64()
33371 } // ~Inst_DS__DS_WRXCHG2_RTN_B64
33373 // Write-exchange 2 separate qwords.
33375 Inst_DS__DS_WRXCHG2_RTN_B64::execute(GPUDynInstPtr gpuDynInst
)
33377 panicUnimplemented();
33380 Inst_DS__DS_WRXCHG2ST64_RTN_B64::Inst_DS__DS_WRXCHG2ST64_RTN_B64(
33382 : Inst_DS(iFmt
, "ds_wrxchg2st64_rtn_b64")
33384 } // Inst_DS__DS_WRXCHG2ST64_RTN_B64
33386 Inst_DS__DS_WRXCHG2ST64_RTN_B64::~Inst_DS__DS_WRXCHG2ST64_RTN_B64()
33388 } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B64
33390 // Write-exchange 2 qwords with a stride of 64 qwords.
33392 Inst_DS__DS_WRXCHG2ST64_RTN_B64::execute(GPUDynInstPtr gpuDynInst
)
33394 panicUnimplemented();
33397 Inst_DS__DS_CMPST_RTN_B64::Inst_DS__DS_CMPST_RTN_B64(InFmt_DS
*iFmt
)
33398 : Inst_DS(iFmt
, "ds_cmpst_rtn_b64")
33400 } // Inst_DS__DS_CMPST_RTN_B64
33402 Inst_DS__DS_CMPST_RTN_B64::~Inst_DS__DS_CMPST_RTN_B64()
33404 } // ~Inst_DS__DS_CMPST_RTN_B64
33406 // tmp = MEM[ADDR];
33409 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
33410 // RETURN_DATA[0] = tmp.
33411 // Compare and store.
33413 Inst_DS__DS_CMPST_RTN_B64::execute(GPUDynInstPtr gpuDynInst
)
33415 panicUnimplemented();
33418 Inst_DS__DS_CMPST_RTN_F64::Inst_DS__DS_CMPST_RTN_F64(InFmt_DS
*iFmt
)
33419 : Inst_DS(iFmt
, "ds_cmpst_rtn_f64")
33422 } // Inst_DS__DS_CMPST_RTN_F64
33424 Inst_DS__DS_CMPST_RTN_F64::~Inst_DS__DS_CMPST_RTN_F64()
33426 } // ~Inst_DS__DS_CMPST_RTN_F64
33428 // tmp = MEM[ADDR];
33431 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
33432 // RETURN_DATA[0] = tmp.
33434 Inst_DS__DS_CMPST_RTN_F64::execute(GPUDynInstPtr gpuDynInst
)
33436 panicUnimplemented();
33439 Inst_DS__DS_MIN_RTN_F64::Inst_DS__DS_MIN_RTN_F64(InFmt_DS
*iFmt
)
33440 : Inst_DS(iFmt
, "ds_min_rtn_f64")
33443 } // Inst_DS__DS_MIN_RTN_F64
33445 Inst_DS__DS_MIN_RTN_F64::~Inst_DS__DS_MIN_RTN_F64()
33447 } // ~Inst_DS__DS_MIN_RTN_F64
33449 // tmp = MEM[ADDR];
33452 // MEM[ADDR] = (cmp < tmp) ? src : tmp.
33454 Inst_DS__DS_MIN_RTN_F64::execute(GPUDynInstPtr gpuDynInst
)
33456 panicUnimplemented();
33459 Inst_DS__DS_MAX_RTN_F64::Inst_DS__DS_MAX_RTN_F64(InFmt_DS
*iFmt
)
33460 : Inst_DS(iFmt
, "ds_max_rtn_f64")
33463 } // Inst_DS__DS_MAX_RTN_F64
33465 Inst_DS__DS_MAX_RTN_F64::~Inst_DS__DS_MAX_RTN_F64()
33467 } // ~Inst_DS__DS_MAX_RTN_F64
33469 // tmp = MEM[ADDR];
33472 // MEM[ADDR] = (tmp > cmp) ? src : tmp.
33474 Inst_DS__DS_MAX_RTN_F64::execute(GPUDynInstPtr gpuDynInst
)
33476 panicUnimplemented();
33479 Inst_DS__DS_READ_B64::Inst_DS__DS_READ_B64(InFmt_DS
*iFmt
)
33480 : Inst_DS(iFmt
, "ds_read_b64")
33482 setFlag(MemoryRef
);
33484 } // Inst_DS__DS_READ_B64
33486 Inst_DS__DS_READ_B64::~Inst_DS__DS_READ_B64()
33488 } // ~Inst_DS__DS_READ_B64
33490 // RETURN_DATA = MEM[ADDR].
33493 Inst_DS__DS_READ_B64::execute(GPUDynInstPtr gpuDynInst
)
33495 Wavefront
*wf
= gpuDynInst
->wavefront();
33496 gpuDynInst
->execUnitId
= wf
->execUnitId
;
33497 gpuDynInst
->exec_mask
= wf
->execMask();
33498 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
33499 gpuDynInst
->latency
.set(
33500 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
33501 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
33505 calcAddr(gpuDynInst
, addr
);
33507 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
33509 wf
->rdLmReqsInPipe
--;
33510 wf
->outstandingReqsRdLm
++;
33511 wf
->outstandingReqs
++;
33512 wf
->validateRequestCounters();
33516 Inst_DS__DS_READ_B64::initiateAcc(GPUDynInstPtr gpuDynInst
)
33518 Addr offset0
= instData
.OFFSET0
;
33519 Addr offset1
= instData
.OFFSET1
;
33520 Addr offset
= (offset1
<< 8) | offset0
;
33522 initMemRead
<VecElemU64
>(gpuDynInst
, offset
);
33526 Inst_DS__DS_READ_B64::completeAcc(GPUDynInstPtr gpuDynInst
)
33528 VecOperandU64
vdst(gpuDynInst
, extData
.VDST
);
33530 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
33531 if (gpuDynInst
->exec_mask
[lane
]) {
33532 vdst
[lane
] = (reinterpret_cast<VecElemU64
*>(
33533 gpuDynInst
->d_data
))[lane
];
33540 Inst_DS__DS_READ2_B64::Inst_DS__DS_READ2_B64(InFmt_DS
*iFmt
)
33541 : Inst_DS(iFmt
, "ds_read2_b64")
33543 setFlag(MemoryRef
);
33545 } // Inst_DS__DS_READ2_B64
33547 Inst_DS__DS_READ2_B64::~Inst_DS__DS_READ2_B64()
33549 } // ~Inst_DS__DS_READ2_B64
33551 // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8];
33552 // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8].
33555 Inst_DS__DS_READ2_B64::execute(GPUDynInstPtr gpuDynInst
)
33557 Wavefront
*wf
= gpuDynInst
->wavefront();
33558 gpuDynInst
->execUnitId
= wf
->execUnitId
;
33559 gpuDynInst
->exec_mask
= wf
->execMask();
33560 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
33561 gpuDynInst
->latency
.set(
33562 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
33563 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
33567 calcAddr(gpuDynInst
, addr
);
33569 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
33571 wf
->rdLmReqsInPipe
--;
33572 wf
->outstandingReqsRdLm
++;
33573 wf
->outstandingReqs
++;
33574 wf
->validateRequestCounters();
33578 Inst_DS__DS_READ2_B64::initiateAcc(GPUDynInstPtr gpuDynInst
)
33580 Addr offset0
= instData
.OFFSET0
* 8;
33581 Addr offset1
= instData
.OFFSET1
* 8;
33583 initDualMemRead
<VecElemU64
>(gpuDynInst
, offset0
, offset1
);
33587 Inst_DS__DS_READ2_B64::completeAcc(GPUDynInstPtr gpuDynInst
)
33589 VecOperandU64
vdst0(gpuDynInst
, extData
.VDST
);
33590 VecOperandU64
vdst1(gpuDynInst
, extData
.VDST
+ 2);
33592 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
33593 if (gpuDynInst
->exec_mask
[lane
]) {
33594 vdst0
[lane
] = (reinterpret_cast<VecElemU64
*>(
33595 gpuDynInst
->d_data
))[lane
* 2];
33596 vdst1
[lane
] = (reinterpret_cast<VecElemU64
*>(
33597 gpuDynInst
->d_data
))[lane
* 2 + 1];
33605 Inst_DS__DS_READ2ST64_B64::Inst_DS__DS_READ2ST64_B64(InFmt_DS
*iFmt
)
33606 : Inst_DS(iFmt
, "ds_read2st64_b64")
33608 setFlag(MemoryRef
);
33610 } // Inst_DS__DS_READ2ST64_B64
33612 Inst_DS__DS_READ2ST64_B64::~Inst_DS__DS_READ2ST64_B64()
33614 } // ~Inst_DS__DS_READ2ST64_B64
33616 // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8 * 64];
33617 // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8 * 64].
33620 Inst_DS__DS_READ2ST64_B64::execute(GPUDynInstPtr gpuDynInst
)
33622 Wavefront
*wf
= gpuDynInst
->wavefront();
33623 gpuDynInst
->execUnitId
= wf
->execUnitId
;
33624 gpuDynInst
->exec_mask
= wf
->execMask();
33625 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
33626 gpuDynInst
->latency
.set(
33627 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
33628 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
33632 calcAddr(gpuDynInst
, addr
);
33634 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
33636 wf
->rdLmReqsInPipe
--;
33637 wf
->outstandingReqsRdLm
++;
33638 wf
->outstandingReqs
++;
33639 wf
->validateRequestCounters();
33643 Inst_DS__DS_READ2ST64_B64::initiateAcc(GPUDynInstPtr gpuDynInst
)
33645 Addr offset0
= (instData
.OFFSET0
* 8 * 64);
33646 Addr offset1
= (instData
.OFFSET1
* 8 * 64);
33648 initDualMemRead
<VecElemU64
>(gpuDynInst
, offset0
, offset1
);
33652 Inst_DS__DS_READ2ST64_B64::completeAcc(GPUDynInstPtr gpuDynInst
)
33654 VecOperandU64
vdst0(gpuDynInst
, extData
.VDST
);
33655 VecOperandU64
vdst1(gpuDynInst
, extData
.VDST
+ 2);
33657 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
33658 if (gpuDynInst
->exec_mask
[lane
]) {
33659 vdst0
[lane
] = (reinterpret_cast<VecElemU64
*>(
33660 gpuDynInst
->d_data
))[lane
* 2];
33661 vdst1
[lane
] = (reinterpret_cast<VecElemU64
*>(
33662 gpuDynInst
->d_data
))[lane
* 2 + 1];
33670 Inst_DS__DS_CONDXCHG32_RTN_B64::Inst_DS__DS_CONDXCHG32_RTN_B64(
33672 : Inst_DS(iFmt
, "ds_condxchg32_rtn_b64")
33674 } // Inst_DS__DS_CONDXCHG32_RTN_B64
33676 Inst_DS__DS_CONDXCHG32_RTN_B64::~Inst_DS__DS_CONDXCHG32_RTN_B64()
33678 } // ~Inst_DS__DS_CONDXCHG32_RTN_B64
33680 // Conditional write exchange.
33682 Inst_DS__DS_CONDXCHG32_RTN_B64::execute(GPUDynInstPtr gpuDynInst
)
33684 panicUnimplemented();
33687 Inst_DS__DS_ADD_SRC2_U32::Inst_DS__DS_ADD_SRC2_U32(InFmt_DS
*iFmt
)
33688 : Inst_DS(iFmt
, "ds_add_src2_u32")
33690 } // Inst_DS__DS_ADD_SRC2_U32
33692 Inst_DS__DS_ADD_SRC2_U32::~Inst_DS__DS_ADD_SRC2_U32()
33694 } // ~Inst_DS__DS_ADD_SRC2_U32
33697 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33698 // {offset1[6],offset1[6:0],offset0});
33699 // MEM[A] = MEM[A] + MEM[B].
33701 Inst_DS__DS_ADD_SRC2_U32::execute(GPUDynInstPtr gpuDynInst
)
33703 panicUnimplemented();
33706 Inst_DS__DS_SUB_SRC2_U32::Inst_DS__DS_SUB_SRC2_U32(InFmt_DS
*iFmt
)
33707 : Inst_DS(iFmt
, "ds_sub_src2_u32")
33709 } // Inst_DS__DS_SUB_SRC2_U32
33711 Inst_DS__DS_SUB_SRC2_U32::~Inst_DS__DS_SUB_SRC2_U32()
33713 } // ~Inst_DS__DS_SUB_SRC2_U32
33716 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33717 // {offset1[6],offset1[6:0],offset0});
33718 // MEM[A] = MEM[A] - MEM[B].
33720 Inst_DS__DS_SUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst
)
33722 panicUnimplemented();
33725 Inst_DS__DS_RSUB_SRC2_U32::Inst_DS__DS_RSUB_SRC2_U32(InFmt_DS
*iFmt
)
33726 : Inst_DS(iFmt
, "ds_rsub_src2_u32")
33728 } // Inst_DS__DS_RSUB_SRC2_U32
33730 Inst_DS__DS_RSUB_SRC2_U32::~Inst_DS__DS_RSUB_SRC2_U32()
33732 } // ~Inst_DS__DS_RSUB_SRC2_U32
33735 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33736 // {offset1[6],offset1[6:0],offset0});
33737 // MEM[A] = MEM[B] - MEM[A].
33739 Inst_DS__DS_RSUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst
)
33741 panicUnimplemented();
33744 Inst_DS__DS_INC_SRC2_U32::Inst_DS__DS_INC_SRC2_U32(InFmt_DS
*iFmt
)
33745 : Inst_DS(iFmt
, "ds_inc_src2_u32")
33747 } // Inst_DS__DS_INC_SRC2_U32
33749 Inst_DS__DS_INC_SRC2_U32::~Inst_DS__DS_INC_SRC2_U32()
33751 } // ~Inst_DS__DS_INC_SRC2_U32
33754 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33755 // {offset1[6],offset1[6:0],offset0});
33756 // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1).
33758 Inst_DS__DS_INC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst
)
33760 panicUnimplemented();
33763 Inst_DS__DS_DEC_SRC2_U32::Inst_DS__DS_DEC_SRC2_U32(InFmt_DS
*iFmt
)
33764 : Inst_DS(iFmt
, "ds_dec_src2_u32")
33766 } // Inst_DS__DS_DEC_SRC2_U32
33768 Inst_DS__DS_DEC_SRC2_U32::~Inst_DS__DS_DEC_SRC2_U32()
33770 } // ~Inst_DS__DS_DEC_SRC2_U32
33773 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33774 // {offset1[6],offset1[6:0],offset0});
33775 // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1).
33778 Inst_DS__DS_DEC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst
)
33780 panicUnimplemented();
33783 Inst_DS__DS_MIN_SRC2_I32::Inst_DS__DS_MIN_SRC2_I32(InFmt_DS
*iFmt
)
33784 : Inst_DS(iFmt
, "ds_min_src2_i32")
33786 } // Inst_DS__DS_MIN_SRC2_I32
33788 Inst_DS__DS_MIN_SRC2_I32::~Inst_DS__DS_MIN_SRC2_I32()
33790 } // ~Inst_DS__DS_MIN_SRC2_I32
33793 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33794 // {offset1[6],offset1[6:0],offset0});
33795 // MEM[A] = min(MEM[A], MEM[B]).
33797 Inst_DS__DS_MIN_SRC2_I32::execute(GPUDynInstPtr gpuDynInst
)
33799 panicUnimplemented();
33802 Inst_DS__DS_MAX_SRC2_I32::Inst_DS__DS_MAX_SRC2_I32(InFmt_DS
*iFmt
)
33803 : Inst_DS(iFmt
, "ds_max_src2_i32")
33805 } // Inst_DS__DS_MAX_SRC2_I32
33807 Inst_DS__DS_MAX_SRC2_I32::~Inst_DS__DS_MAX_SRC2_I32()
33809 } // ~Inst_DS__DS_MAX_SRC2_I32
33812 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33813 // {offset1[6],offset1[6:0],offset0});
33814 // MEM[A] = max(MEM[A], MEM[B]).
33816 Inst_DS__DS_MAX_SRC2_I32::execute(GPUDynInstPtr gpuDynInst
)
33818 panicUnimplemented();
33821 Inst_DS__DS_MIN_SRC2_U32::Inst_DS__DS_MIN_SRC2_U32(InFmt_DS
*iFmt
)
33822 : Inst_DS(iFmt
, "ds_min_src2_u32")
33824 } // Inst_DS__DS_MIN_SRC2_U32
33826 Inst_DS__DS_MIN_SRC2_U32::~Inst_DS__DS_MIN_SRC2_U32()
33828 } // ~Inst_DS__DS_MIN_SRC2_U32
33831 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33832 // {offset1[6],offset1[6:0],offset0});
33833 // MEM[A] = min(MEM[A], MEM[B]).
33835 Inst_DS__DS_MIN_SRC2_U32::execute(GPUDynInstPtr gpuDynInst
)
33837 panicUnimplemented();
33840 Inst_DS__DS_MAX_SRC2_U32::Inst_DS__DS_MAX_SRC2_U32(InFmt_DS
*iFmt
)
33841 : Inst_DS(iFmt
, "ds_max_src2_u32")
33843 } // Inst_DS__DS_MAX_SRC2_U32
33845 Inst_DS__DS_MAX_SRC2_U32::~Inst_DS__DS_MAX_SRC2_U32()
33847 } // ~Inst_DS__DS_MAX_SRC2_U32
33850 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33851 // {offset1[6],offset1[6:0],offset0});
33852 // MEM[A] = max(MEM[A], MEM[B]).
33854 Inst_DS__DS_MAX_SRC2_U32::execute(GPUDynInstPtr gpuDynInst
)
33856 panicUnimplemented();
33859 Inst_DS__DS_AND_SRC2_B32::Inst_DS__DS_AND_SRC2_B32(InFmt_DS
*iFmt
)
33860 : Inst_DS(iFmt
, "ds_and_src2_b32")
33862 } // Inst_DS__DS_AND_SRC2_B32
33864 Inst_DS__DS_AND_SRC2_B32::~Inst_DS__DS_AND_SRC2_B32()
33866 } // ~Inst_DS__DS_AND_SRC2_B32
33869 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33870 // {offset1[6],offset1[6:0],offset0});
33871 // MEM[A] = MEM[A] & MEM[B].
33873 Inst_DS__DS_AND_SRC2_B32::execute(GPUDynInstPtr gpuDynInst
)
33875 panicUnimplemented();
33878 Inst_DS__DS_OR_SRC2_B32::Inst_DS__DS_OR_SRC2_B32(InFmt_DS
*iFmt
)
33879 : Inst_DS(iFmt
, "ds_or_src2_b32")
33881 } // Inst_DS__DS_OR_SRC2_B32
33883 Inst_DS__DS_OR_SRC2_B32::~Inst_DS__DS_OR_SRC2_B32()
33885 } // ~Inst_DS__DS_OR_SRC2_B32
33888 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33889 // {offset1[6],offset1[6:0],offset0});
33890 // MEM[A] = MEM[A] | MEM[B].
33892 Inst_DS__DS_OR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst
)
33894 panicUnimplemented();
33897 Inst_DS__DS_XOR_SRC2_B32::Inst_DS__DS_XOR_SRC2_B32(InFmt_DS
*iFmt
)
33898 : Inst_DS(iFmt
, "ds_xor_src2_b32")
33900 } // Inst_DS__DS_XOR_SRC2_B32
33902 Inst_DS__DS_XOR_SRC2_B32::~Inst_DS__DS_XOR_SRC2_B32()
33904 } // ~Inst_DS__DS_XOR_SRC2_B32
33907 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33908 // {offset1[6],offset1[6:0],offset0});
33909 // MEM[A] = MEM[A] ^ MEM[B].
33911 Inst_DS__DS_XOR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst
)
33913 panicUnimplemented();
33916 Inst_DS__DS_WRITE_SRC2_B32::Inst_DS__DS_WRITE_SRC2_B32(InFmt_DS
*iFmt
)
33917 : Inst_DS(iFmt
, "ds_write_src2_b32")
33919 setFlag(MemoryRef
);
33921 } // Inst_DS__DS_WRITE_SRC2_B32
33923 Inst_DS__DS_WRITE_SRC2_B32::~Inst_DS__DS_WRITE_SRC2_B32()
33925 } // ~Inst_DS__DS_WRITE_SRC2_B32
33928 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33929 // {offset1[6],offset1[6:0],offset0});
33930 // MEM[A] = MEM[B].
33933 Inst_DS__DS_WRITE_SRC2_B32::execute(GPUDynInstPtr gpuDynInst
)
33935 panicUnimplemented();
33938 Inst_DS__DS_MIN_SRC2_F32::Inst_DS__DS_MIN_SRC2_F32(InFmt_DS
*iFmt
)
33939 : Inst_DS(iFmt
, "ds_min_src2_f32")
33942 } // Inst_DS__DS_MIN_SRC2_F32
33944 Inst_DS__DS_MIN_SRC2_F32::~Inst_DS__DS_MIN_SRC2_F32()
33946 } // ~Inst_DS__DS_MIN_SRC2_F32
33949 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33950 // {offset1[6],offset1[6:0],offset0});
33951 // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A].
33953 Inst_DS__DS_MIN_SRC2_F32::execute(GPUDynInstPtr gpuDynInst
)
33955 panicUnimplemented();
33958 Inst_DS__DS_MAX_SRC2_F32::Inst_DS__DS_MAX_SRC2_F32(InFmt_DS
*iFmt
)
33959 : Inst_DS(iFmt
, "ds_max_src2_f32")
33962 } // Inst_DS__DS_MAX_SRC2_F32
33964 Inst_DS__DS_MAX_SRC2_F32::~Inst_DS__DS_MAX_SRC2_F32()
33966 } // ~Inst_DS__DS_MAX_SRC2_F32
33969 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33970 // {offset1[6],offset1[6:0],offset0});
33971 // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A].
33973 Inst_DS__DS_MAX_SRC2_F32::execute(GPUDynInstPtr gpuDynInst
)
33975 panicUnimplemented();
33978 Inst_DS__DS_ADD_SRC2_F32::Inst_DS__DS_ADD_SRC2_F32(InFmt_DS
*iFmt
)
33979 : Inst_DS(iFmt
, "ds_add_src2_f32")
33982 } // Inst_DS__DS_ADD_SRC2_F32
33984 Inst_DS__DS_ADD_SRC2_F32::~Inst_DS__DS_ADD_SRC2_F32()
33986 } // ~Inst_DS__DS_ADD_SRC2_F32
33989 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33990 // {offset1[6],offset1[6:0],offset0});
33991 // MEM[A] = MEM[B] + MEM[A].
33993 Inst_DS__DS_ADD_SRC2_F32::execute(GPUDynInstPtr gpuDynInst
)
33995 panicUnimplemented();
33998 Inst_DS__DS_GWS_SEMA_RELEASE_ALL::Inst_DS__DS_GWS_SEMA_RELEASE_ALL(
34000 : Inst_DS(iFmt
, "ds_gws_sema_release_all")
34002 } // Inst_DS__DS_GWS_SEMA_RELEASE_ALL
34004 Inst_DS__DS_GWS_SEMA_RELEASE_ALL::~Inst_DS__DS_GWS_SEMA_RELEASE_ALL()
34006 } // ~Inst_DS__DS_GWS_SEMA_RELEASE_ALL
34009 Inst_DS__DS_GWS_SEMA_RELEASE_ALL::execute(GPUDynInstPtr gpuDynInst
)
34011 panicUnimplemented();
34014 Inst_DS__DS_GWS_INIT::Inst_DS__DS_GWS_INIT(InFmt_DS
*iFmt
)
34015 : Inst_DS(iFmt
, "ds_gws_init")
34017 } // Inst_DS__DS_GWS_INIT
34019 Inst_DS__DS_GWS_INIT::~Inst_DS__DS_GWS_INIT()
34021 } // ~Inst_DS__DS_GWS_INIT
34024 Inst_DS__DS_GWS_INIT::execute(GPUDynInstPtr gpuDynInst
)
34026 panicUnimplemented();
34029 Inst_DS__DS_GWS_SEMA_V::Inst_DS__DS_GWS_SEMA_V(InFmt_DS
*iFmt
)
34030 : Inst_DS(iFmt
, "ds_gws_sema_v")
34032 } // Inst_DS__DS_GWS_SEMA_V
34034 Inst_DS__DS_GWS_SEMA_V::~Inst_DS__DS_GWS_SEMA_V()
34036 } // ~Inst_DS__DS_GWS_SEMA_V
34039 Inst_DS__DS_GWS_SEMA_V::execute(GPUDynInstPtr gpuDynInst
)
34041 panicUnimplemented();
34044 Inst_DS__DS_GWS_SEMA_BR::Inst_DS__DS_GWS_SEMA_BR(InFmt_DS
*iFmt
)
34045 : Inst_DS(iFmt
, "ds_gws_sema_br")
34047 } // Inst_DS__DS_GWS_SEMA_BR
34049 Inst_DS__DS_GWS_SEMA_BR::~Inst_DS__DS_GWS_SEMA_BR()
34051 } // ~Inst_DS__DS_GWS_SEMA_BR
34054 Inst_DS__DS_GWS_SEMA_BR::execute(GPUDynInstPtr gpuDynInst
)
34056 panicUnimplemented();
34059 Inst_DS__DS_GWS_SEMA_P::Inst_DS__DS_GWS_SEMA_P(InFmt_DS
*iFmt
)
34060 : Inst_DS(iFmt
, "ds_gws_sema_p")
34062 } // Inst_DS__DS_GWS_SEMA_P
34064 Inst_DS__DS_GWS_SEMA_P::~Inst_DS__DS_GWS_SEMA_P()
34066 } // ~Inst_DS__DS_GWS_SEMA_P
34069 Inst_DS__DS_GWS_SEMA_P::execute(GPUDynInstPtr gpuDynInst
)
34071 panicUnimplemented();
34074 Inst_DS__DS_GWS_BARRIER::Inst_DS__DS_GWS_BARRIER(InFmt_DS
*iFmt
)
34075 : Inst_DS(iFmt
, "ds_gws_barrier")
34077 } // Inst_DS__DS_GWS_BARRIER
34079 Inst_DS__DS_GWS_BARRIER::~Inst_DS__DS_GWS_BARRIER()
34081 } // ~Inst_DS__DS_GWS_BARRIER
34084 Inst_DS__DS_GWS_BARRIER::execute(GPUDynInstPtr gpuDynInst
)
34086 panicUnimplemented();
34089 Inst_DS__DS_CONSUME::Inst_DS__DS_CONSUME(InFmt_DS
*iFmt
)
34090 : Inst_DS(iFmt
, "ds_consume")
34092 } // Inst_DS__DS_CONSUME
34094 Inst_DS__DS_CONSUME::~Inst_DS__DS_CONSUME()
34096 } // ~Inst_DS__DS_CONSUME
34099 Inst_DS__DS_CONSUME::execute(GPUDynInstPtr gpuDynInst
)
34101 panicUnimplemented();
34104 Inst_DS__DS_APPEND::Inst_DS__DS_APPEND(InFmt_DS
*iFmt
)
34105 : Inst_DS(iFmt
, "ds_append")
34107 } // Inst_DS__DS_APPEND
34109 Inst_DS__DS_APPEND::~Inst_DS__DS_APPEND()
34111 } // ~Inst_DS__DS_APPEND
34114 Inst_DS__DS_APPEND::execute(GPUDynInstPtr gpuDynInst
)
34116 panicUnimplemented();
34119 Inst_DS__DS_ORDERED_COUNT::Inst_DS__DS_ORDERED_COUNT(InFmt_DS
*iFmt
)
34120 : Inst_DS(iFmt
, "ds_ordered_count")
34122 } // Inst_DS__DS_ORDERED_COUNT
34124 Inst_DS__DS_ORDERED_COUNT::~Inst_DS__DS_ORDERED_COUNT()
34126 } // ~Inst_DS__DS_ORDERED_COUNT
34129 Inst_DS__DS_ORDERED_COUNT::execute(GPUDynInstPtr gpuDynInst
)
34131 panicUnimplemented();
34134 Inst_DS__DS_ADD_SRC2_U64::Inst_DS__DS_ADD_SRC2_U64(InFmt_DS
*iFmt
)
34135 : Inst_DS(iFmt
, "ds_add_src2_u64")
34137 } // Inst_DS__DS_ADD_SRC2_U64
34139 Inst_DS__DS_ADD_SRC2_U64::~Inst_DS__DS_ADD_SRC2_U64()
34141 } // ~Inst_DS__DS_ADD_SRC2_U64
34144 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34145 // {offset1[6],offset1[6:0],offset0});
34146 // MEM[A] = MEM[A] + MEM[B].
34148 Inst_DS__DS_ADD_SRC2_U64::execute(GPUDynInstPtr gpuDynInst
)
34150 panicUnimplemented();
34153 Inst_DS__DS_SUB_SRC2_U64::Inst_DS__DS_SUB_SRC2_U64(InFmt_DS
*iFmt
)
34154 : Inst_DS(iFmt
, "ds_sub_src2_u64")
34156 } // Inst_DS__DS_SUB_SRC2_U64
34158 Inst_DS__DS_SUB_SRC2_U64::~Inst_DS__DS_SUB_SRC2_U64()
34160 } // ~Inst_DS__DS_SUB_SRC2_U64
34163 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34164 // {offset1[6],offset1[6:0],offset0});
34165 // MEM[A] = MEM[A] - MEM[B].
34167 Inst_DS__DS_SUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst
)
34169 panicUnimplemented();
34172 Inst_DS__DS_RSUB_SRC2_U64::Inst_DS__DS_RSUB_SRC2_U64(InFmt_DS
*iFmt
)
34173 : Inst_DS(iFmt
, "ds_rsub_src2_u64")
34175 } // Inst_DS__DS_RSUB_SRC2_U64
34177 Inst_DS__DS_RSUB_SRC2_U64::~Inst_DS__DS_RSUB_SRC2_U64()
34179 } // ~Inst_DS__DS_RSUB_SRC2_U64
34182 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34183 // {offset1[6],offset1[6:0],offset0});
34184 // MEM[A] = MEM[B] - MEM[A].
34186 Inst_DS__DS_RSUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst
)
34188 panicUnimplemented();
34191 Inst_DS__DS_INC_SRC2_U64::Inst_DS__DS_INC_SRC2_U64(InFmt_DS
*iFmt
)
34192 : Inst_DS(iFmt
, "ds_inc_src2_u64")
34194 } // Inst_DS__DS_INC_SRC2_U64
34196 Inst_DS__DS_INC_SRC2_U64::~Inst_DS__DS_INC_SRC2_U64()
34198 } // ~Inst_DS__DS_INC_SRC2_U64
34201 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34202 // {offset1[6],offset1[6:0],offset0});
34203 // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1).
34205 Inst_DS__DS_INC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst
)
34207 panicUnimplemented();
34210 Inst_DS__DS_DEC_SRC2_U64::Inst_DS__DS_DEC_SRC2_U64(InFmt_DS
*iFmt
)
34211 : Inst_DS(iFmt
, "ds_dec_src2_u64")
34213 } // Inst_DS__DS_DEC_SRC2_U64
34215 Inst_DS__DS_DEC_SRC2_U64::~Inst_DS__DS_DEC_SRC2_U64()
34217 } // ~Inst_DS__DS_DEC_SRC2_U64
34220 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34221 // {offset1[6],offset1[6:0],offset0});
34222 // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1).
34225 Inst_DS__DS_DEC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst
)
34227 panicUnimplemented();
34230 Inst_DS__DS_MIN_SRC2_I64::Inst_DS__DS_MIN_SRC2_I64(InFmt_DS
*iFmt
)
34231 : Inst_DS(iFmt
, "ds_min_src2_i64")
34233 } // Inst_DS__DS_MIN_SRC2_I64
34235 Inst_DS__DS_MIN_SRC2_I64::~Inst_DS__DS_MIN_SRC2_I64()
34237 } // ~Inst_DS__DS_MIN_SRC2_I64
34240 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34241 // {offset1[6],offset1[6:0],offset0});
34242 // MEM[A] = min(MEM[A], MEM[B]).
34244 Inst_DS__DS_MIN_SRC2_I64::execute(GPUDynInstPtr gpuDynInst
)
34246 panicUnimplemented();
34249 Inst_DS__DS_MAX_SRC2_I64::Inst_DS__DS_MAX_SRC2_I64(InFmt_DS
*iFmt
)
34250 : Inst_DS(iFmt
, "ds_max_src2_i64")
34252 } // Inst_DS__DS_MAX_SRC2_I64
34254 Inst_DS__DS_MAX_SRC2_I64::~Inst_DS__DS_MAX_SRC2_I64()
34256 } // ~Inst_DS__DS_MAX_SRC2_I64
34259 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34260 // {offset1[6],offset1[6:0],offset0});
34261 // MEM[A] = max(MEM[A], MEM[B]).
34263 Inst_DS__DS_MAX_SRC2_I64::execute(GPUDynInstPtr gpuDynInst
)
34265 panicUnimplemented();
34268 Inst_DS__DS_MIN_SRC2_U64::Inst_DS__DS_MIN_SRC2_U64(InFmt_DS
*iFmt
)
34269 : Inst_DS(iFmt
, "ds_min_src2_u64")
34271 } // Inst_DS__DS_MIN_SRC2_U64
34273 Inst_DS__DS_MIN_SRC2_U64::~Inst_DS__DS_MIN_SRC2_U64()
34275 } // ~Inst_DS__DS_MIN_SRC2_U64
34278 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34279 // {offset1[6],offset1[6:0],offset0});
34280 // MEM[A] = min(MEM[A], MEM[B]).
34282 Inst_DS__DS_MIN_SRC2_U64::execute(GPUDynInstPtr gpuDynInst
)
34284 panicUnimplemented();
34287 Inst_DS__DS_MAX_SRC2_U64::Inst_DS__DS_MAX_SRC2_U64(InFmt_DS
*iFmt
)
34288 : Inst_DS(iFmt
, "ds_max_src2_u64")
34290 } // Inst_DS__DS_MAX_SRC2_U64
34292 Inst_DS__DS_MAX_SRC2_U64::~Inst_DS__DS_MAX_SRC2_U64()
34294 } // ~Inst_DS__DS_MAX_SRC2_U64
34297 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34298 // {offset1[6],offset1[6:0],offset0});
34299 // MEM[A] = max(MEM[A], MEM[B]).
34301 Inst_DS__DS_MAX_SRC2_U64::execute(GPUDynInstPtr gpuDynInst
)
34303 panicUnimplemented();
34306 Inst_DS__DS_AND_SRC2_B64::Inst_DS__DS_AND_SRC2_B64(InFmt_DS
*iFmt
)
34307 : Inst_DS(iFmt
, "ds_and_src2_b64")
34309 } // Inst_DS__DS_AND_SRC2_B64
34311 Inst_DS__DS_AND_SRC2_B64::~Inst_DS__DS_AND_SRC2_B64()
34313 } // ~Inst_DS__DS_AND_SRC2_B64
34316 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34317 // {offset1[6],offset1[6:0],offset0});
34318 // MEM[A] = MEM[A] & MEM[B].
34320 Inst_DS__DS_AND_SRC2_B64::execute(GPUDynInstPtr gpuDynInst
)
34322 panicUnimplemented();
34325 Inst_DS__DS_OR_SRC2_B64::Inst_DS__DS_OR_SRC2_B64(InFmt_DS
*iFmt
)
34326 : Inst_DS(iFmt
, "ds_or_src2_b64")
34328 } // Inst_DS__DS_OR_SRC2_B64
34330 Inst_DS__DS_OR_SRC2_B64::~Inst_DS__DS_OR_SRC2_B64()
34332 } // ~Inst_DS__DS_OR_SRC2_B64
34335 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34336 // {offset1[6],offset1[6:0],offset0});
34337 // MEM[A] = MEM[A] | MEM[B].
34339 Inst_DS__DS_OR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst
)
34341 panicUnimplemented();
34344 Inst_DS__DS_XOR_SRC2_B64::Inst_DS__DS_XOR_SRC2_B64(InFmt_DS
*iFmt
)
34345 : Inst_DS(iFmt
, "ds_xor_src2_b64")
34347 } // Inst_DS__DS_XOR_SRC2_B64
34349 Inst_DS__DS_XOR_SRC2_B64::~Inst_DS__DS_XOR_SRC2_B64()
34351 } // ~Inst_DS__DS_XOR_SRC2_B64
34354 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34355 // {offset1[6],offset1[6:0],offset0});
34356 // MEM[A] = MEM[A] ^ MEM[B].
34358 Inst_DS__DS_XOR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst
)
34360 panicUnimplemented();
34363 Inst_DS__DS_WRITE_SRC2_B64::Inst_DS__DS_WRITE_SRC2_B64(InFmt_DS
*iFmt
)
34364 : Inst_DS(iFmt
, "ds_write_src2_b64")
34366 setFlag(MemoryRef
);
34368 } // Inst_DS__DS_WRITE_SRC2_B64
34370 Inst_DS__DS_WRITE_SRC2_B64::~Inst_DS__DS_WRITE_SRC2_B64()
34372 } // ~Inst_DS__DS_WRITE_SRC2_B64
34375 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34376 // {offset1[6],offset1[6:0],offset0});
34377 // MEM[A] = MEM[B].
34380 Inst_DS__DS_WRITE_SRC2_B64::execute(GPUDynInstPtr gpuDynInst
)
34382 panicUnimplemented();
34385 Inst_DS__DS_MIN_SRC2_F64::Inst_DS__DS_MIN_SRC2_F64(InFmt_DS
*iFmt
)
34386 : Inst_DS(iFmt
, "ds_min_src2_f64")
34389 } // Inst_DS__DS_MIN_SRC2_F64
34391 Inst_DS__DS_MIN_SRC2_F64::~Inst_DS__DS_MIN_SRC2_F64()
34393 } // ~Inst_DS__DS_MIN_SRC2_F64
34396 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34397 // {offset1[6],offset1[6:0],offset0});
34398 // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A].
34400 Inst_DS__DS_MIN_SRC2_F64::execute(GPUDynInstPtr gpuDynInst
)
34402 panicUnimplemented();
34405 Inst_DS__DS_MAX_SRC2_F64::Inst_DS__DS_MAX_SRC2_F64(InFmt_DS
*iFmt
)
34406 : Inst_DS(iFmt
, "ds_max_src2_f64")
34409 } // Inst_DS__DS_MAX_SRC2_F64
34411 Inst_DS__DS_MAX_SRC2_F64::~Inst_DS__DS_MAX_SRC2_F64()
34413 } // ~Inst_DS__DS_MAX_SRC2_F64
34416 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34417 // {offset1[6],offset1[6:0],offset0});
34418 // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A].
34420 Inst_DS__DS_MAX_SRC2_F64::execute(GPUDynInstPtr gpuDynInst
)
34422 panicUnimplemented();
34425 Inst_DS__DS_WRITE_B96::Inst_DS__DS_WRITE_B96(InFmt_DS
*iFmt
)
34426 : Inst_DS(iFmt
, "ds_write_b96")
34428 setFlag(MemoryRef
);
34430 } // Inst_DS__DS_WRITE_B96
34432 Inst_DS__DS_WRITE_B96::~Inst_DS__DS_WRITE_B96()
34434 } // ~Inst_DS__DS_WRITE_B96
34436 // {MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[95:0].
34437 // Tri-dword write.
34439 Inst_DS__DS_WRITE_B96::execute(GPUDynInstPtr gpuDynInst
)
34441 panicUnimplemented();
34444 Inst_DS__DS_WRITE_B128::Inst_DS__DS_WRITE_B128(InFmt_DS
*iFmt
)
34445 : Inst_DS(iFmt
, "ds_write_b128")
34447 setFlag(MemoryRef
);
34449 } // Inst_DS__DS_WRITE_B128
34451 Inst_DS__DS_WRITE_B128::~Inst_DS__DS_WRITE_B128()
34453 } // ~Inst_DS__DS_WRITE_B128
34455 // {MEM[ADDR + 12], MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[127:0].
34458 Inst_DS__DS_WRITE_B128::execute(GPUDynInstPtr gpuDynInst
)
34460 panicUnimplemented();
34463 Inst_DS__DS_READ_B96::Inst_DS__DS_READ_B96(InFmt_DS
*iFmt
)
34464 : Inst_DS(iFmt
, "ds_read_b96")
34466 setFlag(MemoryRef
);
34468 } // Inst_DS__DS_READ_B96
34470 Inst_DS__DS_READ_B96::~Inst_DS__DS_READ_B96()
34472 } // ~Inst_DS__DS_READ_B96
34476 Inst_DS__DS_READ_B96::execute(GPUDynInstPtr gpuDynInst
)
34478 panicUnimplemented();
34481 Inst_DS__DS_READ_B128::Inst_DS__DS_READ_B128(InFmt_DS
*iFmt
)
34482 : Inst_DS(iFmt
, "ds_read_b128")
34484 setFlag(MemoryRef
);
34486 } // Inst_DS__DS_READ_B128
34488 Inst_DS__DS_READ_B128::~Inst_DS__DS_READ_B128()
34490 } // ~Inst_DS__DS_READ_B128
34494 Inst_DS__DS_READ_B128::execute(GPUDynInstPtr gpuDynInst
)
34496 panicUnimplemented();
34499 Inst_MUBUF__BUFFER_LOAD_FORMAT_X
34500 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_X(InFmt_MUBUF
*iFmt
)
34501 : Inst_MUBUF(iFmt
, "buffer_load_format_x")
34503 setFlag(MemoryRef
);
34505 setFlag(GlobalSegment
);
34506 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_X
34508 Inst_MUBUF__BUFFER_LOAD_FORMAT_X::~Inst_MUBUF__BUFFER_LOAD_FORMAT_X()
34510 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_X
34512 // Untyped buffer load 1 dword with format conversion.
34514 Inst_MUBUF__BUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst
)
34516 panicUnimplemented();
34520 Inst_MUBUF__BUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst
)
34525 Inst_MUBUF__BUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst
)
34529 Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
34530 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XY(InFmt_MUBUF
*iFmt
)
34531 : Inst_MUBUF(iFmt
, "buffer_load_format_xy")
34533 setFlag(MemoryRef
);
34535 setFlag(GlobalSegment
);
34536 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
34538 Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY()
34540 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
34542 // Untyped buffer load 2 dwords with format conversion.
34544 Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst
)
34546 panicUnimplemented();
34550 Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst
)
34555 Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst
)
34559 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
34560 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ(InFmt_MUBUF
*iFmt
)
34561 : Inst_MUBUF(iFmt
, "buffer_load_format_xyz")
34563 setFlag(MemoryRef
);
34565 setFlag(GlobalSegment
);
34566 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
34568 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ()
34570 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
34572 // Untyped buffer load 3 dwords with format conversion.
34574 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst
)
34576 panicUnimplemented();
34580 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst
)
34585 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst
)
34589 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
34590 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW(InFmt_MUBUF
*iFmt
)
34591 : Inst_MUBUF(iFmt
, "buffer_load_format_xyzw")
34593 setFlag(MemoryRef
);
34595 setFlag(GlobalSegment
);
34596 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
34598 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW()
34600 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
34602 // Untyped buffer load 4 dwords with format conversion.
34604 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst
)
34606 panicUnimplemented();
34610 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst
)
34615 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst
)
34619 Inst_MUBUF__BUFFER_STORE_FORMAT_X
34620 ::Inst_MUBUF__BUFFER_STORE_FORMAT_X(InFmt_MUBUF
*iFmt
)
34621 : Inst_MUBUF(iFmt
, "buffer_store_format_x")
34623 setFlag(MemoryRef
);
34625 setFlag(GlobalSegment
);
34626 } // Inst_MUBUF__BUFFER_STORE_FORMAT_X
34628 Inst_MUBUF__BUFFER_STORE_FORMAT_X::~Inst_MUBUF__BUFFER_STORE_FORMAT_X()
34630 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_X
34632 // Untyped buffer store 1 dword with format conversion.
34634 Inst_MUBUF__BUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst
)
34636 panicUnimplemented();
34640 Inst_MUBUF__BUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst
)
34645 Inst_MUBUF__BUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst
)
34649 Inst_MUBUF__BUFFER_STORE_FORMAT_XY
34650 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XY(InFmt_MUBUF
*iFmt
)
34651 : Inst_MUBUF(iFmt
, "buffer_store_format_xy")
34653 setFlag(MemoryRef
);
34655 setFlag(GlobalSegment
);
34656 } // Inst_MUBUF__BUFFER_STORE_FORMAT_XY
34658 Inst_MUBUF__BUFFER_STORE_FORMAT_XY::~Inst_MUBUF__BUFFER_STORE_FORMAT_XY()
34660 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XY
34662 // Untyped buffer store 2 dwords with format conversion.
34664 Inst_MUBUF__BUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst
)
34666 panicUnimplemented();
34670 Inst_MUBUF__BUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst
)
34675 Inst_MUBUF__BUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst
)
34679 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
34680 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ(InFmt_MUBUF
*iFmt
)
34681 : Inst_MUBUF(iFmt
, "buffer_store_format_xyz")
34683 setFlag(MemoryRef
);
34685 setFlag(GlobalSegment
);
34686 } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
34688 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ()
34690 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
34692 // Untyped buffer store 3 dwords with format conversion.
34694 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst
)
34696 panicUnimplemented();
34700 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst
)
34705 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst
)
34709 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
34710 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW(InFmt_MUBUF
*iFmt
)
34711 : Inst_MUBUF(iFmt
, "buffer_store_format_xyzw")
34713 setFlag(MemoryRef
);
34715 setFlag(GlobalSegment
);
34716 } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
34718 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
34719 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW()
34721 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
34723 // Untyped buffer store 4 dwords with format conversion.
34725 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst
)
34727 panicUnimplemented();
34731 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst
)
34736 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst
)
34740 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
34741 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X(InFmt_MUBUF
*iFmt
)
34742 : Inst_MUBUF(iFmt
, "buffer_load_format_d16_x")
34744 setFlag(MemoryRef
);
34746 setFlag(GlobalSegment
);
34747 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
34749 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
34750 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X()
34752 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
34754 // Untyped buffer load 1 dword with format conversion.
34756 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst
)
34758 panicUnimplemented();
34762 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::initiateAcc(GPUDynInstPtr gpuDynInst
)
34767 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::completeAcc(GPUDynInstPtr gpuDynInst
)
34771 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
34772 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY(InFmt_MUBUF
*iFmt
)
34773 : Inst_MUBUF(iFmt
, "buffer_load_format_d16_xy")
34775 setFlag(MemoryRef
);
34777 setFlag(GlobalSegment
);
34778 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
34780 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
34781 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY()
34783 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
34785 // Untyped buffer load 2 dwords with format conversion.
34787 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst
)
34789 panicUnimplemented();
34793 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::initiateAcc(
34794 GPUDynInstPtr gpuDynInst
)
34799 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::completeAcc(
34800 GPUDynInstPtr gpuDynInst
)
34804 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
34805 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ(InFmt_MUBUF
*iFmt
)
34806 : Inst_MUBUF(iFmt
, "buffer_load_format_d16_xyz")
34808 setFlag(MemoryRef
);
34810 setFlag(GlobalSegment
);
34811 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
34813 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
34814 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ()
34816 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
34818 // Untyped buffer load 3 dwords with format conversion.
34820 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst
)
34822 panicUnimplemented();
34826 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc(
34827 GPUDynInstPtr gpuDynInst
)
34832 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::completeAcc(
34833 GPUDynInstPtr gpuDynInst
)
34837 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
34838 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW(InFmt_MUBUF
*iFmt
)
34839 : Inst_MUBUF(iFmt
, "buffer_load_format_d16_xyzw")
34841 setFlag(MemoryRef
);
34843 setFlag(GlobalSegment
);
34844 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
34846 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
34847 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW()
34849 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
34851 // Untyped buffer load 4 dwords with format conversion.
34853 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst
)
34855 panicUnimplemented();
34859 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc(
34860 GPUDynInstPtr gpuDynInst
)
34865 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::completeAcc(
34866 GPUDynInstPtr gpuDynInst
)
34870 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
34871 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X(InFmt_MUBUF
*iFmt
)
34872 : Inst_MUBUF(iFmt
, "buffer_store_format_d16_x")
34874 setFlag(MemoryRef
);
34876 setFlag(GlobalSegment
);
34877 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
34879 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
34880 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X()
34882 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
34884 // Untyped buffer store 1 dword with format conversion.
34886 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst
)
34888 panicUnimplemented();
34892 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::initiateAcc(
34893 GPUDynInstPtr gpuDynInst
)
34898 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::completeAcc(
34899 GPUDynInstPtr gpuDynInst
)
34903 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
34904 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY(InFmt_MUBUF
*iFmt
)
34905 : Inst_MUBUF(iFmt
, "buffer_store_format_d16_xy")
34907 setFlag(MemoryRef
);
34909 setFlag(GlobalSegment
);
34910 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
34912 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
34913 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY()
34915 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
34917 // Untyped buffer store 2 dwords with format conversion.
34919 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst
)
34921 panicUnimplemented();
34925 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::initiateAcc(
34926 GPUDynInstPtr gpuDynInst
)
34931 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::completeAcc(
34932 GPUDynInstPtr gpuDynInst
)
34936 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
34937 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ(InFmt_MUBUF
*iFmt
)
34938 : Inst_MUBUF(iFmt
, "buffer_store_format_d16_xyz")
34940 setFlag(MemoryRef
);
34942 setFlag(GlobalSegment
);
34943 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
34945 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
34946 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ()
34948 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
34950 // Untyped buffer store 3 dwords with format conversion.
34952 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst
)
34954 panicUnimplemented();
34958 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::initiateAcc(
34959 GPUDynInstPtr gpuDynInst
)
34964 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::completeAcc(
34965 GPUDynInstPtr gpuDynInst
)
34969 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
34970 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW(InFmt_MUBUF
*iFmt
)
34971 : Inst_MUBUF(iFmt
, "buffer_store_format_d16_xyzw")
34973 setFlag(MemoryRef
);
34975 setFlag(GlobalSegment
);
34976 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
34978 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
34979 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW()
34981 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
34983 // Untyped buffer store 4 dwords with format conversion.
34985 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst
)
34987 panicUnimplemented();
34991 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::initiateAcc(
34992 GPUDynInstPtr gpuDynInst
)
34997 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::completeAcc(
34998 GPUDynInstPtr gpuDynInst
)
35002 Inst_MUBUF__BUFFER_LOAD_UBYTE
35003 ::Inst_MUBUF__BUFFER_LOAD_UBYTE(InFmt_MUBUF
*iFmt
)
35004 : Inst_MUBUF(iFmt
, "buffer_load_ubyte")
35006 setFlag(MemoryRef
);
35008 if (instData
.LDS
) {
35009 setFlag(GroupSegment
);
35011 setFlag(GlobalSegment
);
35013 } // Inst_MUBUF__BUFFER_LOAD_UBYTE
35015 Inst_MUBUF__BUFFER_LOAD_UBYTE::~Inst_MUBUF__BUFFER_LOAD_UBYTE()
35017 } // ~Inst_MUBUF__BUFFER_LOAD_UBYTE
35019 // Untyped buffer load unsigned byte (zero extend to VGPR destination).
35021 Inst_MUBUF__BUFFER_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst
)
35023 Wavefront
*wf
= gpuDynInst
->wavefront();
35024 gpuDynInst
->execUnitId
= wf
->execUnitId
;
35025 gpuDynInst
->exec_mask
= wf
->execMask();
35026 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
35027 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
35029 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
35030 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
35031 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
35032 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
35037 int inst_offset
= instData
.OFFSET
;
35039 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
35040 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35041 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35042 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35043 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
35045 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35046 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35047 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35048 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
35050 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35051 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35052 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35056 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35057 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35058 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35061 if (isLocalMem()) {
35062 gpuDynInst
->computeUnit()->localMemoryPipe
.
35063 issueRequest(gpuDynInst
);
35064 wf
->rdLmReqsInPipe
--;
35065 wf
->outstandingReqsRdLm
++;
35067 gpuDynInst
->computeUnit()->globalMemoryPipe
.
35068 issueRequest(gpuDynInst
);
35069 wf
->rdGmReqsInPipe
--;
35070 wf
->outstandingReqsRdGm
++;
35073 wf
->outstandingReqs
++;
35074 wf
->validateRequestCounters();
35078 Inst_MUBUF__BUFFER_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst
)
35080 initMemRead
<VecElemU8
>(gpuDynInst
);
35084 Inst_MUBUF__BUFFER_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst
)
35086 VecOperandU32
vdst(gpuDynInst
, extData
.VDATA
);
35088 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
35089 if (gpuDynInst
->exec_mask
[lane
]) {
35090 if (!oobMask
[lane
]) {
35091 vdst
[lane
] = (VecElemU32
)((reinterpret_cast<VecElemU8
*>(
35092 gpuDynInst
->d_data
))[lane
]);
35103 Inst_MUBUF__BUFFER_LOAD_SBYTE
35104 ::Inst_MUBUF__BUFFER_LOAD_SBYTE(InFmt_MUBUF
*iFmt
)
35105 : Inst_MUBUF(iFmt
, "buffer_load_sbyte")
35107 setFlag(MemoryRef
);
35109 setFlag(GlobalSegment
);
35110 } // Inst_MUBUF__BUFFER_LOAD_SBYTE
35112 Inst_MUBUF__BUFFER_LOAD_SBYTE::~Inst_MUBUF__BUFFER_LOAD_SBYTE()
35114 } // ~Inst_MUBUF__BUFFER_LOAD_SBYTE
35116 // Untyped buffer load signed byte (sign extend to VGPR destination).
35118 Inst_MUBUF__BUFFER_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst
)
35120 panicUnimplemented();
35124 Inst_MUBUF__BUFFER_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst
)
35129 Inst_MUBUF__BUFFER_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst
)
35133 Inst_MUBUF__BUFFER_LOAD_USHORT
35134 ::Inst_MUBUF__BUFFER_LOAD_USHORT(InFmt_MUBUF
*iFmt
)
35135 : Inst_MUBUF(iFmt
, "buffer_load_ushort")
35137 setFlag(MemoryRef
);
35139 if (instData
.LDS
) {
35140 setFlag(GroupSegment
);
35142 setFlag(GlobalSegment
);
35144 } // Inst_MUBUF__BUFFER_LOAD_USHORT
35146 Inst_MUBUF__BUFFER_LOAD_USHORT::~Inst_MUBUF__BUFFER_LOAD_USHORT()
35148 } // ~Inst_MUBUF__BUFFER_LOAD_USHORT
35150 // Untyped buffer load unsigned short (zero extend to VGPR destination).
35152 Inst_MUBUF__BUFFER_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst
)
35154 Wavefront
*wf
= gpuDynInst
->wavefront();
35155 gpuDynInst
->execUnitId
= wf
->execUnitId
;
35156 gpuDynInst
->exec_mask
= wf
->execMask();
35157 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
35158 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
35160 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
35161 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
35162 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
35163 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
35168 int inst_offset
= instData
.OFFSET
;
35170 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
35171 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35172 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35173 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35174 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
35176 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35177 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35178 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35179 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
35181 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35182 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35183 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35187 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35188 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35189 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35192 if (isLocalMem()) {
35193 gpuDynInst
->computeUnit()->localMemoryPipe
35194 .issueRequest(gpuDynInst
);
35195 wf
->rdLmReqsInPipe
--;
35196 wf
->outstandingReqsRdLm
++;
35198 gpuDynInst
->computeUnit()->globalMemoryPipe
35199 .issueRequest(gpuDynInst
);
35200 wf
->rdGmReqsInPipe
--;
35201 wf
->outstandingReqsRdGm
++;
35204 wf
->outstandingReqs
++;
35205 wf
->validateRequestCounters();
35209 Inst_MUBUF__BUFFER_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst
)
35211 initMemRead
<VecElemU16
>(gpuDynInst
);
35215 Inst_MUBUF__BUFFER_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst
)
35217 VecOperandU32
vdst(gpuDynInst
, extData
.VDATA
);
35219 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
35220 if (gpuDynInst
->exec_mask
[lane
]) {
35221 if (!oobMask
[lane
]) {
35222 vdst
[lane
] = (VecElemU32
)((reinterpret_cast<VecElemU16
*>(
35223 gpuDynInst
->d_data
))[lane
]);
35234 Inst_MUBUF__BUFFER_LOAD_SSHORT
35235 ::Inst_MUBUF__BUFFER_LOAD_SSHORT(InFmt_MUBUF
*iFmt
)
35236 : Inst_MUBUF(iFmt
, "buffer_load_sshort")
35238 setFlag(MemoryRef
);
35240 setFlag(GlobalSegment
);
35241 } // Inst_MUBUF__BUFFER_LOAD_SSHORT
35243 Inst_MUBUF__BUFFER_LOAD_SSHORT::~Inst_MUBUF__BUFFER_LOAD_SSHORT()
35245 } // ~Inst_MUBUF__BUFFER_LOAD_SSHORT
35247 // Untyped buffer load signed short (sign extend to VGPR destination).
35249 Inst_MUBUF__BUFFER_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst
)
35251 panicUnimplemented();
35255 Inst_MUBUF__BUFFER_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst
)
35260 Inst_MUBUF__BUFFER_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst
)
35264 Inst_MUBUF__BUFFER_LOAD_DWORD
35265 ::Inst_MUBUF__BUFFER_LOAD_DWORD(InFmt_MUBUF
*iFmt
)
35266 : Inst_MUBUF(iFmt
, "buffer_load_dword")
35268 setFlag(MemoryRef
);
35270 if (instData
.LDS
) {
35271 setFlag(GroupSegment
);
35273 setFlag(GlobalSegment
);
35275 } // Inst_MUBUF__BUFFER_LOAD_DWORD
35277 Inst_MUBUF__BUFFER_LOAD_DWORD::~Inst_MUBUF__BUFFER_LOAD_DWORD()
35279 } // ~Inst_MUBUF__BUFFER_LOAD_DWORD
35281 // Untyped buffer load dword.
35283 Inst_MUBUF__BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst
)
35285 Wavefront
*wf
= gpuDynInst
->wavefront();
35286 gpuDynInst
->execUnitId
= wf
->execUnitId
;
35287 gpuDynInst
->exec_mask
= wf
->execMask();
35288 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
35289 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
35291 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
35292 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
35293 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
35294 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
35299 int inst_offset
= instData
.OFFSET
;
35301 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
35302 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35303 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35304 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35305 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
35307 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35308 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35309 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35310 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
35312 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35313 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35314 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35318 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35319 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35320 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35323 if (isLocalMem()) {
35324 gpuDynInst
->computeUnit()->localMemoryPipe
35325 .issueRequest(gpuDynInst
);
35326 wf
->rdLmReqsInPipe
--;
35327 wf
->outstandingReqsRdLm
++;
35329 gpuDynInst
->computeUnit()->globalMemoryPipe
35330 .issueRequest(gpuDynInst
);
35331 wf
->rdGmReqsInPipe
--;
35332 wf
->outstandingReqsRdGm
++;
35335 wf
->outstandingReqs
++;
35336 wf
->validateRequestCounters();
35340 Inst_MUBUF__BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst
)
35342 initMemRead
<VecElemU32
>(gpuDynInst
);
35346 Inst_MUBUF__BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst
)
35348 VecOperandU32
vdst(gpuDynInst
, extData
.VDATA
);
35350 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
35351 if (gpuDynInst
->exec_mask
[lane
]) {
35352 if (!oobMask
[lane
]) {
35353 vdst
[lane
] = (reinterpret_cast<VecElemU32
*>(
35354 gpuDynInst
->d_data
))[lane
];
35364 Inst_MUBUF__BUFFER_LOAD_DWORDX2
35365 ::Inst_MUBUF__BUFFER_LOAD_DWORDX2(InFmt_MUBUF
*iFmt
)
35366 : Inst_MUBUF(iFmt
, "buffer_load_dwordx2")
35368 setFlag(MemoryRef
);
35370 if (instData
.LDS
) {
35371 setFlag(GroupSegment
);
35373 setFlag(GlobalSegment
);
35375 } // Inst_MUBUF__BUFFER_LOAD_DWORDX2
35377 Inst_MUBUF__BUFFER_LOAD_DWORDX2::~Inst_MUBUF__BUFFER_LOAD_DWORDX2()
35379 } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX2
35381 // Untyped buffer load 2 dwords.
35383 Inst_MUBUF__BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst
)
35385 Wavefront
*wf
= gpuDynInst
->wavefront();
35386 gpuDynInst
->execUnitId
= wf
->execUnitId
;
35387 gpuDynInst
->exec_mask
= wf
->execMask();
35388 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
35389 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
35391 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
35392 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
35393 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
35394 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
35399 int inst_offset
= instData
.OFFSET
;
35401 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
35402 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35403 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35404 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35405 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
35407 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35408 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35409 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35410 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
35412 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35413 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35414 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35418 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35419 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35420 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35423 if (isLocalMem()) {
35424 gpuDynInst
->computeUnit()->localMemoryPipe
35425 .issueRequest(gpuDynInst
);
35426 wf
->rdLmReqsInPipe
--;
35427 wf
->outstandingReqsRdLm
++;
35429 gpuDynInst
->computeUnit()->globalMemoryPipe
35430 .issueRequest(gpuDynInst
);
35431 wf
->rdGmReqsInPipe
--;
35432 wf
->outstandingReqsRdGm
++;
35435 wf
->outstandingReqs
++;
35436 wf
->validateRequestCounters();
35440 Inst_MUBUF__BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst
)
35442 initMemRead
<2>(gpuDynInst
);
35446 Inst_MUBUF__BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst
)
35448 VecOperandU32
vdst0(gpuDynInst
, extData
.VDATA
);
35449 VecOperandU32
vdst1(gpuDynInst
, extData
.VDATA
+ 1);
35451 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
35452 if (gpuDynInst
->exec_mask
[lane
]) {
35453 if (!oobMask
[lane
]) {
35454 vdst0
[lane
] = (reinterpret_cast<VecElemU32
*>(
35455 gpuDynInst
->d_data
))[lane
* 2];
35456 vdst1
[lane
] = (reinterpret_cast<VecElemU32
*>(
35457 gpuDynInst
->d_data
))[lane
* 2 + 1];
35469 Inst_MUBUF__BUFFER_LOAD_DWORDX3
35470 ::Inst_MUBUF__BUFFER_LOAD_DWORDX3(InFmt_MUBUF
*iFmt
)
35471 : Inst_MUBUF(iFmt
, "buffer_load_dwordx3")
35473 setFlag(MemoryRef
);
35475 if (instData
.LDS
) {
35476 setFlag(GroupSegment
);
35478 setFlag(GlobalSegment
);
35480 } // Inst_MUBUF__BUFFER_LOAD_DWORDX3
35482 Inst_MUBUF__BUFFER_LOAD_DWORDX3::~Inst_MUBUF__BUFFER_LOAD_DWORDX3()
35484 } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX3
35486 // Untyped buffer load 3 dwords.
35488 Inst_MUBUF__BUFFER_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst
)
35490 Wavefront
*wf
= gpuDynInst
->wavefront();
35491 gpuDynInst
->execUnitId
= wf
->execUnitId
;
35492 gpuDynInst
->exec_mask
= wf
->execMask();
35493 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
35494 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
35496 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
35497 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
35498 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
35499 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
35504 int inst_offset
= instData
.OFFSET
;
35506 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
35507 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35508 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35509 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35510 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
35512 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35513 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35514 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35515 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
35517 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35518 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35519 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35523 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35524 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35525 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35528 if (isLocalMem()) {
35529 gpuDynInst
->computeUnit()->localMemoryPipe
35530 .issueRequest(gpuDynInst
);
35531 wf
->rdLmReqsInPipe
--;
35532 wf
->outstandingReqsRdLm
++;
35534 gpuDynInst
->computeUnit()->globalMemoryPipe
35535 .issueRequest(gpuDynInst
);
35536 wf
->rdGmReqsInPipe
--;
35537 wf
->outstandingReqsRdGm
++;
35540 wf
->outstandingReqs
++;
35541 wf
->validateRequestCounters();
35545 Inst_MUBUF__BUFFER_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst
)
35547 initMemRead
<3>(gpuDynInst
);
35551 Inst_MUBUF__BUFFER_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst
)
35553 VecOperandU32
vdst0(gpuDynInst
, extData
.VDATA
);
35554 VecOperandU32
vdst1(gpuDynInst
, extData
.VDATA
+ 1);
35555 VecOperandU32
vdst2(gpuDynInst
, extData
.VDATA
+ 2);
35557 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
35558 if (gpuDynInst
->exec_mask
[lane
]) {
35559 if (!oobMask
[lane
]) {
35560 vdst0
[lane
] = (reinterpret_cast<VecElemU32
*>(
35561 gpuDynInst
->d_data
))[lane
* 3];
35562 vdst1
[lane
] = (reinterpret_cast<VecElemU32
*>(
35563 gpuDynInst
->d_data
))[lane
* 3 + 1];
35564 vdst2
[lane
] = (reinterpret_cast<VecElemU32
*>(
35565 gpuDynInst
->d_data
))[lane
* 3 + 2];
35579 Inst_MUBUF__BUFFER_LOAD_DWORDX4
35580 ::Inst_MUBUF__BUFFER_LOAD_DWORDX4(InFmt_MUBUF
*iFmt
)
35581 : Inst_MUBUF(iFmt
, "buffer_load_dwordx4")
35583 setFlag(MemoryRef
);
35585 if (instData
.LDS
) {
35586 setFlag(GroupSegment
);
35588 setFlag(GlobalSegment
);
35590 } // Inst_MUBUF__BUFFER_LOAD_DWORDX4
35592 Inst_MUBUF__BUFFER_LOAD_DWORDX4::~Inst_MUBUF__BUFFER_LOAD_DWORDX4()
35594 } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX4
35596 // Untyped buffer load 4 dwords.
35598 Inst_MUBUF__BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst
)
35600 Wavefront
*wf
= gpuDynInst
->wavefront();
35601 gpuDynInst
->execUnitId
= wf
->execUnitId
;
35602 gpuDynInst
->exec_mask
= wf
->execMask();
35603 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
35604 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
35606 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
35607 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
35608 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
35609 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
35614 int inst_offset
= instData
.OFFSET
;
35616 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
35617 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35618 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35619 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35620 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
35622 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35623 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35624 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35625 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
35627 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35628 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35629 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35633 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35634 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35635 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35638 if (isLocalMem()) {
35639 gpuDynInst
->computeUnit()->localMemoryPipe
35640 .issueRequest(gpuDynInst
);
35641 wf
->rdLmReqsInPipe
--;
35642 wf
->outstandingReqsRdLm
++;
35644 gpuDynInst
->computeUnit()->globalMemoryPipe
35645 .issueRequest(gpuDynInst
);
35646 wf
->rdGmReqsInPipe
--;
35647 wf
->outstandingReqsRdGm
++;
35650 wf
->outstandingReqs
++;
35651 wf
->validateRequestCounters();
35655 Inst_MUBUF__BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst
)
35657 initMemRead
<4>(gpuDynInst
);
35661 Inst_MUBUF__BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst
)
35663 VecOperandU32
vdst0(gpuDynInst
, extData
.VDATA
);
35664 VecOperandU32
vdst1(gpuDynInst
, extData
.VDATA
+ 1);
35665 VecOperandU32
vdst2(gpuDynInst
, extData
.VDATA
+ 2);
35666 VecOperandU32
vdst3(gpuDynInst
, extData
.VDATA
+ 3);
35668 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
35669 if (gpuDynInst
->exec_mask
[lane
]) {
35670 if (!oobMask
[lane
]) {
35671 vdst0
[lane
] = (reinterpret_cast<VecElemU32
*>(
35672 gpuDynInst
->d_data
))[lane
* 4];
35673 vdst1
[lane
] = (reinterpret_cast<VecElemU32
*>(
35674 gpuDynInst
->d_data
))[lane
* 4 + 1];
35675 vdst2
[lane
] = (reinterpret_cast<VecElemU32
*>(
35676 gpuDynInst
->d_data
))[lane
* 4 + 2];
35677 vdst3
[lane
] = (reinterpret_cast<VecElemU32
*>(
35678 gpuDynInst
->d_data
))[lane
* 4 + 3];
35694 Inst_MUBUF__BUFFER_STORE_BYTE
35695 ::Inst_MUBUF__BUFFER_STORE_BYTE(InFmt_MUBUF
*iFmt
)
35696 : Inst_MUBUF(iFmt
, "buffer_store_byte")
35698 setFlag(MemoryRef
);
35700 if (instData
.LDS
) {
35701 setFlag(GroupSegment
);
35703 setFlag(GlobalSegment
);
35705 } // Inst_MUBUF__BUFFER_STORE_BYTE
35707 Inst_MUBUF__BUFFER_STORE_BYTE::~Inst_MUBUF__BUFFER_STORE_BYTE()
35709 } // ~Inst_MUBUF__BUFFER_STORE_BYTE
35711 // Untyped buffer store byte.
35713 Inst_MUBUF__BUFFER_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst
)
35715 Wavefront
*wf
= gpuDynInst
->wavefront();
35716 gpuDynInst
->execUnitId
= wf
->execUnitId
;
35717 gpuDynInst
->exec_mask
= wf
->execMask();
35718 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
35719 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
35721 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
35722 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
35723 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
35724 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
35729 int inst_offset
= instData
.OFFSET
;
35731 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
35732 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35733 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35734 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35735 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
35737 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35738 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35739 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35740 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
35742 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35743 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35744 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35748 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35749 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35750 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35753 if (isLocalMem()) {
35754 gpuDynInst
->computeUnit()->localMemoryPipe
35755 .issueRequest(gpuDynInst
);
35756 wf
->wrLmReqsInPipe
--;
35757 wf
->outstandingReqsWrLm
++;
35759 gpuDynInst
->computeUnit()->globalMemoryPipe
35760 .issueRequest(gpuDynInst
);
35761 wf
->wrGmReqsInPipe
--;
35762 wf
->outstandingReqsWrGm
++;
35765 wf
->outstandingReqs
++;
35766 wf
->validateRequestCounters();
35770 Inst_MUBUF__BUFFER_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst
)
35772 ConstVecOperandI8
data(gpuDynInst
, extData
.VDATA
);
35775 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
35776 if (gpuDynInst
->exec_mask
[lane
]) {
35777 (reinterpret_cast<VecElemI8
*>(gpuDynInst
->d_data
))[lane
]
35782 initMemWrite
<VecElemI8
>(gpuDynInst
);
35786 Inst_MUBUF__BUFFER_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst
)
35790 Inst_MUBUF__BUFFER_STORE_SHORT
35791 ::Inst_MUBUF__BUFFER_STORE_SHORT(InFmt_MUBUF
*iFmt
)
35792 : Inst_MUBUF(iFmt
, "buffer_store_short")
35794 setFlag(MemoryRef
);
35796 if (instData
.LDS
) {
35797 setFlag(GroupSegment
);
35799 setFlag(GlobalSegment
);
35801 } // Inst_MUBUF__BUFFER_STORE_SHORT
35803 Inst_MUBUF__BUFFER_STORE_SHORT::~Inst_MUBUF__BUFFER_STORE_SHORT()
35805 } // ~Inst_MUBUF__BUFFER_STORE_SHORT
35807 // Untyped buffer store short.
35809 Inst_MUBUF__BUFFER_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst
)
35811 Wavefront
*wf
= gpuDynInst
->wavefront();
35812 gpuDynInst
->execUnitId
= wf
->execUnitId
;
35813 gpuDynInst
->exec_mask
= wf
->execMask();
35814 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
35815 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
35817 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
35818 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
35819 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
35820 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
35825 int inst_offset
= instData
.OFFSET
;
35827 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
35828 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35829 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35830 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35831 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
35833 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35834 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35835 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35836 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
35838 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35839 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35840 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35844 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35845 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35846 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35849 if (isLocalMem()) {
35850 gpuDynInst
->computeUnit()->localMemoryPipe
35851 .issueRequest(gpuDynInst
);
35852 wf
->wrLmReqsInPipe
--;
35853 wf
->outstandingReqsWrLm
++;
35855 gpuDynInst
->computeUnit()->globalMemoryPipe
35856 .issueRequest(gpuDynInst
);
35857 wf
->wrGmReqsInPipe
--;
35858 wf
->outstandingReqsWrGm
++;
35861 wf
->outstandingReqs
++;
35862 wf
->validateRequestCounters();
35866 Inst_MUBUF__BUFFER_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst
)
35868 ConstVecOperandI16
data(gpuDynInst
, extData
.VDATA
);
35871 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
35872 if (gpuDynInst
->exec_mask
[lane
]) {
35873 (reinterpret_cast<VecElemI16
*>(gpuDynInst
->d_data
))[lane
]
35878 initMemWrite
<VecElemI16
>(gpuDynInst
);
35882 Inst_MUBUF__BUFFER_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst
)
35886 Inst_MUBUF__BUFFER_STORE_DWORD::
35887 Inst_MUBUF__BUFFER_STORE_DWORD(InFmt_MUBUF
*iFmt
)
35888 : Inst_MUBUF(iFmt
, "buffer_store_dword")
35890 setFlag(MemoryRef
);
35892 if (instData
.LDS
) {
35893 setFlag(GroupSegment
);
35895 setFlag(GlobalSegment
);
35897 } // Inst_MUBUF__BUFFER_STORE_DWORD
35899 Inst_MUBUF__BUFFER_STORE_DWORD::~Inst_MUBUF__BUFFER_STORE_DWORD()
35901 } // ~Inst_MUBUF__BUFFER_STORE_DWORD
35903 // Untyped buffer store dword.
35905 Inst_MUBUF__BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst
)
35907 Wavefront
*wf
= gpuDynInst
->wavefront();
35908 gpuDynInst
->execUnitId
= wf
->execUnitId
;
35909 gpuDynInst
->exec_mask
= wf
->execMask();
35910 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
35911 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
35913 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
35914 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
35915 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
35916 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
35921 int inst_offset
= instData
.OFFSET
;
35923 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
35924 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35925 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35926 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35927 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
35929 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35930 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35931 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35932 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
35934 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35935 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35936 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35940 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35941 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35942 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35945 if (isLocalMem()) {
35946 gpuDynInst
->computeUnit()->localMemoryPipe
35947 .issueRequest(gpuDynInst
);
35948 wf
->wrLmReqsInPipe
--;
35949 wf
->outstandingReqsWrLm
++;
35951 gpuDynInst
->computeUnit()->globalMemoryPipe
35952 .issueRequest(gpuDynInst
);
35953 wf
->wrGmReqsInPipe
--;
35954 wf
->outstandingReqsWrGm
++;
35957 wf
->outstandingReqs
++;
35958 wf
->validateRequestCounters();
35962 Inst_MUBUF__BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst
)
35964 ConstVecOperandU32
data(gpuDynInst
, extData
.VDATA
);
35967 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
35968 if (gpuDynInst
->exec_mask
[lane
]) {
35969 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
]
35974 initMemWrite
<VecElemU32
>(gpuDynInst
);
35978 Inst_MUBUF__BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst
)
35982 Inst_MUBUF__BUFFER_STORE_DWORDX2
35983 ::Inst_MUBUF__BUFFER_STORE_DWORDX2(InFmt_MUBUF
*iFmt
)
35984 : Inst_MUBUF(iFmt
, "buffer_store_dwordx2")
35986 setFlag(MemoryRef
);
35988 if (instData
.LDS
) {
35989 setFlag(GroupSegment
);
35991 setFlag(GlobalSegment
);
35993 } // Inst_MUBUF__BUFFER_STORE_DWORDX2
35995 Inst_MUBUF__BUFFER_STORE_DWORDX2::~Inst_MUBUF__BUFFER_STORE_DWORDX2()
35997 } // ~Inst_MUBUF__BUFFER_STORE_DWORDX2
35999 // Untyped buffer store 2 dwords.
36001 Inst_MUBUF__BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst
)
36003 Wavefront
*wf
= gpuDynInst
->wavefront();
36004 gpuDynInst
->execUnitId
= wf
->execUnitId
;
36005 gpuDynInst
->exec_mask
= wf
->execMask();
36006 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
36007 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
36009 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
36010 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
36011 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
36012 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
36013 ConstVecOperandU32
data0(gpuDynInst
, extData
.VDATA
);
36014 ConstVecOperandU32
data1(gpuDynInst
, extData
.VDATA
+ 1);
36021 int inst_offset
= instData
.OFFSET
;
36023 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
36024 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36025 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36026 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
36027 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
36029 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36030 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36031 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
36032 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
36034 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36035 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36036 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
36040 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36041 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36042 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
36045 if (isLocalMem()) {
36046 gpuDynInst
->computeUnit()->localMemoryPipe
36047 .issueRequest(gpuDynInst
);
36048 wf
->wrLmReqsInPipe
--;
36049 wf
->outstandingReqsWrLm
++;
36051 gpuDynInst
->computeUnit()->globalMemoryPipe
36052 .issueRequest(gpuDynInst
);
36053 wf
->wrGmReqsInPipe
--;
36054 wf
->outstandingReqsWrGm
++;
36057 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
36058 if (gpuDynInst
->exec_mask
[lane
]) {
36059 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
* 4]
36061 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
*4 + 1]
36066 wf
->outstandingReqs
++;
36067 wf
->validateRequestCounters();
36071 Inst_MUBUF__BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst
)
36073 initMemWrite
<2>(gpuDynInst
);
36077 Inst_MUBUF__BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst
)
36081 Inst_MUBUF__BUFFER_STORE_DWORDX3
36082 ::Inst_MUBUF__BUFFER_STORE_DWORDX3(InFmt_MUBUF
*iFmt
)
36083 : Inst_MUBUF(iFmt
, "buffer_store_dwordx3")
36085 setFlag(MemoryRef
);
36087 if (instData
.LDS
) {
36088 setFlag(GroupSegment
);
36090 setFlag(GlobalSegment
);
36092 } // Inst_MUBUF__BUFFER_STORE_DWORDX3
36094 Inst_MUBUF__BUFFER_STORE_DWORDX3::~Inst_MUBUF__BUFFER_STORE_DWORDX3()
36096 } // ~Inst_MUBUF__BUFFER_STORE_DWORDX3
36098 // Untyped buffer store 3 dwords.
36100 Inst_MUBUF__BUFFER_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst
)
36102 Wavefront
*wf
= gpuDynInst
->wavefront();
36103 gpuDynInst
->execUnitId
= wf
->execUnitId
;
36104 gpuDynInst
->exec_mask
= wf
->execMask();
36105 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
36106 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
36108 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
36109 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
36110 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
36111 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
36112 ConstVecOperandU32
data0(gpuDynInst
, extData
.VDATA
);
36113 ConstVecOperandU32
data1(gpuDynInst
, extData
.VDATA
+ 1);
36114 ConstVecOperandU32
data2(gpuDynInst
, extData
.VDATA
+ 2);
36122 int inst_offset
= instData
.OFFSET
;
36124 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
36125 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36126 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36127 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
36128 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
36130 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36131 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36132 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
36133 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
36135 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36136 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36137 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
36141 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36142 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36143 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
36146 if (isLocalMem()) {
36147 gpuDynInst
->computeUnit()->localMemoryPipe
36148 .issueRequest(gpuDynInst
);
36149 wf
->wrLmReqsInPipe
--;
36150 wf
->outstandingReqsWrLm
++;
36152 gpuDynInst
->computeUnit()->globalMemoryPipe
36153 .issueRequest(gpuDynInst
);
36154 wf
->wrGmReqsInPipe
--;
36155 wf
->outstandingReqsWrGm
++;
36158 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
36159 if (gpuDynInst
->exec_mask
[lane
]) {
36160 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
* 4]
36162 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
*4 + 1]
36164 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
*4 + 2]
36169 wf
->outstandingReqs
++;
36170 wf
->validateRequestCounters();
36174 Inst_MUBUF__BUFFER_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst
)
36176 initMemWrite
<3>(gpuDynInst
);
36180 Inst_MUBUF__BUFFER_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst
)
36184 Inst_MUBUF__BUFFER_STORE_DWORDX4
36185 ::Inst_MUBUF__BUFFER_STORE_DWORDX4(InFmt_MUBUF
*iFmt
)
36186 : Inst_MUBUF(iFmt
, "buffer_store_dwordx4")
36188 setFlag(MemoryRef
);
36190 if (instData
.LDS
) {
36191 setFlag(GroupSegment
);
36193 setFlag(GlobalSegment
);
36195 } // Inst_MUBUF__BUFFER_STORE_DWORDX4
36197 Inst_MUBUF__BUFFER_STORE_DWORDX4::~Inst_MUBUF__BUFFER_STORE_DWORDX4()
36199 } // ~Inst_MUBUF__BUFFER_STORE_DWORDX4
36201 // Untyped buffer store 4 dwords.
36203 Inst_MUBUF__BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst
)
36205 Wavefront
*wf
= gpuDynInst
->wavefront();
36206 gpuDynInst
->execUnitId
= wf
->execUnitId
;
36207 gpuDynInst
->exec_mask
= wf
->execMask();
36208 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
36209 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
36211 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
36212 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
36213 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
36214 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
36215 ConstVecOperandU32
data0(gpuDynInst
, extData
.VDATA
);
36216 ConstVecOperandU32
data1(gpuDynInst
, extData
.VDATA
+ 1);
36217 ConstVecOperandU32
data2(gpuDynInst
, extData
.VDATA
+ 2);
36218 ConstVecOperandU32
data3(gpuDynInst
, extData
.VDATA
+ 3);
36227 int inst_offset
= instData
.OFFSET
;
36229 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
36230 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36231 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36232 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
36233 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
36235 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36236 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36237 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
36238 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
36240 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36241 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36242 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
36246 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36247 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36248 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
36251 if (isLocalMem()) {
36252 gpuDynInst
->computeUnit()->localMemoryPipe
36253 .issueRequest(gpuDynInst
);
36254 wf
->wrLmReqsInPipe
--;
36255 wf
->outstandingReqsWrLm
++;
36257 gpuDynInst
->computeUnit()->globalMemoryPipe
36258 .issueRequest(gpuDynInst
);
36259 wf
->wrGmReqsInPipe
--;
36260 wf
->outstandingReqsWrGm
++;
36263 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
36264 if (gpuDynInst
->exec_mask
[lane
]) {
36265 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
* 4]
36267 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
*4 + 1]
36269 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
*4 + 2]
36271 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
*4 + 3]
36276 wf
->outstandingReqs
++;
36277 wf
->validateRequestCounters();
36281 Inst_MUBUF__BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst
)
36283 initMemWrite
<4>(gpuDynInst
);
36287 Inst_MUBUF__BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst
)
36291 Inst_MUBUF__BUFFER_STORE_LDS_DWORD
36292 ::Inst_MUBUF__BUFFER_STORE_LDS_DWORD(InFmt_MUBUF
*iFmt
)
36293 : Inst_MUBUF(iFmt
, "buffer_store_lds_dword")
36295 setFlag(GlobalSegment
);
36296 } // Inst_MUBUF__BUFFER_STORE_LDS_DWORD
36298 Inst_MUBUF__BUFFER_STORE_LDS_DWORD::~Inst_MUBUF__BUFFER_STORE_LDS_DWORD()
36300 } // ~Inst_MUBUF__BUFFER_STORE_LDS_DWORD
36302 // Store one DWORD from LDS memory to system memory without utilizing
36305 Inst_MUBUF__BUFFER_STORE_LDS_DWORD::execute(GPUDynInstPtr gpuDynInst
)
36307 panicUnimplemented();
36310 Inst_MUBUF__BUFFER_WBINVL1::Inst_MUBUF__BUFFER_WBINVL1(InFmt_MUBUF
*iFmt
)
36311 : Inst_MUBUF(iFmt
, "buffer_wbinvl1")
36313 setFlag(MemoryRef
);
36314 setFlag(GPUStaticInst::MemSync
);
36315 setFlag(GlobalSegment
);
36317 } // Inst_MUBUF__BUFFER_WBINVL1
36319 Inst_MUBUF__BUFFER_WBINVL1::~Inst_MUBUF__BUFFER_WBINVL1()
36321 } // ~Inst_MUBUF__BUFFER_WBINVL1
36323 // Write back and invalidate the shader L1.
36324 // Always returns ACK to shader.
36326 Inst_MUBUF__BUFFER_WBINVL1::execute(GPUDynInstPtr gpuDynInst
)
36328 Wavefront
*wf
= gpuDynInst
->wavefront();
36329 gpuDynInst
->execUnitId
= wf
->execUnitId
;
36330 gpuDynInst
->exec_mask
= wf
->execMask();
36331 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
36332 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
36334 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
36335 gpuDynInst
->computeUnit()->globalMemoryPipe
.
36336 issueRequest(gpuDynInst
);
36337 wf
->wrGmReqsInPipe
--;
36338 wf
->rdGmReqsInPipe
--;
36340 wf
->outstandingReqsWrGm
++;
36341 wf
->outstandingReqsRdGm
++;
36343 fatal("Non global flat instructions not implemented yet.\n");
36346 wf
->outstandingReqs
++;
36347 wf
->validateRequestCounters();
36351 Inst_MUBUF__BUFFER_WBINVL1::initiateAcc(GPUDynInstPtr gpuDynInst
)
36353 injectGlobalMemFence(gpuDynInst
);
36357 Inst_MUBUF__BUFFER_WBINVL1::completeAcc(GPUDynInstPtr gpuDynInst
)
36361 Inst_MUBUF__BUFFER_WBINVL1_VOL
36362 ::Inst_MUBUF__BUFFER_WBINVL1_VOL(InFmt_MUBUF
*iFmt
)
36363 : Inst_MUBUF(iFmt
, "buffer_wbinvl1_vol") {
36365 * This instruction is same as buffer_wbinvl1 instruction except this
36366 * instruction only invalidate L1 shader line with MTYPE for system
36367 * or group coherence. Since L1 do not differentiate between its cache
36368 * lines, this instruction currently behaves (and implemented )
36369 * exactly like buffer_wbinvl1 instruction.
36371 setFlag(MemoryRef
);
36372 setFlag(GPUStaticInst::MemSync
);
36373 setFlag(GlobalSegment
);
36375 } // Inst_MUBUF__BUFFER_WBINVL1_VOL
36377 Inst_MUBUF__BUFFER_WBINVL1_VOL::~Inst_MUBUF__BUFFER_WBINVL1_VOL()
36379 } // ~Inst_MUBUF__BUFFER_WBINVL1_VOL
36381 // Write back and invalidate the shader L1 only for lines that are marked
36382 // volatile. Always returns ACK to shader.
36384 Inst_MUBUF__BUFFER_WBINVL1_VOL::execute(GPUDynInstPtr gpuDynInst
)
36386 Wavefront
*wf
= gpuDynInst
->wavefront();
36387 gpuDynInst
->execUnitId
= wf
->execUnitId
;
36388 gpuDynInst
->exec_mask
= wf
->execMask();
36389 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
36390 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
36392 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
36393 gpuDynInst
->computeUnit()->globalMemoryPipe
.
36394 issueRequest(gpuDynInst
);
36395 wf
->wrGmReqsInPipe
--;
36396 wf
->rdGmReqsInPipe
--;
36398 wf
->outstandingReqsWrGm
++;
36399 wf
->outstandingReqsRdGm
++;
36401 fatal("Non global flat instructions not implemented yet.\n");
36404 wf
->outstandingReqs
++;
36405 wf
->validateRequestCounters();
36408 Inst_MUBUF__BUFFER_WBINVL1_VOL::initiateAcc(GPUDynInstPtr gpuDynInst
)
36410 injectGlobalMemFence(gpuDynInst
);
36413 Inst_MUBUF__BUFFER_WBINVL1_VOL::completeAcc(GPUDynInstPtr gpuDynInst
)
36417 Inst_MUBUF__BUFFER_ATOMIC_SWAP
36418 ::Inst_MUBUF__BUFFER_ATOMIC_SWAP(InFmt_MUBUF
*iFmt
)
36419 : Inst_MUBUF(iFmt
, "buffer_atomic_swap")
36421 setFlag(AtomicExch
);
36422 if (instData
.GLC
) {
36423 setFlag(AtomicReturn
);
36425 setFlag(AtomicNoReturn
);
36427 setFlag(MemoryRef
);
36428 setFlag(GlobalSegment
);
36429 } // Inst_MUBUF__BUFFER_ATOMIC_SWAP
36431 Inst_MUBUF__BUFFER_ATOMIC_SWAP::~Inst_MUBUF__BUFFER_ATOMIC_SWAP()
36433 } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP
36435 // tmp = MEM[ADDR];
36436 // MEM[ADDR] = DATA;
36437 // RETURN_DATA = tmp.
36439 Inst_MUBUF__BUFFER_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst
)
36441 panicUnimplemented();
36444 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
36445 ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP(InFmt_MUBUF
*iFmt
)
36446 : Inst_MUBUF(iFmt
, "buffer_atomic_cmpswap")
36448 setFlag(AtomicCAS
);
36449 if (instData
.GLC
) {
36450 setFlag(AtomicReturn
);
36452 setFlag(AtomicNoReturn
);
36454 setFlag(MemoryRef
);
36455 setFlag(GlobalSegment
);
36456 } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
36458 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP()
36460 } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
36462 // tmp = MEM[ADDR];
36465 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
36466 // RETURN_DATA[0] = tmp.
36468 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst
)
36470 panicUnimplemented();
36473 Inst_MUBUF__BUFFER_ATOMIC_ADD
36474 ::Inst_MUBUF__BUFFER_ATOMIC_ADD(InFmt_MUBUF
*iFmt
)
36475 : Inst_MUBUF(iFmt
, "buffer_atomic_add")
36477 setFlag(AtomicAdd
);
36478 if (instData
.GLC
) {
36479 setFlag(AtomicReturn
);
36481 setFlag(AtomicNoReturn
);
36483 setFlag(MemoryRef
);
36484 setFlag(GlobalSegment
);
36485 } // Inst_MUBUF__BUFFER_ATOMIC_ADD
36487 Inst_MUBUF__BUFFER_ATOMIC_ADD::~Inst_MUBUF__BUFFER_ATOMIC_ADD()
36489 } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD
36491 // tmp = MEM[ADDR];
36492 // MEM[ADDR] += DATA;
36493 // RETURN_DATA = tmp.
36495 Inst_MUBUF__BUFFER_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst
)
36497 panicUnimplemented();
36500 Inst_MUBUF__BUFFER_ATOMIC_SUB
36501 ::Inst_MUBUF__BUFFER_ATOMIC_SUB(InFmt_MUBUF
*iFmt
)
36502 : Inst_MUBUF(iFmt
, "buffer_atomic_sub")
36504 setFlag(AtomicSub
);
36505 if (instData
.GLC
) {
36506 setFlag(AtomicReturn
);
36508 setFlag(AtomicNoReturn
);
36510 setFlag(MemoryRef
);
36511 setFlag(GlobalSegment
);
36512 } // Inst_MUBUF__BUFFER_ATOMIC_SUB
36514 Inst_MUBUF__BUFFER_ATOMIC_SUB::~Inst_MUBUF__BUFFER_ATOMIC_SUB()
36516 } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB
36518 // tmp = MEM[ADDR];
36519 // MEM[ADDR] -= DATA;
36520 // RETURN_DATA = tmp.
36522 Inst_MUBUF__BUFFER_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst
)
36524 panicUnimplemented();
36527 Inst_MUBUF__BUFFER_ATOMIC_SMIN
36528 ::Inst_MUBUF__BUFFER_ATOMIC_SMIN(InFmt_MUBUF
*iFmt
)
36529 : Inst_MUBUF(iFmt
, "buffer_atomic_smin")
36531 setFlag(AtomicMin
);
36532 if (instData
.GLC
) {
36533 setFlag(AtomicReturn
);
36535 setFlag(AtomicNoReturn
);
36537 setFlag(MemoryRef
);
36538 setFlag(GlobalSegment
);
36539 } // Inst_MUBUF__BUFFER_ATOMIC_SMIN
36541 Inst_MUBUF__BUFFER_ATOMIC_SMIN::~Inst_MUBUF__BUFFER_ATOMIC_SMIN()
36543 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN
36545 // tmp = MEM[ADDR];
36546 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
36547 // RETURN_DATA = tmp.
36549 Inst_MUBUF__BUFFER_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst
)
36551 panicUnimplemented();
36554 Inst_MUBUF__BUFFER_ATOMIC_UMIN
36555 ::Inst_MUBUF__BUFFER_ATOMIC_UMIN(InFmt_MUBUF
*iFmt
)
36556 : Inst_MUBUF(iFmt
, "buffer_atomic_umin")
36558 setFlag(AtomicMin
);
36559 if (instData
.GLC
) {
36560 setFlag(AtomicReturn
);
36562 setFlag(AtomicNoReturn
);
36564 setFlag(MemoryRef
);
36565 setFlag(GlobalSegment
);
36566 } // Inst_MUBUF__BUFFER_ATOMIC_UMIN
36568 Inst_MUBUF__BUFFER_ATOMIC_UMIN::~Inst_MUBUF__BUFFER_ATOMIC_UMIN()
36570 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN
36572 // tmp = MEM[ADDR];
36573 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
36574 // RETURN_DATA = tmp.
36576 Inst_MUBUF__BUFFER_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst
)
36578 panicUnimplemented();
36581 Inst_MUBUF__BUFFER_ATOMIC_SMAX
36582 ::Inst_MUBUF__BUFFER_ATOMIC_SMAX(InFmt_MUBUF
*iFmt
)
36583 : Inst_MUBUF(iFmt
, "buffer_atomic_smax")
36585 setFlag(AtomicMax
);
36586 if (instData
.GLC
) {
36587 setFlag(AtomicReturn
);
36589 setFlag(AtomicNoReturn
);
36591 setFlag(MemoryRef
);
36592 setFlag(GlobalSegment
);
36593 } // Inst_MUBUF__BUFFER_ATOMIC_SMAX
36595 Inst_MUBUF__BUFFER_ATOMIC_SMAX::~Inst_MUBUF__BUFFER_ATOMIC_SMAX()
36597 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX
36599 // tmp = MEM[ADDR];
36600 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
36601 // RETURN_DATA = tmp.
36603 Inst_MUBUF__BUFFER_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst
)
36605 panicUnimplemented();
36608 Inst_MUBUF__BUFFER_ATOMIC_UMAX
36609 ::Inst_MUBUF__BUFFER_ATOMIC_UMAX(InFmt_MUBUF
*iFmt
)
36610 : Inst_MUBUF(iFmt
, "buffer_atomic_umax")
36612 setFlag(AtomicMax
);
36613 if (instData
.GLC
) {
36614 setFlag(AtomicReturn
);
36616 setFlag(AtomicNoReturn
);
36618 setFlag(MemoryRef
);
36619 setFlag(GlobalSegment
);
36620 } // Inst_MUBUF__BUFFER_ATOMIC_UMAX
36622 Inst_MUBUF__BUFFER_ATOMIC_UMAX::~Inst_MUBUF__BUFFER_ATOMIC_UMAX()
36624 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX
36626 // tmp = MEM[ADDR];
36627 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
36628 // RETURN_DATA = tmp.
36630 Inst_MUBUF__BUFFER_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst
)
36632 panicUnimplemented();
36635 Inst_MUBUF__BUFFER_ATOMIC_AND
36636 ::Inst_MUBUF__BUFFER_ATOMIC_AND(InFmt_MUBUF
*iFmt
)
36637 : Inst_MUBUF(iFmt
, "buffer_atomic_and")
36639 setFlag(AtomicAnd
);
36640 if (instData
.GLC
) {
36641 setFlag(AtomicReturn
);
36643 setFlag(AtomicNoReturn
);
36645 setFlag(MemoryRef
);
36646 setFlag(GlobalSegment
);
36647 } // Inst_MUBUF__BUFFER_ATOMIC_AND
36649 Inst_MUBUF__BUFFER_ATOMIC_AND::~Inst_MUBUF__BUFFER_ATOMIC_AND()
36651 } // ~Inst_MUBUF__BUFFER_ATOMIC_AND
36653 // tmp = MEM[ADDR];
36654 // MEM[ADDR] &= DATA;
36655 // RETURN_DATA = tmp.
36657 Inst_MUBUF__BUFFER_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst
)
36659 panicUnimplemented();
36662 Inst_MUBUF__BUFFER_ATOMIC_OR
36663 ::Inst_MUBUF__BUFFER_ATOMIC_OR(InFmt_MUBUF
*iFmt
)
36664 : Inst_MUBUF(iFmt
, "buffer_atomic_or")
36667 if (instData
.GLC
) {
36668 setFlag(AtomicReturn
);
36670 setFlag(AtomicNoReturn
);
36672 setFlag(MemoryRef
);
36673 setFlag(GlobalSegment
);
36674 } // Inst_MUBUF__BUFFER_ATOMIC_OR
36676 Inst_MUBUF__BUFFER_ATOMIC_OR::~Inst_MUBUF__BUFFER_ATOMIC_OR()
36678 } // ~Inst_MUBUF__BUFFER_ATOMIC_OR
36680 // tmp = MEM[ADDR];
36681 // MEM[ADDR] |= DATA;
36682 // RETURN_DATA = tmp.
36684 Inst_MUBUF__BUFFER_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst
)
36686 panicUnimplemented();
36689 Inst_MUBUF__BUFFER_ATOMIC_XOR
36690 ::Inst_MUBUF__BUFFER_ATOMIC_XOR(InFmt_MUBUF
*iFmt
)
36691 : Inst_MUBUF(iFmt
, "buffer_atomic_xor")
36693 setFlag(AtomicXor
);
36694 if (instData
.GLC
) {
36695 setFlag(AtomicReturn
);
36697 setFlag(AtomicNoReturn
);
36699 setFlag(MemoryRef
);
36700 setFlag(GlobalSegment
);
36701 } // Inst_MUBUF__BUFFER_ATOMIC_XOR
36703 Inst_MUBUF__BUFFER_ATOMIC_XOR::~Inst_MUBUF__BUFFER_ATOMIC_XOR()
36705 } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR
36707 // tmp = MEM[ADDR];
36708 // MEM[ADDR] ^= DATA;
36709 // RETURN_DATA = tmp.
36711 Inst_MUBUF__BUFFER_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst
)
36713 panicUnimplemented();
36716 Inst_MUBUF__BUFFER_ATOMIC_INC
36717 ::Inst_MUBUF__BUFFER_ATOMIC_INC(InFmt_MUBUF
*iFmt
)
36718 : Inst_MUBUF(iFmt
, "buffer_atomic_inc")
36720 setFlag(AtomicInc
);
36721 if (instData
.GLC
) {
36722 setFlag(AtomicReturn
);
36724 setFlag(AtomicNoReturn
);
36726 setFlag(MemoryRef
);
36727 setFlag(GlobalSegment
);
36728 } // Inst_MUBUF__BUFFER_ATOMIC_INC
36730 Inst_MUBUF__BUFFER_ATOMIC_INC::~Inst_MUBUF__BUFFER_ATOMIC_INC()
36732 } // ~Inst_MUBUF__BUFFER_ATOMIC_INC
36734 // tmp = MEM[ADDR];
36735 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
36736 // RETURN_DATA = tmp.
36738 Inst_MUBUF__BUFFER_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst
)
36740 panicUnimplemented();
36743 Inst_MUBUF__BUFFER_ATOMIC_DEC
36744 ::Inst_MUBUF__BUFFER_ATOMIC_DEC(InFmt_MUBUF
*iFmt
)
36745 : Inst_MUBUF(iFmt
, "buffer_atomic_dec")
36747 setFlag(AtomicDec
);
36748 if (instData
.GLC
) {
36749 setFlag(AtomicReturn
);
36751 setFlag(AtomicNoReturn
);
36753 setFlag(MemoryRef
);
36754 setFlag(GlobalSegment
);
36755 } // Inst_MUBUF__BUFFER_ATOMIC_DEC
36757 Inst_MUBUF__BUFFER_ATOMIC_DEC::~Inst_MUBUF__BUFFER_ATOMIC_DEC()
36759 } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC
36761 // tmp = MEM[ADDR];
36762 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
36763 // (unsigned compare); RETURN_DATA = tmp.
36765 Inst_MUBUF__BUFFER_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst
)
36767 panicUnimplemented();
36770 Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
36771 ::Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2(InFmt_MUBUF
*iFmt
)
36772 : Inst_MUBUF(iFmt
, "buffer_atomic_swap_x2")
36774 setFlag(AtomicExch
);
36775 if (instData
.GLC
) {
36776 setFlag(AtomicReturn
);
36778 setFlag(AtomicNoReturn
);
36780 setFlag(MemoryRef
);
36781 setFlag(GlobalSegment
);
36782 } // Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
36784 Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2()
36786 } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
36788 // tmp = MEM[ADDR];
36789 // MEM[ADDR] = DATA[0:1];
36790 // RETURN_DATA[0:1] = tmp.
36792 Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst
)
36794 panicUnimplemented();
36797 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
36798 ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2(InFmt_MUBUF
*iFmt
)
36799 : Inst_MUBUF(iFmt
, "buffer_atomic_cmpswap_x2")
36801 setFlag(AtomicCAS
);
36802 if (instData
.GLC
) {
36803 setFlag(AtomicReturn
);
36805 setFlag(AtomicNoReturn
);
36807 setFlag(MemoryRef
);
36808 setFlag(GlobalSegment
);
36809 } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
36811 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
36812 ::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2()
36814 } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
36816 // tmp = MEM[ADDR];
36817 // src = DATA[0:1];
36818 // cmp = DATA[2:3];
36819 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
36820 // RETURN_DATA[0:1] = tmp.
36822 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst
)
36824 panicUnimplemented();
36827 Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
36828 ::Inst_MUBUF__BUFFER_ATOMIC_ADD_X2(InFmt_MUBUF
*iFmt
)
36829 : Inst_MUBUF(iFmt
, "buffer_atomic_add_x2")
36831 setFlag(AtomicAdd
);
36832 if (instData
.GLC
) {
36833 setFlag(AtomicReturn
);
36835 setFlag(AtomicNoReturn
);
36837 setFlag(MemoryRef
);
36838 setFlag(GlobalSegment
);
36839 } // Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
36841 Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2()
36843 } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
36845 // tmp = MEM[ADDR];
36846 // MEM[ADDR] += DATA[0:1];
36847 // RETURN_DATA[0:1] = tmp.
36849 Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst
)
36851 panicUnimplemented();
36854 Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
36855 ::Inst_MUBUF__BUFFER_ATOMIC_SUB_X2(InFmt_MUBUF
*iFmt
)
36856 : Inst_MUBUF(iFmt
, "buffer_atomic_sub_x2")
36858 setFlag(AtomicSub
);
36859 if (instData
.GLC
) {
36860 setFlag(AtomicReturn
);
36862 setFlag(AtomicNoReturn
);
36864 setFlag(MemoryRef
);
36865 setFlag(GlobalSegment
);
36866 } // Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
36868 Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2()
36870 } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
36872 // tmp = MEM[ADDR];
36873 // MEM[ADDR] -= DATA[0:1];
36874 // RETURN_DATA[0:1] = tmp.
36876 Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst
)
36878 panicUnimplemented();
36881 Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
36882 ::Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2(InFmt_MUBUF
*iFmt
)
36883 : Inst_MUBUF(iFmt
, "buffer_atomic_smin_x2")
36885 setFlag(AtomicMin
);
36886 if (instData
.GLC
) {
36887 setFlag(AtomicReturn
);
36889 setFlag(AtomicNoReturn
);
36891 setFlag(MemoryRef
);
36892 setFlag(GlobalSegment
);
36893 } // Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
36895 Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2()
36897 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
36899 // tmp = MEM[ADDR];
36900 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
36901 // RETURN_DATA[0:1] = tmp.
36903 Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst
)
36905 panicUnimplemented();
36908 Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
36909 ::Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2(InFmt_MUBUF
*iFmt
)
36910 : Inst_MUBUF(iFmt
, "buffer_atomic_umin_x2")
36912 setFlag(AtomicMin
);
36913 if (instData
.GLC
) {
36914 setFlag(AtomicReturn
);
36916 setFlag(AtomicNoReturn
);
36918 setFlag(MemoryRef
);
36919 setFlag(GlobalSegment
);
36920 } // Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
36922 Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2()
36924 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
36926 // tmp = MEM[ADDR];
36927 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
36928 // RETURN_DATA[0:1] = tmp.
36930 Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst
)
36932 panicUnimplemented();
36935 Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
36936 ::Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2(InFmt_MUBUF
*iFmt
)
36937 : Inst_MUBUF(iFmt
, "buffer_atomic_smax_x2")
36939 setFlag(AtomicMax
);
36940 if (instData
.GLC
) {
36941 setFlag(AtomicReturn
);
36943 setFlag(AtomicNoReturn
);
36945 setFlag(MemoryRef
);
36946 setFlag(GlobalSegment
);
36947 } // Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
36949 Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2()
36951 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
36953 // tmp = MEM[ADDR];
36954 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
36955 // RETURN_DATA[0:1] = tmp.
36957 Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst
)
36959 panicUnimplemented();
36962 Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
36963 ::Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2(InFmt_MUBUF
*iFmt
)
36964 : Inst_MUBUF(iFmt
, "buffer_atomic_umax_x2")
36966 setFlag(AtomicMax
);
36967 if (instData
.GLC
) {
36968 setFlag(AtomicReturn
);
36970 setFlag(AtomicNoReturn
);
36972 setFlag(MemoryRef
);
36973 setFlag(GlobalSegment
);
36974 } // Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
36976 Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2()
36978 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
36980 // tmp = MEM[ADDR];
36981 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
36982 // RETURN_DATA[0:1] = tmp.
36984 Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst
)
36986 panicUnimplemented();
36989 Inst_MUBUF__BUFFER_ATOMIC_AND_X2
36990 ::Inst_MUBUF__BUFFER_ATOMIC_AND_X2(InFmt_MUBUF
*iFmt
)
36991 : Inst_MUBUF(iFmt
, "buffer_atomic_and_x2")
36993 setFlag(AtomicAnd
);
36994 if (instData
.GLC
) {
36995 setFlag(AtomicReturn
);
36997 setFlag(AtomicNoReturn
);
36999 setFlag(MemoryRef
);
37000 setFlag(GlobalSegment
);
37001 } // Inst_MUBUF__BUFFER_ATOMIC_AND_X2
37003 Inst_MUBUF__BUFFER_ATOMIC_AND_X2::~Inst_MUBUF__BUFFER_ATOMIC_AND_X2()
37005 } // ~Inst_MUBUF__BUFFER_ATOMIC_AND_X2
37007 // tmp = MEM[ADDR];
37008 // MEM[ADDR] &= DATA[0:1];
37009 // RETURN_DATA[0:1] = tmp.
37011 Inst_MUBUF__BUFFER_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst
)
37013 panicUnimplemented();
37016 Inst_MUBUF__BUFFER_ATOMIC_OR_X2
37017 ::Inst_MUBUF__BUFFER_ATOMIC_OR_X2(InFmt_MUBUF
*iFmt
)
37018 : Inst_MUBUF(iFmt
, "buffer_atomic_or_x2")
37021 if (instData
.GLC
) {
37022 setFlag(AtomicReturn
);
37024 setFlag(AtomicNoReturn
);
37026 setFlag(MemoryRef
);
37027 setFlag(GlobalSegment
);
37028 } // Inst_MUBUF__BUFFER_ATOMIC_OR_X2
37030 Inst_MUBUF__BUFFER_ATOMIC_OR_X2::~Inst_MUBUF__BUFFER_ATOMIC_OR_X2()
37032 } // ~Inst_MUBUF__BUFFER_ATOMIC_OR_X2
37034 // tmp = MEM[ADDR];
37035 // MEM[ADDR] |= DATA[0:1];
37036 // RETURN_DATA[0:1] = tmp.
37038 Inst_MUBUF__BUFFER_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst
)
37040 panicUnimplemented();
37043 Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
37044 ::Inst_MUBUF__BUFFER_ATOMIC_XOR_X2(InFmt_MUBUF
*iFmt
)
37045 : Inst_MUBUF(iFmt
, "buffer_atomic_xor_x2")
37047 setFlag(AtomicXor
);
37048 if (instData
.GLC
) {
37049 setFlag(AtomicReturn
);
37051 setFlag(AtomicNoReturn
);
37053 setFlag(MemoryRef
);
37054 setFlag(GlobalSegment
);
37055 } // Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
37057 Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2()
37059 } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
37061 // tmp = MEM[ADDR];
37062 // MEM[ADDR] ^= DATA[0:1];
37063 // RETURN_DATA[0:1] = tmp.
37065 Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst
)
37067 panicUnimplemented();
37070 Inst_MUBUF__BUFFER_ATOMIC_INC_X2
37071 ::Inst_MUBUF__BUFFER_ATOMIC_INC_X2(InFmt_MUBUF
*iFmt
)
37072 : Inst_MUBUF(iFmt
, "buffer_atomic_inc_x2")
37074 setFlag(AtomicInc
);
37075 if (instData
.GLC
) {
37076 setFlag(AtomicReturn
);
37078 setFlag(AtomicNoReturn
);
37080 setFlag(MemoryRef
);
37081 setFlag(GlobalSegment
);
37082 } // Inst_MUBUF__BUFFER_ATOMIC_INC_X2
37084 Inst_MUBUF__BUFFER_ATOMIC_INC_X2::~Inst_MUBUF__BUFFER_ATOMIC_INC_X2()
37086 } // ~Inst_MUBUF__BUFFER_ATOMIC_INC_X2
37088 // tmp = MEM[ADDR];
37089 // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
37090 // RETURN_DATA[0:1] = tmp.
37092 Inst_MUBUF__BUFFER_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst
)
37094 panicUnimplemented();
37097 Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
37098 ::Inst_MUBUF__BUFFER_ATOMIC_DEC_X2(InFmt_MUBUF
*iFmt
)
37099 : Inst_MUBUF(iFmt
, "buffer_atomic_dec_x2")
37101 setFlag(AtomicDec
);
37102 if (instData
.GLC
) {
37103 setFlag(AtomicReturn
);
37105 setFlag(AtomicNoReturn
);
37107 setFlag(MemoryRef
);
37108 setFlag(GlobalSegment
);
37109 } // Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
37111 Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2()
37113 } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
37115 // tmp = MEM[ADDR];
37116 // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
37117 // (unsigned compare);
37118 // RETURN_DATA[0:1] = tmp.
37120 Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst
)
37122 panicUnimplemented();
37125 Inst_MTBUF__TBUFFER_LOAD_FORMAT_X
37126 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_X(InFmt_MTBUF
*iFmt
)
37127 : Inst_MTBUF(iFmt
, "tbuffer_load_format_x")
37129 setFlag(MemoryRef
);
37131 setFlag(GlobalSegment
);
37132 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_X
37134 Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X()
37136 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X
37138 // Typed buffer load 1 dword with format conversion.
37140 Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst
)
37142 panicUnimplemented();
37146 Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst
)
37151 Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst
)
37155 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY
37156 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY(InFmt_MTBUF
*iFmt
)
37157 : Inst_MTBUF(iFmt
, "tbuffer_load_format_xy")
37159 setFlag(MemoryRef
);
37161 setFlag(GlobalSegment
);
37162 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY
37164 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY()
37166 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY
37168 // Typed buffer load 2 dwords with format conversion.
37170 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst
)
37172 panicUnimplemented();
37176 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst
)
37181 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst
)
37185 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ
37186 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ(InFmt_MTBUF
*iFmt
)
37187 : Inst_MTBUF(iFmt
, "tbuffer_load_format_xyz")
37189 setFlag(MemoryRef
);
37191 setFlag(GlobalSegment
);
37192 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ
37194 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ()
37196 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ
37198 // Typed buffer load 3 dwords with format conversion.
37200 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst
)
37202 panicUnimplemented();
37206 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst
)
37211 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst
)
37215 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
37216 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW(InFmt_MTBUF
*iFmt
)
37217 : Inst_MTBUF(iFmt
, "tbuffer_load_format_xyzw")
37219 setFlag(MemoryRef
);
37221 setFlag(GlobalSegment
);
37222 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
37224 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
37225 ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW()
37227 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
37229 // Typed buffer load 4 dwords with format conversion.
37231 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst
)
37233 panicUnimplemented();
37237 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst
)
37242 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst
)
37246 Inst_MTBUF__TBUFFER_STORE_FORMAT_X
37247 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_X(InFmt_MTBUF
*iFmt
)
37248 : Inst_MTBUF(iFmt
, "tbuffer_store_format_x")
37250 setFlag(MemoryRef
);
37252 setFlag(GlobalSegment
);
37253 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_X
37255 Inst_MTBUF__TBUFFER_STORE_FORMAT_X::~Inst_MTBUF__TBUFFER_STORE_FORMAT_X()
37257 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_X
37259 // Typed buffer store 1 dword with format conversion.
37261 Inst_MTBUF__TBUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst
)
37263 panicUnimplemented();
37267 Inst_MTBUF__TBUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst
)
37272 Inst_MTBUF__TBUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst
)
37276 Inst_MTBUF__TBUFFER_STORE_FORMAT_XY
37277 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XY(InFmt_MTBUF
*iFmt
)
37278 : Inst_MTBUF(iFmt
, "tbuffer_store_format_xy")
37280 setFlag(MemoryRef
);
37282 setFlag(GlobalSegment
);
37283 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XY
37285 Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY()
37287 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY
37289 // Typed buffer store 2 dwords with format conversion.
37291 Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst
)
37293 panicUnimplemented();
37297 Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst
)
37302 Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst
)
37306 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
37307 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ(InFmt_MTBUF
*iFmt
)
37308 : Inst_MTBUF(iFmt
, "tbuffer_store_format_xyz")
37310 setFlag(MemoryRef
);
37312 setFlag(GlobalSegment
);
37313 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
37315 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
37316 ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ()
37318 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
37320 // Typed buffer store 3 dwords with format conversion.
37322 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst
)
37324 panicUnimplemented();
37328 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst
)
37333 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst
)
37337 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
37338 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW(InFmt_MTBUF
*iFmt
)
37339 : Inst_MTBUF(iFmt
, "tbuffer_store_format_xyzw")
37341 setFlag(MemoryRef
);
37343 setFlag(GlobalSegment
);
37344 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
37346 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
37347 ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW()
37349 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
37351 // Typed buffer store 4 dwords with format conversion.
37353 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst
)
37355 panicUnimplemented();
37359 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::initiateAcc(
37360 GPUDynInstPtr gpuDynInst
)
37365 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::completeAcc(
37366 GPUDynInstPtr gpuDynInst
)
37370 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X
37371 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X(InFmt_MTBUF
*iFmt
)
37372 : Inst_MTBUF(iFmt
, "tbuffer_load_format_d16_x")
37374 setFlag(MemoryRef
);
37376 setFlag(GlobalSegment
);
37377 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X
37379 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::
37380 ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X()
37382 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X
37384 // Typed buffer load 1 dword with format conversion.
37386 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst
)
37388 panicUnimplemented();
37392 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::initiateAcc(
37393 GPUDynInstPtr gpuDynInst
)
37398 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::completeAcc(
37399 GPUDynInstPtr gpuDynInst
)
37403 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
37404 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY(InFmt_MTBUF
*iFmt
)
37405 : Inst_MTBUF(iFmt
, "tbuffer_load_format_d16_xy")
37407 setFlag(MemoryRef
);
37409 setFlag(GlobalSegment
);
37410 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
37412 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
37413 ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY()
37415 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
37417 // Typed buffer load 2 dwords with format conversion.
37419 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst
)
37421 panicUnimplemented();
37425 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::initiateAcc(
37426 GPUDynInstPtr gpuDynInst
)
37431 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::completeAcc(
37432 GPUDynInstPtr gpuDynInst
)
37436 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
37437 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ(
37439 : Inst_MTBUF(iFmt
, "tbuffer_load_format_d16_xyz")
37441 setFlag(MemoryRef
);
37443 setFlag(GlobalSegment
);
37444 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
37446 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
37447 ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ()
37449 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
37451 // Typed buffer load 3 dwords with format conversion.
37453 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst
)
37455 panicUnimplemented();
37459 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc(
37460 GPUDynInstPtr gpuDynInst
)
37465 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::completeAcc(
37466 GPUDynInstPtr gpuDynInst
)
37470 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
37471 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW(
37473 : Inst_MTBUF(iFmt
, "tbuffer_load_format_d16_xyzw")
37475 setFlag(MemoryRef
);
37477 setFlag(GlobalSegment
);
37478 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
37480 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
37481 ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW()
37483 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
37485 // Typed buffer load 4 dwords with format conversion.
37487 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst
)
37489 panicUnimplemented();
37493 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc(
37494 GPUDynInstPtr gpuDynInst
)
37499 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::completeAcc(
37500 GPUDynInstPtr gpuDynInst
)
37504 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
37505 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X(InFmt_MTBUF
*iFmt
)
37506 : Inst_MTBUF(iFmt
, "tbuffer_store_format_d16_x")
37508 setFlag(MemoryRef
);
37510 setFlag(GlobalSegment
);
37511 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
37513 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
37514 ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X()
37516 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
37518 // Typed buffer store 1 dword with format conversion.
37520 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst
)
37522 panicUnimplemented();
37526 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::initiateAcc(
37527 GPUDynInstPtr gpuDynInst
)
37532 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::completeAcc(
37533 GPUDynInstPtr gpuDynInst
)
37537 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
37538 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY(InFmt_MTBUF
*iFmt
)
37539 : Inst_MTBUF(iFmt
, "tbuffer_store_format_d16_xy")
37541 setFlag(MemoryRef
);
37543 setFlag(GlobalSegment
);
37544 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
37546 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
37547 ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY()
37549 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
37551 // Typed buffer store 2 dwords with format conversion.
37553 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst
)
37555 panicUnimplemented();
37559 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::initiateAcc(
37560 GPUDynInstPtr gpuDynInst
)
37565 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::completeAcc(
37566 GPUDynInstPtr gpuDynInst
)
37570 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
37571 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ(InFmt_MTBUF
*iFmt
)
37572 : Inst_MTBUF(iFmt
, "tbuffer_store_format_d16_xyz")
37574 setFlag(MemoryRef
);
37576 setFlag(GlobalSegment
);
37577 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
37579 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
37580 ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ()
37582 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
37584 // Typed buffer store 3 dwords with format conversion.
37586 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst
)
37588 panicUnimplemented();
37592 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::initiateAcc(
37593 GPUDynInstPtr gpuDynInst
)
37598 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::completeAcc(
37599 GPUDynInstPtr gpuDynInst
)
37603 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
37604 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW(InFmt_MTBUF
*iFmt
)
37605 : Inst_MTBUF(iFmt
, "tbuffer_store_format_d16_xyzw")
37607 setFlag(MemoryRef
);
37609 setFlag(GlobalSegment
);
37610 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
37612 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
37613 ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW()
37615 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
37617 // Typed buffer store 4 dwords with format conversion.
37619 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::execute(
37620 GPUDynInstPtr gpuDynInst
)
37622 panicUnimplemented();
37626 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::initiateAcc(
37627 GPUDynInstPtr gpuDynInst
)
37632 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::completeAcc(
37633 GPUDynInstPtr gpuDynInst
)
37637 Inst_MIMG__IMAGE_LOAD::Inst_MIMG__IMAGE_LOAD(InFmt_MIMG
*iFmt
)
37638 : Inst_MIMG(iFmt
, "image_load")
37640 setFlag(MemoryRef
);
37642 setFlag(GlobalSegment
);
37643 } // Inst_MIMG__IMAGE_LOAD
37645 Inst_MIMG__IMAGE_LOAD::~Inst_MIMG__IMAGE_LOAD()
37647 } // ~Inst_MIMG__IMAGE_LOAD
37649 // Image memory load with format conversion specified
37651 Inst_MIMG__IMAGE_LOAD::execute(GPUDynInstPtr gpuDynInst
)
37653 panicUnimplemented();
37657 Inst_MIMG__IMAGE_LOAD::initiateAcc(GPUDynInstPtr gpuDynInst
)
37662 Inst_MIMG__IMAGE_LOAD::completeAcc(GPUDynInstPtr gpuDynInst
)
37666 Inst_MIMG__IMAGE_LOAD_MIP::Inst_MIMG__IMAGE_LOAD_MIP(InFmt_MIMG
*iFmt
)
37667 : Inst_MIMG(iFmt
, "image_load_mip")
37669 setFlag(MemoryRef
);
37671 setFlag(GlobalSegment
);
37672 } // Inst_MIMG__IMAGE_LOAD_MIP
37674 Inst_MIMG__IMAGE_LOAD_MIP::~Inst_MIMG__IMAGE_LOAD_MIP()
37676 } // ~Inst_MIMG__IMAGE_LOAD_MIP
37679 Inst_MIMG__IMAGE_LOAD_MIP::execute(GPUDynInstPtr gpuDynInst
)
37681 panicUnimplemented();
37685 Inst_MIMG__IMAGE_LOAD_MIP::initiateAcc(GPUDynInstPtr gpuDynInst
)
37690 Inst_MIMG__IMAGE_LOAD_MIP::completeAcc(GPUDynInstPtr gpuDynInst
)
37694 Inst_MIMG__IMAGE_LOAD_PCK::Inst_MIMG__IMAGE_LOAD_PCK(InFmt_MIMG
*iFmt
)
37695 : Inst_MIMG(iFmt
, "image_load_pck")
37697 setFlag(MemoryRef
);
37699 setFlag(GlobalSegment
);
37700 } // Inst_MIMG__IMAGE_LOAD_PCK
37702 Inst_MIMG__IMAGE_LOAD_PCK::~Inst_MIMG__IMAGE_LOAD_PCK()
37704 } // ~Inst_MIMG__IMAGE_LOAD_PCK
37707 Inst_MIMG__IMAGE_LOAD_PCK::execute(GPUDynInstPtr gpuDynInst
)
37709 panicUnimplemented();
37713 Inst_MIMG__IMAGE_LOAD_PCK::initiateAcc(GPUDynInstPtr gpuDynInst
)
37718 Inst_MIMG__IMAGE_LOAD_PCK::completeAcc(GPUDynInstPtr gpuDynInst
)
37722 Inst_MIMG__IMAGE_LOAD_PCK_SGN::Inst_MIMG__IMAGE_LOAD_PCK_SGN(
37724 : Inst_MIMG(iFmt
, "image_load_pck_sgn")
37726 setFlag(MemoryRef
);
37728 setFlag(GlobalSegment
);
37729 } // Inst_MIMG__IMAGE_LOAD_PCK_SGN
37731 Inst_MIMG__IMAGE_LOAD_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_PCK_SGN()
37733 } // ~Inst_MIMG__IMAGE_LOAD_PCK_SGN
37735 // Image memory load with with no format conversion and sign extension
37737 Inst_MIMG__IMAGE_LOAD_PCK_SGN::execute(GPUDynInstPtr gpuDynInst
)
37739 panicUnimplemented();
37743 Inst_MIMG__IMAGE_LOAD_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst
)
37748 Inst_MIMG__IMAGE_LOAD_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst
)
37752 Inst_MIMG__IMAGE_LOAD_MIP_PCK::Inst_MIMG__IMAGE_LOAD_MIP_PCK(
37754 : Inst_MIMG(iFmt
, "image_load_mip_pck")
37756 setFlag(MemoryRef
);
37758 setFlag(GlobalSegment
);
37759 } // Inst_MIMG__IMAGE_LOAD_MIP_PCK
37761 Inst_MIMG__IMAGE_LOAD_MIP_PCK::~Inst_MIMG__IMAGE_LOAD_MIP_PCK()
37763 } // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK
37765 // Image memory load with user-supplied mip level, no format conversion
37767 Inst_MIMG__IMAGE_LOAD_MIP_PCK::execute(GPUDynInstPtr gpuDynInst
)
37769 panicUnimplemented();
37773 Inst_MIMG__IMAGE_LOAD_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst
)
37778 Inst_MIMG__IMAGE_LOAD_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst
)
37782 Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN(
37784 : Inst_MIMG(iFmt
, "image_load_mip_pck_sgn")
37786 setFlag(MemoryRef
);
37788 setFlag(GlobalSegment
);
37789 } // Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN
37791 Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN()
37793 } // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN
37795 // Image memory load with user-supplied mip level, no format conversion.
37797 Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::execute(GPUDynInstPtr gpuDynInst
)
37799 panicUnimplemented();
37803 Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst
)
37808 Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst
)
37812 Inst_MIMG__IMAGE_STORE::Inst_MIMG__IMAGE_STORE(InFmt_MIMG
*iFmt
)
37813 : Inst_MIMG(iFmt
, "image_store")
37815 setFlag(MemoryRef
);
37817 setFlag(GlobalSegment
);
37818 } // Inst_MIMG__IMAGE_STORE
37820 Inst_MIMG__IMAGE_STORE::~Inst_MIMG__IMAGE_STORE()
37822 } // ~Inst_MIMG__IMAGE_STORE
37824 // Image memory store with format conversion specified
37826 Inst_MIMG__IMAGE_STORE::execute(GPUDynInstPtr gpuDynInst
)
37828 panicUnimplemented();
37832 Inst_MIMG__IMAGE_STORE::initiateAcc(GPUDynInstPtr gpuDynInst
)
37837 Inst_MIMG__IMAGE_STORE::completeAcc(GPUDynInstPtr gpuDynInst
)
37841 Inst_MIMG__IMAGE_STORE_MIP::Inst_MIMG__IMAGE_STORE_MIP(InFmt_MIMG
*iFmt
)
37842 : Inst_MIMG(iFmt
, "image_store_mip")
37844 setFlag(MemoryRef
);
37846 setFlag(GlobalSegment
);
37847 } // Inst_MIMG__IMAGE_STORE_MIP
37849 Inst_MIMG__IMAGE_STORE_MIP::~Inst_MIMG__IMAGE_STORE_MIP()
37851 } // ~Inst_MIMG__IMAGE_STORE_MIP
37854 Inst_MIMG__IMAGE_STORE_MIP::execute(GPUDynInstPtr gpuDynInst
)
37856 panicUnimplemented();
37860 Inst_MIMG__IMAGE_STORE_MIP::initiateAcc(GPUDynInstPtr gpuDynInst
)
37865 Inst_MIMG__IMAGE_STORE_MIP::completeAcc(GPUDynInstPtr gpuDynInst
)
37869 Inst_MIMG__IMAGE_STORE_PCK::Inst_MIMG__IMAGE_STORE_PCK(InFmt_MIMG
*iFmt
)
37870 : Inst_MIMG(iFmt
, "image_store_pck")
37872 setFlag(MemoryRef
);
37874 setFlag(GlobalSegment
);
37875 } // Inst_MIMG__IMAGE_STORE_PCK
37877 Inst_MIMG__IMAGE_STORE_PCK::~Inst_MIMG__IMAGE_STORE_PCK()
37879 } // ~Inst_MIMG__IMAGE_STORE_PCK
37881 // Image memory store of packed data without format conversion.
37883 Inst_MIMG__IMAGE_STORE_PCK::execute(GPUDynInstPtr gpuDynInst
)
37885 panicUnimplemented();
37889 Inst_MIMG__IMAGE_STORE_PCK::initiateAcc(GPUDynInstPtr gpuDynInst
)
37894 Inst_MIMG__IMAGE_STORE_PCK::completeAcc(GPUDynInstPtr gpuDynInst
)
37898 Inst_MIMG__IMAGE_STORE_MIP_PCK::Inst_MIMG__IMAGE_STORE_MIP_PCK(
37900 : Inst_MIMG(iFmt
, "image_store_mip_pck")
37902 setFlag(MemoryRef
);
37904 setFlag(GlobalSegment
);
37905 } // Inst_MIMG__IMAGE_STORE_MIP_PCK
37907 Inst_MIMG__IMAGE_STORE_MIP_PCK::~Inst_MIMG__IMAGE_STORE_MIP_PCK()
37909 } // ~Inst_MIMG__IMAGE_STORE_MIP_PCK
37911 // Image memory store of packed data without format conversion
37913 Inst_MIMG__IMAGE_STORE_MIP_PCK::execute(GPUDynInstPtr gpuDynInst
)
37915 panicUnimplemented();
37919 Inst_MIMG__IMAGE_STORE_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst
)
37924 Inst_MIMG__IMAGE_STORE_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst
)
37928 Inst_MIMG__IMAGE_GET_RESINFO::Inst_MIMG__IMAGE_GET_RESINFO(
37930 : Inst_MIMG(iFmt
, "image_get_resinfo")
37932 setFlag(GlobalSegment
);
37933 } // Inst_MIMG__IMAGE_GET_RESINFO
37935 Inst_MIMG__IMAGE_GET_RESINFO::~Inst_MIMG__IMAGE_GET_RESINFO()
37937 } // ~Inst_MIMG__IMAGE_GET_RESINFO
37940 Inst_MIMG__IMAGE_GET_RESINFO::execute(GPUDynInstPtr gpuDynInst
)
37942 panicUnimplemented();
37945 Inst_MIMG__IMAGE_ATOMIC_SWAP::Inst_MIMG__IMAGE_ATOMIC_SWAP(
37947 : Inst_MIMG(iFmt
, "image_atomic_swap")
37949 setFlag(AtomicExch
);
37950 if (instData
.GLC
) {
37951 setFlag(AtomicReturn
);
37953 setFlag(AtomicNoReturn
);
37955 setFlag(MemoryRef
);
37956 setFlag(GlobalSegment
);
37957 } // Inst_MIMG__IMAGE_ATOMIC_SWAP
37959 Inst_MIMG__IMAGE_ATOMIC_SWAP::~Inst_MIMG__IMAGE_ATOMIC_SWAP()
37961 } // ~Inst_MIMG__IMAGE_ATOMIC_SWAP
37963 // tmp = MEM[ADDR];
37964 // MEM[ADDR] = DATA;
37965 // RETURN_DATA = tmp.
37967 Inst_MIMG__IMAGE_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst
)
37969 panicUnimplemented();
37972 Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::Inst_MIMG__IMAGE_ATOMIC_CMPSWAP(
37974 : Inst_MIMG(iFmt
, "image_atomic_cmpswap")
37976 setFlag(AtomicCAS
);
37977 if (instData
.GLC
) {
37978 setFlag(AtomicReturn
);
37980 setFlag(AtomicNoReturn
);
37982 setFlag(MemoryRef
);
37983 setFlag(GlobalSegment
);
37984 } // Inst_MIMG__IMAGE_ATOMIC_CMPSWAP
37986 Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP()
37988 } // ~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP
37990 // tmp = MEM[ADDR];
37993 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
37994 // RETURN_DATA[0] = tmp.
37996 Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst
)
37998 panicUnimplemented();
38001 Inst_MIMG__IMAGE_ATOMIC_ADD::Inst_MIMG__IMAGE_ATOMIC_ADD(InFmt_MIMG
*iFmt
)
38002 : Inst_MIMG(iFmt
, "image_atomic_add")
38004 setFlag(AtomicAdd
);
38005 if (instData
.GLC
) {
38006 setFlag(AtomicReturn
);
38008 setFlag(AtomicNoReturn
);
38010 setFlag(MemoryRef
);
38011 setFlag(GlobalSegment
);
38012 } // Inst_MIMG__IMAGE_ATOMIC_ADD
38014 Inst_MIMG__IMAGE_ATOMIC_ADD::~Inst_MIMG__IMAGE_ATOMIC_ADD()
38016 } // ~Inst_MIMG__IMAGE_ATOMIC_ADD
38018 // tmp = MEM[ADDR];
38019 // MEM[ADDR] += DATA;
38020 // RETURN_DATA = tmp.
38022 Inst_MIMG__IMAGE_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst
)
38024 panicUnimplemented();
38027 Inst_MIMG__IMAGE_ATOMIC_SUB::Inst_MIMG__IMAGE_ATOMIC_SUB(InFmt_MIMG
*iFmt
)
38028 : Inst_MIMG(iFmt
, "image_atomic_sub")
38030 setFlag(AtomicSub
);
38031 if (instData
.GLC
) {
38032 setFlag(AtomicReturn
);
38034 setFlag(AtomicNoReturn
);
38036 setFlag(MemoryRef
);
38037 setFlag(GlobalSegment
);
38038 } // Inst_MIMG__IMAGE_ATOMIC_SUB
38040 Inst_MIMG__IMAGE_ATOMIC_SUB::~Inst_MIMG__IMAGE_ATOMIC_SUB()
38042 } // ~Inst_MIMG__IMAGE_ATOMIC_SUB
38044 // tmp = MEM[ADDR];
38045 // MEM[ADDR] -= DATA;
38046 // RETURN_DATA = tmp.
38048 Inst_MIMG__IMAGE_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst
)
38050 panicUnimplemented();
38053 Inst_MIMG__IMAGE_ATOMIC_SMIN::Inst_MIMG__IMAGE_ATOMIC_SMIN(
38055 : Inst_MIMG(iFmt
, "image_atomic_smin")
38057 setFlag(AtomicMin
);
38058 if (instData
.GLC
) {
38059 setFlag(AtomicReturn
);
38061 setFlag(AtomicNoReturn
);
38063 setFlag(MemoryRef
);
38064 setFlag(GlobalSegment
);
38065 } // Inst_MIMG__IMAGE_ATOMIC_SMIN
38067 Inst_MIMG__IMAGE_ATOMIC_SMIN::~Inst_MIMG__IMAGE_ATOMIC_SMIN()
38069 } // ~Inst_MIMG__IMAGE_ATOMIC_SMIN
38071 // tmp = MEM[ADDR];
38072 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
38073 // RETURN_DATA = tmp.
38075 Inst_MIMG__IMAGE_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst
)
38077 panicUnimplemented();
38080 Inst_MIMG__IMAGE_ATOMIC_UMIN::Inst_MIMG__IMAGE_ATOMIC_UMIN(
38082 : Inst_MIMG(iFmt
, "image_atomic_umin")
38084 setFlag(AtomicMin
);
38085 if (instData
.GLC
) {
38086 setFlag(AtomicReturn
);
38088 setFlag(AtomicNoReturn
);
38090 setFlag(MemoryRef
);
38091 setFlag(GlobalSegment
);
38092 } // Inst_MIMG__IMAGE_ATOMIC_UMIN
38094 Inst_MIMG__IMAGE_ATOMIC_UMIN::~Inst_MIMG__IMAGE_ATOMIC_UMIN()
38096 } // ~Inst_MIMG__IMAGE_ATOMIC_UMIN
38098 // tmp = MEM[ADDR];
38099 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
38100 // RETURN_DATA = tmp.
38102 Inst_MIMG__IMAGE_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst
)
38104 panicUnimplemented();
38107 Inst_MIMG__IMAGE_ATOMIC_SMAX::Inst_MIMG__IMAGE_ATOMIC_SMAX(
38109 : Inst_MIMG(iFmt
, "image_atomic_smax")
38111 setFlag(AtomicMax
);
38112 if (instData
.GLC
) {
38113 setFlag(AtomicReturn
);
38115 setFlag(AtomicNoReturn
);
38117 setFlag(MemoryRef
);
38118 setFlag(GlobalSegment
);
38119 } // Inst_MIMG__IMAGE_ATOMIC_SMAX
38121 Inst_MIMG__IMAGE_ATOMIC_SMAX::~Inst_MIMG__IMAGE_ATOMIC_SMAX()
38123 } // ~Inst_MIMG__IMAGE_ATOMIC_SMAX
38125 // tmp = MEM[ADDR];
38126 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
38127 // RETURN_DATA = tmp.
38129 Inst_MIMG__IMAGE_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst
)
38131 panicUnimplemented();
38134 Inst_MIMG__IMAGE_ATOMIC_UMAX::Inst_MIMG__IMAGE_ATOMIC_UMAX(
38136 : Inst_MIMG(iFmt
, "image_atomic_umax")
38138 setFlag(AtomicMax
);
38139 if (instData
.GLC
) {
38140 setFlag(AtomicReturn
);
38142 setFlag(AtomicNoReturn
);
38144 setFlag(MemoryRef
);
38145 setFlag(GlobalSegment
);
38146 } // Inst_MIMG__IMAGE_ATOMIC_UMAX
38148 Inst_MIMG__IMAGE_ATOMIC_UMAX::~Inst_MIMG__IMAGE_ATOMIC_UMAX()
38150 } // ~Inst_MIMG__IMAGE_ATOMIC_UMAX
38152 // tmp = MEM[ADDR];
38153 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
38154 // RETURN_DATA = tmp.
38156 Inst_MIMG__IMAGE_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst
)
38158 panicUnimplemented();
38161 Inst_MIMG__IMAGE_ATOMIC_AND::Inst_MIMG__IMAGE_ATOMIC_AND(InFmt_MIMG
*iFmt
)
38162 : Inst_MIMG(iFmt
, "image_atomic_and")
38164 setFlag(AtomicAnd
);
38165 if (instData
.GLC
) {
38166 setFlag(AtomicReturn
);
38168 setFlag(AtomicNoReturn
);
38170 setFlag(MemoryRef
);
38171 setFlag(GlobalSegment
);
38172 } // Inst_MIMG__IMAGE_ATOMIC_AND
38174 Inst_MIMG__IMAGE_ATOMIC_AND::~Inst_MIMG__IMAGE_ATOMIC_AND()
38176 } // ~Inst_MIMG__IMAGE_ATOMIC_AND
38178 // tmp = MEM[ADDR];
38179 // MEM[ADDR] &= DATA;
38180 // RETURN_DATA = tmp.
38182 Inst_MIMG__IMAGE_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst
)
38184 panicUnimplemented();
38187 Inst_MIMG__IMAGE_ATOMIC_OR::Inst_MIMG__IMAGE_ATOMIC_OR(InFmt_MIMG
*iFmt
)
38188 : Inst_MIMG(iFmt
, "image_atomic_or")
38191 if (instData
.GLC
) {
38192 setFlag(AtomicReturn
);
38194 setFlag(AtomicNoReturn
);
38196 setFlag(MemoryRef
);
38197 setFlag(GlobalSegment
);
38198 } // Inst_MIMG__IMAGE_ATOMIC_OR
38200 Inst_MIMG__IMAGE_ATOMIC_OR::~Inst_MIMG__IMAGE_ATOMIC_OR()
38202 } // ~Inst_MIMG__IMAGE_ATOMIC_OR
38204 // tmp = MEM[ADDR];
38205 // MEM[ADDR] |= DATA;
38206 // RETURN_DATA = tmp.
38208 Inst_MIMG__IMAGE_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst
)
38210 panicUnimplemented();
38213 Inst_MIMG__IMAGE_ATOMIC_XOR::Inst_MIMG__IMAGE_ATOMIC_XOR(InFmt_MIMG
*iFmt
)
38214 : Inst_MIMG(iFmt
, "image_atomic_xor")
38216 setFlag(AtomicXor
);
38217 if (instData
.GLC
) {
38218 setFlag(AtomicReturn
);
38220 setFlag(AtomicNoReturn
);
38222 setFlag(MemoryRef
);
38223 setFlag(GlobalSegment
);
38224 } // Inst_MIMG__IMAGE_ATOMIC_XOR
38226 Inst_MIMG__IMAGE_ATOMIC_XOR::~Inst_MIMG__IMAGE_ATOMIC_XOR()
38228 } // ~Inst_MIMG__IMAGE_ATOMIC_XOR
38230 // tmp = MEM[ADDR];
38231 // MEM[ADDR] ^= DATA;
38232 // RETURN_DATA = tmp.
38234 Inst_MIMG__IMAGE_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst
)
38236 panicUnimplemented();
38239 Inst_MIMG__IMAGE_ATOMIC_INC::Inst_MIMG__IMAGE_ATOMIC_INC(InFmt_MIMG
*iFmt
)
38240 : Inst_MIMG(iFmt
, "image_atomic_inc")
38242 setFlag(AtomicInc
);
38243 if (instData
.GLC
) {
38244 setFlag(AtomicReturn
);
38246 setFlag(AtomicNoReturn
);
38248 setFlag(MemoryRef
);
38249 setFlag(GlobalSegment
);
38250 } // Inst_MIMG__IMAGE_ATOMIC_INC
38252 Inst_MIMG__IMAGE_ATOMIC_INC::~Inst_MIMG__IMAGE_ATOMIC_INC()
38254 } // ~Inst_MIMG__IMAGE_ATOMIC_INC
38256 // tmp = MEM[ADDR];
38257 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
38258 // RETURN_DATA = tmp.
38260 Inst_MIMG__IMAGE_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst
)
38262 panicUnimplemented();
38265 Inst_MIMG__IMAGE_ATOMIC_DEC::Inst_MIMG__IMAGE_ATOMIC_DEC(InFmt_MIMG
*iFmt
)
38266 : Inst_MIMG(iFmt
, "image_atomic_dec")
38268 setFlag(AtomicDec
);
38269 if (instData
.GLC
) {
38270 setFlag(AtomicReturn
);
38272 setFlag(AtomicNoReturn
);
38274 setFlag(MemoryRef
);
38275 setFlag(GlobalSegment
);
38276 } // Inst_MIMG__IMAGE_ATOMIC_DEC
38278 Inst_MIMG__IMAGE_ATOMIC_DEC::~Inst_MIMG__IMAGE_ATOMIC_DEC()
38280 } // ~Inst_MIMG__IMAGE_ATOMIC_DEC
38282 // tmp = MEM[ADDR];
38283 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
38284 // (unsigned compare); RETURN_DATA = tmp.
38286 Inst_MIMG__IMAGE_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst
)
38288 panicUnimplemented();
38291 Inst_MIMG__IMAGE_SAMPLE::Inst_MIMG__IMAGE_SAMPLE(InFmt_MIMG
*iFmt
)
38292 : Inst_MIMG(iFmt
, "image_sample")
38294 setFlag(GlobalSegment
);
38295 } // Inst_MIMG__IMAGE_SAMPLE
38297 Inst_MIMG__IMAGE_SAMPLE::~Inst_MIMG__IMAGE_SAMPLE()
38299 } // ~Inst_MIMG__IMAGE_SAMPLE
38302 Inst_MIMG__IMAGE_SAMPLE::execute(GPUDynInstPtr gpuDynInst
)
38304 panicUnimplemented();
38307 Inst_MIMG__IMAGE_SAMPLE_CL::Inst_MIMG__IMAGE_SAMPLE_CL(InFmt_MIMG
*iFmt
)
38308 : Inst_MIMG(iFmt
, "image_sample_cl")
38310 setFlag(GlobalSegment
);
38311 } // Inst_MIMG__IMAGE_SAMPLE_CL
38313 Inst_MIMG__IMAGE_SAMPLE_CL::~Inst_MIMG__IMAGE_SAMPLE_CL()
38315 } // ~Inst_MIMG__IMAGE_SAMPLE_CL
38318 Inst_MIMG__IMAGE_SAMPLE_CL::execute(GPUDynInstPtr gpuDynInst
)
38320 panicUnimplemented();
38323 Inst_MIMG__IMAGE_SAMPLE_D::Inst_MIMG__IMAGE_SAMPLE_D(InFmt_MIMG
*iFmt
)
38324 : Inst_MIMG(iFmt
, "image_sample_d")
38326 setFlag(GlobalSegment
);
38327 } // Inst_MIMG__IMAGE_SAMPLE_D
38329 Inst_MIMG__IMAGE_SAMPLE_D::~Inst_MIMG__IMAGE_SAMPLE_D()
38331 } // ~Inst_MIMG__IMAGE_SAMPLE_D
38334 Inst_MIMG__IMAGE_SAMPLE_D::execute(GPUDynInstPtr gpuDynInst
)
38336 panicUnimplemented();
38339 Inst_MIMG__IMAGE_SAMPLE_D_CL::Inst_MIMG__IMAGE_SAMPLE_D_CL(
38341 : Inst_MIMG(iFmt
, "image_sample_d_cl")
38343 setFlag(GlobalSegment
);
38344 } // Inst_MIMG__IMAGE_SAMPLE_D_CL
38346 Inst_MIMG__IMAGE_SAMPLE_D_CL::~Inst_MIMG__IMAGE_SAMPLE_D_CL()
38348 } // ~Inst_MIMG__IMAGE_SAMPLE_D_CL
38351 Inst_MIMG__IMAGE_SAMPLE_D_CL::execute(GPUDynInstPtr gpuDynInst
)
38353 panicUnimplemented();
38356 Inst_MIMG__IMAGE_SAMPLE_L::Inst_MIMG__IMAGE_SAMPLE_L(InFmt_MIMG
*iFmt
)
38357 : Inst_MIMG(iFmt
, "image_sample_l")
38359 setFlag(GlobalSegment
);
38360 } // Inst_MIMG__IMAGE_SAMPLE_L
38362 Inst_MIMG__IMAGE_SAMPLE_L::~Inst_MIMG__IMAGE_SAMPLE_L()
38364 } // ~Inst_MIMG__IMAGE_SAMPLE_L
38367 Inst_MIMG__IMAGE_SAMPLE_L::execute(GPUDynInstPtr gpuDynInst
)
38369 panicUnimplemented();
38372 Inst_MIMG__IMAGE_SAMPLE_B::Inst_MIMG__IMAGE_SAMPLE_B(InFmt_MIMG
*iFmt
)
38373 : Inst_MIMG(iFmt
, "image_sample_b")
38375 setFlag(GlobalSegment
);
38376 } // Inst_MIMG__IMAGE_SAMPLE_B
38378 Inst_MIMG__IMAGE_SAMPLE_B::~Inst_MIMG__IMAGE_SAMPLE_B()
38380 } // ~Inst_MIMG__IMAGE_SAMPLE_B
38383 Inst_MIMG__IMAGE_SAMPLE_B::execute(GPUDynInstPtr gpuDynInst
)
38385 panicUnimplemented();
38388 Inst_MIMG__IMAGE_SAMPLE_B_CL::Inst_MIMG__IMAGE_SAMPLE_B_CL(
38390 : Inst_MIMG(iFmt
, "image_sample_b_cl")
38392 setFlag(GlobalSegment
);
38393 } // Inst_MIMG__IMAGE_SAMPLE_B_CL
38395 Inst_MIMG__IMAGE_SAMPLE_B_CL::~Inst_MIMG__IMAGE_SAMPLE_B_CL()
38397 } // ~Inst_MIMG__IMAGE_SAMPLE_B_CL
38400 Inst_MIMG__IMAGE_SAMPLE_B_CL::execute(GPUDynInstPtr gpuDynInst
)
38402 panicUnimplemented();
38405 Inst_MIMG__IMAGE_SAMPLE_LZ::Inst_MIMG__IMAGE_SAMPLE_LZ(InFmt_MIMG
*iFmt
)
38406 : Inst_MIMG(iFmt
, "image_sample_lz")
38408 setFlag(GlobalSegment
);
38409 } // Inst_MIMG__IMAGE_SAMPLE_LZ
38411 Inst_MIMG__IMAGE_SAMPLE_LZ::~Inst_MIMG__IMAGE_SAMPLE_LZ()
38413 } // ~Inst_MIMG__IMAGE_SAMPLE_LZ
38416 Inst_MIMG__IMAGE_SAMPLE_LZ::execute(GPUDynInstPtr gpuDynInst
)
38418 panicUnimplemented();
38421 Inst_MIMG__IMAGE_SAMPLE_C::Inst_MIMG__IMAGE_SAMPLE_C(InFmt_MIMG
*iFmt
)
38422 : Inst_MIMG(iFmt
, "image_sample_c")
38424 setFlag(GlobalSegment
);
38425 } // Inst_MIMG__IMAGE_SAMPLE_C
38427 Inst_MIMG__IMAGE_SAMPLE_C::~Inst_MIMG__IMAGE_SAMPLE_C()
38429 } // ~Inst_MIMG__IMAGE_SAMPLE_C
38432 Inst_MIMG__IMAGE_SAMPLE_C::execute(GPUDynInstPtr gpuDynInst
)
38434 panicUnimplemented();
38437 Inst_MIMG__IMAGE_SAMPLE_C_CL::Inst_MIMG__IMAGE_SAMPLE_C_CL(
38439 : Inst_MIMG(iFmt
, "image_sample_c_cl")
38441 setFlag(GlobalSegment
);
38442 } // Inst_MIMG__IMAGE_SAMPLE_C_CL
38444 Inst_MIMG__IMAGE_SAMPLE_C_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CL()
38446 } // ~Inst_MIMG__IMAGE_SAMPLE_C_CL
38449 Inst_MIMG__IMAGE_SAMPLE_C_CL::execute(GPUDynInstPtr gpuDynInst
)
38451 panicUnimplemented();
38454 Inst_MIMG__IMAGE_SAMPLE_C_D::Inst_MIMG__IMAGE_SAMPLE_C_D(InFmt_MIMG
*iFmt
)
38455 : Inst_MIMG(iFmt
, "image_sample_c_d")
38457 setFlag(GlobalSegment
);
38458 } // Inst_MIMG__IMAGE_SAMPLE_C_D
38460 Inst_MIMG__IMAGE_SAMPLE_C_D::~Inst_MIMG__IMAGE_SAMPLE_C_D()
38462 } // ~Inst_MIMG__IMAGE_SAMPLE_C_D
38465 Inst_MIMG__IMAGE_SAMPLE_C_D::execute(GPUDynInstPtr gpuDynInst
)
38467 panicUnimplemented();
38470 Inst_MIMG__IMAGE_SAMPLE_C_D_CL::Inst_MIMG__IMAGE_SAMPLE_C_D_CL(
38472 : Inst_MIMG(iFmt
, "image_sample_c_d_cl")
38474 setFlag(GlobalSegment
);
38475 } // Inst_MIMG__IMAGE_SAMPLE_C_D_CL
38477 Inst_MIMG__IMAGE_SAMPLE_C_D_CL::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL()
38479 } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL
38482 Inst_MIMG__IMAGE_SAMPLE_C_D_CL::execute(GPUDynInstPtr gpuDynInst
)
38484 panicUnimplemented();
38487 Inst_MIMG__IMAGE_SAMPLE_C_L::Inst_MIMG__IMAGE_SAMPLE_C_L(InFmt_MIMG
*iFmt
)
38488 : Inst_MIMG(iFmt
, "image_sample_c_l")
38490 setFlag(GlobalSegment
);
38491 } // Inst_MIMG__IMAGE_SAMPLE_C_L
38493 Inst_MIMG__IMAGE_SAMPLE_C_L::~Inst_MIMG__IMAGE_SAMPLE_C_L()
38495 } // ~Inst_MIMG__IMAGE_SAMPLE_C_L
38498 Inst_MIMG__IMAGE_SAMPLE_C_L::execute(GPUDynInstPtr gpuDynInst
)
38500 panicUnimplemented();
38503 Inst_MIMG__IMAGE_SAMPLE_C_B::Inst_MIMG__IMAGE_SAMPLE_C_B(InFmt_MIMG
*iFmt
)
38504 : Inst_MIMG(iFmt
, "image_sample_c_b")
38506 setFlag(GlobalSegment
);
38507 } // Inst_MIMG__IMAGE_SAMPLE_C_B
38509 Inst_MIMG__IMAGE_SAMPLE_C_B::~Inst_MIMG__IMAGE_SAMPLE_C_B()
38511 } // ~Inst_MIMG__IMAGE_SAMPLE_C_B
38514 Inst_MIMG__IMAGE_SAMPLE_C_B::execute(GPUDynInstPtr gpuDynInst
)
38516 panicUnimplemented();
38519 Inst_MIMG__IMAGE_SAMPLE_C_B_CL::Inst_MIMG__IMAGE_SAMPLE_C_B_CL(
38521 : Inst_MIMG(iFmt
, "image_sample_c_b_cl")
38523 setFlag(GlobalSegment
);
38524 } // Inst_MIMG__IMAGE_SAMPLE_C_B_CL
38526 Inst_MIMG__IMAGE_SAMPLE_C_B_CL::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL()
38528 } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL
38531 Inst_MIMG__IMAGE_SAMPLE_C_B_CL::execute(GPUDynInstPtr gpuDynInst
)
38533 panicUnimplemented();
38536 Inst_MIMG__IMAGE_SAMPLE_C_LZ::Inst_MIMG__IMAGE_SAMPLE_C_LZ(
38538 : Inst_MIMG(iFmt
, "image_sample_c_lz")
38540 setFlag(GlobalSegment
);
38541 } // Inst_MIMG__IMAGE_SAMPLE_C_LZ
38543 Inst_MIMG__IMAGE_SAMPLE_C_LZ::~Inst_MIMG__IMAGE_SAMPLE_C_LZ()
38545 } // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ
38548 Inst_MIMG__IMAGE_SAMPLE_C_LZ::execute(GPUDynInstPtr gpuDynInst
)
38550 panicUnimplemented();
38553 Inst_MIMG__IMAGE_SAMPLE_O::Inst_MIMG__IMAGE_SAMPLE_O(InFmt_MIMG
*iFmt
)
38554 : Inst_MIMG(iFmt
, "image_sample_o")
38556 setFlag(GlobalSegment
);
38557 } // Inst_MIMG__IMAGE_SAMPLE_O
38559 Inst_MIMG__IMAGE_SAMPLE_O::~Inst_MIMG__IMAGE_SAMPLE_O()
38561 } // ~Inst_MIMG__IMAGE_SAMPLE_O
38564 Inst_MIMG__IMAGE_SAMPLE_O::execute(GPUDynInstPtr gpuDynInst
)
38566 panicUnimplemented();
38569 Inst_MIMG__IMAGE_SAMPLE_CL_O::Inst_MIMG__IMAGE_SAMPLE_CL_O(
38571 : Inst_MIMG(iFmt
, "image_sample_cl_o")
38573 setFlag(GlobalSegment
);
38574 } // Inst_MIMG__IMAGE_SAMPLE_CL_O
38576 Inst_MIMG__IMAGE_SAMPLE_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CL_O()
38578 } // ~Inst_MIMG__IMAGE_SAMPLE_CL_O
38581 Inst_MIMG__IMAGE_SAMPLE_CL_O::execute(GPUDynInstPtr gpuDynInst
)
38583 panicUnimplemented();
38586 Inst_MIMG__IMAGE_SAMPLE_D_O::Inst_MIMG__IMAGE_SAMPLE_D_O(InFmt_MIMG
*iFmt
)
38587 : Inst_MIMG(iFmt
, "image_sample_d_o")
38589 setFlag(GlobalSegment
);
38590 } // Inst_MIMG__IMAGE_SAMPLE_D_O
38592 Inst_MIMG__IMAGE_SAMPLE_D_O::~Inst_MIMG__IMAGE_SAMPLE_D_O()
38594 } // ~Inst_MIMG__IMAGE_SAMPLE_D_O
38597 Inst_MIMG__IMAGE_SAMPLE_D_O::execute(GPUDynInstPtr gpuDynInst
)
38599 panicUnimplemented();
38602 Inst_MIMG__IMAGE_SAMPLE_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_D_CL_O(
38604 : Inst_MIMG(iFmt
, "image_sample_d_cl_o")
38606 setFlag(GlobalSegment
);
38607 } // Inst_MIMG__IMAGE_SAMPLE_D_CL_O
38609 Inst_MIMG__IMAGE_SAMPLE_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_D_CL_O()
38611 } // ~Inst_MIMG__IMAGE_SAMPLE_D_CL_O
38614 Inst_MIMG__IMAGE_SAMPLE_D_CL_O::execute(GPUDynInstPtr gpuDynInst
)
38616 panicUnimplemented();
38619 Inst_MIMG__IMAGE_SAMPLE_L_O::Inst_MIMG__IMAGE_SAMPLE_L_O(InFmt_MIMG
*iFmt
)
38620 : Inst_MIMG(iFmt
, "image_sample_l_o")
38622 setFlag(GlobalSegment
);
38623 } // Inst_MIMG__IMAGE_SAMPLE_L_O
38625 Inst_MIMG__IMAGE_SAMPLE_L_O::~Inst_MIMG__IMAGE_SAMPLE_L_O()
38627 } // ~Inst_MIMG__IMAGE_SAMPLE_L_O
38630 Inst_MIMG__IMAGE_SAMPLE_L_O::execute(GPUDynInstPtr gpuDynInst
)
38632 panicUnimplemented();
38635 Inst_MIMG__IMAGE_SAMPLE_B_O::Inst_MIMG__IMAGE_SAMPLE_B_O(InFmt_MIMG
*iFmt
)
38636 : Inst_MIMG(iFmt
, "image_sample_b_o")
38638 setFlag(GlobalSegment
);
38639 } // Inst_MIMG__IMAGE_SAMPLE_B_O
38641 Inst_MIMG__IMAGE_SAMPLE_B_O::~Inst_MIMG__IMAGE_SAMPLE_B_O()
38643 } // ~Inst_MIMG__IMAGE_SAMPLE_B_O
38646 Inst_MIMG__IMAGE_SAMPLE_B_O::execute(GPUDynInstPtr gpuDynInst
)
38648 panicUnimplemented();
38651 Inst_MIMG__IMAGE_SAMPLE_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_B_CL_O(
38653 : Inst_MIMG(iFmt
, "image_sample_b_cl_o")
38655 setFlag(GlobalSegment
);
38656 } // Inst_MIMG__IMAGE_SAMPLE_B_CL_O
38658 Inst_MIMG__IMAGE_SAMPLE_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_B_CL_O()
38660 } // ~Inst_MIMG__IMAGE_SAMPLE_B_CL_O
38663 Inst_MIMG__IMAGE_SAMPLE_B_CL_O::execute(GPUDynInstPtr gpuDynInst
)
38665 panicUnimplemented();
38668 Inst_MIMG__IMAGE_SAMPLE_LZ_O::Inst_MIMG__IMAGE_SAMPLE_LZ_O(
38670 : Inst_MIMG(iFmt
, "image_sample_lz_o")
38672 setFlag(GlobalSegment
);
38673 } // Inst_MIMG__IMAGE_SAMPLE_LZ_O
38675 Inst_MIMG__IMAGE_SAMPLE_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_LZ_O()
38677 } // ~Inst_MIMG__IMAGE_SAMPLE_LZ_O
38680 Inst_MIMG__IMAGE_SAMPLE_LZ_O::execute(GPUDynInstPtr gpuDynInst
)
38682 panicUnimplemented();
38685 Inst_MIMG__IMAGE_SAMPLE_C_O::Inst_MIMG__IMAGE_SAMPLE_C_O(InFmt_MIMG
*iFmt
)
38686 : Inst_MIMG(iFmt
, "image_sample_c_o")
38688 setFlag(GlobalSegment
);
38689 } // Inst_MIMG__IMAGE_SAMPLE_C_O
38691 Inst_MIMG__IMAGE_SAMPLE_C_O::~Inst_MIMG__IMAGE_SAMPLE_C_O()
38693 } // ~Inst_MIMG__IMAGE_SAMPLE_C_O
38696 Inst_MIMG__IMAGE_SAMPLE_C_O::execute(GPUDynInstPtr gpuDynInst
)
38698 panicUnimplemented();
38701 Inst_MIMG__IMAGE_SAMPLE_C_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CL_O(
38703 : Inst_MIMG(iFmt
, "image_sample_c_cl_o")
38705 setFlag(GlobalSegment
);
38706 } // Inst_MIMG__IMAGE_SAMPLE_C_CL_O
38708 Inst_MIMG__IMAGE_SAMPLE_C_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CL_O()
38710 } // ~Inst_MIMG__IMAGE_SAMPLE_C_CL_O
38713 Inst_MIMG__IMAGE_SAMPLE_C_CL_O::execute(GPUDynInstPtr gpuDynInst
)
38715 panicUnimplemented();
38718 Inst_MIMG__IMAGE_SAMPLE_C_D_O::Inst_MIMG__IMAGE_SAMPLE_C_D_O(
38720 : Inst_MIMG(iFmt
, "image_sample_c_d_o")
38722 setFlag(GlobalSegment
);
38723 } // Inst_MIMG__IMAGE_SAMPLE_C_D_O
38725 Inst_MIMG__IMAGE_SAMPLE_C_D_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_O()
38727 } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_O
38730 Inst_MIMG__IMAGE_SAMPLE_C_D_O::execute(GPUDynInstPtr gpuDynInst
)
38732 panicUnimplemented();
38735 Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O(
38737 : Inst_MIMG(iFmt
, "image_sample_c_d_cl_o")
38739 setFlag(GlobalSegment
);
38740 } // Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O
38742 Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O()
38744 } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O
38747 Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::execute(GPUDynInstPtr gpuDynInst
)
38749 panicUnimplemented();
38752 Inst_MIMG__IMAGE_SAMPLE_C_L_O::Inst_MIMG__IMAGE_SAMPLE_C_L_O(
38754 : Inst_MIMG(iFmt
, "image_sample_c_l_o")
38756 setFlag(GlobalSegment
);
38757 } // Inst_MIMG__IMAGE_SAMPLE_C_L_O
38759 Inst_MIMG__IMAGE_SAMPLE_C_L_O::~Inst_MIMG__IMAGE_SAMPLE_C_L_O()
38761 } // ~Inst_MIMG__IMAGE_SAMPLE_C_L_O
38764 Inst_MIMG__IMAGE_SAMPLE_C_L_O::execute(GPUDynInstPtr gpuDynInst
)
38766 panicUnimplemented();
38769 Inst_MIMG__IMAGE_SAMPLE_C_B_O::Inst_MIMG__IMAGE_SAMPLE_C_B_O(
38771 : Inst_MIMG(iFmt
, "image_sample_c_b_o")
38773 setFlag(GlobalSegment
);
38774 } // Inst_MIMG__IMAGE_SAMPLE_C_B_O
38776 Inst_MIMG__IMAGE_SAMPLE_C_B_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_O()
38778 } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_O
38781 Inst_MIMG__IMAGE_SAMPLE_C_B_O::execute(GPUDynInstPtr gpuDynInst
)
38783 panicUnimplemented();
38786 Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O(
38788 : Inst_MIMG(iFmt
, "image_sample_c_b_cl_o")
38790 setFlag(GlobalSegment
);
38791 } // Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O
38793 Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O()
38795 } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O
38798 Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst
)
38800 panicUnimplemented();
38803 Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::Inst_MIMG__IMAGE_SAMPLE_C_LZ_O(
38805 : Inst_MIMG(iFmt
, "image_sample_c_lz_o")
38807 setFlag(GlobalSegment
);
38808 } // Inst_MIMG__IMAGE_SAMPLE_C_LZ_O
38810 Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O()
38812 } // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O
38815 Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::execute(GPUDynInstPtr gpuDynInst
)
38817 panicUnimplemented();
38820 Inst_MIMG__IMAGE_GATHER4::Inst_MIMG__IMAGE_GATHER4(InFmt_MIMG
*iFmt
)
38821 : Inst_MIMG(iFmt
, "image_gather4")
38823 setFlag(GlobalSegment
);
38824 } // Inst_MIMG__IMAGE_GATHER4
38826 Inst_MIMG__IMAGE_GATHER4::~Inst_MIMG__IMAGE_GATHER4()
38828 } // ~Inst_MIMG__IMAGE_GATHER4
38831 Inst_MIMG__IMAGE_GATHER4::execute(GPUDynInstPtr gpuDynInst
)
38833 panicUnimplemented();
38836 Inst_MIMG__IMAGE_GATHER4_CL::Inst_MIMG__IMAGE_GATHER4_CL(InFmt_MIMG
*iFmt
)
38837 : Inst_MIMG(iFmt
, "image_gather4_cl")
38839 setFlag(GlobalSegment
);
38840 } // Inst_MIMG__IMAGE_GATHER4_CL
38842 Inst_MIMG__IMAGE_GATHER4_CL::~Inst_MIMG__IMAGE_GATHER4_CL()
38844 } // ~Inst_MIMG__IMAGE_GATHER4_CL
38847 Inst_MIMG__IMAGE_GATHER4_CL::execute(GPUDynInstPtr gpuDynInst
)
38849 panicUnimplemented();
38852 Inst_MIMG__IMAGE_GATHER4_L::Inst_MIMG__IMAGE_GATHER4_L(InFmt_MIMG
*iFmt
)
38853 : Inst_MIMG(iFmt
, "image_gather4_l")
38855 setFlag(GlobalSegment
);
38856 } // Inst_MIMG__IMAGE_GATHER4_L
38858 Inst_MIMG__IMAGE_GATHER4_L::~Inst_MIMG__IMAGE_GATHER4_L()
38860 } // ~Inst_MIMG__IMAGE_GATHER4_L
38863 Inst_MIMG__IMAGE_GATHER4_L::execute(GPUDynInstPtr gpuDynInst
)
38865 panicUnimplemented();
38868 Inst_MIMG__IMAGE_GATHER4_B::Inst_MIMG__IMAGE_GATHER4_B(InFmt_MIMG
*iFmt
)
38869 : Inst_MIMG(iFmt
, "image_gather4_b")
38871 setFlag(GlobalSegment
);
38872 } // Inst_MIMG__IMAGE_GATHER4_B
38874 Inst_MIMG__IMAGE_GATHER4_B::~Inst_MIMG__IMAGE_GATHER4_B()
38876 } // ~Inst_MIMG__IMAGE_GATHER4_B
38879 Inst_MIMG__IMAGE_GATHER4_B::execute(GPUDynInstPtr gpuDynInst
)
38881 panicUnimplemented();
38884 Inst_MIMG__IMAGE_GATHER4_B_CL::Inst_MIMG__IMAGE_GATHER4_B_CL(
38886 : Inst_MIMG(iFmt
, "image_gather4_b_cl")
38888 setFlag(GlobalSegment
);
38889 } // Inst_MIMG__IMAGE_GATHER4_B_CL
38891 Inst_MIMG__IMAGE_GATHER4_B_CL::~Inst_MIMG__IMAGE_GATHER4_B_CL()
38893 } // ~Inst_MIMG__IMAGE_GATHER4_B_CL
38896 Inst_MIMG__IMAGE_GATHER4_B_CL::execute(GPUDynInstPtr gpuDynInst
)
38898 panicUnimplemented();
38901 Inst_MIMG__IMAGE_GATHER4_LZ::Inst_MIMG__IMAGE_GATHER4_LZ(InFmt_MIMG
*iFmt
)
38902 : Inst_MIMG(iFmt
, "image_gather4_lz")
38904 setFlag(GlobalSegment
);
38905 } // Inst_MIMG__IMAGE_GATHER4_LZ
38907 Inst_MIMG__IMAGE_GATHER4_LZ::~Inst_MIMG__IMAGE_GATHER4_LZ()
38909 } // ~Inst_MIMG__IMAGE_GATHER4_LZ
38912 Inst_MIMG__IMAGE_GATHER4_LZ::execute(GPUDynInstPtr gpuDynInst
)
38914 panicUnimplemented();
38917 Inst_MIMG__IMAGE_GATHER4_C::Inst_MIMG__IMAGE_GATHER4_C(InFmt_MIMG
*iFmt
)
38918 : Inst_MIMG(iFmt
, "image_gather4_c")
38920 setFlag(GlobalSegment
);
38921 } // Inst_MIMG__IMAGE_GATHER4_C
38923 Inst_MIMG__IMAGE_GATHER4_C::~Inst_MIMG__IMAGE_GATHER4_C()
38925 } // ~Inst_MIMG__IMAGE_GATHER4_C
38928 Inst_MIMG__IMAGE_GATHER4_C::execute(GPUDynInstPtr gpuDynInst
)
38930 panicUnimplemented();
38933 Inst_MIMG__IMAGE_GATHER4_C_CL::Inst_MIMG__IMAGE_GATHER4_C_CL(
38935 : Inst_MIMG(iFmt
, "image_gather4_c_cl")
38937 setFlag(GlobalSegment
);
38938 } // Inst_MIMG__IMAGE_GATHER4_C_CL
38940 Inst_MIMG__IMAGE_GATHER4_C_CL::~Inst_MIMG__IMAGE_GATHER4_C_CL()
38942 } // ~Inst_MIMG__IMAGE_GATHER4_C_CL
38945 Inst_MIMG__IMAGE_GATHER4_C_CL::execute(GPUDynInstPtr gpuDynInst
)
38947 panicUnimplemented();
38950 Inst_MIMG__IMAGE_GATHER4_C_L::Inst_MIMG__IMAGE_GATHER4_C_L(
38952 : Inst_MIMG(iFmt
, "image_gather4_c_l")
38954 setFlag(GlobalSegment
);
38955 } // Inst_MIMG__IMAGE_GATHER4_C_L
38957 Inst_MIMG__IMAGE_GATHER4_C_L::~Inst_MIMG__IMAGE_GATHER4_C_L()
38959 } // ~Inst_MIMG__IMAGE_GATHER4_C_L
38962 Inst_MIMG__IMAGE_GATHER4_C_L::execute(GPUDynInstPtr gpuDynInst
)
38964 panicUnimplemented();
38967 Inst_MIMG__IMAGE_GATHER4_C_B::Inst_MIMG__IMAGE_GATHER4_C_B(
38969 : Inst_MIMG(iFmt
, "image_gather4_c_b")
38971 setFlag(GlobalSegment
);
38972 } // Inst_MIMG__IMAGE_GATHER4_C_B
38974 Inst_MIMG__IMAGE_GATHER4_C_B::~Inst_MIMG__IMAGE_GATHER4_C_B()
38976 } // ~Inst_MIMG__IMAGE_GATHER4_C_B
38979 Inst_MIMG__IMAGE_GATHER4_C_B::execute(GPUDynInstPtr gpuDynInst
)
38981 panicUnimplemented();
38984 Inst_MIMG__IMAGE_GATHER4_C_B_CL::Inst_MIMG__IMAGE_GATHER4_C_B_CL(
38986 : Inst_MIMG(iFmt
, "image_gather4_c_b_cl")
38988 setFlag(GlobalSegment
);
38989 } // Inst_MIMG__IMAGE_GATHER4_C_B_CL
38991 Inst_MIMG__IMAGE_GATHER4_C_B_CL::~Inst_MIMG__IMAGE_GATHER4_C_B_CL()
38993 } // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL
38996 Inst_MIMG__IMAGE_GATHER4_C_B_CL::execute(GPUDynInstPtr gpuDynInst
)
38998 panicUnimplemented();
39001 Inst_MIMG__IMAGE_GATHER4_C_LZ::Inst_MIMG__IMAGE_GATHER4_C_LZ(
39003 : Inst_MIMG(iFmt
, "image_gather4_c_lz")
39005 setFlag(GlobalSegment
);
39006 } // Inst_MIMG__IMAGE_GATHER4_C_LZ
39008 Inst_MIMG__IMAGE_GATHER4_C_LZ::~Inst_MIMG__IMAGE_GATHER4_C_LZ()
39010 } // ~Inst_MIMG__IMAGE_GATHER4_C_LZ
39013 Inst_MIMG__IMAGE_GATHER4_C_LZ::execute(GPUDynInstPtr gpuDynInst
)
39015 panicUnimplemented();
39018 Inst_MIMG__IMAGE_GATHER4_O::Inst_MIMG__IMAGE_GATHER4_O(InFmt_MIMG
*iFmt
)
39019 : Inst_MIMG(iFmt
, "image_gather4_o")
39021 setFlag(GlobalSegment
);
39022 } // Inst_MIMG__IMAGE_GATHER4_O
39024 Inst_MIMG__IMAGE_GATHER4_O::~Inst_MIMG__IMAGE_GATHER4_O()
39026 } // ~Inst_MIMG__IMAGE_GATHER4_O
39029 Inst_MIMG__IMAGE_GATHER4_O::execute(GPUDynInstPtr gpuDynInst
)
39031 panicUnimplemented();
39034 Inst_MIMG__IMAGE_GATHER4_CL_O::Inst_MIMG__IMAGE_GATHER4_CL_O(
39036 : Inst_MIMG(iFmt
, "image_gather4_cl_o")
39038 setFlag(GlobalSegment
);
39039 } // Inst_MIMG__IMAGE_GATHER4_CL_O
39041 Inst_MIMG__IMAGE_GATHER4_CL_O::~Inst_MIMG__IMAGE_GATHER4_CL_O()
39043 } // ~Inst_MIMG__IMAGE_GATHER4_CL_O
39046 Inst_MIMG__IMAGE_GATHER4_CL_O::execute(GPUDynInstPtr gpuDynInst
)
39048 panicUnimplemented();
39051 Inst_MIMG__IMAGE_GATHER4_L_O::Inst_MIMG__IMAGE_GATHER4_L_O(
39053 : Inst_MIMG(iFmt
, "image_gather4_l_o")
39055 setFlag(GlobalSegment
);
39056 } // Inst_MIMG__IMAGE_GATHER4_L_O
39058 Inst_MIMG__IMAGE_GATHER4_L_O::~Inst_MIMG__IMAGE_GATHER4_L_O()
39060 } // ~Inst_MIMG__IMAGE_GATHER4_L_O
39063 Inst_MIMG__IMAGE_GATHER4_L_O::execute(GPUDynInstPtr gpuDynInst
)
39065 panicUnimplemented();
39068 Inst_MIMG__IMAGE_GATHER4_B_O::Inst_MIMG__IMAGE_GATHER4_B_O(
39070 : Inst_MIMG(iFmt
, "image_gather4_b_o")
39072 setFlag(GlobalSegment
);
39073 } // Inst_MIMG__IMAGE_GATHER4_B_O
39075 Inst_MIMG__IMAGE_GATHER4_B_O::~Inst_MIMG__IMAGE_GATHER4_B_O()
39077 } // ~Inst_MIMG__IMAGE_GATHER4_B_O
39080 Inst_MIMG__IMAGE_GATHER4_B_O::execute(GPUDynInstPtr gpuDynInst
)
39082 panicUnimplemented();
39085 Inst_MIMG__IMAGE_GATHER4_B_CL_O::Inst_MIMG__IMAGE_GATHER4_B_CL_O(
39087 : Inst_MIMG(iFmt
, "image_gather4_b_cl_o")
39089 setFlag(GlobalSegment
);
39090 } // Inst_MIMG__IMAGE_GATHER4_B_CL_O
39092 Inst_MIMG__IMAGE_GATHER4_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_B_CL_O()
39094 } // ~Inst_MIMG__IMAGE_GATHER4_B_CL_O
39097 Inst_MIMG__IMAGE_GATHER4_B_CL_O::execute(GPUDynInstPtr gpuDynInst
)
39099 panicUnimplemented();
39102 Inst_MIMG__IMAGE_GATHER4_LZ_O::Inst_MIMG__IMAGE_GATHER4_LZ_O(
39104 : Inst_MIMG(iFmt
, "image_gather4_lz_o")
39106 setFlag(GlobalSegment
);
39107 } // Inst_MIMG__IMAGE_GATHER4_LZ_O
39109 Inst_MIMG__IMAGE_GATHER4_LZ_O::~Inst_MIMG__IMAGE_GATHER4_LZ_O()
39111 } // ~Inst_MIMG__IMAGE_GATHER4_LZ_O
39114 Inst_MIMG__IMAGE_GATHER4_LZ_O::execute(GPUDynInstPtr gpuDynInst
)
39116 panicUnimplemented();
39119 Inst_MIMG__IMAGE_GATHER4_C_O::Inst_MIMG__IMAGE_GATHER4_C_O(
39121 : Inst_MIMG(iFmt
, "image_gather4_c_o")
39123 setFlag(GlobalSegment
);
39124 } // Inst_MIMG__IMAGE_GATHER4_C_O
39126 Inst_MIMG__IMAGE_GATHER4_C_O::~Inst_MIMG__IMAGE_GATHER4_C_O()
39128 } // ~Inst_MIMG__IMAGE_GATHER4_C_O
39131 Inst_MIMG__IMAGE_GATHER4_C_O::execute(GPUDynInstPtr gpuDynInst
)
39133 panicUnimplemented();
39136 Inst_MIMG__IMAGE_GATHER4_C_CL_O::Inst_MIMG__IMAGE_GATHER4_C_CL_O(
39138 : Inst_MIMG(iFmt
, "image_gather4_c_cl_o")
39140 setFlag(GlobalSegment
);
39141 } // Inst_MIMG__IMAGE_GATHER4_C_CL_O
39143 Inst_MIMG__IMAGE_GATHER4_C_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_CL_O()
39145 } // ~Inst_MIMG__IMAGE_GATHER4_C_CL_O
39148 Inst_MIMG__IMAGE_GATHER4_C_CL_O::execute(GPUDynInstPtr gpuDynInst
)
39150 panicUnimplemented();
39153 Inst_MIMG__IMAGE_GATHER4_C_L_O::Inst_MIMG__IMAGE_GATHER4_C_L_O(
39155 : Inst_MIMG(iFmt
, "image_gather4_c_l_o")
39157 setFlag(GlobalSegment
);
39158 } // Inst_MIMG__IMAGE_GATHER4_C_L_O
39160 Inst_MIMG__IMAGE_GATHER4_C_L_O::~Inst_MIMG__IMAGE_GATHER4_C_L_O()
39162 } // ~Inst_MIMG__IMAGE_GATHER4_C_L_O
39165 Inst_MIMG__IMAGE_GATHER4_C_L_O::execute(GPUDynInstPtr gpuDynInst
)
39167 panicUnimplemented();
39170 Inst_MIMG__IMAGE_GATHER4_C_B_O::Inst_MIMG__IMAGE_GATHER4_C_B_O(
39172 : Inst_MIMG(iFmt
, "image_gather4_c_b_o")
39174 setFlag(GlobalSegment
);
39175 } // Inst_MIMG__IMAGE_GATHER4_C_B_O
39177 Inst_MIMG__IMAGE_GATHER4_C_B_O::~Inst_MIMG__IMAGE_GATHER4_C_B_O()
39179 } // ~Inst_MIMG__IMAGE_GATHER4_C_B_O
39182 Inst_MIMG__IMAGE_GATHER4_C_B_O::execute(GPUDynInstPtr gpuDynInst
)
39184 panicUnimplemented();
39187 Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::Inst_MIMG__IMAGE_GATHER4_C_B_CL_O(
39189 : Inst_MIMG(iFmt
, "image_gather4_c_b_cl_o")
39191 setFlag(GlobalSegment
);
39192 } // Inst_MIMG__IMAGE_GATHER4_C_B_CL_O
39194 Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O()
39196 } // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O
39199 Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst
)
39201 panicUnimplemented();
39204 Inst_MIMG__IMAGE_GATHER4_C_LZ_O::Inst_MIMG__IMAGE_GATHER4_C_LZ_O(
39206 : Inst_MIMG(iFmt
, "image_gather4_c_lz_o")
39208 setFlag(GlobalSegment
);
39209 } // Inst_MIMG__IMAGE_GATHER4_C_LZ_O
39211 Inst_MIMG__IMAGE_GATHER4_C_LZ_O::~Inst_MIMG__IMAGE_GATHER4_C_LZ_O()
39213 } // ~Inst_MIMG__IMAGE_GATHER4_C_LZ_O
39216 Inst_MIMG__IMAGE_GATHER4_C_LZ_O::execute(GPUDynInstPtr gpuDynInst
)
39218 panicUnimplemented();
39221 Inst_MIMG__IMAGE_GET_LOD::Inst_MIMG__IMAGE_GET_LOD(InFmt_MIMG
*iFmt
)
39222 : Inst_MIMG(iFmt
, "image_get_lod")
39224 setFlag(GlobalSegment
);
39225 } // Inst_MIMG__IMAGE_GET_LOD
39227 Inst_MIMG__IMAGE_GET_LOD::~Inst_MIMG__IMAGE_GET_LOD()
39229 } // ~Inst_MIMG__IMAGE_GET_LOD
39232 Inst_MIMG__IMAGE_GET_LOD::execute(GPUDynInstPtr gpuDynInst
)
39234 panicUnimplemented();
39237 Inst_MIMG__IMAGE_SAMPLE_CD::Inst_MIMG__IMAGE_SAMPLE_CD(InFmt_MIMG
*iFmt
)
39238 : Inst_MIMG(iFmt
, "image_sample_cd")
39240 setFlag(GlobalSegment
);
39241 } // Inst_MIMG__IMAGE_SAMPLE_CD
39243 Inst_MIMG__IMAGE_SAMPLE_CD::~Inst_MIMG__IMAGE_SAMPLE_CD()
39245 } // ~Inst_MIMG__IMAGE_SAMPLE_CD
39248 Inst_MIMG__IMAGE_SAMPLE_CD::execute(GPUDynInstPtr gpuDynInst
)
39250 panicUnimplemented();
39253 Inst_MIMG__IMAGE_SAMPLE_CD_CL::Inst_MIMG__IMAGE_SAMPLE_CD_CL(
39255 : Inst_MIMG(iFmt
, "image_sample_cd_cl")
39257 setFlag(GlobalSegment
);
39258 } // Inst_MIMG__IMAGE_SAMPLE_CD_CL
39260 Inst_MIMG__IMAGE_SAMPLE_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_CD_CL()
39262 } // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL
39265 Inst_MIMG__IMAGE_SAMPLE_CD_CL::execute(GPUDynInstPtr gpuDynInst
)
39267 panicUnimplemented();
39270 Inst_MIMG__IMAGE_SAMPLE_C_CD::Inst_MIMG__IMAGE_SAMPLE_C_CD(
39272 : Inst_MIMG(iFmt
, "image_sample_c_cd")
39274 setFlag(GlobalSegment
);
39275 } // Inst_MIMG__IMAGE_SAMPLE_C_CD
39277 Inst_MIMG__IMAGE_SAMPLE_C_CD::~Inst_MIMG__IMAGE_SAMPLE_C_CD()
39279 } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD
39282 Inst_MIMG__IMAGE_SAMPLE_C_CD::execute(GPUDynInstPtr gpuDynInst
)
39284 panicUnimplemented();
39287 Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL(
39289 : Inst_MIMG(iFmt
, "image_sample_c_cd_cl")
39291 setFlag(GlobalSegment
);
39292 } // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL
39294 Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL()
39296 } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL
39299 Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::execute(GPUDynInstPtr gpuDynInst
)
39301 panicUnimplemented();
39304 Inst_MIMG__IMAGE_SAMPLE_CD_O::Inst_MIMG__IMAGE_SAMPLE_CD_O(
39306 : Inst_MIMG(iFmt
, "image_sample_cd_o")
39308 setFlag(GlobalSegment
);
39309 } // Inst_MIMG__IMAGE_SAMPLE_CD_O
39311 Inst_MIMG__IMAGE_SAMPLE_CD_O::~Inst_MIMG__IMAGE_SAMPLE_CD_O()
39313 } // ~Inst_MIMG__IMAGE_SAMPLE_CD_O
39316 Inst_MIMG__IMAGE_SAMPLE_CD_O::execute(GPUDynInstPtr gpuDynInst
)
39318 panicUnimplemented();
39321 Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_CD_CL_O(
39323 : Inst_MIMG(iFmt
, "image_sample_cd_cl_o")
39325 setFlag(GlobalSegment
);
39326 } // Inst_MIMG__IMAGE_SAMPLE_CD_CL_O
39328 Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O()
39330 } // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O
39333 Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::execute(GPUDynInstPtr gpuDynInst
)
39335 panicUnimplemented();
39338 Inst_MIMG__IMAGE_SAMPLE_C_CD_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_O(
39340 : Inst_MIMG(iFmt
, "image_sample_c_cd_o")
39342 setFlag(GlobalSegment
);
39343 } // Inst_MIMG__IMAGE_SAMPLE_C_CD_O
39345 Inst_MIMG__IMAGE_SAMPLE_C_CD_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_O()
39347 } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_O
39350 Inst_MIMG__IMAGE_SAMPLE_C_CD_O::execute(GPUDynInstPtr gpuDynInst
)
39352 panicUnimplemented();
39355 Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O(
39357 : Inst_MIMG(iFmt
, "image_sample_c_cd_cl_o")
39359 setFlag(GlobalSegment
);
39360 } // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O
39362 Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O()
39364 } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O
39367 Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::execute(GPUDynInstPtr gpuDynInst
)
39369 panicUnimplemented();
39372 Inst_EXP__EXP::Inst_EXP__EXP(InFmt_EXP
*iFmt
)
39373 : Inst_EXP(iFmt
, "exp")
39377 Inst_EXP__EXP::~Inst_EXP__EXP()
39379 } // ~Inst_EXP__EXP
39382 Inst_EXP__EXP::execute(GPUDynInstPtr gpuDynInst
)
39384 panicUnimplemented();
39387 Inst_FLAT__FLAT_LOAD_UBYTE::Inst_FLAT__FLAT_LOAD_UBYTE(InFmt_FLAT
*iFmt
)
39388 : Inst_FLAT(iFmt
, "flat_load_ubyte")
39390 setFlag(MemoryRef
);
39392 } // Inst_FLAT__FLAT_LOAD_UBYTE
39394 Inst_FLAT__FLAT_LOAD_UBYTE::~Inst_FLAT__FLAT_LOAD_UBYTE()
39396 } // ~Inst_FLAT__FLAT_LOAD_UBYTE
39398 // Untyped buffer load unsigned byte (zero extend to VGPR destination).
39400 Inst_FLAT__FLAT_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst
)
39402 Wavefront
*wf
= gpuDynInst
->wavefront();
39404 if (wf
->execMask().none()) {
39405 wf
->decVMemInstsIssued();
39406 wf
->decLGKMInstsIssued();
39407 wf
->rdGmReqsInPipe
--;
39408 wf
->rdLmReqsInPipe
--;
39412 gpuDynInst
->execUnitId
= wf
->execUnitId
;
39413 gpuDynInst
->exec_mask
= gpuDynInst
->wavefront()->execMask();
39414 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
39415 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
39417 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
39421 calcAddr(gpuDynInst
, addr
);
39423 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
39424 gpuDynInst
->computeUnit()->globalMemoryPipe
39425 .issueRequest(gpuDynInst
);
39426 wf
->rdGmReqsInPipe
--;
39427 wf
->outstandingReqsRdGm
++;
39429 fatal("Non global flat instructions not implemented yet.\n");
39432 gpuDynInst
->wavefront()->outstandingReqs
++;
39433 gpuDynInst
->wavefront()->validateRequestCounters();
39437 Inst_FLAT__FLAT_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst
)
39439 initMemRead
<VecElemU8
>(gpuDynInst
);
39443 Inst_FLAT__FLAT_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst
)
39445 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
39447 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
39448 if (gpuDynInst
->exec_mask
[lane
]) {
39449 vdst
[lane
] = (VecElemU32
)((reinterpret_cast<VecElemU8
*>(
39450 gpuDynInst
->d_data
))[lane
]);
39455 // --- Inst_FLAT__FLAT_LOAD_SBYTE class methods ---
39457 Inst_FLAT__FLAT_LOAD_SBYTE::Inst_FLAT__FLAT_LOAD_SBYTE(InFmt_FLAT
*iFmt
)
39458 : Inst_FLAT(iFmt
, "flat_load_sbyte")
39460 setFlag(MemoryRef
);
39462 } // Inst_FLAT__FLAT_LOAD_SBYTE
39464 Inst_FLAT__FLAT_LOAD_SBYTE::~Inst_FLAT__FLAT_LOAD_SBYTE()
39466 } // ~Inst_FLAT__FLAT_LOAD_SBYTE
39468 // Untyped buffer load signed byte (sign extend to VGPR destination).
39470 Inst_FLAT__FLAT_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst
)
39472 panicUnimplemented();
39476 Inst_FLAT__FLAT_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst
)
39481 Inst_FLAT__FLAT_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst
)
39485 Inst_FLAT__FLAT_LOAD_USHORT::Inst_FLAT__FLAT_LOAD_USHORT(InFmt_FLAT
*iFmt
)
39486 : Inst_FLAT(iFmt
, "flat_load_ushort")
39488 setFlag(MemoryRef
);
39490 } // Inst_FLAT__FLAT_LOAD_USHORT
39492 Inst_FLAT__FLAT_LOAD_USHORT::~Inst_FLAT__FLAT_LOAD_USHORT()
39494 } // ~Inst_FLAT__FLAT_LOAD_USHORT
39496 // Untyped buffer load unsigned short (zero extend to VGPR destination).
39498 Inst_FLAT__FLAT_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst
)
39500 Wavefront
*wf
= gpuDynInst
->wavefront();
39502 if (wf
->execMask().none()) {
39503 wf
->decVMemInstsIssued();
39504 wf
->decLGKMInstsIssued();
39505 wf
->rdGmReqsInPipe
--;
39506 wf
->rdLmReqsInPipe
--;
39510 gpuDynInst
->execUnitId
= wf
->execUnitId
;
39511 gpuDynInst
->exec_mask
= gpuDynInst
->wavefront()->execMask();
39512 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
39513 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
39515 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
39519 calcAddr(gpuDynInst
, addr
);
39521 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
39522 gpuDynInst
->computeUnit()->globalMemoryPipe
39523 .issueRequest(gpuDynInst
);
39524 wf
->rdGmReqsInPipe
--;
39525 wf
->outstandingReqsRdGm
++;
39527 fatal("Non global flat instructions not implemented yet.\n");
39530 gpuDynInst
->wavefront()->outstandingReqs
++;
39531 gpuDynInst
->wavefront()->validateRequestCounters();
39535 Inst_FLAT__FLAT_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst
)
39537 initMemRead
<VecElemU16
>(gpuDynInst
);
39541 Inst_FLAT__FLAT_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst
)
39543 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
39545 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
39546 if (gpuDynInst
->exec_mask
[lane
]) {
39547 vdst
[lane
] = (VecElemU32
)((reinterpret_cast<VecElemU16
*>(
39548 gpuDynInst
->d_data
))[lane
]);
39555 Inst_FLAT__FLAT_LOAD_SSHORT::Inst_FLAT__FLAT_LOAD_SSHORT(InFmt_FLAT
*iFmt
)
39556 : Inst_FLAT(iFmt
, "flat_load_sshort")
39558 setFlag(MemoryRef
);
39560 } // Inst_FLAT__FLAT_LOAD_SSHORT
39562 Inst_FLAT__FLAT_LOAD_SSHORT::~Inst_FLAT__FLAT_LOAD_SSHORT()
39564 } // ~Inst_FLAT__FLAT_LOAD_SSHORT
39566 // Untyped buffer load signed short (sign extend to VGPR destination).
39568 Inst_FLAT__FLAT_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst
)
39570 panicUnimplemented();
39574 Inst_FLAT__FLAT_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst
)
39579 Inst_FLAT__FLAT_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst
)
39583 Inst_FLAT__FLAT_LOAD_DWORD::Inst_FLAT__FLAT_LOAD_DWORD(InFmt_FLAT
*iFmt
)
39584 : Inst_FLAT(iFmt
, "flat_load_dword")
39586 setFlag(MemoryRef
);
39588 } // Inst_FLAT__FLAT_LOAD_DWORD
39590 Inst_FLAT__FLAT_LOAD_DWORD::~Inst_FLAT__FLAT_LOAD_DWORD()
39592 } // ~Inst_FLAT__FLAT_LOAD_DWORD
39594 // Untyped buffer load dword.
39596 Inst_FLAT__FLAT_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst
)
39598 Wavefront
*wf
= gpuDynInst
->wavefront();
39600 if (wf
->execMask().none()) {
39601 wf
->decVMemInstsIssued();
39602 wf
->decLGKMInstsIssued();
39603 wf
->rdGmReqsInPipe
--;
39604 wf
->rdLmReqsInPipe
--;
39608 gpuDynInst
->execUnitId
= wf
->execUnitId
;
39609 gpuDynInst
->exec_mask
= gpuDynInst
->wavefront()->execMask();
39610 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
39611 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
39613 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
39617 calcAddr(gpuDynInst
, addr
);
39619 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
39620 gpuDynInst
->computeUnit()->globalMemoryPipe
39621 .issueRequest(gpuDynInst
);
39622 wf
->rdGmReqsInPipe
--;
39623 wf
->outstandingReqsRdGm
++;
39625 fatal("Non global flat instructions not implemented yet.\n");
39628 gpuDynInst
->wavefront()->outstandingReqs
++;
39629 gpuDynInst
->wavefront()->validateRequestCounters();
39633 Inst_FLAT__FLAT_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst
)
39635 initMemRead
<VecElemU32
>(gpuDynInst
);
39639 Inst_FLAT__FLAT_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst
)
39641 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
39643 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
39644 if (gpuDynInst
->exec_mask
[lane
]) {
39645 vdst
[lane
] = (reinterpret_cast<VecElemU32
*>(
39646 gpuDynInst
->d_data
))[lane
];
39652 Inst_FLAT__FLAT_LOAD_DWORDX2::Inst_FLAT__FLAT_LOAD_DWORDX2(
39654 : Inst_FLAT(iFmt
, "flat_load_dwordx2")
39656 setFlag(MemoryRef
);
39658 } // Inst_FLAT__FLAT_LOAD_DWORDX2
39660 Inst_FLAT__FLAT_LOAD_DWORDX2::~Inst_FLAT__FLAT_LOAD_DWORDX2()
39662 } // ~Inst_FLAT__FLAT_LOAD_DWORDX2
39664 // Untyped buffer load 2 dwords.
39666 Inst_FLAT__FLAT_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst
)
39668 Wavefront
*wf
= gpuDynInst
->wavefront();
39670 if (wf
->execMask().none()) {
39671 wf
->decVMemInstsIssued();
39672 wf
->decLGKMInstsIssued();
39673 wf
->rdGmReqsInPipe
--;
39674 wf
->rdLmReqsInPipe
--;
39678 gpuDynInst
->execUnitId
= wf
->execUnitId
;
39679 gpuDynInst
->exec_mask
= gpuDynInst
->wavefront()->execMask();
39680 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
39681 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
39683 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
39687 calcAddr(gpuDynInst
, addr
);
39689 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
39690 gpuDynInst
->computeUnit()->globalMemoryPipe
39691 .issueRequest(gpuDynInst
);
39692 wf
->rdGmReqsInPipe
--;
39693 wf
->outstandingReqsRdGm
++;
39695 fatal("Non global flat instructions not implemented yet.\n");
39698 gpuDynInst
->wavefront()->outstandingReqs
++;
39699 gpuDynInst
->wavefront()->validateRequestCounters();
39703 Inst_FLAT__FLAT_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst
)
39705 initMemRead
<VecElemU64
>(gpuDynInst
);
39709 Inst_FLAT__FLAT_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst
)
39711 VecOperandU64
vdst(gpuDynInst
, extData
.VDST
);
39713 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
39714 if (gpuDynInst
->exec_mask
[lane
]) {
39715 vdst
[lane
] = (reinterpret_cast<VecElemU64
*>(
39716 gpuDynInst
->d_data
))[lane
];
39722 Inst_FLAT__FLAT_LOAD_DWORDX3::Inst_FLAT__FLAT_LOAD_DWORDX3(
39724 : Inst_FLAT(iFmt
, "flat_load_dwordx3")
39726 setFlag(MemoryRef
);
39728 } // Inst_FLAT__FLAT_LOAD_DWORDX3
39730 Inst_FLAT__FLAT_LOAD_DWORDX3::~Inst_FLAT__FLAT_LOAD_DWORDX3()
39732 } // ~Inst_FLAT__FLAT_LOAD_DWORDX3
39734 // Untyped buffer load 3 dwords.
39736 Inst_FLAT__FLAT_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst
)
39738 Wavefront
*wf
= gpuDynInst
->wavefront();
39740 if (wf
->execMask().none()) {
39741 wf
->decVMemInstsIssued();
39742 wf
->decLGKMInstsIssued();
39743 wf
->rdGmReqsInPipe
--;
39744 wf
->rdLmReqsInPipe
--;
39748 gpuDynInst
->execUnitId
= wf
->execUnitId
;
39749 gpuDynInst
->exec_mask
= wf
->execMask();
39750 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
39751 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
39753 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
39757 calcAddr(gpuDynInst
, addr
);
39759 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
39760 gpuDynInst
->computeUnit()->globalMemoryPipe
39761 .issueRequest(gpuDynInst
);
39762 wf
->rdGmReqsInPipe
--;
39763 wf
->outstandingReqsRdGm
++;
39765 fatal("Non global flat instructions not implemented yet.\n");
39768 gpuDynInst
->wavefront()->outstandingReqs
++;
39769 gpuDynInst
->wavefront()->validateRequestCounters();
39773 Inst_FLAT__FLAT_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst
)
39775 initMemRead
<3>(gpuDynInst
);
39779 Inst_FLAT__FLAT_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst
)
39781 VecOperandU32
vdst0(gpuDynInst
, extData
.VDST
);
39782 VecOperandU32
vdst1(gpuDynInst
, extData
.VDST
+ 1);
39783 VecOperandU32
vdst2(gpuDynInst
, extData
.VDST
+ 2);
39785 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
39786 if (gpuDynInst
->exec_mask
[lane
]) {
39787 vdst0
[lane
] = (reinterpret_cast<VecElemU32
*>(
39788 gpuDynInst
->d_data
))[lane
* 3];
39789 vdst1
[lane
] = (reinterpret_cast<VecElemU32
*>(
39790 gpuDynInst
->d_data
))[lane
* 3 + 1];
39791 vdst2
[lane
] = (reinterpret_cast<VecElemU32
*>(
39792 gpuDynInst
->d_data
))[lane
* 3 + 2];
39801 Inst_FLAT__FLAT_LOAD_DWORDX4::Inst_FLAT__FLAT_LOAD_DWORDX4(
39803 : Inst_FLAT(iFmt
, "flat_load_dwordx4")
39805 setFlag(MemoryRef
);
39807 } // Inst_FLAT__FLAT_LOAD_DWORDX4
39809 Inst_FLAT__FLAT_LOAD_DWORDX4::~Inst_FLAT__FLAT_LOAD_DWORDX4()
39811 } // ~Inst_FLAT__FLAT_LOAD_DWORDX4
39813 // Untyped buffer load 4 dwords.
39815 Inst_FLAT__FLAT_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst
)
39817 Wavefront
*wf
= gpuDynInst
->wavefront();
39819 if (wf
->execMask().none()) {
39820 wf
->decVMemInstsIssued();
39821 wf
->decLGKMInstsIssued();
39822 wf
->rdGmReqsInPipe
--;
39823 wf
->rdLmReqsInPipe
--;
39826 gpuDynInst
->execUnitId
= wf
->execUnitId
;
39827 gpuDynInst
->exec_mask
= wf
->execMask();
39828 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
39829 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
39831 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
39835 calcAddr(gpuDynInst
, addr
);
39837 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
39838 gpuDynInst
->computeUnit()->globalMemoryPipe
39839 .issueRequest(gpuDynInst
);
39840 wf
->rdGmReqsInPipe
--;
39841 wf
->outstandingReqsRdGm
++;
39843 fatal("Non global flat instructions not implemented yet.\n");
39846 gpuDynInst
->wavefront()->outstandingReqs
++;
39847 gpuDynInst
->wavefront()->validateRequestCounters();
39851 Inst_FLAT__FLAT_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst
)
39853 initMemRead
<4>(gpuDynInst
);
39857 Inst_FLAT__FLAT_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst
)
39859 VecOperandU32
vdst0(gpuDynInst
, extData
.VDST
);
39860 VecOperandU32
vdst1(gpuDynInst
, extData
.VDST
+ 1);
39861 VecOperandU32
vdst2(gpuDynInst
, extData
.VDST
+ 2);
39862 VecOperandU32
vdst3(gpuDynInst
, extData
.VDST
+ 3);
39864 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
39865 if (gpuDynInst
->exec_mask
[lane
]) {
39866 vdst0
[lane
] = (reinterpret_cast<VecElemU32
*>(
39867 gpuDynInst
->d_data
))[lane
* 4];
39868 vdst1
[lane
] = (reinterpret_cast<VecElemU32
*>(
39869 gpuDynInst
->d_data
))[lane
* 4 + 1];
39870 vdst2
[lane
] = (reinterpret_cast<VecElemU32
*>(
39871 gpuDynInst
->d_data
))[lane
* 4 + 2];
39872 vdst3
[lane
] = (reinterpret_cast<VecElemU32
*>(
39873 gpuDynInst
->d_data
))[lane
* 4 + 3];
39883 Inst_FLAT__FLAT_STORE_BYTE::Inst_FLAT__FLAT_STORE_BYTE(InFmt_FLAT
*iFmt
)
39884 : Inst_FLAT(iFmt
, "flat_store_byte")
39886 setFlag(MemoryRef
);
39888 } // Inst_FLAT__FLAT_STORE_BYTE
39890 Inst_FLAT__FLAT_STORE_BYTE::~Inst_FLAT__FLAT_STORE_BYTE()
39892 } // ~Inst_FLAT__FLAT_STORE_BYTE
39894 // Untyped buffer store byte.
39896 Inst_FLAT__FLAT_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst
)
39898 Wavefront
*wf
= gpuDynInst
->wavefront();
39900 if (wf
->execMask().none()) {
39901 wf
->decVMemInstsIssued();
39902 wf
->decLGKMInstsIssued();
39903 wf
->wrGmReqsInPipe
--;
39904 wf
->wrLmReqsInPipe
--;
39908 gpuDynInst
->execUnitId
= wf
->execUnitId
;
39909 gpuDynInst
->exec_mask
= wf
->execMask();
39910 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
39911 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
39913 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
39917 calcAddr(gpuDynInst
, addr
);
39919 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
39920 gpuDynInst
->computeUnit()->globalMemoryPipe
39921 .issueRequest(gpuDynInst
);
39922 wf
->wrGmReqsInPipe
--;
39923 wf
->outstandingReqsWrGm
++;
39925 fatal("Non global flat instructions not implemented yet.\n");
39928 gpuDynInst
->wavefront()->outstandingReqs
++;
39929 gpuDynInst
->wavefront()->validateRequestCounters();
39933 Inst_FLAT__FLAT_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst
)
39935 ConstVecOperandU8
data(gpuDynInst
, extData
.DATA
);
39938 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
39939 if (gpuDynInst
->exec_mask
[lane
]) {
39940 (reinterpret_cast<VecElemU8
*>(gpuDynInst
->d_data
))[lane
]
39945 initMemWrite
<VecElemU8
>(gpuDynInst
);
39949 Inst_FLAT__FLAT_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst
)
39953 Inst_FLAT__FLAT_STORE_SHORT::Inst_FLAT__FLAT_STORE_SHORT(InFmt_FLAT
*iFmt
)
39954 : Inst_FLAT(iFmt
, "flat_store_short")
39956 setFlag(MemoryRef
);
39958 } // Inst_FLAT__FLAT_STORE_SHORT
39960 Inst_FLAT__FLAT_STORE_SHORT::~Inst_FLAT__FLAT_STORE_SHORT()
39962 } // ~Inst_FLAT__FLAT_STORE_SHORT
39964 // Untyped buffer store short.
39966 Inst_FLAT__FLAT_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst
)
39968 Wavefront
*wf
= gpuDynInst
->wavefront();
39970 if (wf
->execMask().none()) {
39971 wf
->decVMemInstsIssued();
39972 wf
->decLGKMInstsIssued();
39973 wf
->wrGmReqsInPipe
--;
39974 wf
->wrLmReqsInPipe
--;
39978 gpuDynInst
->execUnitId
= wf
->execUnitId
;
39979 gpuDynInst
->exec_mask
= wf
->execMask();
39980 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
39981 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
39983 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
39987 calcAddr(gpuDynInst
, addr
);
39989 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
39990 gpuDynInst
->computeUnit()->globalMemoryPipe
39991 .issueRequest(gpuDynInst
);
39992 wf
->wrGmReqsInPipe
--;
39993 wf
->outstandingReqsWrGm
++;
39995 fatal("Non global flat instructions not implemented yet.\n");
39998 gpuDynInst
->wavefront()->outstandingReqs
++;
39999 gpuDynInst
->wavefront()->validateRequestCounters();
40003 Inst_FLAT__FLAT_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst
)
40005 ConstVecOperandU16
data(gpuDynInst
, extData
.DATA
);
40009 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40010 if (gpuDynInst
->exec_mask
[lane
]) {
40011 (reinterpret_cast<VecElemU16
*>(gpuDynInst
->d_data
))[lane
]
40016 initMemWrite
<VecElemU16
>(gpuDynInst
);
40020 Inst_FLAT__FLAT_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst
)
40024 Inst_FLAT__FLAT_STORE_DWORD::Inst_FLAT__FLAT_STORE_DWORD(InFmt_FLAT
*iFmt
)
40025 : Inst_FLAT(iFmt
, "flat_store_dword")
40027 setFlag(MemoryRef
);
40029 } // Inst_FLAT__FLAT_STORE_DWORD
40031 Inst_FLAT__FLAT_STORE_DWORD::~Inst_FLAT__FLAT_STORE_DWORD()
40033 } // ~Inst_FLAT__FLAT_STORE_DWORD
40035 // Untyped buffer store dword.
40037 Inst_FLAT__FLAT_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst
)
40039 Wavefront
*wf
= gpuDynInst
->wavefront();
40041 if (wf
->execMask().none()) {
40042 wf
->decVMemInstsIssued();
40043 wf
->decLGKMInstsIssued();
40044 wf
->wrGmReqsInPipe
--;
40045 wf
->wrLmReqsInPipe
--;
40049 gpuDynInst
->execUnitId
= wf
->execUnitId
;
40050 gpuDynInst
->exec_mask
= wf
->execMask();
40051 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
40052 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
40054 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
40058 calcAddr(gpuDynInst
, addr
);
40060 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
40061 gpuDynInst
->computeUnit()->globalMemoryPipe
40062 .issueRequest(gpuDynInst
);
40063 wf
->wrGmReqsInPipe
--;
40064 wf
->outstandingReqsWrGm
++;
40066 fatal("Non global flat instructions not implemented yet.\n");
40069 gpuDynInst
->wavefront()->outstandingReqs
++;
40070 gpuDynInst
->wavefront()->validateRequestCounters();
40074 Inst_FLAT__FLAT_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst
)
40076 ConstVecOperandU32
data(gpuDynInst
, extData
.DATA
);
40079 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40080 if (gpuDynInst
->exec_mask
[lane
]) {
40081 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
]
40086 initMemWrite
<VecElemU32
>(gpuDynInst
);
40090 Inst_FLAT__FLAT_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst
)
40094 Inst_FLAT__FLAT_STORE_DWORDX2::Inst_FLAT__FLAT_STORE_DWORDX2(
40096 : Inst_FLAT(iFmt
, "flat_store_dwordx2")
40098 setFlag(MemoryRef
);
40100 } // Inst_FLAT__FLAT_STORE_DWORDX2
40102 Inst_FLAT__FLAT_STORE_DWORDX2::~Inst_FLAT__FLAT_STORE_DWORDX2()
40104 } // ~Inst_FLAT__FLAT_STORE_DWORDX2
40106 // Untyped buffer store 2 dwords.
40108 Inst_FLAT__FLAT_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst
)
40110 Wavefront
*wf
= gpuDynInst
->wavefront();
40112 if (wf
->execMask().none()) {
40113 wf
->decVMemInstsIssued();
40114 wf
->decLGKMInstsIssued();
40115 wf
->wrGmReqsInPipe
--;
40116 wf
->wrLmReqsInPipe
--;
40120 gpuDynInst
->execUnitId
= wf
->execUnitId
;
40121 gpuDynInst
->exec_mask
= wf
->execMask();
40122 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
40123 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
40125 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
40129 calcAddr(gpuDynInst
, addr
);
40131 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
40132 gpuDynInst
->computeUnit()->globalMemoryPipe
40133 .issueRequest(gpuDynInst
);
40134 wf
->wrGmReqsInPipe
--;
40135 wf
->outstandingReqsWrGm
++;
40137 fatal("Non global flat instructions not implemented yet.\n");
40140 wf
->outstandingReqs
++;
40141 wf
->validateRequestCounters();
40145 Inst_FLAT__FLAT_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst
)
40147 ConstVecOperandU64
data(gpuDynInst
, extData
.DATA
);
40150 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40151 if (gpuDynInst
->exec_mask
[lane
]) {
40152 (reinterpret_cast<VecElemU64
*>(gpuDynInst
->d_data
))[lane
]
40157 initMemWrite
<VecElemU64
>(gpuDynInst
);
40161 Inst_FLAT__FLAT_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst
)
40165 Inst_FLAT__FLAT_STORE_DWORDX3::Inst_FLAT__FLAT_STORE_DWORDX3(
40167 : Inst_FLAT(iFmt
, "flat_store_dwordx3")
40169 setFlag(MemoryRef
);
40171 } // Inst_FLAT__FLAT_STORE_DWORDX3
40173 Inst_FLAT__FLAT_STORE_DWORDX3::~Inst_FLAT__FLAT_STORE_DWORDX3()
40175 } // ~Inst_FLAT__FLAT_STORE_DWORDX3
40177 // Untyped buffer store 3 dwords.
40179 Inst_FLAT__FLAT_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst
)
40181 Wavefront
*wf
= gpuDynInst
->wavefront();
40183 if (wf
->execMask().none()) {
40184 wf
->decVMemInstsIssued();
40185 wf
->decLGKMInstsIssued();
40186 wf
->wrGmReqsInPipe
--;
40187 wf
->wrLmReqsInPipe
--;
40191 gpuDynInst
->execUnitId
= wf
->execUnitId
;
40192 gpuDynInst
->exec_mask
= wf
->execMask();
40193 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
40194 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
40196 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
40200 calcAddr(gpuDynInst
, addr
);
40202 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
40203 gpuDynInst
->computeUnit()->globalMemoryPipe
40204 .issueRequest(gpuDynInst
);
40205 wf
->wrGmReqsInPipe
--;
40206 wf
->outstandingReqsWrGm
++;
40208 fatal("Non global flat instructions not implemented yet.\n");
40211 gpuDynInst
->wavefront()->outstandingReqs
++;
40212 gpuDynInst
->wavefront()->validateRequestCounters();
40216 Inst_FLAT__FLAT_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst
)
40218 ConstVecOperandU32
data0(gpuDynInst
, extData
.DATA
);
40219 ConstVecOperandU32
data1(gpuDynInst
, extData
.DATA
+ 1);
40220 ConstVecOperandU32
data2(gpuDynInst
, extData
.DATA
+ 2);
40226 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40227 if (gpuDynInst
->exec_mask
[lane
]) {
40228 (reinterpret_cast<VecElemU32
*>(
40229 gpuDynInst
->d_data
))[lane
* 3] = data0
[lane
];
40230 (reinterpret_cast<VecElemU32
*>(
40231 gpuDynInst
->d_data
))[lane
* 3 + 1] = data1
[lane
];
40232 (reinterpret_cast<VecElemU32
*>(
40233 gpuDynInst
->d_data
))[lane
* 3 + 2] = data2
[lane
];
40237 initMemWrite
<3>(gpuDynInst
);
40241 Inst_FLAT__FLAT_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst
)
40245 Inst_FLAT__FLAT_STORE_DWORDX4::Inst_FLAT__FLAT_STORE_DWORDX4(
40247 : Inst_FLAT(iFmt
, "flat_store_dwordx4")
40249 setFlag(MemoryRef
);
40251 } // Inst_FLAT__FLAT_STORE_DWORDX4
40253 Inst_FLAT__FLAT_STORE_DWORDX4::~Inst_FLAT__FLAT_STORE_DWORDX4()
40255 } // ~Inst_FLAT__FLAT_STORE_DWORDX4
40257 // Untyped buffer store 4 dwords.
40259 Inst_FLAT__FLAT_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst
)
40261 Wavefront
*wf
= gpuDynInst
->wavefront();
40263 if (wf
->execMask().none()) {
40264 wf
->decVMemInstsIssued();
40265 wf
->decLGKMInstsIssued();
40266 wf
->wrGmReqsInPipe
--;
40267 wf
->wrLmReqsInPipe
--;
40271 gpuDynInst
->execUnitId
= wf
->execUnitId
;
40272 gpuDynInst
->exec_mask
= wf
->execMask();
40273 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
40274 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
40276 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
40280 calcAddr(gpuDynInst
, addr
);
40282 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
40283 gpuDynInst
->computeUnit()->globalMemoryPipe
40284 .issueRequest(gpuDynInst
);
40285 wf
->wrGmReqsInPipe
--;
40286 wf
->outstandingReqsWrGm
++;
40288 fatal("Non global flat instructions not implemented yet.\n");
40291 gpuDynInst
->wavefront()->outstandingReqs
++;
40292 gpuDynInst
->wavefront()->validateRequestCounters();
40296 Inst_FLAT__FLAT_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst
)
40298 ConstVecOperandU32
data0(gpuDynInst
, extData
.DATA
);
40299 ConstVecOperandU32
data1(gpuDynInst
, extData
.DATA
+ 1);
40300 ConstVecOperandU32
data2(gpuDynInst
, extData
.DATA
+ 2);
40301 ConstVecOperandU32
data3(gpuDynInst
, extData
.DATA
+ 3);
40308 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40309 if (gpuDynInst
->exec_mask
[lane
]) {
40310 (reinterpret_cast<VecElemU32
*>(
40311 gpuDynInst
->d_data
))[lane
* 4] = data0
[lane
];
40312 (reinterpret_cast<VecElemU32
*>(
40313 gpuDynInst
->d_data
))[lane
* 4 + 1] = data1
[lane
];
40314 (reinterpret_cast<VecElemU32
*>(
40315 gpuDynInst
->d_data
))[lane
* 4 + 2] = data2
[lane
];
40316 (reinterpret_cast<VecElemU32
*>(
40317 gpuDynInst
->d_data
))[lane
* 4 + 3] = data3
[lane
];
40321 initMemWrite
<4>(gpuDynInst
);
40325 Inst_FLAT__FLAT_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst
)
40329 Inst_FLAT__FLAT_ATOMIC_SWAP::Inst_FLAT__FLAT_ATOMIC_SWAP(InFmt_FLAT
*iFmt
)
40330 : Inst_FLAT(iFmt
, "flat_atomic_swap")
40332 setFlag(AtomicExch
);
40333 if (instData
.GLC
) {
40334 setFlag(AtomicReturn
);
40336 setFlag(AtomicNoReturn
);
40338 setFlag(MemoryRef
);
40339 } // Inst_FLAT__FLAT_ATOMIC_SWAP
40341 Inst_FLAT__FLAT_ATOMIC_SWAP::~Inst_FLAT__FLAT_ATOMIC_SWAP()
40343 } // ~Inst_FLAT__FLAT_ATOMIC_SWAP
40345 // tmp = MEM[ADDR];
40346 // MEM[ADDR] = DATA;
40347 // RETURN_DATA = tmp.
40349 Inst_FLAT__FLAT_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst
)
40351 Wavefront
*wf
= gpuDynInst
->wavefront();
40353 if (wf
->execMask().none()) {
40354 wf
->decVMemInstsIssued();
40355 wf
->decLGKMInstsIssued();
40356 wf
->wrGmReqsInPipe
--;
40357 wf
->rdGmReqsInPipe
--;
40361 gpuDynInst
->execUnitId
= wf
->execUnitId
;
40362 gpuDynInst
->exec_mask
= wf
->execMask();
40363 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
40364 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
40366 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
40370 calcAddr(gpuDynInst
, addr
);
40372 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
||
40373 gpuDynInst
->executedAs() == Enums::SC_PRIVATE
) {
40374 // TODO: additional address computation required for scratch
40375 panic_if(gpuDynInst
->executedAs() == Enums::SC_PRIVATE
,
40376 "Flats to private aperture not tested yet\n");
40377 gpuDynInst
->computeUnit()->globalMemoryPipe
.
40378 issueRequest(gpuDynInst
);
40379 wf
->wrGmReqsInPipe
--;
40380 wf
->outstandingReqsWrGm
++;
40381 wf
->rdGmReqsInPipe
--;
40382 wf
->outstandingReqsRdGm
++;
40384 fatal("Non global flat instructions not implemented yet.\n");
40387 gpuDynInst
->wavefront()->outstandingReqs
++;
40388 gpuDynInst
->wavefront()->validateRequestCounters();
40390 ConstVecOperandU32
data(gpuDynInst
, extData
.DATA
);
40394 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40395 if (gpuDynInst
->exec_mask
[lane
]) {
40396 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->a_data
))[lane
]
40404 Inst_FLAT__FLAT_ATOMIC_SWAP::initiateAcc(GPUDynInstPtr gpuDynInst
)
40406 initAtomicAccess
<VecElemU32
>(gpuDynInst
);
40410 Inst_FLAT__FLAT_ATOMIC_SWAP::completeAcc(GPUDynInstPtr gpuDynInst
)
40412 if (isAtomicRet()) {
40413 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
40415 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40416 if (gpuDynInst
->exec_mask
[lane
]) {
40417 vdst
[lane
] = (reinterpret_cast<VecElemU32
*>(
40418 gpuDynInst
->d_data
))[lane
];
40426 // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP class methods ---
40428 Inst_FLAT__FLAT_ATOMIC_CMPSWAP
40429 ::Inst_FLAT__FLAT_ATOMIC_CMPSWAP(InFmt_FLAT
*iFmt
)
40430 : Inst_FLAT(iFmt
, "flat_atomic_cmpswap")
40432 setFlag(AtomicCAS
);
40433 if (instData
.GLC
) {
40434 setFlag(AtomicReturn
);
40436 setFlag(AtomicNoReturn
);
40438 setFlag(MemoryRef
);
40439 } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP
40441 Inst_FLAT__FLAT_ATOMIC_CMPSWAP::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP()
40443 } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP
40445 // tmp = MEM[ADDR];
40448 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
40449 // RETURN_DATA[0] = tmp.
40451 Inst_FLAT__FLAT_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst
)
40453 Wavefront
*wf
= gpuDynInst
->wavefront();
40455 if (wf
->execMask().none()) {
40456 wf
->decVMemInstsIssued();
40457 wf
->decLGKMInstsIssued();
40458 wf
->wrGmReqsInPipe
--;
40459 wf
->rdGmReqsInPipe
--;
40463 gpuDynInst
->execUnitId
= wf
->execUnitId
;
40464 gpuDynInst
->exec_mask
= wf
->execMask();
40465 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
40466 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
40468 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
40469 ConstVecOperandU32
data(gpuDynInst
, extData
.DATA
);
40470 ConstVecOperandU32
cmp(gpuDynInst
, extData
.DATA
+ 1);
40476 calcAddr(gpuDynInst
, addr
);
40478 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40479 if (gpuDynInst
->exec_mask
[lane
]) {
40480 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->x_data
))[lane
]
40482 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->a_data
))[lane
]
40487 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
||
40488 gpuDynInst
->executedAs() == Enums::SC_PRIVATE
) {
40490 * TODO: If you encounter this panic, just remove this panic
40491 * and restart the simulation. It should just work fine but
40492 * this is to warn user that this path is never tested although
40493 * all the necessary logic is implemented
40495 panic_if(gpuDynInst
->executedAs() == Enums::SC_PRIVATE
,
40496 "Flats to private aperture not tested yet\n");
40497 gpuDynInst
->computeUnit()->globalMemoryPipe
.
40498 issueRequest(gpuDynInst
);
40499 wf
->wrGmReqsInPipe
--;
40500 wf
->outstandingReqsWrGm
++;
40501 wf
->rdGmReqsInPipe
--;
40502 wf
->outstandingReqsRdGm
++;
40504 fatal("Non global flat instructions not implemented yet.\n");
40507 gpuDynInst
->wavefront()->outstandingReqs
++;
40508 gpuDynInst
->wavefront()->validateRequestCounters();
40512 Inst_FLAT__FLAT_ATOMIC_CMPSWAP::initiateAcc(GPUDynInstPtr gpuDynInst
)
40514 initAtomicAccess
<VecElemU32
>(gpuDynInst
);
40518 Inst_FLAT__FLAT_ATOMIC_CMPSWAP::completeAcc(GPUDynInstPtr gpuDynInst
)
40520 if (isAtomicRet()) {
40521 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
40523 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40524 if (gpuDynInst
->exec_mask
[lane
]) {
40525 vdst
[lane
] = (reinterpret_cast<VecElemU32
*>(
40526 gpuDynInst
->d_data
))[lane
];
40534 Inst_FLAT__FLAT_ATOMIC_ADD::Inst_FLAT__FLAT_ATOMIC_ADD(InFmt_FLAT
*iFmt
)
40535 : Inst_FLAT(iFmt
, "flat_atomic_add")
40537 setFlag(AtomicAdd
);
40538 if (instData
.GLC
) {
40539 setFlag(AtomicReturn
);
40541 setFlag(AtomicNoReturn
);
40543 setFlag(MemoryRef
);
40544 } // Inst_FLAT__FLAT_ATOMIC_ADD
40546 Inst_FLAT__FLAT_ATOMIC_ADD::~Inst_FLAT__FLAT_ATOMIC_ADD()
40548 } // ~Inst_FLAT__FLAT_ATOMIC_ADD
40550 // tmp = MEM[ADDR];
40551 // MEM[ADDR] += DATA;
40552 // RETURN_DATA = tmp.
40554 Inst_FLAT__FLAT_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst
)
40556 Wavefront
*wf
= gpuDynInst
->wavefront();
40558 if (wf
->execMask().none()) {
40559 wf
->decVMemInstsIssued();
40560 wf
->decLGKMInstsIssued();
40561 wf
->wrGmReqsInPipe
--;
40562 wf
->rdGmReqsInPipe
--;
40566 gpuDynInst
->execUnitId
= wf
->execUnitId
;
40567 gpuDynInst
->exec_mask
= wf
->execMask();
40568 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
40569 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
40571 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
40572 ConstVecOperandU32
data(gpuDynInst
, extData
.DATA
);
40577 calcAddr(gpuDynInst
, addr
);
40579 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40580 if (gpuDynInst
->exec_mask
[lane
]) {
40581 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->a_data
))[lane
]
40586 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
40587 gpuDynInst
->computeUnit()->globalMemoryPipe
.
40588 issueRequest(gpuDynInst
);
40589 wf
->wrGmReqsInPipe
--;
40590 wf
->outstandingReqsWrGm
++;
40591 wf
->rdGmReqsInPipe
--;
40592 wf
->outstandingReqsRdGm
++;
40594 fatal("Non global flat instructions not implemented yet.\n");
40597 gpuDynInst
->wavefront()->outstandingReqs
++;
40598 gpuDynInst
->wavefront()->validateRequestCounters();
40602 Inst_FLAT__FLAT_ATOMIC_ADD::initiateAcc(GPUDynInstPtr gpuDynInst
)
40604 initAtomicAccess
<VecElemU32
>(gpuDynInst
);
40608 Inst_FLAT__FLAT_ATOMIC_ADD::completeAcc(GPUDynInstPtr gpuDynInst
)
40610 if (isAtomicRet()) {
40611 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
40613 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40614 if (gpuDynInst
->exec_mask
[lane
]) {
40615 vdst
[lane
] = (reinterpret_cast<VecElemU32
*>(
40616 gpuDynInst
->d_data
))[lane
];
40624 Inst_FLAT__FLAT_ATOMIC_SUB::Inst_FLAT__FLAT_ATOMIC_SUB(InFmt_FLAT
*iFmt
)
40625 : Inst_FLAT(iFmt
, "flat_atomic_sub")
40627 setFlag(AtomicSub
);
40628 if (instData
.GLC
) {
40629 setFlag(AtomicReturn
);
40631 setFlag(AtomicNoReturn
);
40633 setFlag(MemoryRef
);
40634 } // Inst_FLAT__FLAT_ATOMIC_SUB
40636 Inst_FLAT__FLAT_ATOMIC_SUB::~Inst_FLAT__FLAT_ATOMIC_SUB()
40638 } // ~Inst_FLAT__FLAT_ATOMIC_SUB
40640 // tmp = MEM[ADDR];
40641 // MEM[ADDR] -= DATA;
40642 // RETURN_DATA = tmp.
40644 Inst_FLAT__FLAT_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst
)
40646 Wavefront
*wf
= gpuDynInst
->wavefront();
40648 if (wf
->execMask().none()) {
40649 wf
->decVMemInstsIssued();
40650 wf
->decLGKMInstsIssued();
40651 wf
->wrGmReqsInPipe
--;
40652 wf
->rdGmReqsInPipe
--;
40656 gpuDynInst
->execUnitId
= wf
->execUnitId
;
40657 gpuDynInst
->exec_mask
= wf
->execMask();
40658 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
40659 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
40661 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
40662 ConstVecOperandU32
data(gpuDynInst
, extData
.DATA
);
40667 calcAddr(gpuDynInst
, addr
);
40669 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40670 if (gpuDynInst
->exec_mask
[lane
]) {
40671 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->a_data
))[lane
]
40676 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
40677 gpuDynInst
->computeUnit()->globalMemoryPipe
.
40678 issueRequest(gpuDynInst
);
40679 wf
->wrGmReqsInPipe
--;
40680 wf
->outstandingReqsWrGm
++;
40681 wf
->rdGmReqsInPipe
--;
40682 wf
->outstandingReqsRdGm
++;
40684 fatal("Non global flat instructions not implemented yet.\n");
40687 gpuDynInst
->wavefront()->outstandingReqs
++;
40688 gpuDynInst
->wavefront()->validateRequestCounters();
40691 Inst_FLAT__FLAT_ATOMIC_SUB::initiateAcc(GPUDynInstPtr gpuDynInst
)
40693 initAtomicAccess
<VecElemU32
>(gpuDynInst
);
40697 Inst_FLAT__FLAT_ATOMIC_SUB::completeAcc(GPUDynInstPtr gpuDynInst
)
40699 if (isAtomicRet()) {
40700 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
40702 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40703 if (gpuDynInst
->exec_mask
[lane
]) {
40704 vdst
[lane
] = (reinterpret_cast<VecElemU32
*>(
40705 gpuDynInst
->d_data
))[lane
];
40713 Inst_FLAT__FLAT_ATOMIC_SMIN::Inst_FLAT__FLAT_ATOMIC_SMIN(InFmt_FLAT
*iFmt
)
40714 : Inst_FLAT(iFmt
, "flat_atomic_smin")
40716 setFlag(AtomicMin
);
40717 if (instData
.GLC
) {
40718 setFlag(AtomicReturn
);
40720 setFlag(AtomicNoReturn
);
40722 setFlag(MemoryRef
);
40723 } // Inst_FLAT__FLAT_ATOMIC_SMIN
40725 Inst_FLAT__FLAT_ATOMIC_SMIN::~Inst_FLAT__FLAT_ATOMIC_SMIN()
40727 } // ~Inst_FLAT__FLAT_ATOMIC_SMIN
40729 // tmp = MEM[ADDR];
40730 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
40731 // RETURN_DATA = tmp.
40733 Inst_FLAT__FLAT_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst
)
40735 panicUnimplemented();
40738 Inst_FLAT__FLAT_ATOMIC_UMIN::Inst_FLAT__FLAT_ATOMIC_UMIN(InFmt_FLAT
*iFmt
)
40739 : Inst_FLAT(iFmt
, "flat_atomic_umin")
40741 setFlag(AtomicMin
);
40742 if (instData
.GLC
) {
40743 setFlag(AtomicReturn
);
40745 setFlag(AtomicNoReturn
);
40747 setFlag(MemoryRef
);
40748 } // Inst_FLAT__FLAT_ATOMIC_UMIN
40750 Inst_FLAT__FLAT_ATOMIC_UMIN::~Inst_FLAT__FLAT_ATOMIC_UMIN()
40752 } // ~Inst_FLAT__FLAT_ATOMIC_UMIN
40754 // tmp = MEM[ADDR];
40755 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
40756 // RETURN_DATA = tmp.
40758 Inst_FLAT__FLAT_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst
)
40760 panicUnimplemented();
40763 Inst_FLAT__FLAT_ATOMIC_SMAX::Inst_FLAT__FLAT_ATOMIC_SMAX(InFmt_FLAT
*iFmt
)
40764 : Inst_FLAT(iFmt
, "flat_atomic_smax")
40766 setFlag(AtomicMax
);
40767 if (instData
.GLC
) {
40768 setFlag(AtomicReturn
);
40770 setFlag(AtomicNoReturn
);
40772 setFlag(MemoryRef
);
40773 } // Inst_FLAT__FLAT_ATOMIC_SMAX
40775 Inst_FLAT__FLAT_ATOMIC_SMAX::~Inst_FLAT__FLAT_ATOMIC_SMAX()
40777 } // ~Inst_FLAT__FLAT_ATOMIC_SMAX
40779 // tmp = MEM[ADDR];
40780 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
40781 // RETURN_DATA = tmp.
40783 Inst_FLAT__FLAT_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst
)
40785 panicUnimplemented();
40788 Inst_FLAT__FLAT_ATOMIC_UMAX::Inst_FLAT__FLAT_ATOMIC_UMAX(InFmt_FLAT
*iFmt
)
40789 : Inst_FLAT(iFmt
, "flat_atomic_umax")
40791 setFlag(AtomicMax
);
40792 if (instData
.GLC
) {
40793 setFlag(AtomicReturn
);
40795 setFlag(AtomicNoReturn
);
40797 setFlag(MemoryRef
);
40798 } // Inst_FLAT__FLAT_ATOMIC_UMAX
40800 Inst_FLAT__FLAT_ATOMIC_UMAX::~Inst_FLAT__FLAT_ATOMIC_UMAX()
40802 } // ~Inst_FLAT__FLAT_ATOMIC_UMAX
40804 // tmp = MEM[ADDR];
40805 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
40806 // RETURN_DATA = tmp.
40808 Inst_FLAT__FLAT_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst
)
40810 panicUnimplemented();
40813 Inst_FLAT__FLAT_ATOMIC_AND::Inst_FLAT__FLAT_ATOMIC_AND(InFmt_FLAT
*iFmt
)
40814 : Inst_FLAT(iFmt
, "flat_atomic_and")
40816 setFlag(AtomicAnd
);
40817 if (instData
.GLC
) {
40818 setFlag(AtomicReturn
);
40820 setFlag(AtomicNoReturn
);
40822 setFlag(MemoryRef
);
40823 } // Inst_FLAT__FLAT_ATOMIC_AND
40825 Inst_FLAT__FLAT_ATOMIC_AND::~Inst_FLAT__FLAT_ATOMIC_AND()
40827 } // ~Inst_FLAT__FLAT_ATOMIC_AND
40829 // tmp = MEM[ADDR];
40830 // MEM[ADDR] &= DATA;
40831 // RETURN_DATA = tmp.
40833 Inst_FLAT__FLAT_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst
)
40835 panicUnimplemented();
40838 Inst_FLAT__FLAT_ATOMIC_OR::Inst_FLAT__FLAT_ATOMIC_OR(InFmt_FLAT
*iFmt
)
40839 : Inst_FLAT(iFmt
, "flat_atomic_or")
40842 if (instData
.GLC
) {
40843 setFlag(AtomicReturn
);
40845 setFlag(AtomicNoReturn
);
40847 setFlag(MemoryRef
);
40848 } // Inst_FLAT__FLAT_ATOMIC_OR
40850 Inst_FLAT__FLAT_ATOMIC_OR::~Inst_FLAT__FLAT_ATOMIC_OR()
40852 } // ~Inst_FLAT__FLAT_ATOMIC_OR
40854 // tmp = MEM[ADDR];
40855 // MEM[ADDR] |= DATA;
40856 // RETURN_DATA = tmp.
40858 Inst_FLAT__FLAT_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst
)
40860 panicUnimplemented();
40863 Inst_FLAT__FLAT_ATOMIC_XOR::Inst_FLAT__FLAT_ATOMIC_XOR(InFmt_FLAT
*iFmt
)
40864 : Inst_FLAT(iFmt
, "flat_atomic_xor")
40866 setFlag(AtomicXor
);
40867 if (instData
.GLC
) {
40868 setFlag(AtomicReturn
);
40870 setFlag(AtomicNoReturn
);
40872 setFlag(MemoryRef
);
40873 } // Inst_FLAT__FLAT_ATOMIC_XOR
40875 Inst_FLAT__FLAT_ATOMIC_XOR::~Inst_FLAT__FLAT_ATOMIC_XOR()
40877 } // ~Inst_FLAT__FLAT_ATOMIC_XOR
40879 // tmp = MEM[ADDR];
40880 // MEM[ADDR] ^= DATA;
40881 // RETURN_DATA = tmp.
40883 Inst_FLAT__FLAT_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst
)
40885 panicUnimplemented();
40888 Inst_FLAT__FLAT_ATOMIC_INC::Inst_FLAT__FLAT_ATOMIC_INC(InFmt_FLAT
*iFmt
)
40889 : Inst_FLAT(iFmt
, "flat_atomic_inc")
40891 setFlag(AtomicInc
);
40892 if (instData
.GLC
) {
40893 setFlag(AtomicReturn
);
40895 setFlag(AtomicNoReturn
);
40897 setFlag(MemoryRef
);
40898 } // Inst_FLAT__FLAT_ATOMIC_INC
40900 Inst_FLAT__FLAT_ATOMIC_INC::~Inst_FLAT__FLAT_ATOMIC_INC()
40902 } // ~Inst_FLAT__FLAT_ATOMIC_INC
40904 // tmp = MEM[ADDR];
40905 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
40906 // RETURN_DATA = tmp.
40908 Inst_FLAT__FLAT_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst
)
40910 Wavefront
*wf
= gpuDynInst
->wavefront();
40912 if (wf
->execMask().none()) {
40913 wf
->decVMemInstsIssued();
40914 wf
->decLGKMInstsIssued();
40915 wf
->wrGmReqsInPipe
--;
40916 wf
->rdGmReqsInPipe
--;
40920 gpuDynInst
->execUnitId
= wf
->execUnitId
;
40921 gpuDynInst
->exec_mask
= wf
->execMask();
40922 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
40923 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
40925 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
40926 ConstVecOperandU32
data(gpuDynInst
, extData
.DATA
);
40931 calcAddr(gpuDynInst
, addr
);
40933 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40934 if (gpuDynInst
->exec_mask
[lane
]) {
40935 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->a_data
))[lane
]
40940 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
40941 gpuDynInst
->computeUnit()->globalMemoryPipe
.
40942 issueRequest(gpuDynInst
);
40943 wf
->wrGmReqsInPipe
--;
40944 wf
->outstandingReqsWrGm
++;
40945 wf
->rdGmReqsInPipe
--;
40946 wf
->outstandingReqsRdGm
++;
40948 fatal("Non global flat instructions not implemented yet.\n");
40951 gpuDynInst
->wavefront()->outstandingReqs
++;
40952 gpuDynInst
->wavefront()->validateRequestCounters();
40956 Inst_FLAT__FLAT_ATOMIC_INC::initiateAcc(GPUDynInstPtr gpuDynInst
)
40958 initAtomicAccess
<VecElemU32
>(gpuDynInst
);
40962 Inst_FLAT__FLAT_ATOMIC_INC::completeAcc(GPUDynInstPtr gpuDynInst
)
40964 if (isAtomicRet()) {
40965 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
40967 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40968 if (gpuDynInst
->exec_mask
[lane
]) {
40969 vdst
[lane
] = (reinterpret_cast<VecElemU32
*>(
40970 gpuDynInst
->d_data
))[lane
];
40978 Inst_FLAT__FLAT_ATOMIC_DEC::Inst_FLAT__FLAT_ATOMIC_DEC(InFmt_FLAT
*iFmt
)
40979 : Inst_FLAT(iFmt
, "flat_atomic_dec")
40981 setFlag(AtomicDec
);
40982 if (instData
.GLC
) {
40983 setFlag(AtomicReturn
);
40985 setFlag(AtomicNoReturn
);
40987 setFlag(MemoryRef
);
40988 } // Inst_FLAT__FLAT_ATOMIC_DEC
40990 Inst_FLAT__FLAT_ATOMIC_DEC::~Inst_FLAT__FLAT_ATOMIC_DEC()
40992 } // ~Inst_FLAT__FLAT_ATOMIC_DEC
40994 // tmp = MEM[ADDR];
40995 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
40996 // (unsigned compare); RETURN_DATA = tmp.
40998 Inst_FLAT__FLAT_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst
)
41000 Wavefront
*wf
= gpuDynInst
->wavefront();
41002 if (wf
->execMask().none()) {
41003 wf
->decVMemInstsIssued();
41004 wf
->decLGKMInstsIssued();
41005 wf
->wrGmReqsInPipe
--;
41006 wf
->rdGmReqsInPipe
--;
41010 gpuDynInst
->execUnitId
= wf
->execUnitId
;
41011 gpuDynInst
->exec_mask
= wf
->execMask();
41012 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
41013 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
41015 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
41016 ConstVecOperandU32
data(gpuDynInst
, extData
.DATA
);
41021 calcAddr(gpuDynInst
, addr
);
41023 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41024 if (gpuDynInst
->exec_mask
[lane
]) {
41025 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->a_data
))[lane
]
41030 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
41031 gpuDynInst
->computeUnit()->globalMemoryPipe
.
41032 issueRequest(gpuDynInst
);
41033 wf
->wrGmReqsInPipe
--;
41034 wf
->outstandingReqsWrGm
++;
41035 wf
->rdGmReqsInPipe
--;
41036 wf
->outstandingReqsRdGm
++;
41038 fatal("Non global flat instructions not implemented yet.\n");
41041 gpuDynInst
->wavefront()->outstandingReqs
++;
41042 gpuDynInst
->wavefront()->validateRequestCounters();
41046 Inst_FLAT__FLAT_ATOMIC_DEC::initiateAcc(GPUDynInstPtr gpuDynInst
)
41048 initAtomicAccess
<VecElemU32
>(gpuDynInst
);
41052 Inst_FLAT__FLAT_ATOMIC_DEC::completeAcc(GPUDynInstPtr gpuDynInst
)
41054 if (isAtomicRet()) {
41055 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
41057 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41058 if (gpuDynInst
->exec_mask
[lane
]) {
41059 vdst
[lane
] = (reinterpret_cast<VecElemU32
*>(
41060 gpuDynInst
->d_data
))[lane
];
41068 Inst_FLAT__FLAT_ATOMIC_SWAP_X2::Inst_FLAT__FLAT_ATOMIC_SWAP_X2(
41070 : Inst_FLAT(iFmt
, "flat_atomic_swap_x2")
41072 setFlag(AtomicExch
);
41073 if (instData
.GLC
) {
41074 setFlag(AtomicReturn
);
41076 setFlag(AtomicNoReturn
);
41078 setFlag(MemoryRef
);
41079 } // Inst_FLAT__FLAT_ATOMIC_SWAP_X2
41081 Inst_FLAT__FLAT_ATOMIC_SWAP_X2::~Inst_FLAT__FLAT_ATOMIC_SWAP_X2()
41083 } // ~Inst_FLAT__FLAT_ATOMIC_SWAP_X2
41085 // tmp = MEM[ADDR];
41086 // MEM[ADDR] = DATA[0:1];
41087 // RETURN_DATA[0:1] = tmp.
41089 Inst_FLAT__FLAT_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst
)
41091 panicUnimplemented();
41094 Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2(
41096 : Inst_FLAT(iFmt
, "flat_atomic_cmpswap_x2")
41098 setFlag(AtomicCAS
);
41099 if (instData
.GLC
) {
41100 setFlag(AtomicReturn
);
41102 setFlag(AtomicNoReturn
);
41104 setFlag(MemoryRef
);
41105 } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2
41107 Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2()
41109 } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2
41111 // tmp = MEM[ADDR];
41112 // src = DATA[0:1];
41113 // cmp = DATA[2:3];
41114 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
41115 // RETURN_DATA[0:1] = tmp.
41117 Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst
)
41119 Wavefront
*wf
= gpuDynInst
->wavefront();
41121 if (wf
->execMask().none()) {
41122 wf
->decVMemInstsIssued();
41123 wf
->decLGKMInstsIssued();
41124 wf
->wrGmReqsInPipe
--;
41125 wf
->rdGmReqsInPipe
--;
41129 gpuDynInst
->execUnitId
= wf
->execUnitId
;
41130 gpuDynInst
->exec_mask
= wf
->execMask();
41131 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
41132 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
41134 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
41135 ConstVecOperandU64
data(gpuDynInst
, extData
.DATA
);
41136 ConstVecOperandU64
cmp(gpuDynInst
, extData
.DATA
+ 2);
41142 calcAddr(gpuDynInst
, addr
);
41144 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41145 if (gpuDynInst
->exec_mask
[lane
]) {
41146 (reinterpret_cast<VecElemU64
*>(gpuDynInst
->x_data
))[lane
]
41148 (reinterpret_cast<VecElemU64
*>(gpuDynInst
->a_data
))[lane
]
41153 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
||
41154 gpuDynInst
->executedAs() == Enums::SC_PRIVATE
) {
41156 * TODO: If you encounter this panic, just remove this panic
41157 * and restart the simulation. It should just work fine but
41158 * this is to warn user that this path is never tested although
41159 * all the necessary logic is implemented
41161 panic_if(gpuDynInst
->executedAs() == Enums::SC_PRIVATE
,
41162 "Flats to private aperture not tested yet\n");
41163 gpuDynInst
->computeUnit()->globalMemoryPipe
.
41164 issueRequest(gpuDynInst
);
41165 wf
->wrGmReqsInPipe
--;
41166 wf
->outstandingReqsWrGm
++;
41167 wf
->rdGmReqsInPipe
--;
41168 wf
->outstandingReqsRdGm
++;
41170 fatal("Non global flat instructions not implemented yet.\n");
41173 gpuDynInst
->wavefront()->outstandingReqs
++;
41174 gpuDynInst
->wavefront()->validateRequestCounters();
41178 Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::initiateAcc(GPUDynInstPtr gpuDynInst
)
41180 initAtomicAccess
<VecElemU64
>(gpuDynInst
);
41184 Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::completeAcc(GPUDynInstPtr gpuDynInst
)
41186 if (isAtomicRet()) {
41187 VecOperandU64
vdst(gpuDynInst
, extData
.VDST
);
41189 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41190 if (gpuDynInst
->exec_mask
[lane
]) {
41191 vdst
[lane
] = (reinterpret_cast<VecElemU64
*>(
41192 gpuDynInst
->d_data
))[lane
];
41200 Inst_FLAT__FLAT_ATOMIC_ADD_X2::Inst_FLAT__FLAT_ATOMIC_ADD_X2(
41202 : Inst_FLAT(iFmt
, "flat_atomic_add_x2")
41204 setFlag(AtomicAdd
);
41205 if (instData
.GLC
) {
41206 setFlag(AtomicReturn
);
41208 setFlag(AtomicNoReturn
);
41210 setFlag(MemoryRef
);
41211 } // Inst_FLAT__FLAT_ATOMIC_ADD_X2
41213 Inst_FLAT__FLAT_ATOMIC_ADD_X2::~Inst_FLAT__FLAT_ATOMIC_ADD_X2()
41215 } // ~Inst_FLAT__FLAT_ATOMIC_ADD_X2
41217 // tmp = MEM[ADDR];
41218 // MEM[ADDR] += DATA[0:1];
41219 // RETURN_DATA[0:1] = tmp.
41221 Inst_FLAT__FLAT_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst
)
41223 Wavefront
*wf
= gpuDynInst
->wavefront();
41225 if (wf
->execMask().none()) {
41226 wf
->decVMemInstsIssued();
41227 wf
->decLGKMInstsIssued();
41228 wf
->wrGmReqsInPipe
--;
41229 wf
->rdGmReqsInPipe
--;
41233 gpuDynInst
->execUnitId
= wf
->execUnitId
;
41234 gpuDynInst
->exec_mask
= wf
->execMask();
41235 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
41236 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
41238 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
41239 ConstVecOperandU64
data(gpuDynInst
, extData
.DATA
);
41244 calcAddr(gpuDynInst
, addr
);
41246 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41247 if (gpuDynInst
->exec_mask
[lane
]) {
41248 (reinterpret_cast<VecElemU64
*>(gpuDynInst
->a_data
))[lane
]
41253 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
41254 gpuDynInst
->computeUnit()->globalMemoryPipe
.
41255 issueRequest(gpuDynInst
);
41256 wf
->wrGmReqsInPipe
--;
41257 wf
->outstandingReqsWrGm
++;
41258 wf
->rdGmReqsInPipe
--;
41259 wf
->outstandingReqsRdGm
++;
41261 fatal("Non global flat instructions not implemented yet.\n");
41264 gpuDynInst
->wavefront()->outstandingReqs
++;
41265 gpuDynInst
->wavefront()->validateRequestCounters();
41269 Inst_FLAT__FLAT_ATOMIC_ADD_X2::initiateAcc(GPUDynInstPtr gpuDynInst
)
41271 initAtomicAccess
<VecElemU64
>(gpuDynInst
);
41275 Inst_FLAT__FLAT_ATOMIC_ADD_X2::completeAcc(GPUDynInstPtr gpuDynInst
)
41277 if (isAtomicRet()) {
41278 VecOperandU64
vdst(gpuDynInst
, extData
.VDST
);
41281 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41282 if (gpuDynInst
->exec_mask
[lane
]) {
41283 vdst
[lane
] = (reinterpret_cast<VecElemU64
*>(
41284 gpuDynInst
->d_data
))[lane
];
41292 Inst_FLAT__FLAT_ATOMIC_SUB_X2::Inst_FLAT__FLAT_ATOMIC_SUB_X2(
41294 : Inst_FLAT(iFmt
, "flat_atomic_sub_x2")
41296 setFlag(AtomicSub
);
41297 if (instData
.GLC
) {
41298 setFlag(AtomicReturn
);
41300 setFlag(AtomicNoReturn
);
41302 setFlag(MemoryRef
);
41303 } // Inst_FLAT__FLAT_ATOMIC_SUB_X2
41305 Inst_FLAT__FLAT_ATOMIC_SUB_X2::~Inst_FLAT__FLAT_ATOMIC_SUB_X2()
41307 } // ~Inst_FLAT__FLAT_ATOMIC_SUB_X2
41309 // tmp = MEM[ADDR];
41310 // MEM[ADDR] -= DATA[0:1];
41311 // RETURN_DATA[0:1] = tmp.
41313 Inst_FLAT__FLAT_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst
)
41315 Wavefront
*wf
= gpuDynInst
->wavefront();
41317 if (wf
->execMask().none()) {
41318 wf
->decVMemInstsIssued();
41319 wf
->decLGKMInstsIssued();
41320 wf
->wrGmReqsInPipe
--;
41321 wf
->rdGmReqsInPipe
--;
41325 gpuDynInst
->execUnitId
= wf
->execUnitId
;
41326 gpuDynInst
->exec_mask
= wf
->execMask();
41327 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
41328 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
41330 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
41331 ConstVecOperandU64
data(gpuDynInst
, extData
.DATA
);
41336 calcAddr(gpuDynInst
, addr
);
41338 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41339 if (gpuDynInst
->exec_mask
[lane
]) {
41340 (reinterpret_cast<VecElemU64
*>(gpuDynInst
->a_data
))[lane
]
41345 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
41346 gpuDynInst
->computeUnit()->globalMemoryPipe
.
41347 issueRequest(gpuDynInst
);
41348 wf
->wrGmReqsInPipe
--;
41349 wf
->outstandingReqsWrGm
++;
41350 wf
->rdGmReqsInPipe
--;
41351 wf
->outstandingReqsRdGm
++;
41353 fatal("Non global flat instructions not implemented yet.\n");
41356 gpuDynInst
->wavefront()->outstandingReqs
++;
41357 gpuDynInst
->wavefront()->validateRequestCounters();
41361 Inst_FLAT__FLAT_ATOMIC_SUB_X2::initiateAcc(GPUDynInstPtr gpuDynInst
)
41363 initAtomicAccess
<VecElemU64
>(gpuDynInst
);
41367 Inst_FLAT__FLAT_ATOMIC_SUB_X2::completeAcc(GPUDynInstPtr gpuDynInst
)
41369 if (isAtomicRet()) {
41370 VecOperandU64
vdst(gpuDynInst
, extData
.VDST
);
41373 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41374 if (gpuDynInst
->exec_mask
[lane
]) {
41375 vdst
[lane
] = (reinterpret_cast<VecElemU64
*>(
41376 gpuDynInst
->d_data
))[lane
];
41384 Inst_FLAT__FLAT_ATOMIC_SMIN_X2::Inst_FLAT__FLAT_ATOMIC_SMIN_X2(
41386 : Inst_FLAT(iFmt
, "flat_atomic_smin_x2")
41388 setFlag(AtomicMin
);
41389 if (instData
.GLC
) {
41390 setFlag(AtomicReturn
);
41392 setFlag(AtomicNoReturn
);
41394 setFlag(MemoryRef
);
41395 } // Inst_FLAT__FLAT_ATOMIC_SMIN_X2
41397 Inst_FLAT__FLAT_ATOMIC_SMIN_X2::~Inst_FLAT__FLAT_ATOMIC_SMIN_X2()
41399 } // ~Inst_FLAT__FLAT_ATOMIC_SMIN_X2
41401 // tmp = MEM[ADDR];
41402 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
41403 // RETURN_DATA[0:1] = tmp.
41405 Inst_FLAT__FLAT_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst
)
41407 panicUnimplemented();
41410 Inst_FLAT__FLAT_ATOMIC_UMIN_X2::Inst_FLAT__FLAT_ATOMIC_UMIN_X2(
41412 : Inst_FLAT(iFmt
, "flat_atomic_umin_x2")
41414 setFlag(AtomicMin
);
41415 if (instData
.GLC
) {
41416 setFlag(AtomicReturn
);
41418 setFlag(AtomicNoReturn
);
41420 setFlag(MemoryRef
);
41421 } // Inst_FLAT__FLAT_ATOMIC_UMIN_X2
41423 Inst_FLAT__FLAT_ATOMIC_UMIN_X2::~Inst_FLAT__FLAT_ATOMIC_UMIN_X2()
41425 } // ~Inst_FLAT__FLAT_ATOMIC_UMIN_X2
41427 // tmp = MEM[ADDR];
41428 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
41429 // RETURN_DATA[0:1] = tmp.
41431 Inst_FLAT__FLAT_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst
)
41433 panicUnimplemented();
41436 Inst_FLAT__FLAT_ATOMIC_SMAX_X2::Inst_FLAT__FLAT_ATOMIC_SMAX_X2(
41438 : Inst_FLAT(iFmt
, "flat_atomic_smax_x2")
41440 setFlag(AtomicMax
);
41441 if (instData
.GLC
) {
41442 setFlag(AtomicReturn
);
41444 setFlag(AtomicNoReturn
);
41446 setFlag(MemoryRef
);
41447 } // Inst_FLAT__FLAT_ATOMIC_SMAX_X2
41449 Inst_FLAT__FLAT_ATOMIC_SMAX_X2::~Inst_FLAT__FLAT_ATOMIC_SMAX_X2()
41451 } // ~Inst_FLAT__FLAT_ATOMIC_SMAX_X2
41453 // tmp = MEM[ADDR];
41454 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
41455 // RETURN_DATA[0:1] = tmp.
41457 Inst_FLAT__FLAT_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst
)
41459 panicUnimplemented();
41462 Inst_FLAT__FLAT_ATOMIC_UMAX_X2::Inst_FLAT__FLAT_ATOMIC_UMAX_X2(
41464 : Inst_FLAT(iFmt
, "flat_atomic_umax_x2")
41466 setFlag(AtomicMax
);
41467 if (instData
.GLC
) {
41468 setFlag(AtomicReturn
);
41470 setFlag(AtomicNoReturn
);
41472 setFlag(MemoryRef
);
41473 } // Inst_FLAT__FLAT_ATOMIC_UMAX_X2
41475 Inst_FLAT__FLAT_ATOMIC_UMAX_X2::~Inst_FLAT__FLAT_ATOMIC_UMAX_X2()
41477 } // ~Inst_FLAT__FLAT_ATOMIC_UMAX_X2
41479 // tmp = MEM[ADDR];
41480 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
41481 // RETURN_DATA[0:1] = tmp.
41483 Inst_FLAT__FLAT_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst
)
41485 panicUnimplemented();
41488 Inst_FLAT__FLAT_ATOMIC_AND_X2::Inst_FLAT__FLAT_ATOMIC_AND_X2(
41490 : Inst_FLAT(iFmt
, "flat_atomic_and_x2")
41492 setFlag(AtomicAnd
);
41493 if (instData
.GLC
) {
41494 setFlag(AtomicReturn
);
41496 setFlag(AtomicNoReturn
);
41498 setFlag(MemoryRef
);
41499 } // Inst_FLAT__FLAT_ATOMIC_AND_X2
41501 Inst_FLAT__FLAT_ATOMIC_AND_X2::~Inst_FLAT__FLAT_ATOMIC_AND_X2()
41503 } // ~Inst_FLAT__FLAT_ATOMIC_AND_X2
41505 // tmp = MEM[ADDR];
41506 // MEM[ADDR] &= DATA[0:1];
41507 // RETURN_DATA[0:1] = tmp.
41509 Inst_FLAT__FLAT_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst
)
41511 panicUnimplemented();
41514 Inst_FLAT__FLAT_ATOMIC_OR_X2::Inst_FLAT__FLAT_ATOMIC_OR_X2(
41516 : Inst_FLAT(iFmt
, "flat_atomic_or_x2")
41519 if (instData
.GLC
) {
41520 setFlag(AtomicReturn
);
41522 setFlag(AtomicNoReturn
);
41524 setFlag(MemoryRef
);
41525 } // Inst_FLAT__FLAT_ATOMIC_OR_X2
41527 Inst_FLAT__FLAT_ATOMIC_OR_X2::~Inst_FLAT__FLAT_ATOMIC_OR_X2()
41529 } // ~Inst_FLAT__FLAT_ATOMIC_OR_X2
41531 // tmp = MEM[ADDR];
41532 // MEM[ADDR] |= DATA[0:1];
41533 // RETURN_DATA[0:1] = tmp.
41535 Inst_FLAT__FLAT_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst
)
41537 panicUnimplemented();
41540 Inst_FLAT__FLAT_ATOMIC_XOR_X2::Inst_FLAT__FLAT_ATOMIC_XOR_X2(
41542 : Inst_FLAT(iFmt
, "flat_atomic_xor_x2")
41544 setFlag(AtomicXor
);
41545 if (instData
.GLC
) {
41546 setFlag(AtomicReturn
);
41548 setFlag(AtomicNoReturn
);
41550 setFlag(MemoryRef
);
41551 } // Inst_FLAT__FLAT_ATOMIC_XOR_X2
41553 Inst_FLAT__FLAT_ATOMIC_XOR_X2::~Inst_FLAT__FLAT_ATOMIC_XOR_X2()
41555 } // ~Inst_FLAT__FLAT_ATOMIC_XOR_X2
41557 // tmp = MEM[ADDR];
41558 // MEM[ADDR] ^= DATA[0:1];
41559 // RETURN_DATA[0:1] = tmp.
41561 Inst_FLAT__FLAT_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst
)
41563 panicUnimplemented();
41566 Inst_FLAT__FLAT_ATOMIC_INC_X2::Inst_FLAT__FLAT_ATOMIC_INC_X2(
41568 : Inst_FLAT(iFmt
, "flat_atomic_inc_x2")
41570 setFlag(AtomicInc
);
41571 if (instData
.GLC
) {
41572 setFlag(AtomicReturn
);
41574 setFlag(AtomicNoReturn
);
41576 setFlag(MemoryRef
);
41577 } // Inst_FLAT__FLAT_ATOMIC_INC_X2
41579 Inst_FLAT__FLAT_ATOMIC_INC_X2::~Inst_FLAT__FLAT_ATOMIC_INC_X2()
41581 } // ~Inst_FLAT__FLAT_ATOMIC_INC_X2
41583 // tmp = MEM[ADDR];
41584 // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
41585 // RETURN_DATA[0:1] = tmp.
41587 Inst_FLAT__FLAT_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst
)
41589 Wavefront
*wf
= gpuDynInst
->wavefront();
41591 if (wf
->execMask().none()) {
41592 wf
->decVMemInstsIssued();
41593 wf
->decLGKMInstsIssued();
41594 wf
->wrGmReqsInPipe
--;
41595 wf
->rdGmReqsInPipe
--;
41599 gpuDynInst
->execUnitId
= wf
->execUnitId
;
41600 gpuDynInst
->exec_mask
= wf
->execMask();
41601 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
41602 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
41604 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
41605 ConstVecOperandU64
data(gpuDynInst
, extData
.DATA
);
41610 calcAddr(gpuDynInst
, addr
);
41612 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41613 if (gpuDynInst
->exec_mask
[lane
]) {
41614 (reinterpret_cast<VecElemU64
*>(gpuDynInst
->a_data
))[lane
]
41619 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
41620 gpuDynInst
->computeUnit()->globalMemoryPipe
.
41621 issueRequest(gpuDynInst
);
41622 wf
->wrGmReqsInPipe
--;
41623 wf
->outstandingReqsWrGm
++;
41624 wf
->rdGmReqsInPipe
--;
41625 wf
->outstandingReqsRdGm
++;
41627 fatal("Non global flat instructions not implemented yet.\n");
41630 gpuDynInst
->wavefront()->outstandingReqs
++;
41631 gpuDynInst
->wavefront()->validateRequestCounters();
41635 Inst_FLAT__FLAT_ATOMIC_INC_X2::initiateAcc(GPUDynInstPtr gpuDynInst
)
41637 initAtomicAccess
<VecElemU64
>(gpuDynInst
);
41641 Inst_FLAT__FLAT_ATOMIC_INC_X2::completeAcc(GPUDynInstPtr gpuDynInst
)
41643 if (isAtomicRet()) {
41644 VecOperandU64
vdst(gpuDynInst
, extData
.VDST
);
41647 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41648 if (gpuDynInst
->exec_mask
[lane
]) {
41649 vdst
[lane
] = (reinterpret_cast<VecElemU64
*>(
41650 gpuDynInst
->d_data
))[lane
];
41658 Inst_FLAT__FLAT_ATOMIC_DEC_X2::Inst_FLAT__FLAT_ATOMIC_DEC_X2(
41660 : Inst_FLAT(iFmt
, "flat_atomic_dec_x2")
41662 setFlag(AtomicDec
);
41663 if (instData
.GLC
) {
41664 setFlag(AtomicReturn
);
41666 setFlag(AtomicNoReturn
);
41668 setFlag(MemoryRef
);
41669 } // Inst_FLAT__FLAT_ATOMIC_DEC_X2
41671 Inst_FLAT__FLAT_ATOMIC_DEC_X2::~Inst_FLAT__FLAT_ATOMIC_DEC_X2()
41673 } // ~Inst_FLAT__FLAT_ATOMIC_DEC_X2
41675 // tmp = MEM[ADDR];
41676 // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
41677 // (unsigned compare);
41678 // RETURN_DATA[0:1] = tmp.
41680 Inst_FLAT__FLAT_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst
)
41682 Wavefront
*wf
= gpuDynInst
->wavefront();
41684 if (wf
->execMask().none()) {
41685 wf
->decVMemInstsIssued();
41686 wf
->decLGKMInstsIssued();
41687 wf
->wrGmReqsInPipe
--;
41688 wf
->rdGmReqsInPipe
--;
41692 gpuDynInst
->execUnitId
= wf
->execUnitId
;
41693 gpuDynInst
->exec_mask
= wf
->execMask();
41694 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
41695 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
41697 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
41698 ConstVecOperandU64
data(gpuDynInst
, extData
.DATA
);
41703 calcAddr(gpuDynInst
, addr
);
41705 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41706 if (gpuDynInst
->exec_mask
[lane
]) {
41707 (reinterpret_cast<VecElemU64
*>(gpuDynInst
->a_data
))[lane
]
41712 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
41713 gpuDynInst
->computeUnit()->globalMemoryPipe
.
41714 issueRequest(gpuDynInst
);
41715 wf
->wrGmReqsInPipe
--;
41716 wf
->outstandingReqsWrGm
++;
41717 wf
->rdGmReqsInPipe
--;
41718 wf
->outstandingReqsRdGm
++;
41720 fatal("Non global flat instructions not implemented yet.\n");
41723 gpuDynInst
->wavefront()->outstandingReqs
++;
41724 gpuDynInst
->wavefront()->validateRequestCounters();
41728 Inst_FLAT__FLAT_ATOMIC_DEC_X2::initiateAcc(GPUDynInstPtr gpuDynInst
)
41730 initAtomicAccess
<VecElemU64
>(gpuDynInst
);
41734 Inst_FLAT__FLAT_ATOMIC_DEC_X2::completeAcc(GPUDynInstPtr gpuDynInst
)
41736 if (isAtomicRet()) {
41737 VecOperandU64
vdst(gpuDynInst
, extData
.VDST
);
41740 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41741 if (gpuDynInst
->exec_mask
[lane
]) {
41742 vdst
[lane
] = (reinterpret_cast<VecElemU64
*>(
41743 gpuDynInst
->d_data
))[lane
];
41750 } // namespace Gcn3ISA