2 * Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
5 * For use for simulation and test purposes only
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
34 #include "arch/gcn3/insts/instructions.hh"
38 #include "arch/gcn3/insts/inst_util.hh"
39 #include "debug/GCN3.hh"
40 #include "debug/GPUSync.hh"
41 #include "gpu-compute/shader.hh"
46 Inst_SOP2__S_ADD_U32::Inst_SOP2__S_ADD_U32(InFmt_SOP2
*iFmt
)
47 : Inst_SOP2(iFmt
, "s_add_u32")
50 } // Inst_SOP2__S_ADD_U32
52 Inst_SOP2__S_ADD_U32::~Inst_SOP2__S_ADD_U32()
54 } // ~Inst_SOP2__S_ADD_U32
57 // SCC = (S0.u + S1.u >= 0x100000000ULL ? 1 : 0) is an unsigned
58 // overflow/carry-out.
60 Inst_SOP2__S_ADD_U32::execute(GPUDynInstPtr gpuDynInst
)
62 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
63 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
64 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
65 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
70 sdst
= src0
.rawData() + src1
.rawData();
71 scc
= ((ScalarRegU64
)src0
.rawData() + (ScalarRegU64
)src1
.rawData())
72 >= 0x100000000ULL
? 1 : 0;
78 Inst_SOP2__S_SUB_U32::Inst_SOP2__S_SUB_U32(InFmt_SOP2
*iFmt
)
79 : Inst_SOP2(iFmt
, "s_sub_u32")
82 } // Inst_SOP2__S_SUB_U32
84 Inst_SOP2__S_SUB_U32::~Inst_SOP2__S_SUB_U32()
86 } // ~Inst_SOP2__S_SUB_U32
89 // SCC = (S1.u > S0.u ? 1 : 0) is an unsigned overflow or carry-out.
91 Inst_SOP2__S_SUB_U32::execute(GPUDynInstPtr gpuDynInst
)
93 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
94 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
95 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
96 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
101 sdst
= src0
.rawData() - src1
.rawData();
102 scc
= (src1
.rawData() > src0
.rawData()) ? 1 : 0;
108 Inst_SOP2__S_ADD_I32::Inst_SOP2__S_ADD_I32(InFmt_SOP2
*iFmt
)
109 : Inst_SOP2(iFmt
, "s_add_i32")
112 } // Inst_SOP2__S_ADD_I32
114 Inst_SOP2__S_ADD_I32::~Inst_SOP2__S_ADD_I32()
116 } // ~Inst_SOP2__S_ADD_I32
118 // D.i = S0.i + S1.i;
119 // SCC = (S0.u[31] == S1.u[31] && S0.u[31] != D.u[31]) is a signed
122 Inst_SOP2__S_ADD_I32::execute(GPUDynInstPtr gpuDynInst
)
124 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
125 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
126 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
127 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
132 sdst
= src0
.rawData() + src1
.rawData();
133 scc
= (bits(src0
.rawData(), 31) == bits(src1
.rawData(), 31)
134 && bits(src0
.rawData(), 31) != bits(sdst
.rawData(), 31))
141 Inst_SOP2__S_SUB_I32::Inst_SOP2__S_SUB_I32(InFmt_SOP2
*iFmt
)
142 : Inst_SOP2(iFmt
, "s_sub_i32")
145 } // Inst_SOP2__S_SUB_I32
147 Inst_SOP2__S_SUB_I32::~Inst_SOP2__S_SUB_I32()
149 } // ~Inst_SOP2__S_SUB_I32
151 // D.i = S0.i - S1.i;
152 // SCC = (S0.u[31] != S1.u[31] && S0.u[31] != D.u[31]) is a signed
155 Inst_SOP2__S_SUB_I32::execute(GPUDynInstPtr gpuDynInst
)
157 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
158 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
159 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
160 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
165 sdst
= src0
.rawData() - src1
.rawData();
166 scc
= (bits(src0
.rawData(), 31) != bits(src1
.rawData(), 31)
167 && bits(src0
.rawData(), 31) != bits(sdst
.rawData(), 31)) ? 1 : 0;
173 Inst_SOP2__S_ADDC_U32::Inst_SOP2__S_ADDC_U32(InFmt_SOP2
*iFmt
)
174 : Inst_SOP2(iFmt
, "s_addc_u32")
177 } // Inst_SOP2__S_ADDC_U32
179 Inst_SOP2__S_ADDC_U32::~Inst_SOP2__S_ADDC_U32()
181 } // ~Inst_SOP2__S_ADDC_U32
183 // D.u = S0.u + S1.u + SCC;
184 // SCC = (S0.u + S1.u + SCC >= 0x100000000ULL ? 1 : 0) is an unsigned
187 Inst_SOP2__S_ADDC_U32::execute(GPUDynInstPtr gpuDynInst
)
189 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
190 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
191 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
192 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
198 sdst
= src0
.rawData() + src1
.rawData() + scc
.rawData();
199 scc
= ((ScalarRegU64
)src0
.rawData() + (ScalarRegU64
)src1
.rawData()
200 + (ScalarRegU64
)scc
.rawData()) >= 0x100000000ULL
? 1 : 0;
206 Inst_SOP2__S_SUBB_U32::Inst_SOP2__S_SUBB_U32(InFmt_SOP2
*iFmt
)
207 : Inst_SOP2(iFmt
, "s_subb_u32")
210 } // Inst_SOP2__S_SUBB_U32
212 Inst_SOP2__S_SUBB_U32::~Inst_SOP2__S_SUBB_U32()
214 } // ~Inst_SOP2__S_SUBB_U32
216 // D.u = S0.u - S1.u - SCC;
217 // SCC = (S1.u + SCC > S0.u ? 1 : 0) is an unsigned overflow.
219 Inst_SOP2__S_SUBB_U32::execute(GPUDynInstPtr gpuDynInst
)
221 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
222 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
223 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
224 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
230 sdst
= src0
.rawData() - src1
.rawData() - scc
.rawData();
231 scc
= (src1
.rawData() + scc
.rawData()) > src0
.rawData() ? 1 : 0;
237 Inst_SOP2__S_MIN_I32::Inst_SOP2__S_MIN_I32(InFmt_SOP2
*iFmt
)
238 : Inst_SOP2(iFmt
, "s_min_i32")
241 } // Inst_SOP2__S_MIN_I32
243 Inst_SOP2__S_MIN_I32::~Inst_SOP2__S_MIN_I32()
245 } // ~Inst_SOP2__S_MIN_I32
247 // D.i = (S0.i < S1.i) ? S0.i : S1.i;
248 // SCC = 1 if S0 is chosen as the minimum value.
250 Inst_SOP2__S_MIN_I32::execute(GPUDynInstPtr gpuDynInst
)
252 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
253 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
254 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
255 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
260 sdst
= std::min(src0
.rawData(), src1
.rawData());
261 scc
= (src0
.rawData() < src1
.rawData()) ? 1 : 0;
267 Inst_SOP2__S_MIN_U32::Inst_SOP2__S_MIN_U32(InFmt_SOP2
*iFmt
)
268 : Inst_SOP2(iFmt
, "s_min_u32")
271 } // Inst_SOP2__S_MIN_U32
273 Inst_SOP2__S_MIN_U32::~Inst_SOP2__S_MIN_U32()
275 } // ~Inst_SOP2__S_MIN_U32
277 // D.u = (S0.u < S1.u) ? S0.u : S1.u;
278 // SCC = 1 if S0 is chosen as the minimum value.
280 Inst_SOP2__S_MIN_U32::execute(GPUDynInstPtr gpuDynInst
)
282 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
283 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
284 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
285 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
290 sdst
= std::min(src0
.rawData(), src1
.rawData());
291 scc
= (src0
.rawData() < src1
.rawData()) ? 1 : 0;
297 Inst_SOP2__S_MAX_I32::Inst_SOP2__S_MAX_I32(InFmt_SOP2
*iFmt
)
298 : Inst_SOP2(iFmt
, "s_max_i32")
301 } // Inst_SOP2__S_MAX_I32
303 Inst_SOP2__S_MAX_I32::~Inst_SOP2__S_MAX_I32()
305 } // ~Inst_SOP2__S_MAX_I32
307 // D.i = (S0.i > S1.i) ? S0.i : S1.i;
308 // SCC = 1 if S0 is chosen as the maximum value.
310 Inst_SOP2__S_MAX_I32::execute(GPUDynInstPtr gpuDynInst
)
312 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
313 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
314 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
315 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
320 sdst
= std::max(src0
.rawData(), src1
.rawData());
321 scc
= (src0
.rawData() > src1
.rawData()) ? 1 : 0;
327 Inst_SOP2__S_MAX_U32::Inst_SOP2__S_MAX_U32(InFmt_SOP2
*iFmt
)
328 : Inst_SOP2(iFmt
, "s_max_u32")
331 } // Inst_SOP2__S_MAX_U32
333 Inst_SOP2__S_MAX_U32::~Inst_SOP2__S_MAX_U32()
335 } // ~Inst_SOP2__S_MAX_U32
337 // D.u = (S0.u > S1.u) ? S0.u : S1.u;
338 // SCC = 1 if S0 is chosen as the maximum value.
340 Inst_SOP2__S_MAX_U32::execute(GPUDynInstPtr gpuDynInst
)
342 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
343 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
344 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
345 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
350 sdst
= std::max(src0
.rawData(), src1
.rawData());
351 scc
= (src0
.rawData() > src1
.rawData()) ? 1 : 0;
357 Inst_SOP2__S_CSELECT_B32::Inst_SOP2__S_CSELECT_B32(InFmt_SOP2
*iFmt
)
358 : Inst_SOP2(iFmt
, "s_cselect_b32")
361 } // Inst_SOP2__S_CSELECT_B32
363 Inst_SOP2__S_CSELECT_B32::~Inst_SOP2__S_CSELECT_B32()
365 } // ~Inst_SOP2__S_CSELECT_B32
367 // D.u = SCC ? S0.u : S1.u (conditional select).
369 Inst_SOP2__S_CSELECT_B32::execute(GPUDynInstPtr gpuDynInst
)
371 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
372 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
373 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
374 ConstScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
380 sdst
= scc
.rawData() ? src0
.rawData() : src1
.rawData();
385 Inst_SOP2__S_CSELECT_B64::Inst_SOP2__S_CSELECT_B64(InFmt_SOP2
*iFmt
)
386 : Inst_SOP2(iFmt
, "s_cselect_b64")
389 } // Inst_SOP2__S_CSELECT_B64
391 Inst_SOP2__S_CSELECT_B64::~Inst_SOP2__S_CSELECT_B64()
393 } // ~Inst_SOP2__S_CSELECT_B64
395 // D.u64 = SCC ? S0.u64 : S1.u64 (conditional select).
397 Inst_SOP2__S_CSELECT_B64::execute(GPUDynInstPtr gpuDynInst
)
399 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
400 ConstScalarOperandU64
src1(gpuDynInst
, instData
.SSRC1
);
401 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
402 ConstScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
408 sdst
= scc
.rawData() ? src0
.rawData() : src1
.rawData();
413 Inst_SOP2__S_AND_B32::Inst_SOP2__S_AND_B32(InFmt_SOP2
*iFmt
)
414 : Inst_SOP2(iFmt
, "s_and_b32")
417 } // Inst_SOP2__S_AND_B32
419 Inst_SOP2__S_AND_B32::~Inst_SOP2__S_AND_B32()
421 } // ~Inst_SOP2__S_AND_B32
423 // D.u = S0.u & S1.u;
424 // SCC = 1 if result is non-zero.
426 Inst_SOP2__S_AND_B32::execute(GPUDynInstPtr gpuDynInst
)
428 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
429 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
430 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
431 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
436 sdst
= src0
.rawData() & src1
.rawData();
437 scc
= sdst
.rawData() ? 1 : 0;
443 Inst_SOP2__S_AND_B64::Inst_SOP2__S_AND_B64(InFmt_SOP2
*iFmt
)
444 : Inst_SOP2(iFmt
, "s_and_b64")
447 } // Inst_SOP2__S_AND_B64
449 Inst_SOP2__S_AND_B64::~Inst_SOP2__S_AND_B64()
451 } // ~Inst_SOP2__S_AND_B64
453 // D.u64 = S0.u64 & S1.u64;
454 // SCC = 1 if result is non-zero.
456 Inst_SOP2__S_AND_B64::execute(GPUDynInstPtr gpuDynInst
)
458 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
459 ConstScalarOperandU64
src1(gpuDynInst
, instData
.SSRC1
);
460 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
461 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
466 sdst
= src0
.rawData() & src1
.rawData();
467 scc
= sdst
.rawData() ? 1 : 0;
473 Inst_SOP2__S_OR_B32::Inst_SOP2__S_OR_B32(InFmt_SOP2
*iFmt
)
474 : Inst_SOP2(iFmt
, "s_or_b32")
477 } // Inst_SOP2__S_OR_B32
479 Inst_SOP2__S_OR_B32::~Inst_SOP2__S_OR_B32()
481 } // ~Inst_SOP2__S_OR_B32
483 // D.u = S0.u | S1.u;
484 // SCC = 1 if result is non-zero.
486 Inst_SOP2__S_OR_B32::execute(GPUDynInstPtr gpuDynInst
)
488 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
489 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
490 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
491 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
496 sdst
= src0
.rawData() | src1
.rawData();
497 scc
= sdst
.rawData() ? 1 : 0;
503 Inst_SOP2__S_OR_B64::Inst_SOP2__S_OR_B64(InFmt_SOP2
*iFmt
)
504 : Inst_SOP2(iFmt
, "s_or_b64")
507 } // Inst_SOP2__S_OR_B64
509 Inst_SOP2__S_OR_B64::~Inst_SOP2__S_OR_B64()
511 } // ~Inst_SOP2__S_OR_B64
513 // D.u64 = S0.u64 | S1.u64;
514 // SCC = 1 if result is non-zero.
516 Inst_SOP2__S_OR_B64::execute(GPUDynInstPtr gpuDynInst
)
518 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
519 ConstScalarOperandU64
src1(gpuDynInst
, instData
.SSRC1
);
520 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
521 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
526 sdst
= src0
.rawData() | src1
.rawData();
527 scc
= sdst
.rawData() ? 1 : 0;
533 Inst_SOP2__S_XOR_B32::Inst_SOP2__S_XOR_B32(InFmt_SOP2
*iFmt
)
534 : Inst_SOP2(iFmt
, "s_xor_b32")
537 } // Inst_SOP2__S_XOR_B32
539 Inst_SOP2__S_XOR_B32::~Inst_SOP2__S_XOR_B32()
541 } // ~Inst_SOP2__S_XOR_B32
543 // D.u = S0.u ^ S1.u;
544 // SCC = 1 if result is non-zero.
546 Inst_SOP2__S_XOR_B32::execute(GPUDynInstPtr gpuDynInst
)
548 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
549 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
550 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
551 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
556 sdst
= src0
.rawData() ^ src1
.rawData();
557 scc
= sdst
.rawData() ? 1 : 0;
563 Inst_SOP2__S_XOR_B64::Inst_SOP2__S_XOR_B64(InFmt_SOP2
*iFmt
)
564 : Inst_SOP2(iFmt
, "s_xor_b64")
567 } // Inst_SOP2__S_XOR_B64
569 Inst_SOP2__S_XOR_B64::~Inst_SOP2__S_XOR_B64()
571 } // ~Inst_SOP2__S_XOR_B64
573 // D.u64 = S0.u64 ^ S1.u64;
574 // SCC = 1 if result is non-zero.
576 Inst_SOP2__S_XOR_B64::execute(GPUDynInstPtr gpuDynInst
)
578 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
579 ConstScalarOperandU64
src1(gpuDynInst
, instData
.SSRC1
);
580 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
581 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
586 sdst
= src0
.rawData() ^ src1
.rawData();
587 scc
= sdst
.rawData() ? 1 : 0;
593 Inst_SOP2__S_ANDN2_B32::Inst_SOP2__S_ANDN2_B32(InFmt_SOP2
*iFmt
)
594 : Inst_SOP2(iFmt
, "s_andn2_b32")
597 } // Inst_SOP2__S_ANDN2_B32
599 Inst_SOP2__S_ANDN2_B32::~Inst_SOP2__S_ANDN2_B32()
601 } // ~Inst_SOP2__S_ANDN2_B32
603 // D.u = S0.u & ~S1.u;
604 // SCC = 1 if result is non-zero.
606 Inst_SOP2__S_ANDN2_B32::execute(GPUDynInstPtr gpuDynInst
)
608 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
609 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
610 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
611 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
616 sdst
= src0
.rawData() &~ src1
.rawData();
617 scc
= sdst
.rawData() ? 1 : 0;
623 Inst_SOP2__S_ANDN2_B64::Inst_SOP2__S_ANDN2_B64(InFmt_SOP2
*iFmt
)
624 : Inst_SOP2(iFmt
, "s_andn2_b64")
627 } // Inst_SOP2__S_ANDN2_B64
629 Inst_SOP2__S_ANDN2_B64::~Inst_SOP2__S_ANDN2_B64()
631 } // ~Inst_SOP2__S_ANDN2_B64
633 // D.u64 = S0.u64 & ~S1.u64;
634 // SCC = 1 if result is non-zero.
636 Inst_SOP2__S_ANDN2_B64::execute(GPUDynInstPtr gpuDynInst
)
638 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
639 ConstScalarOperandU64
src1(gpuDynInst
, instData
.SSRC1
);
640 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
641 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
646 sdst
= src0
.rawData() &~ src1
.rawData();
647 scc
= sdst
.rawData() ? 1 : 0;
653 Inst_SOP2__S_ORN2_B32::Inst_SOP2__S_ORN2_B32(InFmt_SOP2
*iFmt
)
654 : Inst_SOP2(iFmt
, "s_orn2_b32")
657 } // Inst_SOP2__S_ORN2_B32
659 Inst_SOP2__S_ORN2_B32::~Inst_SOP2__S_ORN2_B32()
661 } // ~Inst_SOP2__S_ORN2_B32
663 // D.u = S0.u | ~S1.u;
664 // SCC = 1 if result is non-zero.
666 Inst_SOP2__S_ORN2_B32::execute(GPUDynInstPtr gpuDynInst
)
668 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
669 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
670 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
671 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
676 sdst
= src0
.rawData() |~ src1
.rawData();
677 scc
= sdst
.rawData() ? 1 : 0;
683 Inst_SOP2__S_ORN2_B64::Inst_SOP2__S_ORN2_B64(InFmt_SOP2
*iFmt
)
684 : Inst_SOP2(iFmt
, "s_orn2_b64")
687 } // Inst_SOP2__S_ORN2_B64
689 Inst_SOP2__S_ORN2_B64::~Inst_SOP2__S_ORN2_B64()
691 } // ~Inst_SOP2__S_ORN2_B64
693 // D.u64 = S0.u64 | ~S1.u64;
694 // SCC = 1 if result is non-zero.
696 Inst_SOP2__S_ORN2_B64::execute(GPUDynInstPtr gpuDynInst
)
698 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
699 ConstScalarOperandU64
src1(gpuDynInst
, instData
.SSRC1
);
700 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
701 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
706 sdst
= src0
.rawData() |~ src1
.rawData();
707 scc
= sdst
.rawData() ? 1 : 0;
713 Inst_SOP2__S_NAND_B32::Inst_SOP2__S_NAND_B32(InFmt_SOP2
*iFmt
)
714 : Inst_SOP2(iFmt
, "s_nand_b32")
717 } // Inst_SOP2__S_NAND_B32
719 Inst_SOP2__S_NAND_B32::~Inst_SOP2__S_NAND_B32()
721 } // ~Inst_SOP2__S_NAND_B32
723 // D.u = ~(S0.u & S1.u);
724 // SCC = 1 if result is non-zero.
726 Inst_SOP2__S_NAND_B32::execute(GPUDynInstPtr gpuDynInst
)
728 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
729 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
730 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
731 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
736 sdst
= ~(src0
.rawData() & src1
.rawData());
737 scc
= sdst
.rawData() ? 1 : 0;
743 Inst_SOP2__S_NAND_B64::Inst_SOP2__S_NAND_B64(InFmt_SOP2
*iFmt
)
744 : Inst_SOP2(iFmt
, "s_nand_b64")
747 } // Inst_SOP2__S_NAND_B64
749 Inst_SOP2__S_NAND_B64::~Inst_SOP2__S_NAND_B64()
751 } // ~Inst_SOP2__S_NAND_B64
753 // D.u64 = ~(S0.u64 & S1.u64);
754 // SCC = 1 if result is non-zero.
756 Inst_SOP2__S_NAND_B64::execute(GPUDynInstPtr gpuDynInst
)
758 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
759 ConstScalarOperandU64
src1(gpuDynInst
, instData
.SSRC1
);
760 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
761 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
766 sdst
= ~(src0
.rawData() & src1
.rawData());
767 scc
= sdst
.rawData() ? 1 : 0;
773 Inst_SOP2__S_NOR_B32::Inst_SOP2__S_NOR_B32(InFmt_SOP2
*iFmt
)
774 : Inst_SOP2(iFmt
, "s_nor_b32")
777 } // Inst_SOP2__S_NOR_B32
779 Inst_SOP2__S_NOR_B32::~Inst_SOP2__S_NOR_B32()
781 } // ~Inst_SOP2__S_NOR_B32
783 // D.u = ~(S0.u | S1.u);
784 // SCC = 1 if result is non-zero.
786 Inst_SOP2__S_NOR_B32::execute(GPUDynInstPtr gpuDynInst
)
788 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
789 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
790 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
791 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
796 sdst
= ~(src0
.rawData() | src1
.rawData());
797 scc
= sdst
.rawData() ? 1 : 0;
803 Inst_SOP2__S_NOR_B64::Inst_SOP2__S_NOR_B64(InFmt_SOP2
*iFmt
)
804 : Inst_SOP2(iFmt
, "s_nor_b64")
807 } // Inst_SOP2__S_NOR_B64
809 Inst_SOP2__S_NOR_B64::~Inst_SOP2__S_NOR_B64()
811 } // ~Inst_SOP2__S_NOR_B64
813 // D.u64 = ~(S0.u64 | S1.u64);
814 // SCC = 1 if result is non-zero.
816 Inst_SOP2__S_NOR_B64::execute(GPUDynInstPtr gpuDynInst
)
818 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
819 ConstScalarOperandU64
src1(gpuDynInst
, instData
.SSRC1
);
820 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
821 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
826 sdst
= ~(src0
.rawData() | src1
.rawData());
827 scc
= sdst
.rawData() ? 1 : 0;
833 Inst_SOP2__S_XNOR_B32::Inst_SOP2__S_XNOR_B32(InFmt_SOP2
*iFmt
)
834 : Inst_SOP2(iFmt
, "s_xnor_b32")
837 } // Inst_SOP2__S_XNOR_B32
839 Inst_SOP2__S_XNOR_B32::~Inst_SOP2__S_XNOR_B32()
841 } // ~Inst_SOP2__S_XNOR_B32
843 // D.u = ~(S0.u ^ S1.u);
844 // SCC = 1 if result is non-zero.
846 Inst_SOP2__S_XNOR_B32::execute(GPUDynInstPtr gpuDynInst
)
848 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
849 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
850 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
851 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
856 sdst
= ~(src0
.rawData() ^ src1
.rawData());
857 scc
= sdst
.rawData() ? 1 : 0;
863 Inst_SOP2__S_XNOR_B64::Inst_SOP2__S_XNOR_B64(InFmt_SOP2
*iFmt
)
864 : Inst_SOP2(iFmt
, "s_xnor_b64")
867 } // Inst_SOP2__S_XNOR_B64
869 Inst_SOP2__S_XNOR_B64::~Inst_SOP2__S_XNOR_B64()
871 } // ~Inst_SOP2__S_XNOR_B64
873 // D.u64 = ~(S0.u64 ^ S1.u64);
874 // SCC = 1 if result is non-zero.
876 Inst_SOP2__S_XNOR_B64::execute(GPUDynInstPtr gpuDynInst
)
878 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
879 ConstScalarOperandU64
src1(gpuDynInst
, instData
.SSRC1
);
880 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
881 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
886 sdst
= ~(src0
.rawData() ^ src1
.rawData());
887 scc
= sdst
.rawData() ? 1 : 0;
893 Inst_SOP2__S_LSHL_B32::Inst_SOP2__S_LSHL_B32(InFmt_SOP2
*iFmt
)
894 : Inst_SOP2(iFmt
, "s_lshl_b32")
897 } // Inst_SOP2__S_LSHL_B32
899 Inst_SOP2__S_LSHL_B32::~Inst_SOP2__S_LSHL_B32()
901 } // ~Inst_SOP2__S_LSHL_B32
903 // D.u = S0.u << S1.u[4:0];
904 // SCC = 1 if result is non-zero.
906 Inst_SOP2__S_LSHL_B32::execute(GPUDynInstPtr gpuDynInst
)
908 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
909 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
910 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
911 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
916 sdst
= (src0
.rawData() << bits(src1
.rawData(), 4, 0));
917 scc
= sdst
.rawData() ? 1 : 0;
923 Inst_SOP2__S_LSHL_B64::Inst_SOP2__S_LSHL_B64(InFmt_SOP2
*iFmt
)
924 : Inst_SOP2(iFmt
, "s_lshl_b64")
927 } // Inst_SOP2__S_LSHL_B64
929 Inst_SOP2__S_LSHL_B64::~Inst_SOP2__S_LSHL_B64()
931 } // ~Inst_SOP2__S_LSHL_B64
933 // D.u64 = S0.u64 << S1.u[5:0];
934 // SCC = 1 if result is non-zero.
936 Inst_SOP2__S_LSHL_B64::execute(GPUDynInstPtr gpuDynInst
)
938 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
939 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
940 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
941 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
946 sdst
= (src0
.rawData() << bits(src1
.rawData(), 5, 0));
947 scc
= sdst
.rawData() ? 1 : 0;
953 Inst_SOP2__S_LSHR_B32::Inst_SOP2__S_LSHR_B32(InFmt_SOP2
*iFmt
)
954 : Inst_SOP2(iFmt
, "s_lshr_b32")
957 } // Inst_SOP2__S_LSHR_B32
959 Inst_SOP2__S_LSHR_B32::~Inst_SOP2__S_LSHR_B32()
961 } // ~Inst_SOP2__S_LSHR_B32
963 // D.u = S0.u >> S1.u[4:0];
964 // SCC = 1 if result is non-zero.
965 // The vacated bits are set to zero.
967 Inst_SOP2__S_LSHR_B32::execute(GPUDynInstPtr gpuDynInst
)
969 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
970 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
971 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
972 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
977 sdst
= (src0
.rawData() >> bits(src1
.rawData(), 4, 0));
978 scc
= sdst
.rawData() ? 1 : 0;
984 Inst_SOP2__S_LSHR_B64::Inst_SOP2__S_LSHR_B64(InFmt_SOP2
*iFmt
)
985 : Inst_SOP2(iFmt
, "s_lshr_b64")
988 } // Inst_SOP2__S_LSHR_B64
990 Inst_SOP2__S_LSHR_B64::~Inst_SOP2__S_LSHR_B64()
992 } // ~Inst_SOP2__S_LSHR_B64
994 // D.u64 = S0.u64 >> S1.u[5:0];
995 // SCC = 1 if result is non-zero.
996 // The vacated bits are set to zero.
998 Inst_SOP2__S_LSHR_B64::execute(GPUDynInstPtr gpuDynInst
)
1000 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
1001 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
1002 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
1003 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1008 sdst
= (src0
.rawData() >> bits(src1
.rawData(), 5, 0));
1009 scc
= sdst
.rawData() ? 1 : 0;
1015 Inst_SOP2__S_ASHR_I32::Inst_SOP2__S_ASHR_I32(InFmt_SOP2
*iFmt
)
1016 : Inst_SOP2(iFmt
, "s_ashr_i32")
1019 } // Inst_SOP2__S_ASHR_I32
1021 Inst_SOP2__S_ASHR_I32::~Inst_SOP2__S_ASHR_I32()
1023 } // ~Inst_SOP2__S_ASHR_I32
1025 // D.i = signext(S0.i) >> S1.u[4:0];
1026 // SCC = 1 if result is non-zero.
1027 // The vacated bits are set to the sign bit of the input value.
1029 Inst_SOP2__S_ASHR_I32::execute(GPUDynInstPtr gpuDynInst
)
1031 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
1032 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
1033 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
1034 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1039 sdst
= (src0
.rawData() >> bits(src1
.rawData(), 4, 0));
1040 scc
= sdst
.rawData() ? 1 : 0;
1046 Inst_SOP2__S_ASHR_I64::Inst_SOP2__S_ASHR_I64(InFmt_SOP2
*iFmt
)
1047 : Inst_SOP2(iFmt
, "s_ashr_i64")
1050 } // Inst_SOP2__S_ASHR_I64
1052 Inst_SOP2__S_ASHR_I64::~Inst_SOP2__S_ASHR_I64()
1054 } // ~Inst_SOP2__S_ASHR_I64
1056 // D.i64 = signext(S0.i64) >> S1.u[5:0];
1057 // SCC = 1 if result is non-zero.
1058 // The vacated bits are set to the sign bit of the input value.
1060 Inst_SOP2__S_ASHR_I64::execute(GPUDynInstPtr gpuDynInst
)
1062 ConstScalarOperandI64
src0(gpuDynInst
, instData
.SSRC0
);
1063 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
1064 ScalarOperandI64
sdst(gpuDynInst
, instData
.SDST
);
1065 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1070 sdst
= (src0
.rawData() >> bits(src1
.rawData(), 5, 0));
1071 scc
= sdst
.rawData() ? 1 : 0;
1077 Inst_SOP2__S_BFM_B32::Inst_SOP2__S_BFM_B32(InFmt_SOP2
*iFmt
)
1078 : Inst_SOP2(iFmt
, "s_bfm_b32")
1081 } // Inst_SOP2__S_BFM_B32
1083 Inst_SOP2__S_BFM_B32::~Inst_SOP2__S_BFM_B32()
1085 } // ~Inst_SOP2__S_BFM_B32
1087 // D.u = ((1 << S0.u[4:0]) - 1) << S1.u[4:0] (bitfield mask).
1089 Inst_SOP2__S_BFM_B32::execute(GPUDynInstPtr gpuDynInst
)
1091 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
1092 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
1093 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
1098 sdst
= ((1 << bits(src0
.rawData(), 4, 0)) - 1)
1099 << bits(src1
.rawData(), 4, 0);
1104 Inst_SOP2__S_BFM_B64::Inst_SOP2__S_BFM_B64(InFmt_SOP2
*iFmt
)
1105 : Inst_SOP2(iFmt
, "s_bfm_b64")
1108 } // Inst_SOP2__S_BFM_B64
1110 Inst_SOP2__S_BFM_B64::~Inst_SOP2__S_BFM_B64()
1112 } // ~Inst_SOP2__S_BFM_B64
1114 // D.u64 = ((1ULL << S0.u[5:0]) - 1) << S1.u[5:0] (bitfield mask).
1116 Inst_SOP2__S_BFM_B64::execute(GPUDynInstPtr gpuDynInst
)
1118 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
1119 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
1120 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
1125 sdst
= ((1ULL << bits(src0
.rawData(), 5, 0)) - 1)
1126 << bits(src1
.rawData(), 5, 0);
1131 Inst_SOP2__S_MUL_I32::Inst_SOP2__S_MUL_I32(InFmt_SOP2
*iFmt
)
1132 : Inst_SOP2(iFmt
, "s_mul_i32")
1135 } // Inst_SOP2__S_MUL_I32
1137 Inst_SOP2__S_MUL_I32::~Inst_SOP2__S_MUL_I32()
1139 } // ~Inst_SOP2__S_MUL_I32
1141 // D.i = S0.i * S1.i.
1143 Inst_SOP2__S_MUL_I32::execute(GPUDynInstPtr gpuDynInst
)
1145 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
1146 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
1147 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
1152 sdst
= src0
.rawData() * src1
.rawData();
1157 Inst_SOP2__S_BFE_U32::Inst_SOP2__S_BFE_U32(InFmt_SOP2
*iFmt
)
1158 : Inst_SOP2(iFmt
, "s_bfe_u32")
1161 } // Inst_SOP2__S_BFE_U32
1163 Inst_SOP2__S_BFE_U32::~Inst_SOP2__S_BFE_U32()
1165 } // ~Inst_SOP2__S_BFE_U32
1167 // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is
1169 // D.u = (S0.u >> S1.u[4:0]) & ((1 << S1.u[22:16]) - 1);
1170 // SCC = 1 if result is non-zero.
1172 Inst_SOP2__S_BFE_U32::execute(GPUDynInstPtr gpuDynInst
)
1174 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
1175 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
1176 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
1177 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1182 sdst
= (src0
.rawData() >> bits(src1
.rawData(), 4, 0))
1183 & ((1 << bits(src1
.rawData(), 22, 16)) - 1);
1184 scc
= sdst
.rawData() ? 1 : 0;
1190 Inst_SOP2__S_BFE_I32::Inst_SOP2__S_BFE_I32(InFmt_SOP2
*iFmt
)
1191 : Inst_SOP2(iFmt
, "s_bfe_i32")
1194 } // Inst_SOP2__S_BFE_I32
1196 Inst_SOP2__S_BFE_I32::~Inst_SOP2__S_BFE_I32()
1198 } // ~Inst_SOP2__S_BFE_I32
1200 // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is
1202 // D.i = (S0.i >> S1.u[4:0]) & ((1 << S1.u[22:16]) - 1);
1203 // Sign-extend the result;
1204 // SCC = 1 if result is non-zero.
1206 Inst_SOP2__S_BFE_I32::execute(GPUDynInstPtr gpuDynInst
)
1208 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
1209 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
1210 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
1211 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1216 sdst
= (src0
.rawData() >> bits(src1
.rawData(), 4, 0))
1217 & ((1 << bits(src1
.rawData(), 22, 16)) - 1);
1218 scc
= sdst
.rawData() ? 1 : 0;
1224 Inst_SOP2__S_BFE_U64::Inst_SOP2__S_BFE_U64(InFmt_SOP2
*iFmt
)
1225 : Inst_SOP2(iFmt
, "s_bfe_u64")
1228 } // Inst_SOP2__S_BFE_U64
1230 Inst_SOP2__S_BFE_U64::~Inst_SOP2__S_BFE_U64()
1232 } // ~Inst_SOP2__S_BFE_U64
1234 // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is
1236 // D.u64 = (S0.u64 >> S1.u[5:0]) & ((1 << S1.u[22:16]) - 1);
1237 // SCC = 1 if result is non-zero.
1239 Inst_SOP2__S_BFE_U64::execute(GPUDynInstPtr gpuDynInst
)
1241 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
1242 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
1243 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
1244 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1249 sdst
= (src0
.rawData() >> bits(src1
.rawData(), 5, 0))
1250 & ((1 << bits(src1
.rawData(), 22, 16)) - 1);
1251 scc
= sdst
.rawData() ? 1 : 0;
1257 Inst_SOP2__S_BFE_I64::Inst_SOP2__S_BFE_I64(InFmt_SOP2
*iFmt
)
1258 : Inst_SOP2(iFmt
, "s_bfe_i64")
1261 } // Inst_SOP2__S_BFE_I64
1263 Inst_SOP2__S_BFE_I64::~Inst_SOP2__S_BFE_I64()
1265 } // ~Inst_SOP2__S_BFE_I64
1267 // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is
1269 // D.i64 = (S0.i64 >> S1.u[5:0]) & ((1 << S1.u[22:16]) - 1);
1270 // Sign-extend result;
1271 // SCC = 1 if result is non-zero.
1273 Inst_SOP2__S_BFE_I64::execute(GPUDynInstPtr gpuDynInst
)
1275 ConstScalarOperandI64
src0(gpuDynInst
, instData
.SSRC0
);
1276 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
1277 ScalarOperandI64
sdst(gpuDynInst
, instData
.SDST
);
1278 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1283 sdst
= (src0
.rawData() >> bits(src1
.rawData(), 5, 0))
1284 & ((1 << bits(src1
.rawData(), 22, 16)) - 1);
1285 scc
= sdst
.rawData() ? 1 : 0;
1291 Inst_SOP2__S_CBRANCH_G_FORK::Inst_SOP2__S_CBRANCH_G_FORK(InFmt_SOP2
*iFmt
)
1292 : Inst_SOP2(iFmt
, "s_cbranch_g_fork")
1295 } // Inst_SOP2__S_CBRANCH_G_FORK
1297 Inst_SOP2__S_CBRANCH_G_FORK::~Inst_SOP2__S_CBRANCH_G_FORK()
1299 } // ~Inst_SOP2__S_CBRANCH_G_FORK
1301 // Conditional branch using branch-stack.
1302 // S0 = compare mask(vcc or any sgpr) and
1303 // S1 = 64-bit byte address of target instruction.
1305 Inst_SOP2__S_CBRANCH_G_FORK::execute(GPUDynInstPtr gpuDynInst
)
1307 panicUnimplemented();
1310 Inst_SOP2__S_ABSDIFF_I32::Inst_SOP2__S_ABSDIFF_I32(InFmt_SOP2
*iFmt
)
1311 : Inst_SOP2(iFmt
, "s_absdiff_i32")
1314 } // Inst_SOP2__S_ABSDIFF_I32
1316 Inst_SOP2__S_ABSDIFF_I32::~Inst_SOP2__S_ABSDIFF_I32()
1318 } // ~Inst_SOP2__S_ABSDIFF_I32
1320 // D.i = S0.i - S1.i;
1321 // if (D.i < 0) then D.i = -D.i;
1322 // SCC = 1 if result is non-zero.
1323 // Compute the absolute value of difference between two values.
1325 Inst_SOP2__S_ABSDIFF_I32::execute(GPUDynInstPtr gpuDynInst
)
1327 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
1328 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
1329 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
1330 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1332 sdst
= std::abs(src0
.rawData() - src1
.rawData());
1333 scc
= sdst
.rawData() ? 1 : 0;
1339 Inst_SOP2__S_RFE_RESTORE_B64::Inst_SOP2__S_RFE_RESTORE_B64(
1341 : Inst_SOP2(iFmt
, "s_rfe_restore_b64")
1343 } // Inst_SOP2__S_RFE_RESTORE_B64
1345 Inst_SOP2__S_RFE_RESTORE_B64::~Inst_SOP2__S_RFE_RESTORE_B64()
1347 } // ~Inst_SOP2__S_RFE_RESTORE_B64
1349 // Return from exception handler and continue.
1351 Inst_SOP2__S_RFE_RESTORE_B64::execute(GPUDynInstPtr gpuDynInst
)
1353 panicUnimplemented();
1356 Inst_SOPK__S_MOVK_I32::Inst_SOPK__S_MOVK_I32(InFmt_SOPK
*iFmt
)
1357 : Inst_SOPK(iFmt
, "s_movk_i32")
1360 } // Inst_SOPK__S_MOVK_I32
1362 Inst_SOPK__S_MOVK_I32::~Inst_SOPK__S_MOVK_I32()
1364 } // ~Inst_SOPK__S_MOVK_I32
1366 // D.i = signext(SIMM16) (sign extension).
1368 Inst_SOPK__S_MOVK_I32::execute(GPUDynInstPtr gpuDynInst
)
1370 ScalarRegI32 simm16
= (ScalarRegI32
)sext
<16>(instData
.SIMM16
);
1371 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
1378 Inst_SOPK__S_CMOVK_I32::Inst_SOPK__S_CMOVK_I32(InFmt_SOPK
*iFmt
)
1379 : Inst_SOPK(iFmt
, "s_cmovk_i32")
1382 } // Inst_SOPK__S_CMOVK_I32
1384 Inst_SOPK__S_CMOVK_I32::~Inst_SOPK__S_CMOVK_I32()
1386 } // ~Inst_SOPK__S_CMOVK_I32
1388 // if (SCC) then D.i = signext(SIMM16);
1390 // Conditional move with sign extension.
1392 Inst_SOPK__S_CMOVK_I32::execute(GPUDynInstPtr gpuDynInst
)
1394 ScalarRegI32 simm16
= (ScalarRegI32
)sext
<16>(instData
.SIMM16
);
1395 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
1396 ConstScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1400 if (scc
.rawData()) {
1406 Inst_SOPK__S_CMPK_EQ_I32::Inst_SOPK__S_CMPK_EQ_I32(InFmt_SOPK
*iFmt
)
1407 : Inst_SOPK(iFmt
, "s_cmpk_eq_i32")
1410 } // Inst_SOPK__S_CMPK_EQ_I32
1412 Inst_SOPK__S_CMPK_EQ_I32::~Inst_SOPK__S_CMPK_EQ_I32()
1414 } // ~Inst_SOPK__S_CMPK_EQ_I32
1416 // SCC = (S0.i == signext(SIMM16)).
1418 Inst_SOPK__S_CMPK_EQ_I32::execute(GPUDynInstPtr gpuDynInst
)
1420 ScalarRegI32 simm16
= (ScalarRegI32
)sext
<16>(instData
.SIMM16
);
1421 ConstScalarOperandI32
src(gpuDynInst
, instData
.SDST
);
1422 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1426 scc
= (src
.rawData() == simm16
) ? 1 : 0;
1431 Inst_SOPK__S_CMPK_LG_I32::Inst_SOPK__S_CMPK_LG_I32(InFmt_SOPK
*iFmt
)
1432 : Inst_SOPK(iFmt
, "s_cmpk_lg_i32")
1435 } // Inst_SOPK__S_CMPK_LG_I32
1437 Inst_SOPK__S_CMPK_LG_I32::~Inst_SOPK__S_CMPK_LG_I32()
1439 } // ~Inst_SOPK__S_CMPK_LG_I32
1441 // SCC = (S0.i != signext(SIMM16)).
1443 Inst_SOPK__S_CMPK_LG_I32::execute(GPUDynInstPtr gpuDynInst
)
1445 ScalarRegI32 simm16
= (ScalarRegI32
)sext
<16>(instData
.SIMM16
);
1446 ConstScalarOperandI32
src(gpuDynInst
, instData
.SDST
);
1447 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1451 scc
= (src
.rawData() != simm16
) ? 1 : 0;
1456 Inst_SOPK__S_CMPK_GT_I32::Inst_SOPK__S_CMPK_GT_I32(InFmt_SOPK
*iFmt
)
1457 : Inst_SOPK(iFmt
, "s_cmpk_gt_i32")
1460 } // Inst_SOPK__S_CMPK_GT_I32
1462 Inst_SOPK__S_CMPK_GT_I32::~Inst_SOPK__S_CMPK_GT_I32()
1464 } // ~Inst_SOPK__S_CMPK_GT_I32
1466 // SCC = (S0.i > signext(SIMM16)).
1468 Inst_SOPK__S_CMPK_GT_I32::execute(GPUDynInstPtr gpuDynInst
)
1470 ScalarRegI32 simm16
= (ScalarRegI32
)sext
<16>(instData
.SIMM16
);
1471 ConstScalarOperandI32
src(gpuDynInst
, instData
.SDST
);
1472 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1476 scc
= (src
.rawData() > simm16
) ? 1 : 0;
1481 Inst_SOPK__S_CMPK_GE_I32::Inst_SOPK__S_CMPK_GE_I32(InFmt_SOPK
*iFmt
)
1482 : Inst_SOPK(iFmt
, "s_cmpk_ge_i32")
1485 } // Inst_SOPK__S_CMPK_GE_I32
1487 Inst_SOPK__S_CMPK_GE_I32::~Inst_SOPK__S_CMPK_GE_I32()
1489 } // ~Inst_SOPK__S_CMPK_GE_I32
1491 // SCC = (S0.i >= signext(SIMM16)).
1493 Inst_SOPK__S_CMPK_GE_I32::execute(GPUDynInstPtr gpuDynInst
)
1495 ScalarRegI32 simm16
= (ScalarRegI32
)sext
<16>(instData
.SIMM16
);
1496 ConstScalarOperandI32
src(gpuDynInst
, instData
.SDST
);
1497 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1501 scc
= (src
.rawData() >= simm16
) ? 1 : 0;
1506 Inst_SOPK__S_CMPK_LT_I32::Inst_SOPK__S_CMPK_LT_I32(InFmt_SOPK
*iFmt
)
1507 : Inst_SOPK(iFmt
, "s_cmpk_lt_i32")
1510 } // Inst_SOPK__S_CMPK_LT_I32
1512 Inst_SOPK__S_CMPK_LT_I32::~Inst_SOPK__S_CMPK_LT_I32()
1514 } // ~Inst_SOPK__S_CMPK_LT_I32
1516 // SCC = (S0.i < signext(SIMM16)).
1518 Inst_SOPK__S_CMPK_LT_I32::execute(GPUDynInstPtr gpuDynInst
)
1520 ScalarRegI32 simm16
= (ScalarRegI32
)sext
<16>(instData
.SIMM16
);
1521 ConstScalarOperandI32
src(gpuDynInst
, instData
.SDST
);
1522 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1526 scc
= (src
.rawData() < simm16
) ? 1 : 0;
1531 Inst_SOPK__S_CMPK_LE_I32::Inst_SOPK__S_CMPK_LE_I32(InFmt_SOPK
*iFmt
)
1532 : Inst_SOPK(iFmt
, "s_cmpk_le_i32")
1535 } // Inst_SOPK__S_CMPK_LE_I32
1537 Inst_SOPK__S_CMPK_LE_I32::~Inst_SOPK__S_CMPK_LE_I32()
1539 } // ~Inst_SOPK__S_CMPK_LE_I32
1541 // SCC = (S0.i <= signext(SIMM16)).
1543 Inst_SOPK__S_CMPK_LE_I32::execute(GPUDynInstPtr gpuDynInst
)
1545 ScalarRegI32 simm16
= (ScalarRegI32
)sext
<16>(instData
.SIMM16
);
1546 ConstScalarOperandI32
src(gpuDynInst
, instData
.SDST
);
1547 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1551 scc
= (src
.rawData() <= simm16
) ? 1 : 0;
1556 Inst_SOPK__S_CMPK_EQ_U32::Inst_SOPK__S_CMPK_EQ_U32(InFmt_SOPK
*iFmt
)
1557 : Inst_SOPK(iFmt
, "s_cmpk_eq_u32")
1560 } // Inst_SOPK__S_CMPK_EQ_U32
1562 Inst_SOPK__S_CMPK_EQ_U32::~Inst_SOPK__S_CMPK_EQ_U32()
1564 } // ~Inst_SOPK__S_CMPK_EQ_U32
1566 // SCC = (S0.u == SIMM16).
1568 Inst_SOPK__S_CMPK_EQ_U32::execute(GPUDynInstPtr gpuDynInst
)
1570 ScalarRegU32 simm16
= (ScalarRegU32
)instData
.SIMM16
;
1571 ConstScalarOperandU32
src(gpuDynInst
, instData
.SDST
);
1572 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1576 scc
= (src
.rawData() == simm16
) ? 1 : 0;
1581 Inst_SOPK__S_CMPK_LG_U32::Inst_SOPK__S_CMPK_LG_U32(InFmt_SOPK
*iFmt
)
1582 : Inst_SOPK(iFmt
, "s_cmpk_lg_u32")
1585 } // Inst_SOPK__S_CMPK_LG_U32
1587 Inst_SOPK__S_CMPK_LG_U32::~Inst_SOPK__S_CMPK_LG_U32()
1589 } // ~Inst_SOPK__S_CMPK_LG_U32
1591 // SCC = (S0.u != SIMM16).
1593 Inst_SOPK__S_CMPK_LG_U32::execute(GPUDynInstPtr gpuDynInst
)
1595 ScalarRegU32 simm16
= (ScalarRegU32
)instData
.SIMM16
;
1596 ConstScalarOperandU32
src(gpuDynInst
, instData
.SDST
);
1597 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1601 scc
= (src
.rawData() != simm16
) ? 1 : 0;
1606 Inst_SOPK__S_CMPK_GT_U32::Inst_SOPK__S_CMPK_GT_U32(InFmt_SOPK
*iFmt
)
1607 : Inst_SOPK(iFmt
, "s_cmpk_gt_u32")
1610 } // Inst_SOPK__S_CMPK_GT_U32
1612 Inst_SOPK__S_CMPK_GT_U32::~Inst_SOPK__S_CMPK_GT_U32()
1614 } // ~Inst_SOPK__S_CMPK_GT_U32
1616 // SCC = (S0.u > SIMM16).
1618 Inst_SOPK__S_CMPK_GT_U32::execute(GPUDynInstPtr gpuDynInst
)
1620 ScalarRegU32 simm16
= (ScalarRegU32
)instData
.SIMM16
;
1621 ConstScalarOperandU32
src(gpuDynInst
, instData
.SDST
);
1622 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1626 scc
= (src
.rawData() > simm16
) ? 1 : 0;
1631 Inst_SOPK__S_CMPK_GE_U32::Inst_SOPK__S_CMPK_GE_U32(InFmt_SOPK
*iFmt
)
1632 : Inst_SOPK(iFmt
, "s_cmpk_ge_u32")
1635 } // Inst_SOPK__S_CMPK_GE_U32
1637 Inst_SOPK__S_CMPK_GE_U32::~Inst_SOPK__S_CMPK_GE_U32()
1639 } // ~Inst_SOPK__S_CMPK_GE_U32
1641 // SCC = (S0.u >= SIMM16).
1643 Inst_SOPK__S_CMPK_GE_U32::execute(GPUDynInstPtr gpuDynInst
)
1645 ScalarRegU32 simm16
= (ScalarRegU32
)instData
.SIMM16
;
1646 ConstScalarOperandU32
src(gpuDynInst
, instData
.SDST
);
1647 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1651 scc
= (src
.rawData() >= simm16
) ? 1 : 0;
1656 Inst_SOPK__S_CMPK_LT_U32::Inst_SOPK__S_CMPK_LT_U32(InFmt_SOPK
*iFmt
)
1657 : Inst_SOPK(iFmt
, "s_cmpk_lt_u32")
1660 } // Inst_SOPK__S_CMPK_LT_U32
1662 Inst_SOPK__S_CMPK_LT_U32::~Inst_SOPK__S_CMPK_LT_U32()
1664 } // ~Inst_SOPK__S_CMPK_LT_U32
1666 // SCC = (S0.u < SIMM16).
1668 Inst_SOPK__S_CMPK_LT_U32::execute(GPUDynInstPtr gpuDynInst
)
1670 ScalarRegU32 simm16
= (ScalarRegU32
)instData
.SIMM16
;
1671 ConstScalarOperandU32
src(gpuDynInst
, instData
.SDST
);
1672 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1676 scc
= (src
.rawData() < simm16
) ? 1 : 0;
1681 Inst_SOPK__S_CMPK_LE_U32::Inst_SOPK__S_CMPK_LE_U32(InFmt_SOPK
*iFmt
)
1682 : Inst_SOPK(iFmt
, "s_cmpk_le_u32")
1685 } // Inst_SOPK__S_CMPK_LE_U32
1687 Inst_SOPK__S_CMPK_LE_U32::~Inst_SOPK__S_CMPK_LE_U32()
1689 } // ~Inst_SOPK__S_CMPK_LE_U32
1691 // SCC = (S0.u <= SIMM16).
1693 Inst_SOPK__S_CMPK_LE_U32::execute(GPUDynInstPtr gpuDynInst
)
1695 ScalarRegU32 simm16
= (ScalarRegU32
)instData
.SIMM16
;
1696 ConstScalarOperandU32
src(gpuDynInst
, instData
.SDST
);
1697 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1701 scc
= (src
.rawData() <= simm16
) ? 1 : 0;
1706 Inst_SOPK__S_ADDK_I32::Inst_SOPK__S_ADDK_I32(InFmt_SOPK
*iFmt
)
1707 : Inst_SOPK(iFmt
, "s_addk_i32")
1710 } // Inst_SOPK__S_ADDK_I32
1712 Inst_SOPK__S_ADDK_I32::~Inst_SOPK__S_ADDK_I32()
1714 } // ~Inst_SOPK__S_ADDK_I32
1716 // D.i = D.i + signext(SIMM16);
1719 Inst_SOPK__S_ADDK_I32::execute(GPUDynInstPtr gpuDynInst
)
1721 ScalarRegI16 simm16
= instData
.SIMM16
;
1722 ConstScalarOperandI32
src(gpuDynInst
, instData
.SDST
);
1723 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
1724 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1728 sdst
= src
.rawData() + (ScalarRegI32
)sext
<16>(simm16
);
1729 scc
= (bits(src
.rawData(), 31) == bits(simm16
, 15)
1730 && bits(src
.rawData(), 31) != bits(sdst
.rawData(), 31)) ? 1 : 0;
1736 Inst_SOPK__S_MULK_I32::Inst_SOPK__S_MULK_I32(InFmt_SOPK
*iFmt
)
1737 : Inst_SOPK(iFmt
, "s_mulk_i32")
1740 } // Inst_SOPK__S_MULK_I32
1742 Inst_SOPK__S_MULK_I32::~Inst_SOPK__S_MULK_I32()
1744 } // ~Inst_SOPK__S_MULK_I32
1746 // D.i = D.i * signext(SIMM16).
1748 Inst_SOPK__S_MULK_I32::execute(GPUDynInstPtr gpuDynInst
)
1750 ScalarRegI16 simm16
= instData
.SIMM16
;
1751 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
1755 sdst
= sdst
.rawData() * (ScalarRegI32
)sext
<16>(simm16
);
1760 Inst_SOPK__S_CBRANCH_I_FORK::Inst_SOPK__S_CBRANCH_I_FORK(InFmt_SOPK
*iFmt
)
1761 : Inst_SOPK(iFmt
, "s_cbranch_i_fork")
1764 } // Inst_SOPK__S_CBRANCH_I_FORK
1766 Inst_SOPK__S_CBRANCH_I_FORK::~Inst_SOPK__S_CBRANCH_I_FORK()
1768 } // ~Inst_SOPK__S_CBRANCH_I_FORK
1770 // Conditional branch using branch-stack.
1771 // S0 = compare mask(vcc or any sgpr), and
1772 // SIMM16 = signed DWORD branch offset relative to next instruction.
1774 Inst_SOPK__S_CBRANCH_I_FORK::execute(GPUDynInstPtr gpuDynInst
)
1776 panicUnimplemented();
1779 Inst_SOPK__S_GETREG_B32::Inst_SOPK__S_GETREG_B32(InFmt_SOPK
*iFmt
)
1780 : Inst_SOPK(iFmt
, "s_getreg_b32")
1782 } // Inst_SOPK__S_GETREG_B32
1784 Inst_SOPK__S_GETREG_B32::~Inst_SOPK__S_GETREG_B32()
1786 } // ~Inst_SOPK__S_GETREG_B32
1788 // D.u = hardware-reg. Read some or all of a hardware register into the
1790 // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
1793 Inst_SOPK__S_GETREG_B32::execute(GPUDynInstPtr gpuDynInst
)
1795 panicUnimplemented();
1798 Inst_SOPK__S_SETREG_B32::Inst_SOPK__S_SETREG_B32(InFmt_SOPK
*iFmt
)
1799 : Inst_SOPK(iFmt
, "s_setreg_b32")
1802 } // Inst_SOPK__S_SETREG_B32
1804 Inst_SOPK__S_SETREG_B32::~Inst_SOPK__S_SETREG_B32()
1806 } // ~Inst_SOPK__S_SETREG_B32
1808 // hardware-reg = S0.u. Write some or all of the LSBs of D into a hardware
1810 // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
1813 Inst_SOPK__S_SETREG_B32::execute(GPUDynInstPtr gpuDynInst
)
1815 ScalarRegI16 simm16
= instData
.SIMM16
;
1816 ScalarRegU32 hwregId
= simm16
& 0x3f;
1817 ScalarRegU32 offset
= (simm16
>> 6) & 31;
1818 ScalarRegU32 size
= ((simm16
>> 11) & 31) + 1;
1820 ScalarOperandU32
hwreg(gpuDynInst
, hwregId
);
1821 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
1825 // Store value from SDST to part of the hardware register.
1826 ScalarRegU32 mask
= (((1U << size
) - 1U) << offset
);
1827 hwreg
= ((hwreg
.rawData() & ~mask
)
1828 | ((sdst
.rawData() << offset
) & mask
));
1831 // set MODE register to control the behavior of single precision
1832 // floating-point numbers: denormal mode or round mode
1833 if (hwregId
==1 && size
==2
1834 && (offset
==4 || offset
==0)) {
1835 warn_once("Be cautious that s_setreg_b32 has no real effect "
1836 "on FP modes: %s\n", gpuDynInst
->disassemble());
1840 // panic if not changing MODE of floating-point numbers
1841 panicUnimplemented();
1844 Inst_SOPK__S_SETREG_IMM32_B32::Inst_SOPK__S_SETREG_IMM32_B32(
1846 : Inst_SOPK(iFmt
, "s_setreg_imm32_b32")
1849 } // Inst_SOPK__S_SETREG_IMM32_B32
1851 Inst_SOPK__S_SETREG_IMM32_B32::~Inst_SOPK__S_SETREG_IMM32_B32()
1853 } // ~Inst_SOPK__S_SETREG_IMM32_B32
1855 // Write some or all of the LSBs of IMM32 into a hardware register; this
1856 // instruction requires a 32-bit literal constant.
1857 // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
1860 Inst_SOPK__S_SETREG_IMM32_B32::execute(GPUDynInstPtr gpuDynInst
)
1862 ScalarRegI16 simm16
= instData
.SIMM16
;
1863 ScalarRegU32 hwregId
= simm16
& 0x3f;
1864 ScalarRegU32 offset
= (simm16
>> 6) & 31;
1865 ScalarRegU32 size
= ((simm16
>> 11) & 31) + 1;
1867 ScalarOperandU32
hwreg(gpuDynInst
, hwregId
);
1868 ScalarRegU32 simm32
= extData
.imm_u32
;
1871 ScalarRegU32 mask
= (((1U << size
) - 1U) << offset
);
1872 hwreg
= ((hwreg
.rawData() & ~mask
)
1873 | ((simm32
<< offset
) & mask
));
1876 if (hwregId
==1 && size
==2
1877 && (offset
==4 || offset
==0)) {
1878 warn_once("Be cautious that s_setreg_imm32_b32 has no real effect "
1879 "on FP modes: %s\n", gpuDynInst
->disassemble());
1883 // panic if not changing MODE of floating-point numbers
1884 panicUnimplemented();
1887 Inst_SOP1__S_MOV_B32::Inst_SOP1__S_MOV_B32(InFmt_SOP1
*iFmt
)
1888 : Inst_SOP1(iFmt
, "s_mov_b32")
1891 } // Inst_SOP1__S_MOV_B32
1893 Inst_SOP1__S_MOV_B32::~Inst_SOP1__S_MOV_B32()
1895 } // ~Inst_SOP1__S_MOV_B32
1899 Inst_SOP1__S_MOV_B32::execute(GPUDynInstPtr gpuDynInst
)
1901 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
1902 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
1906 sdst
= src
.rawData();
1911 Inst_SOP1__S_MOV_B64::Inst_SOP1__S_MOV_B64(InFmt_SOP1
*iFmt
)
1912 : Inst_SOP1(iFmt
, "s_mov_b64")
1915 } // Inst_SOP1__S_MOV_B64
1917 Inst_SOP1__S_MOV_B64::~Inst_SOP1__S_MOV_B64()
1919 } // ~Inst_SOP1__S_MOV_B64
1923 Inst_SOP1__S_MOV_B64::execute(GPUDynInstPtr gpuDynInst
)
1925 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
1926 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
1930 sdst
= src
.rawData();
1935 Inst_SOP1__S_CMOV_B32::Inst_SOP1__S_CMOV_B32(InFmt_SOP1
*iFmt
)
1936 : Inst_SOP1(iFmt
, "s_cmov_b32")
1939 } // Inst_SOP1__S_CMOV_B32
1941 Inst_SOP1__S_CMOV_B32::~Inst_SOP1__S_CMOV_B32()
1943 } // ~Inst_SOP1__S_CMOV_B32
1945 // if (SCC) then D.u = S0.u;
1947 // Conditional move.
1949 Inst_SOP1__S_CMOV_B32::execute(GPUDynInstPtr gpuDynInst
)
1951 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
1952 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
1953 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1958 if (scc
.rawData()) {
1959 sdst
= src
.rawData();
1964 Inst_SOP1__S_CMOV_B64::Inst_SOP1__S_CMOV_B64(InFmt_SOP1
*iFmt
)
1965 : Inst_SOP1(iFmt
, "s_cmov_b64")
1968 } // Inst_SOP1__S_CMOV_B64
1970 Inst_SOP1__S_CMOV_B64::~Inst_SOP1__S_CMOV_B64()
1972 } // ~Inst_SOP1__S_CMOV_B64
1974 // if (SCC) then D.u64 = S0.u64;
1976 // Conditional move.
1978 Inst_SOP1__S_CMOV_B64::execute(GPUDynInstPtr gpuDynInst
)
1980 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
1981 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
1982 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
1987 if (scc
.rawData()) {
1988 sdst
= src
.rawData();
1993 Inst_SOP1__S_NOT_B32::Inst_SOP1__S_NOT_B32(InFmt_SOP1
*iFmt
)
1994 : Inst_SOP1(iFmt
, "s_not_b32")
1997 } // Inst_SOP1__S_NOT_B32
1999 Inst_SOP1__S_NOT_B32::~Inst_SOP1__S_NOT_B32()
2001 } // ~Inst_SOP1__S_NOT_B32
2004 // SCC = 1 if result is non-zero.
2005 // Bitwise negation.
2007 Inst_SOP1__S_NOT_B32::execute(GPUDynInstPtr gpuDynInst
)
2009 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2010 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
2011 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2015 sdst
= ~src
.rawData();
2017 scc
= sdst
.rawData() ? 1 : 0;
2023 Inst_SOP1__S_NOT_B64::Inst_SOP1__S_NOT_B64(InFmt_SOP1
*iFmt
)
2024 : Inst_SOP1(iFmt
, "s_not_b64")
2027 } // Inst_SOP1__S_NOT_B64
2029 Inst_SOP1__S_NOT_B64::~Inst_SOP1__S_NOT_B64()
2031 } // ~Inst_SOP1__S_NOT_B64
2034 // SCC = 1 if result is non-zero.
2035 // Bitwise negation.
2037 Inst_SOP1__S_NOT_B64::execute(GPUDynInstPtr gpuDynInst
)
2039 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2040 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2041 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2045 sdst
= ~src
.rawData();
2046 scc
= sdst
.rawData() ? 1 : 0;
2052 Inst_SOP1__S_WQM_B32::Inst_SOP1__S_WQM_B32(InFmt_SOP1
*iFmt
)
2053 : Inst_SOP1(iFmt
, "s_wqm_b32")
2056 } // Inst_SOP1__S_WQM_B32
2058 Inst_SOP1__S_WQM_B32::~Inst_SOP1__S_WQM_B32()
2060 } // ~Inst_SOP1__S_WQM_B32
2062 // Computes whole quad mode for an active/valid mask.
2063 // SCC = 1 if result is non-zero.
2065 Inst_SOP1__S_WQM_B32::execute(GPUDynInstPtr gpuDynInst
)
2067 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2068 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
2069 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2073 sdst
= wholeQuadMode(src
.rawData());
2074 scc
= sdst
.rawData() ? 1 : 0;
2080 Inst_SOP1__S_WQM_B64::Inst_SOP1__S_WQM_B64(InFmt_SOP1
*iFmt
)
2081 : Inst_SOP1(iFmt
, "s_wqm_b64")
2084 } // Inst_SOP1__S_WQM_B64
2086 Inst_SOP1__S_WQM_B64::~Inst_SOP1__S_WQM_B64()
2088 } // ~Inst_SOP1__S_WQM_B64
2090 // Computes whole quad mode for an active/valid mask.
2091 // SCC = 1 if result is non-zero.
2093 Inst_SOP1__S_WQM_B64::execute(GPUDynInstPtr gpuDynInst
)
2095 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2096 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2097 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2101 sdst
= wholeQuadMode(src
.rawData());
2102 scc
= sdst
.rawData() ? 1 : 0;
2108 Inst_SOP1__S_BREV_B32::Inst_SOP1__S_BREV_B32(InFmt_SOP1
*iFmt
)
2109 : Inst_SOP1(iFmt
, "s_brev_b32")
2112 } // Inst_SOP1__S_BREV_B32
2114 Inst_SOP1__S_BREV_B32::~Inst_SOP1__S_BREV_B32()
2116 } // ~Inst_SOP1__S_BREV_B32
2118 // D.u[31:0] = S0.u[0:31] (reverse bits).
2120 Inst_SOP1__S_BREV_B32::execute(GPUDynInstPtr gpuDynInst
)
2122 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2123 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
2127 sdst
= reverseBits(src
.rawData());
2132 Inst_SOP1__S_BREV_B64::Inst_SOP1__S_BREV_B64(InFmt_SOP1
*iFmt
)
2133 : Inst_SOP1(iFmt
, "s_brev_b64")
2136 } // Inst_SOP1__S_BREV_B64
2138 Inst_SOP1__S_BREV_B64::~Inst_SOP1__S_BREV_B64()
2140 } // ~Inst_SOP1__S_BREV_B64
2142 // D.u64[63:0] = S0.u64[0:63] (reverse bits).
2144 Inst_SOP1__S_BREV_B64::execute(GPUDynInstPtr gpuDynInst
)
2146 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2147 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2151 sdst
= reverseBits(src
.rawData());
2156 Inst_SOP1__S_BCNT0_I32_B32::Inst_SOP1__S_BCNT0_I32_B32(InFmt_SOP1
*iFmt
)
2157 : Inst_SOP1(iFmt
, "s_bcnt0_i32_b32")
2160 } // Inst_SOP1__S_BCNT0_I32_B32
2162 Inst_SOP1__S_BCNT0_I32_B32::~Inst_SOP1__S_BCNT0_I32_B32()
2164 } // ~Inst_SOP1__S_BCNT0_I32_B32
2166 // D.i = CountZeroBits(S0.u);
2167 // SCC = 1 if result is non-zero.
2169 Inst_SOP1__S_BCNT0_I32_B32::execute(GPUDynInstPtr gpuDynInst
)
2171 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2172 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2173 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2177 sdst
= countZeroBits(src
.rawData());
2178 scc
= sdst
.rawData() ? 1 : 0;
2184 Inst_SOP1__S_BCNT0_I32_B64::Inst_SOP1__S_BCNT0_I32_B64(InFmt_SOP1
*iFmt
)
2185 : Inst_SOP1(iFmt
, "s_bcnt0_i32_b64")
2188 } // Inst_SOP1__S_BCNT0_I32_B64
2190 Inst_SOP1__S_BCNT0_I32_B64::~Inst_SOP1__S_BCNT0_I32_B64()
2192 } // ~Inst_SOP1__S_BCNT0_I32_B64
2194 // D.i = CountZeroBits(S0.u64);
2195 // SCC = 1 if result is non-zero.
2197 Inst_SOP1__S_BCNT0_I32_B64::execute(GPUDynInstPtr gpuDynInst
)
2199 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2200 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2201 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2205 sdst
= countZeroBits(src
.rawData());
2206 scc
= sdst
.rawData() ? 1 : 0;
2212 Inst_SOP1__S_BCNT1_I32_B32::Inst_SOP1__S_BCNT1_I32_B32(InFmt_SOP1
*iFmt
)
2213 : Inst_SOP1(iFmt
, "s_bcnt1_i32_b32")
2216 } // Inst_SOP1__S_BCNT1_I32_B32
2218 Inst_SOP1__S_BCNT1_I32_B32::~Inst_SOP1__S_BCNT1_I32_B32()
2220 } // ~Inst_SOP1__S_BCNT1_I32_B32
2222 // D.i = CountOneBits(S0.u);
2223 // SCC = 1 if result is non-zero.
2225 Inst_SOP1__S_BCNT1_I32_B32::execute(GPUDynInstPtr gpuDynInst
)
2227 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2228 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2229 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2233 sdst
= popCount(src
.rawData());
2234 scc
= sdst
.rawData() ? 1 : 0;
2240 Inst_SOP1__S_BCNT1_I32_B64::Inst_SOP1__S_BCNT1_I32_B64(InFmt_SOP1
*iFmt
)
2241 : Inst_SOP1(iFmt
, "s_bcnt1_i32_b64")
2244 } // Inst_SOP1__S_BCNT1_I32_B64
2246 Inst_SOP1__S_BCNT1_I32_B64::~Inst_SOP1__S_BCNT1_I32_B64()
2248 } // ~Inst_SOP1__S_BCNT1_I32_B64
2250 // D.i = CountOneBits(S0.u64);
2251 // SCC = 1 if result is non-zero.
2253 Inst_SOP1__S_BCNT1_I32_B64::execute(GPUDynInstPtr gpuDynInst
)
2255 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2256 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2257 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2261 sdst
= popCount(src
.rawData());
2262 scc
= sdst
.rawData() ? 1 : 0;
2268 Inst_SOP1__S_FF0_I32_B32::Inst_SOP1__S_FF0_I32_B32(InFmt_SOP1
*iFmt
)
2269 : Inst_SOP1(iFmt
, "s_ff0_i32_b32")
2272 } // Inst_SOP1__S_FF0_I32_B32
2274 Inst_SOP1__S_FF0_I32_B32::~Inst_SOP1__S_FF0_I32_B32()
2276 } // ~Inst_SOP1__S_FF0_I32_B32
2278 // D.i = FindFirstZero(S0.u);
2279 // If no zeros are found, return -1.
2280 // Returns the bit position of the first zero from the LSB.
2282 Inst_SOP1__S_FF0_I32_B32::execute(GPUDynInstPtr gpuDynInst
)
2284 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2285 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2289 sdst
= findFirstZero(src
.rawData());
2294 Inst_SOP1__S_FF0_I32_B64::Inst_SOP1__S_FF0_I32_B64(InFmt_SOP1
*iFmt
)
2295 : Inst_SOP1(iFmt
, "s_ff0_i32_b64")
2298 } // Inst_SOP1__S_FF0_I32_B64
2300 Inst_SOP1__S_FF0_I32_B64::~Inst_SOP1__S_FF0_I32_B64()
2302 } // ~Inst_SOP1__S_FF0_I32_B64
2304 // D.i = FindFirstZero(S0.u64);
2305 // If no zeros are found, return -1.
2306 // Returns the bit position of the first zero from the LSB.
2308 Inst_SOP1__S_FF0_I32_B64::execute(GPUDynInstPtr gpuDynInst
)
2310 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2311 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2315 sdst
= findFirstZero(src
.rawData());
2320 Inst_SOP1__S_FF1_I32_B32::Inst_SOP1__S_FF1_I32_B32(InFmt_SOP1
*iFmt
)
2321 : Inst_SOP1(iFmt
, "s_ff1_i32_b32")
2324 } // Inst_SOP1__S_FF1_I32_B32
2326 Inst_SOP1__S_FF1_I32_B32::~Inst_SOP1__S_FF1_I32_B32()
2328 } // ~Inst_SOP1__S_FF1_I32_B32
2330 // D.i = FindFirstOne(S0.u);
2331 // If no ones are found, return -1.
2332 // Returns the bit position of the first one from the LSB.
2334 Inst_SOP1__S_FF1_I32_B32::execute(GPUDynInstPtr gpuDynInst
)
2336 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2337 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2341 sdst
= findFirstOne(src
.rawData());
2346 Inst_SOP1__S_FF1_I32_B64::Inst_SOP1__S_FF1_I32_B64(InFmt_SOP1
*iFmt
)
2347 : Inst_SOP1(iFmt
, "s_ff1_i32_b64")
2350 } // Inst_SOP1__S_FF1_I32_B64
2352 Inst_SOP1__S_FF1_I32_B64::~Inst_SOP1__S_FF1_I32_B64()
2354 } // ~Inst_SOP1__S_FF1_I32_B64
2356 // D.i = FindFirstOne(S0.u64);
2357 // If no ones are found, return -1.
2358 // Returns the bit position of the first one from the LSB.
2360 Inst_SOP1__S_FF1_I32_B64::execute(GPUDynInstPtr gpuDynInst
)
2362 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2363 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2367 sdst
= findFirstOne(src
.rawData());
2372 Inst_SOP1__S_FLBIT_I32_B32::Inst_SOP1__S_FLBIT_I32_B32(InFmt_SOP1
*iFmt
)
2373 : Inst_SOP1(iFmt
, "s_flbit_i32_b32")
2376 } // Inst_SOP1__S_FLBIT_I32_B32
2378 Inst_SOP1__S_FLBIT_I32_B32::~Inst_SOP1__S_FLBIT_I32_B32()
2380 } // ~Inst_SOP1__S_FLBIT_I32_B32
2382 // D.i = FindFirstOne(S0.u);
2383 // If no ones are found, return -1.
2384 // Counts how many zeros before the first one starting from the MSB.
2386 Inst_SOP1__S_FLBIT_I32_B32::execute(GPUDynInstPtr gpuDynInst
)
2388 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2389 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2393 sdst
= countZeroBitsMsb(src
.rawData());
2398 Inst_SOP1__S_FLBIT_I32_B64::Inst_SOP1__S_FLBIT_I32_B64(InFmt_SOP1
*iFmt
)
2399 : Inst_SOP1(iFmt
, "s_flbit_i32_b64")
2402 } // Inst_SOP1__S_FLBIT_I32_B64
2404 Inst_SOP1__S_FLBIT_I32_B64::~Inst_SOP1__S_FLBIT_I32_B64()
2406 } // ~Inst_SOP1__S_FLBIT_I32_B64
2408 // D.i = FindFirstOne(S0.u64);
2409 // If no ones are found, return -1.
2410 // Counts how many zeros before the first one starting from the MSB.
2412 Inst_SOP1__S_FLBIT_I32_B64::execute(GPUDynInstPtr gpuDynInst
)
2414 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2415 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2419 sdst
= countZeroBitsMsb(src
.rawData());
2424 Inst_SOP1__S_FLBIT_I32::Inst_SOP1__S_FLBIT_I32(InFmt_SOP1
*iFmt
)
2425 : Inst_SOP1(iFmt
, "s_flbit_i32")
2428 } // Inst_SOP1__S_FLBIT_I32
2430 Inst_SOP1__S_FLBIT_I32::~Inst_SOP1__S_FLBIT_I32()
2432 } // ~Inst_SOP1__S_FLBIT_I32
2434 // D.i = FirstOppositeSignBit(S0.i);
2435 // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1.
2436 // Counts how many bits in a row (from MSB to LSB) are the same as the
2439 Inst_SOP1__S_FLBIT_I32::execute(GPUDynInstPtr gpuDynInst
)
2441 ConstScalarOperandI32
src(gpuDynInst
, instData
.SSRC0
);
2442 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2446 sdst
= firstOppositeSignBit(src
.rawData());
2451 Inst_SOP1__S_FLBIT_I32_I64::Inst_SOP1__S_FLBIT_I32_I64(InFmt_SOP1
*iFmt
)
2452 : Inst_SOP1(iFmt
, "s_flbit_i32_i64")
2455 } // Inst_SOP1__S_FLBIT_I32_I64
2457 Inst_SOP1__S_FLBIT_I32_I64::~Inst_SOP1__S_FLBIT_I32_I64()
2459 } // ~Inst_SOP1__S_FLBIT_I32_I64
2461 // D.i = FirstOppositeSignBit(S0.i64);
2462 // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1.
2463 // Counts how many bits in a row (from MSB to LSB) are the same as the
2466 Inst_SOP1__S_FLBIT_I32_I64::execute(GPUDynInstPtr gpuDynInst
)
2468 ConstScalarOperandI64
src(gpuDynInst
, instData
.SSRC0
);
2469 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2473 sdst
= firstOppositeSignBit(src
.rawData());
2478 Inst_SOP1__S_SEXT_I32_I8::Inst_SOP1__S_SEXT_I32_I8(InFmt_SOP1
*iFmt
)
2479 : Inst_SOP1(iFmt
, "s_sext_i32_i8")
2482 } // Inst_SOP1__S_SEXT_I32_I8
2484 Inst_SOP1__S_SEXT_I32_I8::~Inst_SOP1__S_SEXT_I32_I8()
2486 } // ~Inst_SOP1__S_SEXT_I32_I8
2488 // D.i = signext(S0.i[7:0]) (sign extension).
2490 Inst_SOP1__S_SEXT_I32_I8::execute(GPUDynInstPtr gpuDynInst
)
2492 ConstScalarOperandI32
src(gpuDynInst
, instData
.SSRC0
);
2493 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2497 sdst
= sext
<std::numeric_limits
<ScalarRegI8
>::digits
>(
2498 bits(src
.rawData(), 7, 0));
2503 Inst_SOP1__S_SEXT_I32_I16::Inst_SOP1__S_SEXT_I32_I16(InFmt_SOP1
*iFmt
)
2504 : Inst_SOP1(iFmt
, "s_sext_i32_i16")
2507 } // Inst_SOP1__S_SEXT_I32_I16
2509 Inst_SOP1__S_SEXT_I32_I16::~Inst_SOP1__S_SEXT_I32_I16()
2511 } // ~Inst_SOP1__S_SEXT_I32_I16
2513 // D.i = signext(S0.i[15:0]) (sign extension).
2515 Inst_SOP1__S_SEXT_I32_I16::execute(GPUDynInstPtr gpuDynInst
)
2517 ConstScalarOperandI32
src(gpuDynInst
, instData
.SSRC0
);
2518 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
2522 sdst
= sext
<std::numeric_limits
<ScalarRegI16
>::digits
>(
2523 bits(src
.rawData(), 15, 0));
2528 Inst_SOP1__S_BITSET0_B32::Inst_SOP1__S_BITSET0_B32(InFmt_SOP1
*iFmt
)
2529 : Inst_SOP1(iFmt
, "s_bitset0_b32")
2532 } // Inst_SOP1__S_BITSET0_B32
2534 Inst_SOP1__S_BITSET0_B32::~Inst_SOP1__S_BITSET0_B32()
2536 } // ~Inst_SOP1__S_BITSET0_B32
2538 // D.u[S0.u[4:0]] = 0.
2540 Inst_SOP1__S_BITSET0_B32::execute(GPUDynInstPtr gpuDynInst
)
2542 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2543 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
2547 sdst
.setBit(bits(src
.rawData(), 4, 0), 0);
2552 Inst_SOP1__S_BITSET0_B64::Inst_SOP1__S_BITSET0_B64(InFmt_SOP1
*iFmt
)
2553 : Inst_SOP1(iFmt
, "s_bitset0_b64")
2556 } // Inst_SOP1__S_BITSET0_B64
2558 Inst_SOP1__S_BITSET0_B64::~Inst_SOP1__S_BITSET0_B64()
2560 } // ~Inst_SOP1__S_BITSET0_B64
2562 // D.u64[S0.u[5:0]] = 0.
2564 Inst_SOP1__S_BITSET0_B64::execute(GPUDynInstPtr gpuDynInst
)
2566 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2567 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2571 sdst
.setBit(bits(src
.rawData(), 5, 0), 0);
2576 Inst_SOP1__S_BITSET1_B32::Inst_SOP1__S_BITSET1_B32(InFmt_SOP1
*iFmt
)
2577 : Inst_SOP1(iFmt
, "s_bitset1_b32")
2580 } // Inst_SOP1__S_BITSET1_B32
2582 Inst_SOP1__S_BITSET1_B32::~Inst_SOP1__S_BITSET1_B32()
2584 } // ~Inst_SOP1__S_BITSET1_B32
2586 // D.u[S0.u[4:0]] = 1.
2588 Inst_SOP1__S_BITSET1_B32::execute(GPUDynInstPtr gpuDynInst
)
2590 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2591 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
2595 sdst
.setBit(bits(src
.rawData(), 4, 0), 1);
2600 Inst_SOP1__S_BITSET1_B64::Inst_SOP1__S_BITSET1_B64(InFmt_SOP1
*iFmt
)
2601 : Inst_SOP1(iFmt
, "s_bitset1_b64")
2604 } // Inst_SOP1__S_BITSET1_B64
2606 Inst_SOP1__S_BITSET1_B64::~Inst_SOP1__S_BITSET1_B64()
2608 } // ~Inst_SOP1__S_BITSET1_B64
2610 // D.u64[S0.u[5:0]] = 1.
2612 Inst_SOP1__S_BITSET1_B64::execute(GPUDynInstPtr gpuDynInst
)
2614 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2615 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2619 sdst
.setBit(bits(src
.rawData(), 5, 0), 1);
2624 Inst_SOP1__S_GETPC_B64::Inst_SOP1__S_GETPC_B64(InFmt_SOP1
*iFmt
)
2625 : Inst_SOP1(iFmt
, "s_getpc_b64")
2628 } // Inst_SOP1__S_GETPC_B64
2630 Inst_SOP1__S_GETPC_B64::~Inst_SOP1__S_GETPC_B64()
2632 } // ~Inst_SOP1__S_GETPC_B64
2635 // Destination receives the byte address of the next instruction.
2637 Inst_SOP1__S_GETPC_B64::execute(GPUDynInstPtr gpuDynInst
)
2639 Wavefront
*wf
= gpuDynInst
->wavefront();
2641 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2648 Inst_SOP1__S_SETPC_B64::Inst_SOP1__S_SETPC_B64(InFmt_SOP1
*iFmt
)
2649 : Inst_SOP1(iFmt
, "s_setpc_b64")
2652 } // Inst_SOP1__S_SETPC_B64
2654 Inst_SOP1__S_SETPC_B64::~Inst_SOP1__S_SETPC_B64()
2656 } // ~Inst_SOP1__S_SETPC_B64
2659 // S0.u64 is a byte address of the instruction to jump to.
2661 Inst_SOP1__S_SETPC_B64::execute(GPUDynInstPtr gpuDynInst
)
2663 Wavefront
*wf
= gpuDynInst
->wavefront();
2664 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2668 wf
->pc(src
.rawData());
2671 Inst_SOP1__S_SWAPPC_B64::Inst_SOP1__S_SWAPPC_B64(InFmt_SOP1
*iFmt
)
2672 : Inst_SOP1(iFmt
, "s_swappc_b64")
2675 } // Inst_SOP1__S_SWAPPC_B64
2677 Inst_SOP1__S_SWAPPC_B64::~Inst_SOP1__S_SWAPPC_B64()
2679 } // ~Inst_SOP1__S_SWAPPC_B64
2681 // D.u64 = PC + 4; PC = S0.u64.
2682 // S0.u64 is a byte address of the instruction to jump to.
2684 Inst_SOP1__S_SWAPPC_B64::execute(GPUDynInstPtr gpuDynInst
)
2686 Wavefront
*wf
= gpuDynInst
->wavefront();
2688 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2689 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2695 wf
->pc(src
.rawData());
2699 Inst_SOP1__S_RFE_B64::Inst_SOP1__S_RFE_B64(InFmt_SOP1
*iFmt
)
2700 : Inst_SOP1(iFmt
, "s_rfe_b64")
2702 } // Inst_SOP1__S_RFE_B64
2704 Inst_SOP1__S_RFE_B64::~Inst_SOP1__S_RFE_B64()
2706 } // ~Inst_SOP1__S_RFE_B64
2708 // Return from exception handler and continue.
2710 Inst_SOP1__S_RFE_B64::execute(GPUDynInstPtr gpuDynInst
)
2712 panicUnimplemented();
2715 Inst_SOP1__S_AND_SAVEEXEC_B64::Inst_SOP1__S_AND_SAVEEXEC_B64(
2717 : Inst_SOP1(iFmt
, "s_and_saveexec_b64")
2720 } // Inst_SOP1__S_AND_SAVEEXEC_B64
2722 Inst_SOP1__S_AND_SAVEEXEC_B64::~Inst_SOP1__S_AND_SAVEEXEC_B64()
2724 } // ~Inst_SOP1__S_AND_SAVEEXEC_B64
2727 // EXEC = S0.u64 & EXEC;
2728 // SCC = 1 if the new value of EXEC is non-zero.
2730 Inst_SOP1__S_AND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst
)
2732 Wavefront
*wf
= gpuDynInst
->wavefront();
2733 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2734 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2735 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2739 sdst
= wf
->execMask().to_ullong();
2740 wf
->execMask() = src
.rawData() & wf
->execMask().to_ullong();
2741 scc
= wf
->execMask().any() ? 1 : 0;
2747 Inst_SOP1__S_OR_SAVEEXEC_B64::Inst_SOP1__S_OR_SAVEEXEC_B64(
2749 : Inst_SOP1(iFmt
, "s_or_saveexec_b64")
2752 } // Inst_SOP1__S_OR_SAVEEXEC_B64
2754 Inst_SOP1__S_OR_SAVEEXEC_B64::~Inst_SOP1__S_OR_SAVEEXEC_B64()
2756 } // ~Inst_SOP1__S_OR_SAVEEXEC_B64
2759 // EXEC = S0.u64 | EXEC;
2760 // SCC = 1 if the new value of EXEC is non-zero.
2762 Inst_SOP1__S_OR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst
)
2764 Wavefront
*wf
= gpuDynInst
->wavefront();
2765 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2766 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2767 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2771 sdst
= wf
->execMask().to_ullong();
2772 wf
->execMask() = src
.rawData() | wf
->execMask().to_ullong();
2773 scc
= wf
->execMask().any() ? 1 : 0;
2779 Inst_SOP1__S_XOR_SAVEEXEC_B64::Inst_SOP1__S_XOR_SAVEEXEC_B64(
2781 : Inst_SOP1(iFmt
, "s_xor_saveexec_b64")
2784 } // Inst_SOP1__S_XOR_SAVEEXEC_B64
2786 Inst_SOP1__S_XOR_SAVEEXEC_B64::~Inst_SOP1__S_XOR_SAVEEXEC_B64()
2788 } // ~Inst_SOP1__S_XOR_SAVEEXEC_B64
2791 // EXEC = S0.u64 ^ EXEC;
2792 // SCC = 1 if the new value of EXEC is non-zero.
2794 Inst_SOP1__S_XOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst
)
2796 Wavefront
*wf
= gpuDynInst
->wavefront();
2797 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2798 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2799 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2803 sdst
= wf
->execMask().to_ullong();
2804 wf
->execMask() = src
.rawData() ^ wf
->execMask().to_ullong();
2805 scc
= wf
->execMask().any() ? 1 : 0;
2811 Inst_SOP1__S_ANDN2_SAVEEXEC_B64::Inst_SOP1__S_ANDN2_SAVEEXEC_B64(
2813 : Inst_SOP1(iFmt
, "s_andn2_saveexec_b64")
2816 } // Inst_SOP1__S_ANDN2_SAVEEXEC_B64
2818 Inst_SOP1__S_ANDN2_SAVEEXEC_B64::~Inst_SOP1__S_ANDN2_SAVEEXEC_B64()
2820 } // ~Inst_SOP1__S_ANDN2_SAVEEXEC_B64
2823 // EXEC = S0.u64 & ~EXEC;
2824 // SCC = 1 if the new value of EXEC is non-zero.
2826 Inst_SOP1__S_ANDN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst
)
2828 Wavefront
*wf
= gpuDynInst
->wavefront();
2829 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2830 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2831 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2835 sdst
= wf
->execMask().to_ullong();
2836 wf
->execMask() = src
.rawData() &~ wf
->execMask().to_ullong();
2837 scc
= wf
->execMask().any() ? 1 : 0;
2843 Inst_SOP1__S_ORN2_SAVEEXEC_B64::Inst_SOP1__S_ORN2_SAVEEXEC_B64(
2845 : Inst_SOP1(iFmt
, "s_orn2_saveexec_b64")
2848 } // Inst_SOP1__S_ORN2_SAVEEXEC_B64
2850 Inst_SOP1__S_ORN2_SAVEEXEC_B64::~Inst_SOP1__S_ORN2_SAVEEXEC_B64()
2852 } // ~Inst_SOP1__S_ORN2_SAVEEXEC_B64
2855 // EXEC = S0.u64 | ~EXEC;
2856 // SCC = 1 if the new value of EXEC is non-zero.
2858 Inst_SOP1__S_ORN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst
)
2860 Wavefront
*wf
= gpuDynInst
->wavefront();
2861 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2862 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2863 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2867 sdst
= wf
->execMask().to_ullong();
2868 wf
->execMask() = src
.rawData() |~ wf
->execMask().to_ullong();
2869 scc
= wf
->execMask().any() ? 1 : 0;
2875 Inst_SOP1__S_NAND_SAVEEXEC_B64::Inst_SOP1__S_NAND_SAVEEXEC_B64(
2877 : Inst_SOP1(iFmt
, "s_nand_saveexec_b64")
2880 } // Inst_SOP1__S_NAND_SAVEEXEC_B64
2882 Inst_SOP1__S_NAND_SAVEEXEC_B64::~Inst_SOP1__S_NAND_SAVEEXEC_B64()
2884 } // ~Inst_SOP1__S_NAND_SAVEEXEC_B64
2887 // EXEC = ~(S0.u64 & EXEC);
2888 // SCC = 1 if the new value of EXEC is non-zero.
2890 Inst_SOP1__S_NAND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst
)
2892 Wavefront
*wf
= gpuDynInst
->wavefront();
2893 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2894 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2895 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2899 sdst
= wf
->execMask().to_ullong();
2900 wf
->execMask() = ~(src
.rawData() & wf
->execMask().to_ullong());
2901 scc
= wf
->execMask().any() ? 1 : 0;
2907 Inst_SOP1__S_NOR_SAVEEXEC_B64::Inst_SOP1__S_NOR_SAVEEXEC_B64(
2909 : Inst_SOP1(iFmt
, "s_nor_saveexec_b64")
2912 } // Inst_SOP1__S_NOR_SAVEEXEC_B64
2914 Inst_SOP1__S_NOR_SAVEEXEC_B64::~Inst_SOP1__S_NOR_SAVEEXEC_B64()
2916 } // ~Inst_SOP1__S_NOR_SAVEEXEC_B64
2919 // EXEC = ~(S0.u64 | EXEC);
2920 // SCC = 1 if the new value of EXEC is non-zero.
2922 Inst_SOP1__S_NOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst
)
2924 Wavefront
*wf
= gpuDynInst
->wavefront();
2925 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2926 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2927 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2931 sdst
= wf
->execMask().to_ullong();
2932 wf
->execMask() = ~(src
.rawData() | wf
->execMask().to_ullong());
2933 scc
= wf
->execMask().any() ? 1 : 0;
2939 Inst_SOP1__S_XNOR_SAVEEXEC_B64::Inst_SOP1__S_XNOR_SAVEEXEC_B64(
2941 : Inst_SOP1(iFmt
, "s_xnor_saveexec_b64")
2944 } // Inst_SOP1__S_XNOR_SAVEEXEC_B64
2946 Inst_SOP1__S_XNOR_SAVEEXEC_B64::~Inst_SOP1__S_XNOR_SAVEEXEC_B64()
2948 } // ~Inst_SOP1__S_XNOR_SAVEEXEC_B64
2951 // EXEC = ~(S0.u64 ^ EXEC);
2952 // SCC = 1 if the new value of EXEC is non-zero.
2954 Inst_SOP1__S_XNOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst
)
2956 Wavefront
*wf
= gpuDynInst
->wavefront();
2957 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
2958 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
2959 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2963 sdst
= wf
->execMask().to_ullong();
2964 wf
->execMask() = ~(src
.rawData() ^ wf
->execMask().to_ullong());
2965 scc
= wf
->execMask().any() ? 1 : 0;
2971 Inst_SOP1__S_QUADMASK_B32::Inst_SOP1__S_QUADMASK_B32(InFmt_SOP1
*iFmt
)
2972 : Inst_SOP1(iFmt
, "s_quadmask_b32")
2975 } // Inst_SOP1__S_QUADMASK_B32
2977 Inst_SOP1__S_QUADMASK_B32::~Inst_SOP1__S_QUADMASK_B32()
2979 } // ~Inst_SOP1__S_QUADMASK_B32
2981 // D.u = QuadMask(S0.u):
2982 // D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[31:8] = 0;
2983 // SCC = 1 if result is non-zero.
2985 Inst_SOP1__S_QUADMASK_B32::execute(GPUDynInstPtr gpuDynInst
)
2987 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
2988 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
2989 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
2993 sdst
= quadMask(src
.rawData());
2994 scc
= sdst
.rawData() ? 1 : 0;
3000 Inst_SOP1__S_QUADMASK_B64::Inst_SOP1__S_QUADMASK_B64(InFmt_SOP1
*iFmt
)
3001 : Inst_SOP1(iFmt
, "s_quadmask_b64")
3004 } // Inst_SOP1__S_QUADMASK_B64
3006 Inst_SOP1__S_QUADMASK_B64::~Inst_SOP1__S_QUADMASK_B64()
3008 } // ~Inst_SOP1__S_QUADMASK_B64
3010 // D.u64 = QuadMask(S0.u64):
3011 // D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[63:16] = 0;
3012 // SCC = 1 if result is non-zero.
3014 Inst_SOP1__S_QUADMASK_B64::execute(GPUDynInstPtr gpuDynInst
)
3016 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
3017 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
3018 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3022 sdst
= quadMask(src
.rawData());
3023 scc
= sdst
.rawData() ? 1 : 0;
3029 Inst_SOP1__S_MOVRELS_B32::Inst_SOP1__S_MOVRELS_B32(InFmt_SOP1
*iFmt
)
3030 : Inst_SOP1(iFmt
, "s_movrels_b32")
3033 } // Inst_SOP1__S_MOVRELS_B32
3035 Inst_SOP1__S_MOVRELS_B32::~Inst_SOP1__S_MOVRELS_B32()
3037 } // ~Inst_SOP1__S_MOVRELS_B32
3039 // D.u = SGPR[S0.u + M0.u].u (move from relative source).
3041 Inst_SOP1__S_MOVRELS_B32::execute(GPUDynInstPtr gpuDynInst
)
3043 ConstScalarOperandU32
m0(gpuDynInst
, REG_M0
);
3045 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
+ m0
.rawData());
3046 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
);
3050 sdst
= src
.rawData();
3055 Inst_SOP1__S_MOVRELS_B64::Inst_SOP1__S_MOVRELS_B64(InFmt_SOP1
*iFmt
)
3056 : Inst_SOP1(iFmt
, "s_movrels_b64")
3059 } // Inst_SOP1__S_MOVRELS_B64
3061 Inst_SOP1__S_MOVRELS_B64::~Inst_SOP1__S_MOVRELS_B64()
3063 } // ~Inst_SOP1__S_MOVRELS_B64
3065 // D.u64 = SGPR[S0.u + M0.u].u64 (move from relative source).
3066 // The index in M0.u must be even for this operation.
3068 Inst_SOP1__S_MOVRELS_B64::execute(GPUDynInstPtr gpuDynInst
)
3070 ConstScalarOperandU32
m0(gpuDynInst
, REG_M0
);
3072 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
+ m0
.rawData());
3073 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
3077 sdst
= src
.rawData();
3082 Inst_SOP1__S_MOVRELD_B32::Inst_SOP1__S_MOVRELD_B32(InFmt_SOP1
*iFmt
)
3083 : Inst_SOP1(iFmt
, "s_movreld_b32")
3086 } // Inst_SOP1__S_MOVRELD_B32
3088 Inst_SOP1__S_MOVRELD_B32::~Inst_SOP1__S_MOVRELD_B32()
3090 } // ~Inst_SOP1__S_MOVRELD_B32
3092 // SGPR[D.u + M0.u].u = S0.u (move to relative destination).
3094 Inst_SOP1__S_MOVRELD_B32::execute(GPUDynInstPtr gpuDynInst
)
3096 ConstScalarOperandU32
m0(gpuDynInst
, REG_M0
);
3098 ConstScalarOperandU32
src(gpuDynInst
, instData
.SSRC0
);
3099 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDST
+ m0
.rawData());
3103 sdst
= src
.rawData();
3108 Inst_SOP1__S_MOVRELD_B64::Inst_SOP1__S_MOVRELD_B64(InFmt_SOP1
*iFmt
)
3109 : Inst_SOP1(iFmt
, "s_movreld_b64")
3112 } // Inst_SOP1__S_MOVRELD_B64
3114 Inst_SOP1__S_MOVRELD_B64::~Inst_SOP1__S_MOVRELD_B64()
3116 } // ~Inst_SOP1__S_MOVRELD_B64
3118 // SGPR[D.u + M0.u].u64 = S0.u64 (move to relative destination).
3119 // The index in M0.u must be even for this operation.
3121 Inst_SOP1__S_MOVRELD_B64::execute(GPUDynInstPtr gpuDynInst
)
3123 ConstScalarOperandU32
m0(gpuDynInst
, REG_M0
);
3125 ConstScalarOperandU64
src(gpuDynInst
, instData
.SSRC0
);
3126 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
+ m0
.rawData());
3130 sdst
= src
.rawData();
3135 Inst_SOP1__S_CBRANCH_JOIN::Inst_SOP1__S_CBRANCH_JOIN(InFmt_SOP1
*iFmt
)
3136 : Inst_SOP1(iFmt
, "s_cbranch_join")
3139 } // Inst_SOP1__S_CBRANCH_JOIN
3141 Inst_SOP1__S_CBRANCH_JOIN::~Inst_SOP1__S_CBRANCH_JOIN()
3143 } // ~Inst_SOP1__S_CBRANCH_JOIN
3145 // Conditional branch join point (end of conditional branch block).
3147 Inst_SOP1__S_CBRANCH_JOIN::execute(GPUDynInstPtr gpuDynInst
)
3149 panicUnimplemented();
3152 Inst_SOP1__S_ABS_I32::Inst_SOP1__S_ABS_I32(InFmt_SOP1
*iFmt
)
3153 : Inst_SOP1(iFmt
, "s_abs_i32")
3156 } // Inst_SOP1__S_ABS_I32
3158 Inst_SOP1__S_ABS_I32::~Inst_SOP1__S_ABS_I32()
3160 } // ~Inst_SOP1__S_ABS_I32
3162 // if (S.i < 0) then D.i = -S.i;
3164 // SCC = 1 if result is non-zero.
3165 // Integer absolute value.
3167 Inst_SOP1__S_ABS_I32::execute(GPUDynInstPtr gpuDynInst
)
3169 ConstScalarOperandI32
src(gpuDynInst
, instData
.SSRC0
);
3170 ScalarOperandI32
sdst(gpuDynInst
, instData
.SDST
);
3171 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3175 sdst
= std::abs(src
.rawData());
3177 scc
= sdst
.rawData() ? 1 : 0;
3183 Inst_SOP1__S_MOV_FED_B32::Inst_SOP1__S_MOV_FED_B32(InFmt_SOP1
*iFmt
)
3184 : Inst_SOP1(iFmt
, "s_mov_fed_b32")
3187 } // Inst_SOP1__S_MOV_FED_B32
3189 Inst_SOP1__S_MOV_FED_B32::~Inst_SOP1__S_MOV_FED_B32()
3191 } // ~Inst_SOP1__S_MOV_FED_B32
3195 Inst_SOP1__S_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst
)
3197 panicUnimplemented();
3200 Inst_SOP1__S_SET_GPR_IDX_IDX::Inst_SOP1__S_SET_GPR_IDX_IDX(
3202 : Inst_SOP1(iFmt
, "s_set_gpr_idx_idx")
3204 } // Inst_SOP1__S_SET_GPR_IDX_IDX
3206 Inst_SOP1__S_SET_GPR_IDX_IDX::~Inst_SOP1__S_SET_GPR_IDX_IDX()
3208 } // ~Inst_SOP1__S_SET_GPR_IDX_IDX
3210 // M0[7:0] = S0.u[7:0].
3211 // Modify the index used in vector GPR indexing.
3213 Inst_SOP1__S_SET_GPR_IDX_IDX::execute(GPUDynInstPtr gpuDynInst
)
3215 panicUnimplemented();
3218 Inst_SOPC__S_CMP_EQ_I32::Inst_SOPC__S_CMP_EQ_I32(InFmt_SOPC
*iFmt
)
3219 : Inst_SOPC(iFmt
, "s_cmp_eq_i32")
3222 } // Inst_SOPC__S_CMP_EQ_I32
3224 Inst_SOPC__S_CMP_EQ_I32::~Inst_SOPC__S_CMP_EQ_I32()
3226 } // ~Inst_SOPC__S_CMP_EQ_I32
3228 // SCC = (S0.i == S1.i).
3230 Inst_SOPC__S_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst
)
3232 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
3233 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
3234 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3239 scc
= (src0
.rawData() == src1
.rawData()) ? 1 : 0;
3244 Inst_SOPC__S_CMP_LG_I32::Inst_SOPC__S_CMP_LG_I32(InFmt_SOPC
*iFmt
)
3245 : Inst_SOPC(iFmt
, "s_cmp_lg_i32")
3248 } // Inst_SOPC__S_CMP_LG_I32
3250 Inst_SOPC__S_CMP_LG_I32::~Inst_SOPC__S_CMP_LG_I32()
3252 } // ~Inst_SOPC__S_CMP_LG_I32
3254 // SCC = (S0.i != S1.i).
3256 Inst_SOPC__S_CMP_LG_I32::execute(GPUDynInstPtr gpuDynInst
)
3258 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
3259 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
3260 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3265 scc
= (src0
.rawData() != src1
.rawData()) ? 1 : 0;
3270 Inst_SOPC__S_CMP_GT_I32::Inst_SOPC__S_CMP_GT_I32(InFmt_SOPC
*iFmt
)
3271 : Inst_SOPC(iFmt
, "s_cmp_gt_i32")
3274 } // Inst_SOPC__S_CMP_GT_I32
3276 Inst_SOPC__S_CMP_GT_I32::~Inst_SOPC__S_CMP_GT_I32()
3278 } // ~Inst_SOPC__S_CMP_GT_I32
3280 // SCC = (S0.i > S1.i).
3282 Inst_SOPC__S_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst
)
3284 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
3285 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
3286 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3291 scc
= (src0
.rawData() > src1
.rawData()) ? 1 : 0;
3296 Inst_SOPC__S_CMP_GE_I32::Inst_SOPC__S_CMP_GE_I32(InFmt_SOPC
*iFmt
)
3297 : Inst_SOPC(iFmt
, "s_cmp_ge_i32")
3300 } // Inst_SOPC__S_CMP_GE_I32
3302 Inst_SOPC__S_CMP_GE_I32::~Inst_SOPC__S_CMP_GE_I32()
3304 } // ~Inst_SOPC__S_CMP_GE_I32
3306 // SCC = (S0.i >= S1.i).
3308 Inst_SOPC__S_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst
)
3310 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
3311 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
3312 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3317 scc
= (src0
.rawData() >= src1
.rawData()) ? 1 : 0;
3322 Inst_SOPC__S_CMP_LT_I32::Inst_SOPC__S_CMP_LT_I32(InFmt_SOPC
*iFmt
)
3323 : Inst_SOPC(iFmt
, "s_cmp_lt_i32")
3326 } // Inst_SOPC__S_CMP_LT_I32
3328 Inst_SOPC__S_CMP_LT_I32::~Inst_SOPC__S_CMP_LT_I32()
3330 } // ~Inst_SOPC__S_CMP_LT_I32
3332 // SCC = (S0.i < S1.i).
3334 Inst_SOPC__S_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst
)
3336 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
3337 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
3338 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3343 scc
= (src0
.rawData() < src1
.rawData()) ? 1 : 0;
3348 Inst_SOPC__S_CMP_LE_I32::Inst_SOPC__S_CMP_LE_I32(InFmt_SOPC
*iFmt
)
3349 : Inst_SOPC(iFmt
, "s_cmp_le_i32")
3352 } // Inst_SOPC__S_CMP_LE_I32
3354 Inst_SOPC__S_CMP_LE_I32::~Inst_SOPC__S_CMP_LE_I32()
3356 } // ~Inst_SOPC__S_CMP_LE_I32
3358 // SCC = (S0.i <= S1.i).
3360 Inst_SOPC__S_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst
)
3362 ConstScalarOperandI32
src0(gpuDynInst
, instData
.SSRC0
);
3363 ConstScalarOperandI32
src1(gpuDynInst
, instData
.SSRC1
);
3364 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3369 scc
= (src0
.rawData() <= src1
.rawData()) ? 1 : 0;
3374 Inst_SOPC__S_CMP_EQ_U32::Inst_SOPC__S_CMP_EQ_U32(InFmt_SOPC
*iFmt
)
3375 : Inst_SOPC(iFmt
, "s_cmp_eq_u32")
3378 } // Inst_SOPC__S_CMP_EQ_U32
3380 Inst_SOPC__S_CMP_EQ_U32::~Inst_SOPC__S_CMP_EQ_U32()
3382 } // ~Inst_SOPC__S_CMP_EQ_U32
3384 // SCC = (S0.u == S1.u).
3386 Inst_SOPC__S_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst
)
3388 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
3389 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
3390 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3395 scc
= (src0
.rawData() == src1
.rawData()) ? 1 : 0;
3400 Inst_SOPC__S_CMP_LG_U32::Inst_SOPC__S_CMP_LG_U32(InFmt_SOPC
*iFmt
)
3401 : Inst_SOPC(iFmt
, "s_cmp_lg_u32")
3404 } // Inst_SOPC__S_CMP_LG_U32
3406 Inst_SOPC__S_CMP_LG_U32::~Inst_SOPC__S_CMP_LG_U32()
3408 } // ~Inst_SOPC__S_CMP_LG_U32
3410 // SCC = (S0.u != S1.u).
3412 Inst_SOPC__S_CMP_LG_U32::execute(GPUDynInstPtr gpuDynInst
)
3414 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
3415 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
3416 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3421 scc
= (src0
.rawData() != src1
.rawData()) ? 1 : 0;
3426 Inst_SOPC__S_CMP_GT_U32::Inst_SOPC__S_CMP_GT_U32(InFmt_SOPC
*iFmt
)
3427 : Inst_SOPC(iFmt
, "s_cmp_gt_u32")
3430 } // Inst_SOPC__S_CMP_GT_U32
3432 Inst_SOPC__S_CMP_GT_U32::~Inst_SOPC__S_CMP_GT_U32()
3434 } // ~Inst_SOPC__S_CMP_GT_U32
3436 // SCC = (S0.u > S1.u).
3438 Inst_SOPC__S_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst
)
3440 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
3441 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
3442 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3447 scc
= (src0
.rawData() > src1
.rawData()) ? 1 : 0;
3452 Inst_SOPC__S_CMP_GE_U32::Inst_SOPC__S_CMP_GE_U32(InFmt_SOPC
*iFmt
)
3453 : Inst_SOPC(iFmt
, "s_cmp_ge_u32")
3456 } // Inst_SOPC__S_CMP_GE_U32
3458 Inst_SOPC__S_CMP_GE_U32::~Inst_SOPC__S_CMP_GE_U32()
3460 } // ~Inst_SOPC__S_CMP_GE_U32
3462 // SCC = (S0.u >= S1.u).
3464 Inst_SOPC__S_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst
)
3466 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
3467 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
3468 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3473 scc
= (src0
.rawData() >= src1
.rawData()) ? 1 : 0;
3478 Inst_SOPC__S_CMP_LT_U32::Inst_SOPC__S_CMP_LT_U32(InFmt_SOPC
*iFmt
)
3479 : Inst_SOPC(iFmt
, "s_cmp_lt_u32")
3482 } // Inst_SOPC__S_CMP_LT_U32
3484 Inst_SOPC__S_CMP_LT_U32::~Inst_SOPC__S_CMP_LT_U32()
3486 } // ~Inst_SOPC__S_CMP_LT_U32
3488 // SCC = (S0.u < S1.u).
3490 Inst_SOPC__S_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst
)
3492 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
3493 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
3494 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3499 scc
= (src0
.rawData() <= src1
.rawData()) ? 1 : 0;
3504 Inst_SOPC__S_CMP_LE_U32::Inst_SOPC__S_CMP_LE_U32(InFmt_SOPC
*iFmt
)
3505 : Inst_SOPC(iFmt
, "s_cmp_le_u32")
3508 } // Inst_SOPC__S_CMP_LE_U32
3510 Inst_SOPC__S_CMP_LE_U32::~Inst_SOPC__S_CMP_LE_U32()
3512 } // ~Inst_SOPC__S_CMP_LE_U32
3514 // SCC = (S0.u <= S1.u).
3516 Inst_SOPC__S_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst
)
3518 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
3519 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
3520 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3525 scc
= (src0
.rawData() <= src1
.rawData()) ? 1 : 0;
3530 Inst_SOPC__S_BITCMP0_B32::Inst_SOPC__S_BITCMP0_B32(InFmt_SOPC
*iFmt
)
3531 : Inst_SOPC(iFmt
, "s_bitcmp0_b32")
3534 } // Inst_SOPC__S_BITCMP0_B32
3536 Inst_SOPC__S_BITCMP0_B32::~Inst_SOPC__S_BITCMP0_B32()
3538 } // ~Inst_SOPC__S_BITCMP0_B32
3540 // SCC = (S0.u[S1.u[4:0]] == 0).
3542 Inst_SOPC__S_BITCMP0_B32::execute(GPUDynInstPtr gpuDynInst
)
3544 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
3545 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
3546 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3551 scc
= !bits(src0
.rawData(), bits(src1
.rawData(), 4, 0)) ? 1 : 0;
3556 Inst_SOPC__S_BITCMP1_B32::Inst_SOPC__S_BITCMP1_B32(InFmt_SOPC
*iFmt
)
3557 : Inst_SOPC(iFmt
, "s_bitcmp1_b32")
3560 } // Inst_SOPC__S_BITCMP1_B32
3562 Inst_SOPC__S_BITCMP1_B32::~Inst_SOPC__S_BITCMP1_B32()
3564 } // ~Inst_SOPC__S_BITCMP1_B32
3566 // SCC = (S0.u[S1.u[4:0]] == 1).
3568 Inst_SOPC__S_BITCMP1_B32::execute(GPUDynInstPtr gpuDynInst
)
3570 ConstScalarOperandU32
src0(gpuDynInst
, instData
.SSRC0
);
3571 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
3572 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3577 scc
= bits(src0
.rawData(), bits(src1
.rawData(), 4, 0)) ? 1 : 0;
3582 Inst_SOPC__S_BITCMP0_B64::Inst_SOPC__S_BITCMP0_B64(InFmt_SOPC
*iFmt
)
3583 : Inst_SOPC(iFmt
, "s_bitcmp0_b64")
3586 } // Inst_SOPC__S_BITCMP0_B64
3588 Inst_SOPC__S_BITCMP0_B64::~Inst_SOPC__S_BITCMP0_B64()
3590 } // ~Inst_SOPC__S_BITCMP0_B64
3592 // SCC = (S0.u64[S1.u[5:0]] == 0).
3594 Inst_SOPC__S_BITCMP0_B64::execute(GPUDynInstPtr gpuDynInst
)
3596 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
3597 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
3598 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3603 scc
= !bits(src0
.rawData(), bits(src1
.rawData(), 5, 0)) ? 1 : 0;
3608 Inst_SOPC__S_BITCMP1_B64::Inst_SOPC__S_BITCMP1_B64(InFmt_SOPC
*iFmt
)
3609 : Inst_SOPC(iFmt
, "s_bitcmp1_b64")
3612 } // Inst_SOPC__S_BITCMP1_B64
3614 Inst_SOPC__S_BITCMP1_B64::~Inst_SOPC__S_BITCMP1_B64()
3616 } // ~Inst_SOPC__S_BITCMP1_B64
3618 // SCC = (S0.u64[S1.u[5:0]] == 1).
3620 Inst_SOPC__S_BITCMP1_B64::execute(GPUDynInstPtr gpuDynInst
)
3622 ConstScalarOperandU64
src0(gpuDynInst
, instData
.SSRC0
);
3623 ConstScalarOperandU32
src1(gpuDynInst
, instData
.SSRC1
);
3624 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3629 scc
= bits(src0
.rawData(), bits(src1
.rawData(), 5, 0)) ? 1 : 0;
3634 Inst_SOPC__S_SETVSKIP::Inst_SOPC__S_SETVSKIP(InFmt_SOPC
*iFmt
)
3635 : Inst_SOPC(iFmt
, "s_setvskip")
3637 setFlag(UnconditionalJump
);
3638 } // Inst_SOPC__S_SETVSKIP
3640 Inst_SOPC__S_SETVSKIP::~Inst_SOPC__S_SETVSKIP()
3642 } // ~Inst_SOPC__S_SETVSKIP
3644 // VSKIP = S0.u[S1.u[4:0]].
3645 // Enables and disables VSKIP mode.
3646 // When VSKIP is enabled, no VOP*/M*BUF/MIMG/DS/FLAT/EXP instuctions are
3649 Inst_SOPC__S_SETVSKIP::execute(GPUDynInstPtr gpuDynInst
)
3651 panicUnimplemented();
3654 Inst_SOPC__S_SET_GPR_IDX_ON::Inst_SOPC__S_SET_GPR_IDX_ON(InFmt_SOPC
*iFmt
)
3655 : Inst_SOPC(iFmt
, "s_set_gpr_idx_on")
3657 } // Inst_SOPC__S_SET_GPR_IDX_ON
3659 Inst_SOPC__S_SET_GPR_IDX_ON::~Inst_SOPC__S_SET_GPR_IDX_ON()
3661 } // ~Inst_SOPC__S_SET_GPR_IDX_ON
3663 // MODE.gpr_idx_en = 1;
3664 // M0[7:0] = S0.u[7:0];
3665 // M0[15:12] = SIMM4 (direct contents of S1 field);
3666 // Remaining bits of M0 are unmodified.
3667 // Enable GPR indexing mode. Vector operations after this will perform
3668 // relative GPR addressing based on the contents of M0.
3669 // The raw contents of the S1 field are read and used to set the enable
3670 // bits. S1[0] = VSRC0_REL, S1[1] = VSRC1_REL, S1[2] = VSRC2_REL and
3671 // S1[3] = VDST_REL.
3673 Inst_SOPC__S_SET_GPR_IDX_ON::execute(GPUDynInstPtr gpuDynInst
)
3675 panicUnimplemented();
3678 Inst_SOPC__S_CMP_EQ_U64::Inst_SOPC__S_CMP_EQ_U64(InFmt_SOPC
*iFmt
)
3679 : Inst_SOPC(iFmt
, "s_cmp_eq_u64")
3682 } // Inst_SOPC__S_CMP_EQ_U64
3684 Inst_SOPC__S_CMP_EQ_U64::~Inst_SOPC__S_CMP_EQ_U64()
3686 } // ~Inst_SOPC__S_CMP_EQ_U64
3688 // SCC = (S0.i64 == S1.i64).
3690 Inst_SOPC__S_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst
)
3692 ConstScalarOperandI64
src0(gpuDynInst
, instData
.SSRC0
);
3693 ConstScalarOperandI64
src1(gpuDynInst
, instData
.SSRC1
);
3694 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3699 scc
= (src0
.rawData() == src1
.rawData()) ? 1 : 0;
3704 Inst_SOPC__S_CMP_LG_U64::Inst_SOPC__S_CMP_LG_U64(InFmt_SOPC
*iFmt
)
3705 : Inst_SOPC(iFmt
, "s_cmp_lg_u64")
3708 } // Inst_SOPC__S_CMP_LG_U64
3710 Inst_SOPC__S_CMP_LG_U64::~Inst_SOPC__S_CMP_LG_U64()
3712 } // ~Inst_SOPC__S_CMP_LG_U64
3714 // SCC = (S0.i64 != S1.i64).
3716 Inst_SOPC__S_CMP_LG_U64::execute(GPUDynInstPtr gpuDynInst
)
3718 ConstScalarOperandI64
src0(gpuDynInst
, instData
.SSRC0
);
3719 ConstScalarOperandI64
src1(gpuDynInst
, instData
.SSRC1
);
3720 ScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3725 scc
= (src0
.rawData() != src1
.rawData()) ? 1 : 0;
3730 Inst_SOPP__S_NOP::Inst_SOPP__S_NOP(InFmt_SOPP
*iFmt
)
3731 : Inst_SOPP(iFmt
, "s_nop")
3734 } // Inst_SOPP__S_NOP
3736 Inst_SOPP__S_NOP::~Inst_SOPP__S_NOP()
3738 } // ~Inst_SOPP__S_NOP
3742 Inst_SOPP__S_NOP::execute(GPUDynInstPtr gpuDynInst
)
3746 Inst_SOPP__S_ENDPGM::Inst_SOPP__S_ENDPGM(InFmt_SOPP
*iFmt
)
3747 : Inst_SOPP(iFmt
, "s_endpgm")
3749 setFlag(EndOfKernel
);
3750 } // Inst_SOPP__S_ENDPGM
3752 Inst_SOPP__S_ENDPGM::~Inst_SOPP__S_ENDPGM()
3754 } // ~Inst_SOPP__S_ENDPGM
3756 // End of program; terminate wavefront.
3758 Inst_SOPP__S_ENDPGM::execute(GPUDynInstPtr gpuDynInst
)
3760 Wavefront
*wf
= gpuDynInst
->wavefront();
3761 ComputeUnit
*cu
= gpuDynInst
->computeUnit();
3763 // delete extra instructions fetched for completed work-items
3764 wf
->instructionBuffer
.erase(wf
->instructionBuffer
.begin() + 1,
3765 wf
->instructionBuffer
.end());
3767 if (wf
->pendingFetch
) {
3768 wf
->dropFetch
= true;
3771 wf
->computeUnit
->fetchStage
.fetchUnit(wf
->simdId
)
3772 .flushBuf(wf
->wfSlotId
);
3773 wf
->setStatus(Wavefront::S_STOPPED
);
3775 int refCount
= wf
->computeUnit
->getLds()
3776 .decreaseRefCounter(wf
->dispatchId
, wf
->wgId
);
3779 * The parent WF of this instruction is exiting, therefore
3780 * it should not participate in this barrier any longer. This
3781 * prevents possible deadlock issues if WFs exit early.
3783 int bar_id
= WFBarrier::InvalidID
;
3784 if (wf
->hasBarrier()) {
3785 assert(wf
->getStatus() != Wavefront::S_BARRIER
);
3786 bar_id
= wf
->barrierId();
3787 assert(bar_id
!= WFBarrier::InvalidID
);
3788 wf
->releaseBarrier();
3789 cu
->decMaxBarrierCnt(bar_id
);
3790 DPRINTF(GPUSync
, "CU[%d] WF[%d][%d] Wave[%d] - Exiting the "
3791 "program and decrementing max barrier count for "
3792 "barrier Id%d. New max count: %d.\n", cu
->cu_id
,
3793 wf
->simdId
, wf
->wfSlotId
, wf
->wfDynId
, bar_id
,
3794 cu
->maxBarrierCnt(bar_id
));
3797 DPRINTF(GPUExec
, "CU%d: decrease ref ctr WG[%d] to [%d]\n",
3798 wf
->computeUnit
->cu_id
, wf
->wgId
, refCount
);
3800 wf
->computeUnit
->registerManager
->freeRegisters(wf
);
3801 wf
->computeUnit
->stats
.completedWfs
++;
3802 wf
->computeUnit
->activeWaves
--;
3804 panic_if(wf
->computeUnit
->activeWaves
< 0, "CU[%d] Active waves less "
3805 "than zero\n", wf
->computeUnit
->cu_id
);
3807 DPRINTF(GPUExec
, "Doing return for CU%d: WF[%d][%d][%d]\n",
3808 wf
->computeUnit
->cu_id
, wf
->simdId
, wf
->wfSlotId
, wf
->wfDynId
);
3810 for (int i
= 0; i
< wf
->vecReads
.size(); i
++) {
3811 if (wf
->rawDist
.find(i
) != wf
->rawDist
.end()) {
3812 wf
->stats
.readsPerWrite
.sample(wf
->vecReads
.at(i
));
3815 wf
->vecReads
.clear();
3816 wf
->rawDist
.clear();
3817 wf
->lastInstExec
= 0;
3821 * If all WFs have finished, and hence the WG has finished,
3822 * then we can free up the barrier belonging to the parent
3823 * WG, but only if we actually used a barrier (i.e., more
3824 * than one WF in the WG).
3826 if (bar_id
!= WFBarrier::InvalidID
) {
3827 DPRINTF(GPUSync
, "CU[%d] WF[%d][%d] Wave[%d] - All waves are "
3828 "now complete. Releasing barrier Id%d.\n", cu
->cu_id
,
3829 wf
->simdId
, wf
->wfSlotId
, wf
->wfDynId
,
3831 cu
->releaseBarrier(bar_id
);
3835 * Last wavefront of the workgroup has executed return. If the
3836 * workgroup is not the final one in the kernel, then simply
3837 * retire it; however, if it is the final one (i.e., indicating
3838 * the kernel end) then release operation is needed.
3841 // check whether the workgroup is indicating the kernel end (i.e.,
3842 // the last workgroup in the kernel).
3844 wf
->computeUnit
->shader
->dispatcher().isReachingKernelEnd(wf
);
3845 // further check whether 'release @ kernel end' is needed
3847 wf
->computeUnit
->shader
->impl_kern_end_rel
;
3849 // if not a kernel end or no release needed, retire the workgroup
3851 if (!kernelEnd
|| !relNeeded
) {
3852 wf
->computeUnit
->shader
->dispatcher().notifyWgCompl(wf
);
3853 wf
->setStatus(Wavefront::S_STOPPED
);
3854 wf
->computeUnit
->stats
.completedWGs
++;
3860 * If a kernel end and release needed, inject a memory sync and
3861 * retire the workgroup after receving all acks.
3864 setFlag(GlobalSegment
);
3865 // Notify Memory System of Kernel Completion
3866 wf
->setStatus(Wavefront::S_RETURNING
);
3867 gpuDynInst
->simdId
= wf
->simdId
;
3868 gpuDynInst
->wfSlotId
= wf
->wfSlotId
;
3869 gpuDynInst
->wfDynId
= wf
->wfDynId
;
3871 DPRINTF(GPUExec
, "inject global memory fence for CU%d: "
3872 "WF[%d][%d][%d]\n", wf
->computeUnit
->cu_id
,
3873 wf
->simdId
, wf
->wfSlotId
, wf
->wfDynId
);
3875 // call shader to prepare the flush operations
3876 wf
->computeUnit
->shader
->prepareFlush(gpuDynInst
);
3878 wf
->computeUnit
->stats
.completedWGs
++;
3880 wf
->computeUnit
->shader
->dispatcher().scheduleDispatch();
3885 Inst_SOPP__S_BRANCH::Inst_SOPP__S_BRANCH(InFmt_SOPP
*iFmt
)
3886 : Inst_SOPP(iFmt
, "s_branch")
3889 } // Inst_SOPP__S_BRANCH
3891 Inst_SOPP__S_BRANCH::~Inst_SOPP__S_BRANCH()
3893 } // ~Inst_SOPP__S_BRANCH
3895 // PC = PC + signext(SIMM16 * 4) + 4 (short jump).
3897 Inst_SOPP__S_BRANCH::execute(GPUDynInstPtr gpuDynInst
)
3899 Wavefront
*wf
= gpuDynInst
->wavefront();
3901 ScalarRegI16 simm16
= instData
.SIMM16
;
3903 pc
= pc
+ ((ScalarRegI64
)sext
<18>(simm16
* 4LL)) + 4LL;
3908 Inst_SOPP__S_WAKEUP::Inst_SOPP__S_WAKEUP(InFmt_SOPP
*iFmt
)
3909 : Inst_SOPP(iFmt
, "s_wakeup")
3911 } // Inst_SOPP__S_WAKEUP
3913 Inst_SOPP__S_WAKEUP::~Inst_SOPP__S_WAKEUP()
3915 } // ~Inst_SOPP__S_WAKEUP
3917 // Allow a wave to wakeup all the other waves in its workgroup to force
3918 // them to wake up immediately from an S_SLEEP instruction. The wakeup is
3919 // ignored if the waves are not sleeping.
3921 Inst_SOPP__S_WAKEUP::execute(GPUDynInstPtr gpuDynInst
)
3923 panicUnimplemented();
3926 Inst_SOPP__S_CBRANCH_SCC0::Inst_SOPP__S_CBRANCH_SCC0(InFmt_SOPP
*iFmt
)
3927 : Inst_SOPP(iFmt
, "s_cbranch_scc0")
3930 } // Inst_SOPP__S_CBRANCH_SCC0
3932 Inst_SOPP__S_CBRANCH_SCC0::~Inst_SOPP__S_CBRANCH_SCC0()
3934 } // ~Inst_SOPP__S_CBRANCH_SCC0
3936 // if (SCC == 0) then PC = PC + signext(SIMM16 * 4) + 4;
3939 Inst_SOPP__S_CBRANCH_SCC0::execute(GPUDynInstPtr gpuDynInst
)
3941 Wavefront
*wf
= gpuDynInst
->wavefront();
3943 ScalarRegI16 simm16
= instData
.SIMM16
;
3944 ConstScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3948 if (!scc
.rawData()) {
3949 pc
= pc
+ ((ScalarRegI64
)sext
<18>(simm16
* 4LL)) + 4LL;
3955 Inst_SOPP__S_CBRANCH_SCC1::Inst_SOPP__S_CBRANCH_SCC1(InFmt_SOPP
*iFmt
)
3956 : Inst_SOPP(iFmt
, "s_cbranch_scc1")
3959 } // Inst_SOPP__S_CBRANCH_SCC1
3961 Inst_SOPP__S_CBRANCH_SCC1::~Inst_SOPP__S_CBRANCH_SCC1()
3963 } // ~Inst_SOPP__S_CBRANCH_SCC1
3965 // if (SCC == 1) then PC = PC + signext(SIMM16 * 4) + 4;
3968 Inst_SOPP__S_CBRANCH_SCC1::execute(GPUDynInstPtr gpuDynInst
)
3970 Wavefront
*wf
= gpuDynInst
->wavefront();
3972 ScalarRegI16 simm16
= instData
.SIMM16
;
3973 ConstScalarOperandU32
scc(gpuDynInst
, REG_SCC
);
3977 if (scc
.rawData()) {
3978 pc
= pc
+ ((ScalarRegI64
)sext
<18>(simm16
* 4LL)) + 4LL;
3984 Inst_SOPP__S_CBRANCH_VCCZ::Inst_SOPP__S_CBRANCH_VCCZ(InFmt_SOPP
*iFmt
)
3985 : Inst_SOPP(iFmt
, "s_cbranch_vccz")
3989 } // Inst_SOPP__S_CBRANCH_VCCZ
3991 Inst_SOPP__S_CBRANCH_VCCZ::~Inst_SOPP__S_CBRANCH_VCCZ()
3993 } // ~Inst_SOPP__S_CBRANCH_VCCZ
3995 // if (VCC == 0) then PC = PC + signext(SIMM16 * 4) + 4;
3998 Inst_SOPP__S_CBRANCH_VCCZ::execute(GPUDynInstPtr gpuDynInst
)
4000 Wavefront
*wf
= gpuDynInst
->wavefront();
4001 ConstScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
4003 ScalarRegI16 simm16
= instData
.SIMM16
;
4007 if (!vcc
.rawData()) {
4008 pc
= pc
+ ((ScalarRegI64
)sext
<18>(simm16
* 4LL)) + 4LL;
4014 Inst_SOPP__S_CBRANCH_VCCNZ::Inst_SOPP__S_CBRANCH_VCCNZ(InFmt_SOPP
*iFmt
)
4015 : Inst_SOPP(iFmt
, "s_cbranch_vccnz")
4019 } // Inst_SOPP__S_CBRANCH_VCCNZ
4021 Inst_SOPP__S_CBRANCH_VCCNZ::~Inst_SOPP__S_CBRANCH_VCCNZ()
4023 } // ~Inst_SOPP__S_CBRANCH_VCCNZ
4025 // if (VCC != 0) then PC = PC + signext(SIMM16 * 4) + 4;
4028 Inst_SOPP__S_CBRANCH_VCCNZ::execute(GPUDynInstPtr gpuDynInst
)
4030 Wavefront
*wf
= gpuDynInst
->wavefront();
4031 ConstScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
4035 if (vcc
.rawData()) {
4037 ScalarRegI16 simm16
= instData
.SIMM16
;
4038 pc
= pc
+ ((ScalarRegI64
)sext
<18>(simm16
* 4LL)) + 4LL;
4043 Inst_SOPP__S_CBRANCH_EXECZ::Inst_SOPP__S_CBRANCH_EXECZ(InFmt_SOPP
*iFmt
)
4044 : Inst_SOPP(iFmt
, "s_cbranch_execz")
4047 } // Inst_SOPP__S_CBRANCH_EXECZ
4049 Inst_SOPP__S_CBRANCH_EXECZ::~Inst_SOPP__S_CBRANCH_EXECZ()
4051 } // ~Inst_SOPP__S_CBRANCH_EXECZ
4053 // if (EXEC == 0) then PC = PC + signext(SIMM16 * 4) + 4;
4056 Inst_SOPP__S_CBRANCH_EXECZ::execute(GPUDynInstPtr gpuDynInst
)
4058 Wavefront
*wf
= gpuDynInst
->wavefront();
4060 if (wf
->execMask().none()) {
4062 ScalarRegI16 simm16
= instData
.SIMM16
;
4063 pc
= pc
+ ((ScalarRegI64
)sext
<18>(simm16
* 4LL)) + 4LL;
4068 Inst_SOPP__S_CBRANCH_EXECNZ::Inst_SOPP__S_CBRANCH_EXECNZ(InFmt_SOPP
*iFmt
)
4069 : Inst_SOPP(iFmt
, "s_cbranch_execnz")
4072 } // Inst_SOPP__S_CBRANCH_EXECNZ
4074 Inst_SOPP__S_CBRANCH_EXECNZ::~Inst_SOPP__S_CBRANCH_EXECNZ()
4076 } // ~Inst_SOPP__S_CBRANCH_EXECNZ
4078 // if (EXEC != 0) then PC = PC + signext(SIMM16 * 4) + 4;
4081 Inst_SOPP__S_CBRANCH_EXECNZ::execute(GPUDynInstPtr gpuDynInst
)
4083 Wavefront
*wf
= gpuDynInst
->wavefront();
4085 if (wf
->execMask().any()) {
4087 ScalarRegI16 simm16
= instData
.SIMM16
;
4088 pc
= pc
+ ((ScalarRegI64
)sext
<18>(simm16
* 4LL)) + 4LL;
4093 Inst_SOPP__S_BARRIER::Inst_SOPP__S_BARRIER(InFmt_SOPP
*iFmt
)
4094 : Inst_SOPP(iFmt
, "s_barrier")
4096 setFlag(MemBarrier
);
4097 } // Inst_SOPP__S_BARRIER
4099 Inst_SOPP__S_BARRIER::~Inst_SOPP__S_BARRIER()
4101 } // ~Inst_SOPP__S_BARRIER
4104 * Synchronize waves within a workgroup. If not all waves of the workgroup
4105 * have been created yet, wait for entire group before proceeding. If some
4106 * waves in the wokgroup have already terminated, this waits on only the
4110 Inst_SOPP__S_BARRIER::execute(GPUDynInstPtr gpuDynInst
)
4112 Wavefront
*wf
= gpuDynInst
->wavefront();
4113 ComputeUnit
*cu
= gpuDynInst
->computeUnit();
4115 if (wf
->hasBarrier()) {
4116 int bar_id
= wf
->barrierId();
4117 assert(wf
->getStatus() != Wavefront::S_BARRIER
);
4118 wf
->setStatus(Wavefront::S_BARRIER
);
4119 cu
->incNumAtBarrier(bar_id
);
4120 DPRINTF(GPUSync
, "CU[%d] WF[%d][%d] Wave[%d] - Stalling at "
4121 "barrier Id%d. %d waves now at barrier, %d waves "
4122 "remain.\n", cu
->cu_id
, wf
->simdId
, wf
->wfSlotId
,
4123 wf
->wfDynId
, bar_id
, cu
->numAtBarrier(bar_id
),
4124 cu
->numYetToReachBarrier(bar_id
));
4127 // --- Inst_SOPP__S_SETKILL class methods ---
4129 Inst_SOPP__S_SETKILL::Inst_SOPP__S_SETKILL(InFmt_SOPP
*iFmt
)
4130 : Inst_SOPP(iFmt
, "s_setkill")
4132 } // Inst_SOPP__S_SETKILL
4134 Inst_SOPP__S_SETKILL::~Inst_SOPP__S_SETKILL()
4136 } // ~Inst_SOPP__S_SETKILL
4139 Inst_SOPP__S_SETKILL::execute(GPUDynInstPtr gpuDynInst
)
4141 panicUnimplemented();
4144 Inst_SOPP__S_WAITCNT::Inst_SOPP__S_WAITCNT(InFmt_SOPP
*iFmt
)
4145 : Inst_SOPP(iFmt
, "s_waitcnt")
4149 } // Inst_SOPP__S_WAITCNT
4151 Inst_SOPP__S_WAITCNT::~Inst_SOPP__S_WAITCNT()
4153 } // ~Inst_SOPP__S_WAITCNT
4155 // Wait for the counts of outstanding lds, vector-memory and
4156 // export/vmem-write-data to be at or below the specified levels.
4157 // SIMM16[3:0] = vmcount (vector memory operations),
4158 // SIMM16[6:4] = export/mem-write-data count,
4159 // SIMM16[12:8] = LGKM_cnt (scalar-mem/GDS/LDS count).
4161 Inst_SOPP__S_WAITCNT::execute(GPUDynInstPtr gpuDynInst
)
4163 ScalarRegI32 vm_cnt
= 0;
4164 ScalarRegI32 exp_cnt
= 0;
4165 ScalarRegI32 lgkm_cnt
= 0;
4166 vm_cnt
= bits
<ScalarRegI16
>(instData
.SIMM16
, 3, 0);
4167 exp_cnt
= bits
<ScalarRegI16
>(instData
.SIMM16
, 6, 4);
4168 lgkm_cnt
= bits
<ScalarRegI16
>(instData
.SIMM16
, 12, 8);
4169 gpuDynInst
->wavefront()->setWaitCnts(vm_cnt
, exp_cnt
, lgkm_cnt
);
4172 Inst_SOPP__S_SETHALT::Inst_SOPP__S_SETHALT(InFmt_SOPP
*iFmt
)
4173 : Inst_SOPP(iFmt
, "s_sethalt")
4175 } // Inst_SOPP__S_SETHALT
4177 Inst_SOPP__S_SETHALT::~Inst_SOPP__S_SETHALT()
4179 } // ~Inst_SOPP__S_SETHALT
4182 Inst_SOPP__S_SETHALT::execute(GPUDynInstPtr gpuDynInst
)
4184 panicUnimplemented();
4187 Inst_SOPP__S_SLEEP::Inst_SOPP__S_SLEEP(InFmt_SOPP
*iFmt
)
4188 : Inst_SOPP(iFmt
, "s_sleep")
4192 } // Inst_SOPP__S_SLEEP
4194 Inst_SOPP__S_SLEEP::~Inst_SOPP__S_SLEEP()
4196 } // ~Inst_SOPP__S_SLEEP
4198 // Cause a wave to sleep for (64 * SIMM16[2:0] + 1..64) clocks.
4200 Inst_SOPP__S_SLEEP::execute(GPUDynInstPtr gpuDynInst
)
4202 ScalarRegI32 simm16
= (ScalarRegI32
)instData
.SIMM16
;
4203 gpuDynInst
->wavefront()->setStatus(Wavefront::S_STALLED_SLEEP
);
4204 // sleep duration is specified in multiples of 64 cycles
4205 gpuDynInst
->wavefront()->setSleepTime(64 * simm16
);
4207 // --- Inst_SOPP__S_SETPRIO class methods ---
4209 Inst_SOPP__S_SETPRIO::Inst_SOPP__S_SETPRIO(InFmt_SOPP
*iFmt
)
4210 : Inst_SOPP(iFmt
, "s_setprio")
4212 } // Inst_SOPP__S_SETPRIO
4214 Inst_SOPP__S_SETPRIO::~Inst_SOPP__S_SETPRIO()
4216 } // ~Inst_SOPP__S_SETPRIO
4218 // User settable wave priority is set to SIMM16[1:0]. 0 = lowest,
4221 Inst_SOPP__S_SETPRIO::execute(GPUDynInstPtr gpuDynInst
)
4223 panicUnimplemented();
4226 Inst_SOPP__S_SENDMSG::Inst_SOPP__S_SENDMSG(InFmt_SOPP
*iFmt
)
4227 : Inst_SOPP(iFmt
, "s_sendmsg")
4229 } // Inst_SOPP__S_SENDMSG
4231 Inst_SOPP__S_SENDMSG::~Inst_SOPP__S_SENDMSG()
4233 } // ~Inst_SOPP__S_SENDMSG
4236 Inst_SOPP__S_SENDMSG::execute(GPUDynInstPtr gpuDynInst
)
4238 panicUnimplemented();
4241 Inst_SOPP__S_SENDMSGHALT::Inst_SOPP__S_SENDMSGHALT(InFmt_SOPP
*iFmt
)
4242 : Inst_SOPP(iFmt
, "s_sendmsghalt")
4244 } // Inst_SOPP__S_SENDMSGHALT
4246 Inst_SOPP__S_SENDMSGHALT::~Inst_SOPP__S_SENDMSGHALT()
4248 } // ~Inst_SOPP__S_SENDMSGHALT
4251 Inst_SOPP__S_SENDMSGHALT::execute(GPUDynInstPtr gpuDynInst
)
4253 panicUnimplemented();
4256 Inst_SOPP__S_TRAP::Inst_SOPP__S_TRAP(InFmt_SOPP
*iFmt
)
4257 : Inst_SOPP(iFmt
, "s_trap")
4259 } // Inst_SOPP__S_TRAP
4261 Inst_SOPP__S_TRAP::~Inst_SOPP__S_TRAP()
4263 } // ~Inst_SOPP__S_TRAP
4265 // Enter the trap handler.
4267 Inst_SOPP__S_TRAP::execute(GPUDynInstPtr gpuDynInst
)
4269 panicUnimplemented();
4272 Inst_SOPP__S_ICACHE_INV::Inst_SOPP__S_ICACHE_INV(InFmt_SOPP
*iFmt
)
4273 : Inst_SOPP(iFmt
, "s_icache_inv")
4275 } // Inst_SOPP__S_ICACHE_INV
4277 Inst_SOPP__S_ICACHE_INV::~Inst_SOPP__S_ICACHE_INV()
4279 } // ~Inst_SOPP__S_ICACHE_INV
4281 // Invalidate entire L1 instruction cache.
4283 Inst_SOPP__S_ICACHE_INV::execute(GPUDynInstPtr gpuDynInst
)
4285 panicUnimplemented();
4288 Inst_SOPP__S_INCPERFLEVEL::Inst_SOPP__S_INCPERFLEVEL(InFmt_SOPP
*iFmt
)
4289 : Inst_SOPP(iFmt
, "s_incperflevel")
4291 } // Inst_SOPP__S_INCPERFLEVEL
4293 Inst_SOPP__S_INCPERFLEVEL::~Inst_SOPP__S_INCPERFLEVEL()
4295 } // ~Inst_SOPP__S_INCPERFLEVEL
4298 Inst_SOPP__S_INCPERFLEVEL::execute(GPUDynInstPtr gpuDynInst
)
4300 panicUnimplemented();
4303 Inst_SOPP__S_DECPERFLEVEL::Inst_SOPP__S_DECPERFLEVEL(InFmt_SOPP
*iFmt
)
4304 : Inst_SOPP(iFmt
, "s_decperflevel")
4306 } // Inst_SOPP__S_DECPERFLEVEL
4308 Inst_SOPP__S_DECPERFLEVEL::~Inst_SOPP__S_DECPERFLEVEL()
4310 } // ~Inst_SOPP__S_DECPERFLEVEL
4313 Inst_SOPP__S_DECPERFLEVEL::execute(GPUDynInstPtr gpuDynInst
)
4315 panicUnimplemented();
4318 Inst_SOPP__S_TTRACEDATA::Inst_SOPP__S_TTRACEDATA(InFmt_SOPP
*iFmt
)
4319 : Inst_SOPP(iFmt
, "s_ttracedata")
4321 } // Inst_SOPP__S_TTRACEDATA
4323 Inst_SOPP__S_TTRACEDATA::~Inst_SOPP__S_TTRACEDATA()
4325 } // ~Inst_SOPP__S_TTRACEDATA
4328 Inst_SOPP__S_TTRACEDATA::execute(GPUDynInstPtr gpuDynInst
)
4330 panicUnimplemented();
4333 Inst_SOPP__S_CBRANCH_CDBGSYS::Inst_SOPP__S_CBRANCH_CDBGSYS(
4335 : Inst_SOPP(iFmt
, "s_cbranch_cdbgsys")
4338 } // Inst_SOPP__S_CBRANCH_CDBGSYS
4340 Inst_SOPP__S_CBRANCH_CDBGSYS::~Inst_SOPP__S_CBRANCH_CDBGSYS()
4342 } // ~Inst_SOPP__S_CBRANCH_CDBGSYS
4345 Inst_SOPP__S_CBRANCH_CDBGSYS::execute(GPUDynInstPtr gpuDynInst
)
4347 panicUnimplemented();
4350 Inst_SOPP__S_CBRANCH_CDBGUSER::Inst_SOPP__S_CBRANCH_CDBGUSER(
4352 : Inst_SOPP(iFmt
, "s_cbranch_cdbguser")
4355 } // Inst_SOPP__S_CBRANCH_CDBGUSER
4357 Inst_SOPP__S_CBRANCH_CDBGUSER::~Inst_SOPP__S_CBRANCH_CDBGUSER()
4359 } // ~Inst_SOPP__S_CBRANCH_CDBGUSER
4362 Inst_SOPP__S_CBRANCH_CDBGUSER::execute(GPUDynInstPtr gpuDynInst
)
4364 panicUnimplemented();
4367 Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER(
4369 : Inst_SOPP(iFmt
, "s_cbranch_cdbgsys_or_user")
4372 } // Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER
4374 Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::
4375 ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER()
4377 } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER
4380 Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::execute(GPUDynInstPtr gpuDynInst
)
4382 panicUnimplemented();
4385 Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::
4386 Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER(InFmt_SOPP
*iFmt
)
4387 : Inst_SOPP(iFmt
, "s_cbranch_cdbgsys_and_user")
4390 } // Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER
4392 Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::
4393 ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER()
4395 } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER
4398 Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::execute(GPUDynInstPtr gpuDynInst
)
4400 panicUnimplemented();
4403 Inst_SOPP__S_ENDPGM_SAVED::Inst_SOPP__S_ENDPGM_SAVED(InFmt_SOPP
*iFmt
)
4404 : Inst_SOPP(iFmt
, "s_endpgm_saved")
4406 } // Inst_SOPP__S_ENDPGM_SAVED
4408 Inst_SOPP__S_ENDPGM_SAVED::~Inst_SOPP__S_ENDPGM_SAVED()
4410 } // ~Inst_SOPP__S_ENDPGM_SAVED
4414 Inst_SOPP__S_ENDPGM_SAVED::execute(GPUDynInstPtr gpuDynInst
)
4416 panicUnimplemented();
4419 Inst_SOPP__S_SET_GPR_IDX_OFF::Inst_SOPP__S_SET_GPR_IDX_OFF(
4421 : Inst_SOPP(iFmt
, "s_set_gpr_idx_off")
4423 } // Inst_SOPP__S_SET_GPR_IDX_OFF
4425 Inst_SOPP__S_SET_GPR_IDX_OFF::~Inst_SOPP__S_SET_GPR_IDX_OFF()
4427 } // ~Inst_SOPP__S_SET_GPR_IDX_OFF
4429 // MODE.gpr_idx_en = 0.
4430 // Clear GPR indexing mode. Vector operations after this will not perform
4431 // relative GPR addressing regardless of the contents of M0.
4433 Inst_SOPP__S_SET_GPR_IDX_OFF::execute(GPUDynInstPtr gpuDynInst
)
4435 panicUnimplemented();
4438 Inst_SOPP__S_SET_GPR_IDX_MODE::Inst_SOPP__S_SET_GPR_IDX_MODE(
4440 : Inst_SOPP(iFmt
, "s_set_gpr_idx_mode")
4442 } // Inst_SOPP__S_SET_GPR_IDX_MODE
4444 Inst_SOPP__S_SET_GPR_IDX_MODE::~Inst_SOPP__S_SET_GPR_IDX_MODE()
4446 } // ~Inst_SOPP__S_SET_GPR_IDX_MODE
4448 // M0[15:12] = SIMM4.
4449 // Modify the mode used for vector GPR indexing.
4450 // The raw contents of the source field are read and used to set the enable
4451 // bits. SIMM4[0] = VSRC0_REL, SIMM4[1] = VSRC1_REL, SIMM4[2] = VSRC2_REL
4452 // and SIMM4[3] = VDST_REL.
4454 Inst_SOPP__S_SET_GPR_IDX_MODE::execute(GPUDynInstPtr gpuDynInst
)
4456 panicUnimplemented();
4459 Inst_SMEM__S_LOAD_DWORD::Inst_SMEM__S_LOAD_DWORD(InFmt_SMEM
*iFmt
)
4460 : Inst_SMEM(iFmt
, "s_load_dword")
4464 } // Inst_SMEM__S_LOAD_DWORD
4466 Inst_SMEM__S_LOAD_DWORD::~Inst_SMEM__S_LOAD_DWORD()
4468 } // ~Inst_SMEM__S_LOAD_DWORD
4471 * Read 1 dword from scalar data cache. If the offset is specified as an
4472 * sgpr, the sgpr contains an unsigned byte offset (the 2 LSBs are
4473 * ignored). If the offset is specified as an immediate 20-bit constant,
4474 * the constant is an unsigned byte offset.
4477 Inst_SMEM__S_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst
)
4479 Wavefront
*wf
= gpuDynInst
->wavefront();
4480 gpuDynInst
->execUnitId
= wf
->execUnitId
;
4481 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
4482 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
4483 ScalarRegU32
offset(0);
4484 ConstScalarOperandU64
addr(gpuDynInst
, instData
.SBASE
<< 1);
4489 offset
= extData
.OFFSET
;
4491 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
4493 offset
= off_sgpr
.rawData();
4496 calcAddr(gpuDynInst
, addr
, offset
);
4498 gpuDynInst
->computeUnit()->scalarMemoryPipe
4499 .getGMReqFIFO().push(gpuDynInst
);
4501 wf
->scalarRdGmReqsInPipe
--;
4502 wf
->scalarOutstandingReqsRdGm
++;
4503 gpuDynInst
->wavefront()->outstandingReqs
++;
4504 gpuDynInst
->wavefront()->validateRequestCounters();
4508 Inst_SMEM__S_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst
)
4510 initMemRead
<1>(gpuDynInst
);
4514 Inst_SMEM__S_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst
)
4516 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDATA
);
4520 Inst_SMEM__S_LOAD_DWORDX2::Inst_SMEM__S_LOAD_DWORDX2(InFmt_SMEM
*iFmt
)
4521 : Inst_SMEM(iFmt
, "s_load_dwordx2")
4525 } // Inst_SMEM__S_LOAD_DWORDX2
4527 Inst_SMEM__S_LOAD_DWORDX2::~Inst_SMEM__S_LOAD_DWORDX2()
4529 } // ~Inst_SMEM__S_LOAD_DWORDX2
4532 * Read 2 dwords from scalar data cache. See s_load_dword for details on
4536 Inst_SMEM__S_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst
)
4538 Wavefront
*wf
= gpuDynInst
->wavefront();
4539 gpuDynInst
->execUnitId
= wf
->execUnitId
;
4540 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
4541 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
4542 ScalarRegU32
offset(0);
4543 ConstScalarOperandU64
addr(gpuDynInst
, instData
.SBASE
<< 1);
4548 offset
= extData
.OFFSET
;
4550 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
4552 offset
= off_sgpr
.rawData();
4555 calcAddr(gpuDynInst
, addr
, offset
);
4557 gpuDynInst
->computeUnit()->scalarMemoryPipe
.
4558 getGMReqFIFO().push(gpuDynInst
);
4560 wf
->scalarRdGmReqsInPipe
--;
4561 wf
->scalarOutstandingReqsRdGm
++;
4562 gpuDynInst
->wavefront()->outstandingReqs
++;
4563 gpuDynInst
->wavefront()->validateRequestCounters();
4567 Inst_SMEM__S_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst
)
4569 initMemRead
<2>(gpuDynInst
);
4573 Inst_SMEM__S_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst
)
4575 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDATA
);
4579 Inst_SMEM__S_LOAD_DWORDX4::Inst_SMEM__S_LOAD_DWORDX4(InFmt_SMEM
*iFmt
)
4580 : Inst_SMEM(iFmt
, "s_load_dwordx4")
4584 } // Inst_SMEM__S_LOAD_DWORDX4
4586 Inst_SMEM__S_LOAD_DWORDX4::~Inst_SMEM__S_LOAD_DWORDX4()
4588 } // ~Inst_SMEM__S_LOAD_DWORDX4
4590 // Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on
4591 // the offset input.
4593 Inst_SMEM__S_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst
)
4595 Wavefront
*wf
= gpuDynInst
->wavefront();
4596 gpuDynInst
->execUnitId
= wf
->execUnitId
;
4597 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
4598 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
4599 ScalarRegU32
offset(0);
4600 ConstScalarOperandU64
addr(gpuDynInst
, instData
.SBASE
<< 1);
4605 offset
= extData
.OFFSET
;
4607 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
4609 offset
= off_sgpr
.rawData();
4612 calcAddr(gpuDynInst
, addr
, offset
);
4614 gpuDynInst
->computeUnit()->scalarMemoryPipe
.
4615 getGMReqFIFO().push(gpuDynInst
);
4617 wf
->scalarRdGmReqsInPipe
--;
4618 wf
->scalarOutstandingReqsRdGm
++;
4619 gpuDynInst
->wavefront()->outstandingReqs
++;
4620 gpuDynInst
->wavefront()->validateRequestCounters();
4624 Inst_SMEM__S_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst
)
4626 initMemRead
<4>(gpuDynInst
);
4630 Inst_SMEM__S_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst
)
4632 ScalarOperandU128
sdst(gpuDynInst
, instData
.SDATA
);
4636 Inst_SMEM__S_LOAD_DWORDX8::Inst_SMEM__S_LOAD_DWORDX8(InFmt_SMEM
*iFmt
)
4637 : Inst_SMEM(iFmt
, "s_load_dwordx8")
4641 } // Inst_SMEM__S_LOAD_DWORDX8
4643 Inst_SMEM__S_LOAD_DWORDX8::~Inst_SMEM__S_LOAD_DWORDX8()
4645 } // ~Inst_SMEM__S_LOAD_DWORDX8
4647 // Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on
4648 // the offset input.
4650 Inst_SMEM__S_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst
)
4652 Wavefront
*wf
= gpuDynInst
->wavefront();
4653 gpuDynInst
->execUnitId
= wf
->execUnitId
;
4654 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
4655 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
4656 ScalarRegU32
offset(0);
4657 ConstScalarOperandU64
addr(gpuDynInst
, instData
.SBASE
<< 1);
4662 offset
= extData
.OFFSET
;
4664 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
4666 offset
= off_sgpr
.rawData();
4669 calcAddr(gpuDynInst
, addr
, offset
);
4671 gpuDynInst
->computeUnit()->scalarMemoryPipe
.
4672 getGMReqFIFO().push(gpuDynInst
);
4674 wf
->scalarRdGmReqsInPipe
--;
4675 wf
->scalarOutstandingReqsRdGm
++;
4676 gpuDynInst
->wavefront()->outstandingReqs
++;
4677 gpuDynInst
->wavefront()->validateRequestCounters();
4681 Inst_SMEM__S_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst
)
4683 initMemRead
<8>(gpuDynInst
);
4687 Inst_SMEM__S_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst
)
4689 ScalarOperandU256
sdst(gpuDynInst
, instData
.SDATA
);
4693 Inst_SMEM__S_LOAD_DWORDX16::Inst_SMEM__S_LOAD_DWORDX16(InFmt_SMEM
*iFmt
)
4694 : Inst_SMEM(iFmt
, "s_load_dwordx16")
4698 } // Inst_SMEM__S_LOAD_DWORDX16
4700 Inst_SMEM__S_LOAD_DWORDX16::~Inst_SMEM__S_LOAD_DWORDX16()
4702 } // ~Inst_SMEM__S_LOAD_DWORDX16
4704 // Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on
4705 // the offset input.
4707 Inst_SMEM__S_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst
)
4709 Wavefront
*wf
= gpuDynInst
->wavefront();
4710 gpuDynInst
->execUnitId
= wf
->execUnitId
;
4711 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
4712 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
4713 ScalarRegU32
offset(0);
4714 ConstScalarOperandU64
addr(gpuDynInst
, instData
.SBASE
<< 1);
4719 offset
= extData
.OFFSET
;
4721 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
4723 offset
= off_sgpr
.rawData();
4726 calcAddr(gpuDynInst
, addr
, offset
);
4728 gpuDynInst
->computeUnit()->scalarMemoryPipe
.
4729 getGMReqFIFO().push(gpuDynInst
);
4731 wf
->scalarRdGmReqsInPipe
--;
4732 wf
->scalarOutstandingReqsRdGm
++;
4733 gpuDynInst
->wavefront()->outstandingReqs
++;
4734 gpuDynInst
->wavefront()->validateRequestCounters();
4738 Inst_SMEM__S_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst
)
4740 initMemRead
<16>(gpuDynInst
);
4744 Inst_SMEM__S_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst
)
4746 ScalarOperandU512
sdst(gpuDynInst
, instData
.SDATA
);
4750 Inst_SMEM__S_BUFFER_LOAD_DWORD::Inst_SMEM__S_BUFFER_LOAD_DWORD(
4752 : Inst_SMEM(iFmt
, "s_buffer_load_dword")
4756 } // Inst_SMEM__S_BUFFER_LOAD_DWORD
4758 Inst_SMEM__S_BUFFER_LOAD_DWORD::~Inst_SMEM__S_BUFFER_LOAD_DWORD()
4760 } // ~Inst_SMEM__S_BUFFER_LOAD_DWORD
4762 // Read 1 dword from scalar data cache. See S_LOAD_DWORD for details on the
4765 Inst_SMEM__S_BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst
)
4767 Wavefront
*wf
= gpuDynInst
->wavefront();
4768 gpuDynInst
->execUnitId
= wf
->execUnitId
;
4769 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
4770 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
4771 ScalarRegU32
offset(0);
4772 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, instData
.SBASE
);
4777 offset
= extData
.OFFSET
;
4779 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
4781 offset
= off_sgpr
.rawData();
4784 calcAddr(gpuDynInst
, rsrcDesc
, offset
);
4786 gpuDynInst
->computeUnit()->scalarMemoryPipe
4787 .getGMReqFIFO().push(gpuDynInst
);
4789 wf
->scalarRdGmReqsInPipe
--;
4790 wf
->scalarOutstandingReqsRdGm
++;
4791 gpuDynInst
->wavefront()->outstandingReqs
++;
4792 gpuDynInst
->wavefront()->validateRequestCounters();
4796 Inst_SMEM__S_BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst
)
4798 initMemRead
<1>(gpuDynInst
);
4802 Inst_SMEM__S_BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst
)
4804 // 1 request, size 32
4805 ScalarOperandU32
sdst(gpuDynInst
, instData
.SDATA
);
4809 Inst_SMEM__S_BUFFER_LOAD_DWORDX2::Inst_SMEM__S_BUFFER_LOAD_DWORDX2(
4811 : Inst_SMEM(iFmt
, "s_buffer_load_dwordx2")
4815 } // Inst_SMEM__S_BUFFER_LOAD_DWORDX2
4817 Inst_SMEM__S_BUFFER_LOAD_DWORDX2::~Inst_SMEM__S_BUFFER_LOAD_DWORDX2()
4819 } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX2
4821 // Read 2 dwords from scalar data cache. See S_LOAD_DWORD for details on
4822 // the offset input.
4824 Inst_SMEM__S_BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst
)
4826 Wavefront
*wf
= gpuDynInst
->wavefront();
4827 gpuDynInst
->execUnitId
= wf
->execUnitId
;
4828 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
4829 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
4830 ScalarRegU32
offset(0);
4831 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, instData
.SBASE
);
4836 offset
= extData
.OFFSET
;
4838 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
4840 offset
= off_sgpr
.rawData();
4843 calcAddr(gpuDynInst
, rsrcDesc
, offset
);
4845 gpuDynInst
->computeUnit()->scalarMemoryPipe
4846 .getGMReqFIFO().push(gpuDynInst
);
4848 wf
->scalarRdGmReqsInPipe
--;
4849 wf
->scalarOutstandingReqsRdGm
++;
4850 gpuDynInst
->wavefront()->outstandingReqs
++;
4851 gpuDynInst
->wavefront()->validateRequestCounters();
4855 Inst_SMEM__S_BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst
)
4857 initMemRead
<2>(gpuDynInst
);
4861 Inst_SMEM__S_BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst
)
4863 // use U64 because 2 requests, each size 32
4864 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDATA
);
4868 Inst_SMEM__S_BUFFER_LOAD_DWORDX4::Inst_SMEM__S_BUFFER_LOAD_DWORDX4(
4870 : Inst_SMEM(iFmt
, "s_buffer_load_dwordx4")
4874 } // Inst_SMEM__S_BUFFER_LOAD_DWORDX4
4876 Inst_SMEM__S_BUFFER_LOAD_DWORDX4::~Inst_SMEM__S_BUFFER_LOAD_DWORDX4()
4878 } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX4
4880 // Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on
4881 // the offset input.
4883 Inst_SMEM__S_BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst
)
4885 Wavefront
*wf
= gpuDynInst
->wavefront();
4886 gpuDynInst
->execUnitId
= wf
->execUnitId
;
4887 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
4888 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
4889 ScalarRegU32
offset(0);
4890 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, instData
.SBASE
);
4895 offset
= extData
.OFFSET
;
4897 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
4899 offset
= off_sgpr
.rawData();
4902 calcAddr(gpuDynInst
, rsrcDesc
, offset
);
4904 gpuDynInst
->computeUnit()->scalarMemoryPipe
4905 .getGMReqFIFO().push(gpuDynInst
);
4907 wf
->scalarRdGmReqsInPipe
--;
4908 wf
->scalarOutstandingReqsRdGm
++;
4909 gpuDynInst
->wavefront()->outstandingReqs
++;
4910 gpuDynInst
->wavefront()->validateRequestCounters();
4914 Inst_SMEM__S_BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst
)
4916 initMemRead
<4>(gpuDynInst
);
4920 Inst_SMEM__S_BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst
)
4922 // 4 requests, each size 32
4923 ScalarOperandU128
sdst(gpuDynInst
, instData
.SDATA
);
4927 Inst_SMEM__S_BUFFER_LOAD_DWORDX8::Inst_SMEM__S_BUFFER_LOAD_DWORDX8(
4929 : Inst_SMEM(iFmt
, "s_buffer_load_dwordx8")
4933 } // Inst_SMEM__S_BUFFER_LOAD_DWORDX8
4935 Inst_SMEM__S_BUFFER_LOAD_DWORDX8::~Inst_SMEM__S_BUFFER_LOAD_DWORDX8()
4937 } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX8
4939 // Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on
4940 // the offset input.
4942 Inst_SMEM__S_BUFFER_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst
)
4944 Wavefront
*wf
= gpuDynInst
->wavefront();
4945 gpuDynInst
->execUnitId
= wf
->execUnitId
;
4946 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
4947 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
4948 ScalarRegU32
offset(0);
4949 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, instData
.SBASE
);
4954 offset
= extData
.OFFSET
;
4956 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
4958 offset
= off_sgpr
.rawData();
4961 calcAddr(gpuDynInst
, rsrcDesc
, offset
);
4963 gpuDynInst
->computeUnit()->scalarMemoryPipe
4964 .getGMReqFIFO().push(gpuDynInst
);
4966 wf
->scalarRdGmReqsInPipe
--;
4967 wf
->scalarOutstandingReqsRdGm
++;
4968 gpuDynInst
->wavefront()->outstandingReqs
++;
4969 gpuDynInst
->wavefront()->validateRequestCounters();
4973 Inst_SMEM__S_BUFFER_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst
)
4975 initMemRead
<8>(gpuDynInst
);
4979 Inst_SMEM__S_BUFFER_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst
)
4981 // 8 requests, each size 32
4982 ScalarOperandU256
sdst(gpuDynInst
, instData
.SDATA
);
4986 Inst_SMEM__S_BUFFER_LOAD_DWORDX16::Inst_SMEM__S_BUFFER_LOAD_DWORDX16(
4988 : Inst_SMEM(iFmt
, "s_buffer_load_dwordx16")
4992 } // Inst_SMEM__S_BUFFER_LOAD_DWORDX16
4994 Inst_SMEM__S_BUFFER_LOAD_DWORDX16::~Inst_SMEM__S_BUFFER_LOAD_DWORDX16()
4996 } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX16
4998 // Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on
4999 // the offset input.
5001 Inst_SMEM__S_BUFFER_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst
)
5003 Wavefront
*wf
= gpuDynInst
->wavefront();
5004 gpuDynInst
->execUnitId
= wf
->execUnitId
;
5005 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
5006 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
5007 ScalarRegU32
offset(0);
5008 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, instData
.SBASE
);
5013 offset
= extData
.OFFSET
;
5015 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
5017 offset
= off_sgpr
.rawData();
5020 calcAddr(gpuDynInst
, rsrcDesc
, offset
);
5022 gpuDynInst
->computeUnit()->scalarMemoryPipe
5023 .getGMReqFIFO().push(gpuDynInst
);
5025 wf
->scalarRdGmReqsInPipe
--;
5026 wf
->scalarOutstandingReqsRdGm
++;
5027 gpuDynInst
->wavefront()->outstandingReqs
++;
5028 gpuDynInst
->wavefront()->validateRequestCounters();
5032 Inst_SMEM__S_BUFFER_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst
)
5034 initMemRead
<16>(gpuDynInst
);
5038 Inst_SMEM__S_BUFFER_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst
)
5040 // 16 requests, each size 32
5041 ScalarOperandU512
sdst(gpuDynInst
, instData
.SDATA
);
5045 Inst_SMEM__S_STORE_DWORD::Inst_SMEM__S_STORE_DWORD(InFmt_SMEM
*iFmt
)
5046 : Inst_SMEM(iFmt
, "s_store_dword")
5050 } // Inst_SMEM__S_STORE_DWORD
5052 Inst_SMEM__S_STORE_DWORD::~Inst_SMEM__S_STORE_DWORD()
5054 } // ~Inst_SMEM__S_STORE_DWORD
5056 // Write 1 dword to scalar data cache.
5057 // If the offset is specified as an SGPR, the SGPR contains an unsigned
5058 // BYTE offset (the 2 LSBs are ignored).
5059 // If the offset is specified as an immediate 20-bit constant, the
5060 // constant is an unsigned BYTE offset.
5062 Inst_SMEM__S_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst
)
5064 Wavefront
*wf
= gpuDynInst
->wavefront();
5065 gpuDynInst
->execUnitId
= wf
->execUnitId
;
5066 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
5067 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
5068 ScalarRegU32
offset(0);
5069 ConstScalarOperandU64
addr(gpuDynInst
, instData
.SBASE
<< 1);
5074 offset
= extData
.OFFSET
;
5076 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
5078 offset
= off_sgpr
.rawData();
5081 calcAddr(gpuDynInst
, addr
, offset
);
5083 gpuDynInst
->computeUnit()->scalarMemoryPipe
.
5084 getGMReqFIFO().push(gpuDynInst
);
5086 wf
->scalarWrGmReqsInPipe
--;
5087 wf
->scalarOutstandingReqsWrGm
++;
5088 gpuDynInst
->wavefront()->outstandingReqs
++;
5089 gpuDynInst
->wavefront()->validateRequestCounters();
5093 Inst_SMEM__S_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst
)
5095 ConstScalarOperandU32
sdata(gpuDynInst
, instData
.SDATA
);
5097 std::memcpy((void*)gpuDynInst
->scalar_data
, sdata
.rawDataPtr(),
5098 sizeof(ScalarRegU32
));
5099 initMemWrite
<1>(gpuDynInst
);
5103 Inst_SMEM__S_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst
)
5107 Inst_SMEM__S_STORE_DWORDX2::Inst_SMEM__S_STORE_DWORDX2(InFmt_SMEM
*iFmt
)
5108 : Inst_SMEM(iFmt
, "s_store_dwordx2")
5112 } // Inst_SMEM__S_STORE_DWORDX2
5114 Inst_SMEM__S_STORE_DWORDX2::~Inst_SMEM__S_STORE_DWORDX2()
5116 } // ~Inst_SMEM__S_STORE_DWORDX2
5118 // Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on
5119 // the offset input.
5121 Inst_SMEM__S_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst
)
5123 Wavefront
*wf
= gpuDynInst
->wavefront();
5124 gpuDynInst
->execUnitId
= wf
->execUnitId
;
5125 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
5126 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
5127 ScalarRegU32
offset(0);
5128 ConstScalarOperandU64
addr(gpuDynInst
, instData
.SBASE
<< 1);
5133 offset
= extData
.OFFSET
;
5135 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
5137 offset
= off_sgpr
.rawData();
5140 calcAddr(gpuDynInst
, addr
, offset
);
5142 gpuDynInst
->computeUnit()->scalarMemoryPipe
.
5143 getGMReqFIFO().push(gpuDynInst
);
5145 wf
->scalarWrGmReqsInPipe
--;
5146 wf
->scalarOutstandingReqsWrGm
++;
5147 gpuDynInst
->wavefront()->outstandingReqs
++;
5148 gpuDynInst
->wavefront()->validateRequestCounters();
5152 Inst_SMEM__S_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst
)
5154 ConstScalarOperandU64
sdata(gpuDynInst
, instData
.SDATA
);
5156 std::memcpy((void*)gpuDynInst
->scalar_data
, sdata
.rawDataPtr(),
5157 sizeof(ScalarRegU64
));
5158 initMemWrite
<2>(gpuDynInst
);
5162 Inst_SMEM__S_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst
)
5166 Inst_SMEM__S_STORE_DWORDX4::Inst_SMEM__S_STORE_DWORDX4(InFmt_SMEM
*iFmt
)
5167 : Inst_SMEM(iFmt
, "s_store_dwordx4")
5171 } // Inst_SMEM__S_STORE_DWORDX4
5173 Inst_SMEM__S_STORE_DWORDX4::~Inst_SMEM__S_STORE_DWORDX4()
5175 } // ~Inst_SMEM__S_STORE_DWORDX4
5177 // Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on
5178 // the offset input.
5180 Inst_SMEM__S_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst
)
5182 Wavefront
*wf
= gpuDynInst
->wavefront();
5183 gpuDynInst
->execUnitId
= wf
->execUnitId
;
5184 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
5185 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
5186 ScalarRegU32
offset(0);
5187 ConstScalarOperandU64
addr(gpuDynInst
, instData
.SBASE
<< 1);
5192 offset
= extData
.OFFSET
;
5194 ConstScalarOperandU32
off_sgpr(gpuDynInst
, extData
.OFFSET
);
5196 offset
= off_sgpr
.rawData();
5199 calcAddr(gpuDynInst
, addr
, offset
);
5201 gpuDynInst
->computeUnit()->scalarMemoryPipe
.
5202 getGMReqFIFO().push(gpuDynInst
);
5204 wf
->scalarWrGmReqsInPipe
--;
5205 wf
->scalarOutstandingReqsWrGm
++;
5206 gpuDynInst
->wavefront()->outstandingReqs
++;
5207 gpuDynInst
->wavefront()->validateRequestCounters();
5211 Inst_SMEM__S_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst
)
5213 ConstScalarOperandU128
sdata(gpuDynInst
, instData
.SDATA
);
5215 std::memcpy((void*)gpuDynInst
->scalar_data
, sdata
.rawDataPtr(),
5216 4 * sizeof(ScalarRegU32
));
5217 initMemWrite
<4>(gpuDynInst
);
5221 Inst_SMEM__S_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst
)
5225 Inst_SMEM__S_BUFFER_STORE_DWORD::Inst_SMEM__S_BUFFER_STORE_DWORD(
5227 : Inst_SMEM(iFmt
, "s_buffer_store_dword")
5231 } // Inst_SMEM__S_BUFFER_STORE_DWORD
5233 Inst_SMEM__S_BUFFER_STORE_DWORD::~Inst_SMEM__S_BUFFER_STORE_DWORD()
5235 } // ~Inst_SMEM__S_BUFFER_STORE_DWORD
5237 // Write 1 dword to scalar data cache. See S_STORE_DWORD for details on the
5240 Inst_SMEM__S_BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst
)
5242 panicUnimplemented();
5246 Inst_SMEM__S_BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst
)
5251 Inst_SMEM__S_BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst
)
5255 Inst_SMEM__S_BUFFER_STORE_DWORDX2::Inst_SMEM__S_BUFFER_STORE_DWORDX2(
5257 : Inst_SMEM(iFmt
, "s_buffer_store_dwordx2")
5261 } // Inst_SMEM__S_BUFFER_STORE_DWORDX2
5263 Inst_SMEM__S_BUFFER_STORE_DWORDX2::~Inst_SMEM__S_BUFFER_STORE_DWORDX2()
5265 } // ~Inst_SMEM__S_BUFFER_STORE_DWORDX2
5267 // Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on
5268 // the offset input.
5270 Inst_SMEM__S_BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst
)
5272 panicUnimplemented();
5276 Inst_SMEM__S_BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst
)
5281 Inst_SMEM__S_BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst
)
5285 Inst_SMEM__S_BUFFER_STORE_DWORDX4::Inst_SMEM__S_BUFFER_STORE_DWORDX4(
5287 : Inst_SMEM(iFmt
, "s_buffer_store_dwordx4")
5291 } // Inst_SMEM__S_BUFFER_STORE_DWORDX4
5293 Inst_SMEM__S_BUFFER_STORE_DWORDX4::~Inst_SMEM__S_BUFFER_STORE_DWORDX4()
5295 } // ~Inst_SMEM__S_BUFFER_STORE_DWORDX4
5297 // Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on
5298 // the offset input.
5300 Inst_SMEM__S_BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst
)
5302 panicUnimplemented();
5306 Inst_SMEM__S_BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst
)
5311 Inst_SMEM__S_BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst
)
5315 Inst_SMEM__S_DCACHE_INV::Inst_SMEM__S_DCACHE_INV(InFmt_SMEM
*iFmt
)
5316 : Inst_SMEM(iFmt
, "s_dcache_inv")
5318 } // Inst_SMEM__S_DCACHE_INV
5320 Inst_SMEM__S_DCACHE_INV::~Inst_SMEM__S_DCACHE_INV()
5322 } // ~Inst_SMEM__S_DCACHE_INV
5324 // Invalidate the scalar data cache.
5326 Inst_SMEM__S_DCACHE_INV::execute(GPUDynInstPtr gpuDynInst
)
5328 panicUnimplemented();
5331 Inst_SMEM__S_DCACHE_WB::Inst_SMEM__S_DCACHE_WB(InFmt_SMEM
*iFmt
)
5332 : Inst_SMEM(iFmt
, "s_dcache_wb")
5334 } // Inst_SMEM__S_DCACHE_WB
5336 Inst_SMEM__S_DCACHE_WB::~Inst_SMEM__S_DCACHE_WB()
5338 } // ~Inst_SMEM__S_DCACHE_WB
5340 // Write back dirty data in the scalar data cache.
5342 Inst_SMEM__S_DCACHE_WB::execute(GPUDynInstPtr gpuDynInst
)
5344 panicUnimplemented();
5347 Inst_SMEM__S_DCACHE_INV_VOL::Inst_SMEM__S_DCACHE_INV_VOL(InFmt_SMEM
*iFmt
)
5348 : Inst_SMEM(iFmt
, "s_dcache_inv_vol")
5350 } // Inst_SMEM__S_DCACHE_INV_VOL
5352 Inst_SMEM__S_DCACHE_INV_VOL::~Inst_SMEM__S_DCACHE_INV_VOL()
5354 } // ~Inst_SMEM__S_DCACHE_INV_VOL
5356 // Invalidate the scalar data cache volatile lines.
5358 Inst_SMEM__S_DCACHE_INV_VOL::execute(GPUDynInstPtr gpuDynInst
)
5360 panicUnimplemented();
5363 Inst_SMEM__S_DCACHE_WB_VOL::Inst_SMEM__S_DCACHE_WB_VOL(InFmt_SMEM
*iFmt
)
5364 : Inst_SMEM(iFmt
, "s_dcache_wb_vol")
5366 } // Inst_SMEM__S_DCACHE_WB_VOL
5368 Inst_SMEM__S_DCACHE_WB_VOL::~Inst_SMEM__S_DCACHE_WB_VOL()
5370 } // ~Inst_SMEM__S_DCACHE_WB_VOL
5372 // Write back dirty data in the scalar data cache volatile lines.
5374 Inst_SMEM__S_DCACHE_WB_VOL::execute(GPUDynInstPtr gpuDynInst
)
5376 panicUnimplemented();
5379 Inst_SMEM__S_MEMTIME::Inst_SMEM__S_MEMTIME(InFmt_SMEM
*iFmt
)
5380 : Inst_SMEM(iFmt
, "s_memtime")
5382 } // Inst_SMEM__S_MEMTIME
5384 Inst_SMEM__S_MEMTIME::~Inst_SMEM__S_MEMTIME()
5386 } // ~Inst_SMEM__S_MEMTIME
5388 // Return current 64-bit timestamp.
5390 Inst_SMEM__S_MEMTIME::execute(GPUDynInstPtr gpuDynInst
)
5392 panicUnimplemented();
5395 Inst_SMEM__S_MEMREALTIME::Inst_SMEM__S_MEMREALTIME(InFmt_SMEM
*iFmt
)
5396 : Inst_SMEM(iFmt
, "s_memrealtime")
5398 } // Inst_SMEM__S_MEMREALTIME
5400 Inst_SMEM__S_MEMREALTIME::~Inst_SMEM__S_MEMREALTIME()
5402 } // ~Inst_SMEM__S_MEMREALTIME
5404 // Return current 64-bit RTC.
5406 Inst_SMEM__S_MEMREALTIME::execute(GPUDynInstPtr gpuDynInst
)
5408 panicUnimplemented();
5411 Inst_SMEM__S_ATC_PROBE::Inst_SMEM__S_ATC_PROBE(InFmt_SMEM
*iFmt
)
5412 : Inst_SMEM(iFmt
, "s_atc_probe")
5414 } // Inst_SMEM__S_ATC_PROBE
5416 Inst_SMEM__S_ATC_PROBE::~Inst_SMEM__S_ATC_PROBE()
5418 } // ~Inst_SMEM__S_ATC_PROBE
5421 Inst_SMEM__S_ATC_PROBE::execute(GPUDynInstPtr gpuDynInst
)
5423 panicUnimplemented();
5426 Inst_SMEM__S_ATC_PROBE_BUFFER::Inst_SMEM__S_ATC_PROBE_BUFFER(
5428 : Inst_SMEM(iFmt
, "s_atc_probe_buffer")
5430 } // Inst_SMEM__S_ATC_PROBE_BUFFER
5432 Inst_SMEM__S_ATC_PROBE_BUFFER::~Inst_SMEM__S_ATC_PROBE_BUFFER()
5434 } // ~Inst_SMEM__S_ATC_PROBE_BUFFER
5437 Inst_SMEM__S_ATC_PROBE_BUFFER::execute(GPUDynInstPtr gpuDynInst
)
5439 panicUnimplemented();
5442 Inst_VOP2__V_CNDMASK_B32::Inst_VOP2__V_CNDMASK_B32(InFmt_VOP2
*iFmt
)
5443 : Inst_VOP2(iFmt
, "v_cndmask_b32")
5447 } // Inst_VOP2__V_CNDMASK_B32
5449 Inst_VOP2__V_CNDMASK_B32::~Inst_VOP2__V_CNDMASK_B32()
5451 } // ~Inst_VOP2__V_CNDMASK_B32
5453 // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC
5454 // as a scalar GPR in S2.
5456 Inst_VOP2__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst
)
5458 Wavefront
*wf
= gpuDynInst
->wavefront();
5459 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
5460 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
5461 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
5462 ConstScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
5468 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5469 if (wf
->execMask(lane
)) {
5471 = bits(vcc
.rawData(), lane
) ? src1
[lane
] : src0
[lane
];
5478 Inst_VOP2__V_ADD_F32::Inst_VOP2__V_ADD_F32(InFmt_VOP2
*iFmt
)
5479 : Inst_VOP2(iFmt
, "v_add_f32")
5483 } // Inst_VOP2__V_ADD_F32
5485 Inst_VOP2__V_ADD_F32::~Inst_VOP2__V_ADD_F32()
5487 } // ~Inst_VOP2__V_ADD_F32
5489 // D.f = S0.f + S1.f.
5491 Inst_VOP2__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst
)
5493 Wavefront
*wf
= gpuDynInst
->wavefront();
5494 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
5495 VecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
5496 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
5502 VecOperandF32
src0_dpp(gpuDynInst
, extData
.iFmt_VOP_DPP
.SRC0
);
5505 DPRINTF(GCN3
, "Handling V_ADD_F32 SRC DPP. SRC0: register v[%d], "
5506 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
5507 "SRC1_ABS: %d, SRC1_NEG: %d, BOUND_CTRL: %d, "
5508 "BANK_MASK: %d, ROW_MASK: %d\n", extData
.iFmt_VOP_DPP
.SRC0
,
5509 extData
.iFmt_VOP_DPP
.DPP_CTRL
,
5510 extData
.iFmt_VOP_DPP
.SRC0_ABS
,
5511 extData
.iFmt_VOP_DPP
.SRC0_NEG
,
5512 extData
.iFmt_VOP_DPP
.SRC1_ABS
,
5513 extData
.iFmt_VOP_DPP
.SRC1_NEG
,
5514 extData
.iFmt_VOP_DPP
.BOUND_CTRL
,
5515 extData
.iFmt_VOP_DPP
.BANK_MASK
,
5516 extData
.iFmt_VOP_DPP
.ROW_MASK
);
5518 processDPP(gpuDynInst
, extData
.iFmt_VOP_DPP
, src0_dpp
, src1
);
5520 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5521 if (wf
->execMask(lane
)) {
5522 vdst
[lane
] = src0_dpp
[lane
] + src1
[lane
];
5526 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5527 if (wf
->execMask(lane
)) {
5528 vdst
[lane
] = src0
[lane
] + src1
[lane
];
5536 Inst_VOP2__V_SUB_F32::Inst_VOP2__V_SUB_F32(InFmt_VOP2
*iFmt
)
5537 : Inst_VOP2(iFmt
, "v_sub_f32")
5541 } // Inst_VOP2__V_SUB_F32
5543 Inst_VOP2__V_SUB_F32::~Inst_VOP2__V_SUB_F32()
5545 } // ~Inst_VOP2__V_SUB_F32
5547 // D.f = S0.f - S1.f.
5549 Inst_VOP2__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst
)
5551 Wavefront
*wf
= gpuDynInst
->wavefront();
5552 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
5553 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
5554 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
5559 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5560 if (wf
->execMask(lane
)) {
5561 vdst
[lane
] = src0
[lane
] - src1
[lane
];
5568 Inst_VOP2__V_SUBREV_F32::Inst_VOP2__V_SUBREV_F32(InFmt_VOP2
*iFmt
)
5569 : Inst_VOP2(iFmt
, "v_subrev_f32")
5573 } // Inst_VOP2__V_SUBREV_F32
5575 Inst_VOP2__V_SUBREV_F32::~Inst_VOP2__V_SUBREV_F32()
5577 } // ~Inst_VOP2__V_SUBREV_F32
5579 // D.f = S1.f - S0.f.
5581 Inst_VOP2__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst
)
5583 Wavefront
*wf
= gpuDynInst
->wavefront();
5584 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
5585 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
5586 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
5591 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5592 if (wf
->execMask(lane
)) {
5593 vdst
[lane
] = src1
[lane
] - src0
[lane
];
5600 Inst_VOP2__V_MUL_LEGACY_F32::Inst_VOP2__V_MUL_LEGACY_F32(InFmt_VOP2
*iFmt
)
5601 : Inst_VOP2(iFmt
, "v_mul_legacy_f32")
5605 } // Inst_VOP2__V_MUL_LEGACY_F32
5607 Inst_VOP2__V_MUL_LEGACY_F32::~Inst_VOP2__V_MUL_LEGACY_F32()
5609 } // ~Inst_VOP2__V_MUL_LEGACY_F32
5611 // D.f = S0.f * S1.f
5613 Inst_VOP2__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst
)
5615 Wavefront
*wf
= gpuDynInst
->wavefront();
5616 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
5617 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
5618 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
5623 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5624 if (wf
->execMask(lane
)) {
5625 vdst
[lane
] = src0
[lane
] * src1
[lane
];
5632 Inst_VOP2__V_MUL_F32::Inst_VOP2__V_MUL_F32(InFmt_VOP2
*iFmt
)
5633 : Inst_VOP2(iFmt
, "v_mul_f32")
5637 } // Inst_VOP2__V_MUL_F32
5639 Inst_VOP2__V_MUL_F32::~Inst_VOP2__V_MUL_F32()
5641 } // ~Inst_VOP2__V_MUL_F32
5643 // D.f = S0.f * S1.f.
5645 Inst_VOP2__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst
)
5647 Wavefront
*wf
= gpuDynInst
->wavefront();
5648 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
5649 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
5650 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
5655 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5656 if (wf
->execMask(lane
)) {
5657 if (std::isnan(src0
[lane
]) ||
5658 std::isnan(src1
[lane
])) {
5660 } else if ((std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
||
5661 std::fpclassify(src0
[lane
]) == FP_ZERO
) &&
5662 !std::signbit(src0
[lane
])) {
5663 if (std::isinf(src1
[lane
])) {
5665 } else if (!std::signbit(src1
[lane
])) {
5670 } else if ((std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
||
5671 std::fpclassify(src0
[lane
]) == FP_ZERO
) &&
5672 std::signbit(src0
[lane
])) {
5673 if (std::isinf(src1
[lane
])) {
5675 } else if (std::signbit(src1
[lane
])) {
5680 } else if (std::isinf(src0
[lane
]) &&
5681 !std::signbit(src0
[lane
])) {
5682 if (std::fpclassify(src1
[lane
]) == FP_SUBNORMAL
||
5683 std::fpclassify(src1
[lane
]) == FP_ZERO
) {
5685 } else if (!std::signbit(src1
[lane
])) {
5686 vdst
[lane
] = +INFINITY
;
5688 vdst
[lane
] = -INFINITY
;
5690 } else if (std::isinf(src0
[lane
]) &&
5691 std::signbit(src0
[lane
])) {
5692 if (std::fpclassify(src1
[lane
]) == FP_SUBNORMAL
||
5693 std::fpclassify(src1
[lane
]) == FP_ZERO
) {
5695 } else if (std::signbit(src1
[lane
])) {
5696 vdst
[lane
] = +INFINITY
;
5698 vdst
[lane
] = -INFINITY
;
5701 vdst
[lane
] = src0
[lane
] * src1
[lane
];
5709 Inst_VOP2__V_MUL_I32_I24::Inst_VOP2__V_MUL_I32_I24(InFmt_VOP2
*iFmt
)
5710 : Inst_VOP2(iFmt
, "v_mul_i32_i24")
5713 } // Inst_VOP2__V_MUL_I32_I24
5715 Inst_VOP2__V_MUL_I32_I24::~Inst_VOP2__V_MUL_I32_I24()
5717 } // ~Inst_VOP2__V_MUL_I32_I24
5719 // D.i = S0.i[23:0] * S1.i[23:0].
5721 Inst_VOP2__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst
)
5723 Wavefront
*wf
= gpuDynInst
->wavefront();
5724 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
5725 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
5726 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
5731 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5732 if (wf
->execMask(lane
)) {
5733 vdst
[lane
] = sext
<24>(bits(src0
[lane
], 23, 0))
5734 * sext
<24>(bits(src1
[lane
], 23, 0));
5741 Inst_VOP2__V_MUL_HI_I32_I24::Inst_VOP2__V_MUL_HI_I32_I24(InFmt_VOP2
*iFmt
)
5742 : Inst_VOP2(iFmt
, "v_mul_hi_i32_i24")
5745 } // Inst_VOP2__V_MUL_HI_I32_I24
5747 Inst_VOP2__V_MUL_HI_I32_I24::~Inst_VOP2__V_MUL_HI_I32_I24()
5749 } // ~Inst_VOP2__V_MUL_HI_I32_I24
5751 // D.i = (S0.i[23:0] * S1.i[23:0]) >> 32.
5753 Inst_VOP2__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst
)
5755 Wavefront
*wf
= gpuDynInst
->wavefront();
5756 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
5757 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
5758 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
5763 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5764 if (wf
->execMask(lane
)) {
5766 = (VecElemI64
)sext
<24>(bits(src0
[lane
], 23, 0));
5768 = (VecElemI64
)sext
<24>(bits(src1
[lane
], 23, 0));
5770 vdst
[lane
] = (VecElemI32
)((tmp_src0
* tmp_src1
) >> 32);
5777 Inst_VOP2__V_MUL_U32_U24::Inst_VOP2__V_MUL_U32_U24(InFmt_VOP2
*iFmt
)
5778 : Inst_VOP2(iFmt
, "v_mul_u32_u24")
5781 } // Inst_VOP2__V_MUL_U32_U24
5783 Inst_VOP2__V_MUL_U32_U24::~Inst_VOP2__V_MUL_U32_U24()
5785 } // ~Inst_VOP2__V_MUL_U32_U24
5787 // D.u = S0.u[23:0] * S1.u[23:0].
5789 Inst_VOP2__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst
)
5791 Wavefront
*wf
= gpuDynInst
->wavefront();
5792 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
5793 VecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
5794 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
5800 VecOperandU32
src0_sdwa(gpuDynInst
, extData
.iFmt_VOP_SDWA
.SRC0
);
5801 // use copies of original src0, src1, and dest during selecting
5802 VecOperandU32
origSrc0_sdwa(gpuDynInst
,
5803 extData
.iFmt_VOP_SDWA
.SRC0
);
5804 VecOperandU32
origSrc1(gpuDynInst
, instData
.VSRC1
);
5805 VecOperandU32
origVdst(gpuDynInst
, instData
.VDST
);
5808 origSrc0_sdwa
.read();
5811 DPRINTF(GCN3
, "Handling V_MUL_U32_U24 SRC SDWA. SRC0: register "
5812 "v[%d], DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: "
5813 "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: "
5814 "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
5815 extData
.iFmt_VOP_SDWA
.SRC0
, extData
.iFmt_VOP_SDWA
.DST_SEL
,
5816 extData
.iFmt_VOP_SDWA
.DST_UNUSED
,
5817 extData
.iFmt_VOP_SDWA
.CLAMP
,
5818 extData
.iFmt_VOP_SDWA
.SRC0_SEL
,
5819 extData
.iFmt_VOP_SDWA
.SRC0_SEXT
,
5820 extData
.iFmt_VOP_SDWA
.SRC0_NEG
,
5821 extData
.iFmt_VOP_SDWA
.SRC0_ABS
,
5822 extData
.iFmt_VOP_SDWA
.SRC1_SEL
,
5823 extData
.iFmt_VOP_SDWA
.SRC1_SEXT
,
5824 extData
.iFmt_VOP_SDWA
.SRC1_NEG
,
5825 extData
.iFmt_VOP_SDWA
.SRC1_ABS
);
5827 processSDWA_src(extData
.iFmt_VOP_SDWA
, src0_sdwa
, origSrc0_sdwa
,
5830 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5831 if (wf
->execMask(lane
)) {
5832 vdst
[lane
] = bits(src0_sdwa
[lane
], 23, 0) *
5833 bits(src1
[lane
], 23, 0);
5834 origVdst
[lane
] = vdst
[lane
]; // keep copy consistent
5838 processSDWA_dst(extData
.iFmt_VOP_SDWA
, vdst
, origVdst
);
5840 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5841 if (wf
->execMask(lane
)) {
5842 vdst
[lane
] = bits(src0
[lane
], 23, 0) *
5843 bits(src1
[lane
], 23, 0);
5852 Inst_VOP2__V_MUL_HI_U32_U24::Inst_VOP2__V_MUL_HI_U32_U24(InFmt_VOP2
*iFmt
)
5853 : Inst_VOP2(iFmt
, "v_mul_hi_u32_u24")
5856 } // Inst_VOP2__V_MUL_HI_U32_U24
5858 Inst_VOP2__V_MUL_HI_U32_U24::~Inst_VOP2__V_MUL_HI_U32_U24()
5860 } // ~Inst_VOP2__V_MUL_HI_U32_U24
5862 // D.i = (S0.u[23:0] * S1.u[23:0]) >> 32.
5864 Inst_VOP2__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst
)
5866 Wavefront
*wf
= gpuDynInst
->wavefront();
5867 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
5868 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
5869 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
5874 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5875 if (wf
->execMask(lane
)) {
5876 VecElemU64 tmp_src0
= (VecElemU64
)bits(src0
[lane
], 23, 0);
5877 VecElemU64 tmp_src1
= (VecElemU64
)bits(src1
[lane
], 23, 0);
5878 vdst
[lane
] = (VecElemU32
)((tmp_src0
* tmp_src1
) >> 32);
5885 Inst_VOP2__V_MIN_F32::Inst_VOP2__V_MIN_F32(InFmt_VOP2
*iFmt
)
5886 : Inst_VOP2(iFmt
, "v_min_f32")
5890 } // Inst_VOP2__V_MIN_F32
5892 Inst_VOP2__V_MIN_F32::~Inst_VOP2__V_MIN_F32()
5894 } // ~Inst_VOP2__V_MIN_F32
5896 // D.f = (S0.f < S1.f ? S0.f : S1.f).
5898 Inst_VOP2__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst
)
5900 Wavefront
*wf
= gpuDynInst
->wavefront();
5901 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
5902 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
5903 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
5908 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5909 if (wf
->execMask(lane
)) {
5910 vdst
[lane
] = std::fmin(src0
[lane
], src1
[lane
]);
5917 Inst_VOP2__V_MAX_F32::Inst_VOP2__V_MAX_F32(InFmt_VOP2
*iFmt
)
5918 : Inst_VOP2(iFmt
, "v_max_f32")
5922 } // Inst_VOP2__V_MAX_F32
5924 Inst_VOP2__V_MAX_F32::~Inst_VOP2__V_MAX_F32()
5926 } // ~Inst_VOP2__V_MAX_F32
5928 // D.f = (S0.f >= S1.f ? S0.f : S1.f).
5930 Inst_VOP2__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst
)
5932 Wavefront
*wf
= gpuDynInst
->wavefront();
5933 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
5934 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
5935 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
5940 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5941 if (wf
->execMask(lane
)) {
5942 vdst
[lane
] = std::fmax(src0
[lane
], src1
[lane
]);
5949 Inst_VOP2__V_MIN_I32::Inst_VOP2__V_MIN_I32(InFmt_VOP2
*iFmt
)
5950 : Inst_VOP2(iFmt
, "v_min_i32")
5953 } // Inst_VOP2__V_MIN_I32
5955 Inst_VOP2__V_MIN_I32::~Inst_VOP2__V_MIN_I32()
5957 } // ~Inst_VOP2__V_MIN_I32
5959 // D.i = min(S0.i, S1.i).
5961 Inst_VOP2__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst
)
5963 Wavefront
*wf
= gpuDynInst
->wavefront();
5964 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
5965 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
5966 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
5971 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
5972 if (wf
->execMask(lane
)) {
5973 vdst
[lane
] = std::min(src0
[lane
], src1
[lane
]);
5980 Inst_VOP2__V_MAX_I32::Inst_VOP2__V_MAX_I32(InFmt_VOP2
*iFmt
)
5981 : Inst_VOP2(iFmt
, "v_max_i32")
5984 } // Inst_VOP2__V_MAX_I32
5986 Inst_VOP2__V_MAX_I32::~Inst_VOP2__V_MAX_I32()
5988 } // ~Inst_VOP2__V_MAX_I32
5990 // D.i = max(S0.i, S1.i).
5992 Inst_VOP2__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst
)
5994 Wavefront
*wf
= gpuDynInst
->wavefront();
5995 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
5996 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
5997 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
6002 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6003 if (wf
->execMask(lane
)) {
6004 vdst
[lane
] = std::max(src0
[lane
], src1
[lane
]);
6011 Inst_VOP2__V_MIN_U32::Inst_VOP2__V_MIN_U32(InFmt_VOP2
*iFmt
)
6012 : Inst_VOP2(iFmt
, "v_min_u32")
6015 } // Inst_VOP2__V_MIN_U32
6017 Inst_VOP2__V_MIN_U32::~Inst_VOP2__V_MIN_U32()
6019 } // ~Inst_VOP2__V_MIN_U32
6021 // D.u = min(S0.u, S1.u).
6023 Inst_VOP2__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst
)
6025 Wavefront
*wf
= gpuDynInst
->wavefront();
6026 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6027 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6028 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6033 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6034 if (wf
->execMask(lane
)) {
6035 vdst
[lane
] = std::min(src0
[lane
], src1
[lane
]);
6042 Inst_VOP2__V_MAX_U32::Inst_VOP2__V_MAX_U32(InFmt_VOP2
*iFmt
)
6043 : Inst_VOP2(iFmt
, "v_max_u32")
6046 } // Inst_VOP2__V_MAX_U32
6048 Inst_VOP2__V_MAX_U32::~Inst_VOP2__V_MAX_U32()
6050 } // ~Inst_VOP2__V_MAX_U32
6052 // D.u = max(S0.u, S1.u).
6054 Inst_VOP2__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst
)
6056 Wavefront
*wf
= gpuDynInst
->wavefront();
6057 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6058 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6059 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6064 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6065 if (wf
->execMask(lane
)) {
6066 vdst
[lane
] = std::max(src0
[lane
], src1
[lane
]);
6073 Inst_VOP2__V_LSHRREV_B32::Inst_VOP2__V_LSHRREV_B32(InFmt_VOP2
*iFmt
)
6074 : Inst_VOP2(iFmt
, "v_lshrrev_b32")
6077 } // Inst_VOP2__V_LSHRREV_B32
6079 Inst_VOP2__V_LSHRREV_B32::~Inst_VOP2__V_LSHRREV_B32()
6081 } // ~Inst_VOP2__V_LSHRREV_B32
6083 // D.u = S1.u >> S0.u[4:0].
6084 // The vacated bits are set to zero.
6086 Inst_VOP2__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst
)
6088 Wavefront
*wf
= gpuDynInst
->wavefront();
6089 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6090 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6091 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6096 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6097 if (wf
->execMask(lane
)) {
6098 vdst
[lane
] = src1
[lane
] >> bits(src0
[lane
], 4, 0);
6105 Inst_VOP2__V_ASHRREV_I32::Inst_VOP2__V_ASHRREV_I32(InFmt_VOP2
*iFmt
)
6106 : Inst_VOP2(iFmt
, "v_ashrrev_i32")
6109 } // Inst_VOP2__V_ASHRREV_I32
6111 Inst_VOP2__V_ASHRREV_I32::~Inst_VOP2__V_ASHRREV_I32()
6113 } // ~Inst_VOP2__V_ASHRREV_I32
6115 // D.i = signext(S1.i) >> S0.i[4:0].
6116 // The vacated bits are set to the sign bit of the input value.
6118 Inst_VOP2__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst
)
6120 Wavefront
*wf
= gpuDynInst
->wavefront();
6121 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6122 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
6123 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
6128 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6129 if (wf
->execMask(lane
)) {
6130 vdst
[lane
] = src1
[lane
] >> bits(src0
[lane
], 4, 0);
6137 Inst_VOP2__V_LSHLREV_B32::Inst_VOP2__V_LSHLREV_B32(InFmt_VOP2
*iFmt
)
6138 : Inst_VOP2(iFmt
, "v_lshlrev_b32")
6141 } // Inst_VOP2__V_LSHLREV_B32
6143 Inst_VOP2__V_LSHLREV_B32::~Inst_VOP2__V_LSHLREV_B32()
6145 } // ~Inst_VOP2__V_LSHLREV_B32
6147 // D.u = S1.u << S0.u[4:0].
6149 Inst_VOP2__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst
)
6151 Wavefront
*wf
= gpuDynInst
->wavefront();
6152 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6153 VecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6154 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6160 VecOperandU32
src0_sdwa(gpuDynInst
, extData
.iFmt_VOP_SDWA
.SRC0
);
6161 // use copies of original src0, src1, and vdst during selecting
6162 VecOperandU32
origSrc0_sdwa(gpuDynInst
,
6163 extData
.iFmt_VOP_SDWA
.SRC0
);
6164 VecOperandU32
origSrc1(gpuDynInst
, instData
.VSRC1
);
6165 VecOperandU32
origVdst(gpuDynInst
, instData
.VDST
);
6168 origSrc0_sdwa
.read();
6171 DPRINTF(GCN3
, "Handling V_LSHLREV_B32 SRC SDWA. SRC0: register "
6172 "v[%d], DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: "
6173 "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: "
6174 "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
6175 extData
.iFmt_VOP_SDWA
.SRC0
, extData
.iFmt_VOP_SDWA
.DST_SEL
,
6176 extData
.iFmt_VOP_SDWA
.DST_UNUSED
,
6177 extData
.iFmt_VOP_SDWA
.CLAMP
,
6178 extData
.iFmt_VOP_SDWA
.SRC0_SEL
,
6179 extData
.iFmt_VOP_SDWA
.SRC0_SEXT
,
6180 extData
.iFmt_VOP_SDWA
.SRC0_NEG
,
6181 extData
.iFmt_VOP_SDWA
.SRC0_ABS
,
6182 extData
.iFmt_VOP_SDWA
.SRC1_SEL
,
6183 extData
.iFmt_VOP_SDWA
.SRC1_SEXT
,
6184 extData
.iFmt_VOP_SDWA
.SRC1_NEG
,
6185 extData
.iFmt_VOP_SDWA
.SRC1_ABS
);
6187 processSDWA_src(extData
.iFmt_VOP_SDWA
, src0_sdwa
, origSrc0_sdwa
,
6190 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6191 if (wf
->execMask(lane
)) {
6192 vdst
[lane
] = src1
[lane
] << bits(src0_sdwa
[lane
], 4, 0);
6193 origVdst
[lane
] = vdst
[lane
]; // keep copy consistent
6197 processSDWA_dst(extData
.iFmt_VOP_SDWA
, vdst
, origVdst
);
6199 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6200 if (wf
->execMask(lane
)) {
6201 vdst
[lane
] = src1
[lane
] << bits(src0
[lane
], 4, 0);
6209 Inst_VOP2__V_AND_B32::Inst_VOP2__V_AND_B32(InFmt_VOP2
*iFmt
)
6210 : Inst_VOP2(iFmt
, "v_and_b32")
6213 } // Inst_VOP2__V_AND_B32
6215 Inst_VOP2__V_AND_B32::~Inst_VOP2__V_AND_B32()
6217 } // ~Inst_VOP2__V_AND_B32
6219 // D.u = S0.u & S1.u.
6220 // Input and output modifiers not supported.
6222 Inst_VOP2__V_AND_B32::execute(GPUDynInstPtr gpuDynInst
)
6224 Wavefront
*wf
= gpuDynInst
->wavefront();
6225 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6226 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6227 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6232 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6233 if (wf
->execMask(lane
)) {
6234 vdst
[lane
] = src0
[lane
] & src1
[lane
];
6241 Inst_VOP2__V_OR_B32::Inst_VOP2__V_OR_B32(InFmt_VOP2
*iFmt
)
6242 : Inst_VOP2(iFmt
, "v_or_b32")
6245 } // Inst_VOP2__V_OR_B32
6247 Inst_VOP2__V_OR_B32::~Inst_VOP2__V_OR_B32()
6249 } // ~Inst_VOP2__V_OR_B32
6251 // D.u = S0.u | S1.u.
6252 // Input and output modifiers not supported.
6254 Inst_VOP2__V_OR_B32::execute(GPUDynInstPtr gpuDynInst
)
6256 Wavefront
*wf
= gpuDynInst
->wavefront();
6257 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6258 VecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6259 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6265 VecOperandU32
src0_sdwa(gpuDynInst
, extData
.iFmt_VOP_SDWA
.SRC0
);
6266 // use copies of original src0, src1, and dest during selecting
6267 VecOperandU32
origSrc0_sdwa(gpuDynInst
,
6268 extData
.iFmt_VOP_SDWA
.SRC0
);
6269 VecOperandU32
origSrc1(gpuDynInst
, instData
.VSRC1
);
6270 VecOperandU32
origVdst(gpuDynInst
, instData
.VDST
);
6273 origSrc0_sdwa
.read();
6276 DPRINTF(GCN3
, "Handling V_OR_B32 SRC SDWA. SRC0: register v[%d], "
6277 "DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: %d, "
6278 "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
6279 "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
6280 extData
.iFmt_VOP_SDWA
.SRC0
, extData
.iFmt_VOP_SDWA
.DST_SEL
,
6281 extData
.iFmt_VOP_SDWA
.DST_UNUSED
,
6282 extData
.iFmt_VOP_SDWA
.CLAMP
,
6283 extData
.iFmt_VOP_SDWA
.SRC0_SEL
,
6284 extData
.iFmt_VOP_SDWA
.SRC0_SEXT
,
6285 extData
.iFmt_VOP_SDWA
.SRC0_NEG
,
6286 extData
.iFmt_VOP_SDWA
.SRC0_ABS
,
6287 extData
.iFmt_VOP_SDWA
.SRC1_SEL
,
6288 extData
.iFmt_VOP_SDWA
.SRC1_SEXT
,
6289 extData
.iFmt_VOP_SDWA
.SRC1_NEG
,
6290 extData
.iFmt_VOP_SDWA
.SRC1_ABS
);
6292 processSDWA_src(extData
.iFmt_VOP_SDWA
, src0_sdwa
, origSrc0_sdwa
,
6295 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6296 if (wf
->execMask(lane
)) {
6297 vdst
[lane
] = src0_sdwa
[lane
] | src1
[lane
];
6298 origVdst
[lane
] = vdst
[lane
]; // keep copy consistent
6302 processSDWA_dst(extData
.iFmt_VOP_SDWA
, vdst
, origVdst
);
6304 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6305 if (wf
->execMask(lane
)) {
6306 vdst
[lane
] = src0
[lane
] | src1
[lane
];
6314 Inst_VOP2__V_XOR_B32::Inst_VOP2__V_XOR_B32(InFmt_VOP2
*iFmt
)
6315 : Inst_VOP2(iFmt
, "v_xor_b32")
6318 } // Inst_VOP2__V_XOR_B32
6320 Inst_VOP2__V_XOR_B32::~Inst_VOP2__V_XOR_B32()
6322 } // ~Inst_VOP2__V_XOR_B32
6324 // D.u = S0.u ^ S1.u.
6325 // Input and output modifiers not supported.
6327 Inst_VOP2__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst
)
6329 Wavefront
*wf
= gpuDynInst
->wavefront();
6330 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6331 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6332 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6337 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6338 if (wf
->execMask(lane
)) {
6339 vdst
[lane
] = src0
[lane
] ^ src1
[lane
];
6346 Inst_VOP2__V_MAC_F32::Inst_VOP2__V_MAC_F32(InFmt_VOP2
*iFmt
)
6347 : Inst_VOP2(iFmt
, "v_mac_f32")
6352 } // Inst_VOP2__V_MAC_F32
6354 Inst_VOP2__V_MAC_F32::~Inst_VOP2__V_MAC_F32()
6356 } // ~Inst_VOP2__V_MAC_F32
6358 // D.f = S0.f * S1.f + D.f.
6360 Inst_VOP2__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst
)
6362 Wavefront
*wf
= gpuDynInst
->wavefront();
6363 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
6364 VecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
6365 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
6372 VecOperandF32
src0_dpp(gpuDynInst
, extData
.iFmt_VOP_DPP
.SRC0
);
6375 DPRINTF(GCN3
, "Handling V_MAC_F32 SRC DPP. SRC0: register v[%d], "
6376 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
6377 "SRC1_ABS: %d, SRC1_NEG: %d, BOUND_CTRL: %d, "
6378 "BANK_MASK: %d, ROW_MASK: %d\n", extData
.iFmt_VOP_DPP
.SRC0
,
6379 extData
.iFmt_VOP_DPP
.DPP_CTRL
,
6380 extData
.iFmt_VOP_DPP
.SRC0_ABS
,
6381 extData
.iFmt_VOP_DPP
.SRC0_NEG
,
6382 extData
.iFmt_VOP_DPP
.SRC1_ABS
,
6383 extData
.iFmt_VOP_DPP
.SRC1_NEG
,
6384 extData
.iFmt_VOP_DPP
.BOUND_CTRL
,
6385 extData
.iFmt_VOP_DPP
.BANK_MASK
,
6386 extData
.iFmt_VOP_DPP
.ROW_MASK
);
6388 processDPP(gpuDynInst
, extData
.iFmt_VOP_DPP
, src0_dpp
, src1
);
6390 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6391 if (wf
->execMask(lane
)) {
6392 vdst
[lane
] = std::fma(src0_dpp
[lane
], src1
[lane
],
6397 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6398 if (wf
->execMask(lane
)) {
6399 vdst
[lane
] = std::fma(src0
[lane
], src1
[lane
], vdst
[lane
]);
6407 Inst_VOP2__V_MADMK_F32::Inst_VOP2__V_MADMK_F32(InFmt_VOP2
*iFmt
)
6408 : Inst_VOP2(iFmt
, "v_madmk_f32")
6413 } // Inst_VOP2__V_MADMK_F32
6415 Inst_VOP2__V_MADMK_F32::~Inst_VOP2__V_MADMK_F32()
6417 } // ~Inst_VOP2__V_MADMK_F32
6419 // D.f = S0.f * K + S1.f; K is a 32-bit inline constant.
6420 // This opcode cannot use the input/output modifiers.
6422 Inst_VOP2__V_MADMK_F32::execute(GPUDynInstPtr gpuDynInst
)
6424 Wavefront
*wf
= gpuDynInst
->wavefront();
6425 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
6426 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
6427 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
6428 VecElemF32 k
= extData
.imm_f32
;
6433 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6434 if (wf
->execMask(lane
)) {
6435 vdst
[lane
] = std::fma(src0
[lane
], k
, src1
[lane
]);
6442 Inst_VOP2__V_MADAK_F32::Inst_VOP2__V_MADAK_F32(InFmt_VOP2
*iFmt
)
6443 : Inst_VOP2(iFmt
, "v_madak_f32")
6448 } // Inst_VOP2__V_MADAK_F32
6450 Inst_VOP2__V_MADAK_F32::~Inst_VOP2__V_MADAK_F32()
6452 } // ~Inst_VOP2__V_MADAK_F32
6454 // D.f = S0.f * S1.f + K; K is a 32-bit inline constant.
6455 // This opcode cannot use input/output modifiers.
6457 Inst_VOP2__V_MADAK_F32::execute(GPUDynInstPtr gpuDynInst
)
6459 Wavefront
*wf
= gpuDynInst
->wavefront();
6460 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
6461 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
6462 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
6463 VecElemF32 k
= extData
.imm_f32
;
6468 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6469 if (wf
->execMask(lane
)) {
6470 vdst
[lane
] = std::fma(src0
[lane
], src1
[lane
], k
);
6477 Inst_VOP2__V_ADD_U32::Inst_VOP2__V_ADD_U32(InFmt_VOP2
*iFmt
)
6478 : Inst_VOP2(iFmt
, "v_add_u32")
6482 } // Inst_VOP2__V_ADD_U32
6484 Inst_VOP2__V_ADD_U32::~Inst_VOP2__V_ADD_U32()
6486 } // ~Inst_VOP2__V_ADD_U32
6488 // D.u = S0.u + S1.u;
6489 // VCC[threadId] = (S0.u + S1.u >= 0x100000000ULL ? 1 : 0) is an UNSIGNED
6490 // overflow or carry-out.
6491 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
6493 Inst_VOP2__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst
)
6495 Wavefront
*wf
= gpuDynInst
->wavefront();
6496 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6497 VecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6498 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6499 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
6505 VecOperandU32
src0_sdwa(gpuDynInst
, extData
.iFmt_VOP_SDWA
.SRC0
);
6506 // use copies of original src0, src1, and dest during selecting
6507 VecOperandU32
origSrc0_sdwa(gpuDynInst
,
6508 extData
.iFmt_VOP_SDWA
.SRC0
);
6509 VecOperandU32
origSrc1(gpuDynInst
, instData
.VSRC1
);
6510 VecOperandU32
origVdst(gpuDynInst
, instData
.VDST
);
6513 origSrc0_sdwa
.read();
6516 DPRINTF(GCN3
, "Handling V_ADD_U32 SRC SDWA. SRC0: register v[%d], "
6517 "DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: %d, "
6518 "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
6519 "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
6520 extData
.iFmt_VOP_SDWA
.SRC0
, extData
.iFmt_VOP_SDWA
.DST_SEL
,
6521 extData
.iFmt_VOP_SDWA
.DST_UNUSED
,
6522 extData
.iFmt_VOP_SDWA
.CLAMP
,
6523 extData
.iFmt_VOP_SDWA
.SRC0_SEL
,
6524 extData
.iFmt_VOP_SDWA
.SRC0_SEXT
,
6525 extData
.iFmt_VOP_SDWA
.SRC0_NEG
,
6526 extData
.iFmt_VOP_SDWA
.SRC0_ABS
,
6527 extData
.iFmt_VOP_SDWA
.SRC1_SEL
,
6528 extData
.iFmt_VOP_SDWA
.SRC1_SEXT
,
6529 extData
.iFmt_VOP_SDWA
.SRC1_NEG
,
6530 extData
.iFmt_VOP_SDWA
.SRC1_ABS
);
6532 processSDWA_src(extData
.iFmt_VOP_SDWA
, src0_sdwa
, origSrc0_sdwa
,
6535 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6536 if (wf
->execMask(lane
)) {
6537 vdst
[lane
] = src0_sdwa
[lane
] + src1
[lane
];
6538 origVdst
[lane
] = vdst
[lane
]; // keep copy consistent
6539 vcc
.setBit(lane
, ((VecElemU64
)src0_sdwa
[lane
]
6540 + (VecElemU64
)src1
[lane
] >= 0x100000000ULL
) ? 1 : 0);
6544 processSDWA_dst(extData
.iFmt_VOP_SDWA
, vdst
, origVdst
);
6546 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6547 if (wf
->execMask(lane
)) {
6548 vdst
[lane
] = src0
[lane
] + src1
[lane
];
6549 vcc
.setBit(lane
, ((VecElemU64
)src0
[lane
]
6550 + (VecElemU64
)src1
[lane
] >= 0x100000000ULL
) ? 1 : 0);
6559 Inst_VOP2__V_SUB_U32::Inst_VOP2__V_SUB_U32(InFmt_VOP2
*iFmt
)
6560 : Inst_VOP2(iFmt
, "v_sub_u32")
6564 } // Inst_VOP2__V_SUB_U32
6566 Inst_VOP2__V_SUB_U32::~Inst_VOP2__V_SUB_U32()
6568 } // ~Inst_VOP2__V_SUB_U32
6570 // D.u = S0.u - S1.u;
6571 // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or
6573 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
6575 Inst_VOP2__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst
)
6577 Wavefront
*wf
= gpuDynInst
->wavefront();
6578 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6579 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6580 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6581 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
6586 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6587 if (wf
->execMask(lane
)) {
6588 vdst
[lane
] = src0
[lane
] - src1
[lane
];
6589 vcc
.setBit(lane
, src1
[lane
] > src0
[lane
] ? 1 : 0);
6597 Inst_VOP2__V_SUBREV_U32::Inst_VOP2__V_SUBREV_U32(InFmt_VOP2
*iFmt
)
6598 : Inst_VOP2(iFmt
, "v_subrev_u32")
6602 } // Inst_VOP2__V_SUBREV_U32
6604 Inst_VOP2__V_SUBREV_U32::~Inst_VOP2__V_SUBREV_U32()
6606 } // ~Inst_VOP2__V_SUBREV_U32
6608 // D.u = S1.u - S0.u;
6609 // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or
6611 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
6613 Inst_VOP2__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst
)
6615 Wavefront
*wf
= gpuDynInst
->wavefront();
6616 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6617 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6618 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6619 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
6624 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6625 if (wf
->execMask(lane
)) {
6626 vdst
[lane
] = src1
[lane
] - src0
[lane
];
6627 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
6635 Inst_VOP2__V_ADDC_U32::Inst_VOP2__V_ADDC_U32(InFmt_VOP2
*iFmt
)
6636 : Inst_VOP2(iFmt
, "v_addc_u32")
6641 } // Inst_VOP2__V_ADDC_U32
6643 Inst_VOP2__V_ADDC_U32::~Inst_VOP2__V_ADDC_U32()
6645 } // ~Inst_VOP2__V_ADDC_U32
6647 // D.u = S0.u + S1.u + VCC[threadId];
6648 // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x100000000ULL ? 1 : 0)
6649 // is an UNSIGNED overflow.
6650 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
6651 // source comes from the SGPR-pair at S2.u.
6653 Inst_VOP2__V_ADDC_U32::execute(GPUDynInstPtr gpuDynInst
)
6655 Wavefront
*wf
= gpuDynInst
->wavefront();
6656 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6657 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6658 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6659 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
6665 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6666 if (wf
->execMask(lane
)) {
6667 vdst
[lane
] = src0
[lane
] + src1
[lane
]
6668 + bits(vcc
.rawData(), lane
);
6669 vcc
.setBit(lane
, ((VecElemU64
)src0
[lane
]
6670 + (VecElemU64
)src1
[lane
]
6671 + (VecElemU64
)bits(vcc
.rawData(), lane
, lane
))
6672 >= 0x100000000 ? 1 : 0);
6680 Inst_VOP2__V_SUBB_U32::Inst_VOP2__V_SUBB_U32(InFmt_VOP2
*iFmt
)
6681 : Inst_VOP2(iFmt
, "v_subb_u32")
6686 } // Inst_VOP2__V_SUBB_U32
6688 Inst_VOP2__V_SUBB_U32::~Inst_VOP2__V_SUBB_U32()
6690 } // ~Inst_VOP2__V_SUBB_U32
6692 // D.u = S0.u - S1.u - VCC[threadId];
6693 // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
6695 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
6696 // source comes from the SGPR-pair at S2.u.
6698 Inst_VOP2__V_SUBB_U32::execute(GPUDynInstPtr gpuDynInst
)
6700 Wavefront
*wf
= gpuDynInst
->wavefront();
6701 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6702 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6703 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6704 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
6710 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6711 if (wf
->execMask(lane
)) {
6713 = src0
[lane
] - src1
[lane
] - bits(vcc
.rawData(), lane
);
6714 vcc
.setBit(lane
, (src1
[lane
] + bits(vcc
.rawData(), lane
))
6715 > src0
[lane
] ? 1 : 0);
6723 Inst_VOP2__V_SUBBREV_U32::Inst_VOP2__V_SUBBREV_U32(InFmt_VOP2
*iFmt
)
6724 : Inst_VOP2(iFmt
, "v_subbrev_u32")
6729 } // Inst_VOP2__V_SUBBREV_U32
6731 Inst_VOP2__V_SUBBREV_U32::~Inst_VOP2__V_SUBBREV_U32()
6733 } // ~Inst_VOP2__V_SUBBREV_U32
6735 // D.u = S1.u - S0.u - VCC[threadId];
6736 // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
6738 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
6739 // source comes from the SGPR-pair at S2.u.
6741 Inst_VOP2__V_SUBBREV_U32::execute(GPUDynInstPtr gpuDynInst
)
6743 Wavefront
*wf
= gpuDynInst
->wavefront();
6744 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
6745 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
6746 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
6747 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
6753 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6754 if (wf
->execMask(lane
)) {
6756 = src1
[lane
] - src0
[lane
] - bits(vcc
.rawData(), lane
);
6757 vcc
.setBit(lane
, (src0
[lane
] + bits(vcc
.rawData(), lane
))
6758 > src1
[lane
] ? 1 : 0);
6766 Inst_VOP2__V_ADD_F16::Inst_VOP2__V_ADD_F16(InFmt_VOP2
*iFmt
)
6767 : Inst_VOP2(iFmt
, "v_add_f16")
6771 } // Inst_VOP2__V_ADD_F16
6773 Inst_VOP2__V_ADD_F16::~Inst_VOP2__V_ADD_F16()
6775 } // ~Inst_VOP2__V_ADD_F16
6777 // D.f16 = S0.f16 + S1.f16.
6779 Inst_VOP2__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst
)
6781 panicUnimplemented();
6784 Inst_VOP2__V_SUB_F16::Inst_VOP2__V_SUB_F16(InFmt_VOP2
*iFmt
)
6785 : Inst_VOP2(iFmt
, "v_sub_f16")
6789 } // Inst_VOP2__V_SUB_F16
6791 Inst_VOP2__V_SUB_F16::~Inst_VOP2__V_SUB_F16()
6793 } // ~Inst_VOP2__V_SUB_F16
6795 // D.f16 = S0.f16 - S1.f16.
6797 Inst_VOP2__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst
)
6799 panicUnimplemented();
6802 Inst_VOP2__V_SUBREV_F16::Inst_VOP2__V_SUBREV_F16(InFmt_VOP2
*iFmt
)
6803 : Inst_VOP2(iFmt
, "v_subrev_f16")
6807 } // Inst_VOP2__V_SUBREV_F16
6809 Inst_VOP2__V_SUBREV_F16::~Inst_VOP2__V_SUBREV_F16()
6811 } // ~Inst_VOP2__V_SUBREV_F16
6813 // D.f16 = S1.f16 - S0.f16.
6815 Inst_VOP2__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst
)
6817 panicUnimplemented();
6820 Inst_VOP2__V_MUL_F16::Inst_VOP2__V_MUL_F16(InFmt_VOP2
*iFmt
)
6821 : Inst_VOP2(iFmt
, "v_mul_f16")
6825 } // Inst_VOP2__V_MUL_F16
6827 Inst_VOP2__V_MUL_F16::~Inst_VOP2__V_MUL_F16()
6829 } // ~Inst_VOP2__V_MUL_F16
6831 // D.f16 = S0.f16 * S1.f16.
6833 Inst_VOP2__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst
)
6835 panicUnimplemented();
6838 Inst_VOP2__V_MAC_F16::Inst_VOP2__V_MAC_F16(InFmt_VOP2
*iFmt
)
6839 : Inst_VOP2(iFmt
, "v_mac_f16")
6844 } // Inst_VOP2__V_MAC_F16
6846 Inst_VOP2__V_MAC_F16::~Inst_VOP2__V_MAC_F16()
6848 } // ~Inst_VOP2__V_MAC_F16
6850 // D.f16 = S0.f16 * S1.f16 + D.f16.
6852 Inst_VOP2__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst
)
6854 panicUnimplemented();
6857 Inst_VOP2__V_MADMK_F16::Inst_VOP2__V_MADMK_F16(InFmt_VOP2
*iFmt
)
6858 : Inst_VOP2(iFmt
, "v_madmk_f16")
6863 } // Inst_VOP2__V_MADMK_F16
6865 Inst_VOP2__V_MADMK_F16::~Inst_VOP2__V_MADMK_F16()
6867 } // ~Inst_VOP2__V_MADMK_F16
6869 // D.f16 = S0.f16 * K.f16 + S1.f16; K is a 16-bit inline constant stored
6870 // in the following literal DWORD.
6871 // This opcode cannot use the VOP3 encoding and cannot use input/output
6874 Inst_VOP2__V_MADMK_F16::execute(GPUDynInstPtr gpuDynInst
)
6876 panicUnimplemented();
6879 Inst_VOP2__V_MADAK_F16::Inst_VOP2__V_MADAK_F16(InFmt_VOP2
*iFmt
)
6880 : Inst_VOP2(iFmt
, "v_madak_f16")
6885 } // Inst_VOP2__V_MADAK_F16
6887 Inst_VOP2__V_MADAK_F16::~Inst_VOP2__V_MADAK_F16()
6889 } // ~Inst_VOP2__V_MADAK_F16
6891 // D.f16 = S0.f16 * S1.f16 + K.f16; K is a 16-bit inline constant stored
6892 // in the following literal DWORD.
6893 // This opcode cannot use the VOP3 encoding and cannot use input/output
6896 Inst_VOP2__V_MADAK_F16::execute(GPUDynInstPtr gpuDynInst
)
6898 panicUnimplemented();
6901 Inst_VOP2__V_ADD_U16::Inst_VOP2__V_ADD_U16(InFmt_VOP2
*iFmt
)
6902 : Inst_VOP2(iFmt
, "v_add_u16")
6905 } // Inst_VOP2__V_ADD_U16
6907 Inst_VOP2__V_ADD_U16::~Inst_VOP2__V_ADD_U16()
6909 } // ~Inst_VOP2__V_ADD_U16
6911 // D.u16 = S0.u16 + S1.u16.
6913 Inst_VOP2__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst
)
6915 Wavefront
*wf
= gpuDynInst
->wavefront();
6916 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
6917 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
6918 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
6923 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6924 if (wf
->execMask(lane
)) {
6925 vdst
[lane
] = src0
[lane
] + src1
[lane
];
6932 Inst_VOP2__V_SUB_U16::Inst_VOP2__V_SUB_U16(InFmt_VOP2
*iFmt
)
6933 : Inst_VOP2(iFmt
, "v_sub_u16")
6936 } // Inst_VOP2__V_SUB_U16
6938 Inst_VOP2__V_SUB_U16::~Inst_VOP2__V_SUB_U16()
6940 } // ~Inst_VOP2__V_SUB_U16
6942 // D.u16 = S0.u16 - S1.u16.
6944 Inst_VOP2__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst
)
6946 Wavefront
*wf
= gpuDynInst
->wavefront();
6947 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
6948 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
6949 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
6954 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6955 if (wf
->execMask(lane
)) {
6956 vdst
[lane
] = src0
[lane
] - src1
[lane
];
6963 Inst_VOP2__V_SUBREV_U16::Inst_VOP2__V_SUBREV_U16(InFmt_VOP2
*iFmt
)
6964 : Inst_VOP2(iFmt
, "v_subrev_u16")
6967 } // Inst_VOP2__V_SUBREV_U16
6969 Inst_VOP2__V_SUBREV_U16::~Inst_VOP2__V_SUBREV_U16()
6971 } // ~Inst_VOP2__V_SUBREV_U16
6973 // D.u16 = S1.u16 - S0.u16.
6975 Inst_VOP2__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst
)
6977 Wavefront
*wf
= gpuDynInst
->wavefront();
6978 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
6979 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
6980 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
6985 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
6986 if (wf
->execMask(lane
)) {
6987 vdst
[lane
] = src1
[lane
] - src0
[lane
];
6994 Inst_VOP2__V_MUL_LO_U16::Inst_VOP2__V_MUL_LO_U16(InFmt_VOP2
*iFmt
)
6995 : Inst_VOP2(iFmt
, "v_mul_lo_u16")
6998 } // Inst_VOP2__V_MUL_LO_U16
7000 Inst_VOP2__V_MUL_LO_U16::~Inst_VOP2__V_MUL_LO_U16()
7002 } // ~Inst_VOP2__V_MUL_LO_U16
7004 // D.u16 = S0.u16 * S1.u16.
7006 Inst_VOP2__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst
)
7008 Wavefront
*wf
= gpuDynInst
->wavefront();
7009 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
7010 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
7011 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
7016 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7017 if (wf
->execMask(lane
)) {
7018 vdst
[lane
] = src0
[lane
] * src1
[lane
];
7025 Inst_VOP2__V_LSHLREV_B16::Inst_VOP2__V_LSHLREV_B16(InFmt_VOP2
*iFmt
)
7026 : Inst_VOP2(iFmt
, "v_lshlrev_b16")
7029 } // Inst_VOP2__V_LSHLREV_B16
7031 Inst_VOP2__V_LSHLREV_B16::~Inst_VOP2__V_LSHLREV_B16()
7033 } // ~Inst_VOP2__V_LSHLREV_B16
7035 // D.u[15:0] = S1.u[15:0] << S0.u[3:0].
7037 Inst_VOP2__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst
)
7039 Wavefront
*wf
= gpuDynInst
->wavefront();
7040 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
7041 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
7042 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
7047 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7048 if (wf
->execMask(lane
)) {
7049 vdst
[lane
] = src1
[lane
] << bits(src0
[lane
], 3, 0);
7056 Inst_VOP2__V_LSHRREV_B16::Inst_VOP2__V_LSHRREV_B16(InFmt_VOP2
*iFmt
)
7057 : Inst_VOP2(iFmt
, "v_lshrrev_b16")
7060 } // Inst_VOP2__V_LSHRREV_B16
7062 Inst_VOP2__V_LSHRREV_B16::~Inst_VOP2__V_LSHRREV_B16()
7064 } // ~Inst_VOP2__V_LSHRREV_B16
7066 // D.u[15:0] = S1.u[15:0] >> S0.u[3:0].
7067 // The vacated bits are set to zero.
7069 Inst_VOP2__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst
)
7071 Wavefront
*wf
= gpuDynInst
->wavefront();
7072 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
7073 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
7074 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
7079 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7080 if (wf
->execMask(lane
)) {
7081 vdst
[lane
] = src1
[lane
] >> src0
[lane
];
7088 Inst_VOP2__V_ASHRREV_I16::Inst_VOP2__V_ASHRREV_I16(InFmt_VOP2
*iFmt
)
7089 : Inst_VOP2(iFmt
, "v_ashrrev_i16")
7092 } // Inst_VOP2__V_ASHRREV_I16
7094 Inst_VOP2__V_ASHRREV_I16::~Inst_VOP2__V_ASHRREV_I16()
7096 } // ~Inst_VOP2__V_ASHRREV_I16
7098 // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0].
7099 // The vacated bits are set to the sign bit of the input value.
7101 Inst_VOP2__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst
)
7103 Wavefront
*wf
= gpuDynInst
->wavefront();
7104 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
7105 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
7106 VecOperandI16
vdst(gpuDynInst
, instData
.VDST
);
7111 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7112 if (wf
->execMask(lane
)) {
7113 vdst
[lane
] = src1
[lane
] >> src0
[lane
];
7120 Inst_VOP2__V_MAX_F16::Inst_VOP2__V_MAX_F16(InFmt_VOP2
*iFmt
)
7121 : Inst_VOP2(iFmt
, "v_max_f16")
7125 } // Inst_VOP2__V_MAX_F16
7127 Inst_VOP2__V_MAX_F16::~Inst_VOP2__V_MAX_F16()
7129 } // ~Inst_VOP2__V_MAX_F16
7131 // D.f16 = max(S0.f16, S1.f16).
7133 Inst_VOP2__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst
)
7135 panicUnimplemented();
7138 Inst_VOP2__V_MIN_F16::Inst_VOP2__V_MIN_F16(InFmt_VOP2
*iFmt
)
7139 : Inst_VOP2(iFmt
, "v_min_f16")
7143 } // Inst_VOP2__V_MIN_F16
7145 Inst_VOP2__V_MIN_F16::~Inst_VOP2__V_MIN_F16()
7147 } // ~Inst_VOP2__V_MIN_F16
7149 // D.f16 = min(S0.f16, S1.f16).
7151 Inst_VOP2__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst
)
7153 panicUnimplemented();
7156 Inst_VOP2__V_MAX_U16::Inst_VOP2__V_MAX_U16(InFmt_VOP2
*iFmt
)
7157 : Inst_VOP2(iFmt
, "v_max_u16")
7160 } // Inst_VOP2__V_MAX_U16
7162 Inst_VOP2__V_MAX_U16::~Inst_VOP2__V_MAX_U16()
7164 } // ~Inst_VOP2__V_MAX_U16
7166 // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]).
7168 Inst_VOP2__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst
)
7170 Wavefront
*wf
= gpuDynInst
->wavefront();
7171 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
7172 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
7173 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
7178 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7179 if (wf
->execMask(lane
)) {
7180 vdst
[lane
] = std::max(src0
[lane
], src1
[lane
]);
7187 Inst_VOP2__V_MAX_I16::Inst_VOP2__V_MAX_I16(InFmt_VOP2
*iFmt
)
7188 : Inst_VOP2(iFmt
, "v_max_i16")
7191 } // Inst_VOP2__V_MAX_I16
7193 Inst_VOP2__V_MAX_I16::~Inst_VOP2__V_MAX_I16()
7195 } // ~Inst_VOP2__V_MAX_I16
7197 // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]).
7199 Inst_VOP2__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst
)
7201 Wavefront
*wf
= gpuDynInst
->wavefront();
7202 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
7203 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
7204 VecOperandI16
vdst(gpuDynInst
, instData
.VDST
);
7209 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7210 if (wf
->execMask(lane
)) {
7211 vdst
[lane
] = std::max(src0
[lane
], src1
[lane
]);
7218 Inst_VOP2__V_MIN_U16::Inst_VOP2__V_MIN_U16(InFmt_VOP2
*iFmt
)
7219 : Inst_VOP2(iFmt
, "v_min_u16")
7222 } // Inst_VOP2__V_MIN_U16
7224 Inst_VOP2__V_MIN_U16::~Inst_VOP2__V_MIN_U16()
7226 } // ~Inst_VOP2__V_MIN_U16
7228 // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]).
7230 Inst_VOP2__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst
)
7232 Wavefront
*wf
= gpuDynInst
->wavefront();
7233 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
7234 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
7235 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
7240 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7241 if (wf
->execMask(lane
)) {
7242 vdst
[lane
] = std::min(src0
[lane
], src1
[lane
]);
7249 Inst_VOP2__V_MIN_I16::Inst_VOP2__V_MIN_I16(InFmt_VOP2
*iFmt
)
7250 : Inst_VOP2(iFmt
, "v_min_i16")
7253 } // Inst_VOP2__V_MIN_I16
7255 Inst_VOP2__V_MIN_I16::~Inst_VOP2__V_MIN_I16()
7257 } // ~Inst_VOP2__V_MIN_I16
7259 // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]).
7261 Inst_VOP2__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst
)
7263 Wavefront
*wf
= gpuDynInst
->wavefront();
7264 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
7265 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
7266 VecOperandI16
vdst(gpuDynInst
, instData
.VDST
);
7271 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7272 if (wf
->execMask(lane
)) {
7273 vdst
[lane
] = std::min(src0
[lane
], src1
[lane
]);
7280 Inst_VOP2__V_LDEXP_F16::Inst_VOP2__V_LDEXP_F16(InFmt_VOP2
*iFmt
)
7281 : Inst_VOP2(iFmt
, "v_ldexp_f16")
7285 } // Inst_VOP2__V_LDEXP_F16
7287 Inst_VOP2__V_LDEXP_F16::~Inst_VOP2__V_LDEXP_F16()
7289 } // ~Inst_VOP2__V_LDEXP_F16
7291 // D.f16 = S0.f16 * (2 ** S1.i16).
7293 Inst_VOP2__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst
)
7295 panicUnimplemented();
7298 Inst_VOP1__V_NOP::Inst_VOP1__V_NOP(InFmt_VOP1
*iFmt
)
7299 : Inst_VOP1(iFmt
, "v_nop")
7303 } // Inst_VOP1__V_NOP
7305 Inst_VOP1__V_NOP::~Inst_VOP1__V_NOP()
7307 } // ~Inst_VOP1__V_NOP
7311 Inst_VOP1__V_NOP::execute(GPUDynInstPtr gpuDynInst
)
7315 Inst_VOP1__V_MOV_B32::Inst_VOP1__V_MOV_B32(InFmt_VOP1
*iFmt
)
7316 : Inst_VOP1(iFmt
, "v_mov_b32")
7319 } // Inst_VOP1__V_MOV_B32
7321 Inst_VOP1__V_MOV_B32::~Inst_VOP1__V_MOV_B32()
7323 } // ~Inst_VOP1__V_MOV_B32
7326 // Input and output modifiers not supported; this is an untyped operation.
7328 Inst_VOP1__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst
)
7330 Wavefront
*wf
= gpuDynInst
->wavefront();
7331 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
7332 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
7337 VecOperandU32
src_dpp(gpuDynInst
, extData
.iFmt_VOP_DPP
.SRC0
);
7340 DPRINTF(GCN3
, "Handling V_MOV_B32 SRC DPP. SRC0: register v[%d], "
7341 "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
7342 "SRC1_ABS: %d, SRC1_NEG: %d, BOUND_CTRL: %d, "
7343 "BANK_MASK: %d, ROW_MASK: %d\n", extData
.iFmt_VOP_DPP
.SRC0
,
7344 extData
.iFmt_VOP_DPP
.DPP_CTRL
,
7345 extData
.iFmt_VOP_DPP
.SRC0_ABS
,
7346 extData
.iFmt_VOP_DPP
.SRC0_NEG
,
7347 extData
.iFmt_VOP_DPP
.SRC1_ABS
,
7348 extData
.iFmt_VOP_DPP
.SRC1_NEG
,
7349 extData
.iFmt_VOP_DPP
.BOUND_CTRL
,
7350 extData
.iFmt_VOP_DPP
.BANK_MASK
,
7351 extData
.iFmt_VOP_DPP
.ROW_MASK
);
7353 // NOTE: For VOP1, there is no SRC1, so make sure we're not trying
7354 // to negate it or take the absolute value of it
7355 assert(!extData
.iFmt_VOP_DPP
.SRC1_ABS
);
7356 assert(!extData
.iFmt_VOP_DPP
.SRC1_NEG
);
7357 processDPP(gpuDynInst
, extData
.iFmt_VOP_DPP
, src_dpp
);
7359 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7360 if (wf
->execMask(lane
)) {
7361 vdst
[lane
] = src_dpp
[lane
];
7365 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7366 if (wf
->execMask(lane
)) {
7367 vdst
[lane
] = src
[lane
];
7375 Inst_VOP1__V_READFIRSTLANE_B32::Inst_VOP1__V_READFIRSTLANE_B32(
7377 : Inst_VOP1(iFmt
, "v_readfirstlane_b32")
7380 } // Inst_VOP1__V_READFIRSTLANE_B32
7382 Inst_VOP1__V_READFIRSTLANE_B32::~Inst_VOP1__V_READFIRSTLANE_B32()
7384 } // ~Inst_VOP1__V_READFIRSTLANE_B32
7386 // Copy one VGPR value to one SGPR. D = SGPR destination, S0 = source data
7387 // (VGPR# or M0 for lds direct access), Lane# = FindFirst1fromLSB(exec)
7388 // (Lane# = 0 if exec is zero). Ignores exec mask for the access.
7389 // Input and output modifiers not supported; this is an untyped operation.
7391 Inst_VOP1__V_READFIRSTLANE_B32::execute(GPUDynInstPtr gpuDynInst
)
7393 Wavefront
*wf
= gpuDynInst
->wavefront();
7394 ScalarRegI32
src_lane(0);
7395 ScalarRegU64 exec_mask
= wf
->execMask().to_ullong();
7396 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
7397 ScalarOperandU32
sdst(gpuDynInst
, instData
.VDST
);
7402 src_lane
= findLsbSet(exec_mask
);
7405 sdst
= src
[src_lane
];
7410 Inst_VOP1__V_CVT_I32_F64::Inst_VOP1__V_CVT_I32_F64(InFmt_VOP1
*iFmt
)
7411 : Inst_VOP1(iFmt
, "v_cvt_i32_f64")
7415 } // Inst_VOP1__V_CVT_I32_F64
7417 Inst_VOP1__V_CVT_I32_F64::~Inst_VOP1__V_CVT_I32_F64()
7419 } // ~Inst_VOP1__V_CVT_I32_F64
7422 // Out-of-range floating point values (including infinity) saturate. NaN
7423 // is converted to 0.
7425 Inst_VOP1__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst
)
7427 Wavefront
*wf
= gpuDynInst
->wavefront();
7428 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
7429 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
7433 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7434 if (wf
->execMask(lane
)) {
7436 std::frexp(src
[lane
],&exp
);
7437 if (std::isnan(src
[lane
])) {
7439 } else if (std::isinf(src
[lane
]) || exp
> 30) {
7440 if (std::signbit(src
[lane
])) {
7441 vdst
[lane
] = INT_MIN
;
7443 vdst
[lane
] = INT_MAX
;
7446 vdst
[lane
] = (VecElemI32
)src
[lane
];
7454 Inst_VOP1__V_CVT_F64_I32::Inst_VOP1__V_CVT_F64_I32(InFmt_VOP1
*iFmt
)
7455 : Inst_VOP1(iFmt
, "v_cvt_f64_i32")
7459 } // Inst_VOP1__V_CVT_F64_I32
7461 Inst_VOP1__V_CVT_F64_I32::~Inst_VOP1__V_CVT_F64_I32()
7463 } // ~Inst_VOP1__V_CVT_F64_I32
7465 // D.d = (double)S0.i.
7467 Inst_VOP1__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst
)
7469 Wavefront
*wf
= gpuDynInst
->wavefront();
7470 ConstVecOperandI32
src(gpuDynInst
, instData
.SRC0
);
7471 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
7475 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7476 if (wf
->execMask(lane
)) {
7477 vdst
[lane
] = (VecElemF64
)src
[lane
];
7484 Inst_VOP1__V_CVT_F32_I32::Inst_VOP1__V_CVT_F32_I32(InFmt_VOP1
*iFmt
)
7485 : Inst_VOP1(iFmt
, "v_cvt_f32_i32")
7489 } // Inst_VOP1__V_CVT_F32_I32
7491 Inst_VOP1__V_CVT_F32_I32::~Inst_VOP1__V_CVT_F32_I32()
7493 } // ~Inst_VOP1__V_CVT_F32_I32
7495 // D.f = (float)S0.i.
7497 Inst_VOP1__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst
)
7499 Wavefront
*wf
= gpuDynInst
->wavefront();
7500 ConstVecOperandI32
src(gpuDynInst
, instData
.SRC0
);
7501 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
7505 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7506 if (wf
->execMask(lane
)) {
7507 vdst
[lane
] = (VecElemF32
)src
[lane
];
7514 Inst_VOP1__V_CVT_F32_U32::Inst_VOP1__V_CVT_F32_U32(InFmt_VOP1
*iFmt
)
7515 : Inst_VOP1(iFmt
, "v_cvt_f32_u32")
7519 } // Inst_VOP1__V_CVT_F32_U32
7521 Inst_VOP1__V_CVT_F32_U32::~Inst_VOP1__V_CVT_F32_U32()
7523 } // ~Inst_VOP1__V_CVT_F32_U32
7525 // D.f = (float)S0.u.
7527 Inst_VOP1__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst
)
7529 Wavefront
*wf
= gpuDynInst
->wavefront();
7530 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
7531 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
7535 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7536 if (wf
->execMask(lane
)) {
7537 vdst
[lane
] = (VecElemF32
)src
[lane
];
7544 Inst_VOP1__V_CVT_U32_F32::Inst_VOP1__V_CVT_U32_F32(InFmt_VOP1
*iFmt
)
7545 : Inst_VOP1(iFmt
, "v_cvt_u32_f32")
7549 } // Inst_VOP1__V_CVT_U32_F32
7551 Inst_VOP1__V_CVT_U32_F32::~Inst_VOP1__V_CVT_U32_F32()
7553 } // ~Inst_VOP1__V_CVT_U32_F32
7555 // D.u = (unsigned)S0.f.
7556 // Out-of-range floating point values (including infinity) saturate. NaN
7557 // is converted to 0.
7559 Inst_VOP1__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst
)
7561 Wavefront
*wf
= gpuDynInst
->wavefront();
7562 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
7563 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
7567 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7568 if (wf
->execMask(lane
)) {
7570 std::frexp(src
[lane
],&exp
);
7571 if (std::isnan(src
[lane
])) {
7573 } else if (std::isinf(src
[lane
])) {
7574 if (std::signbit(src
[lane
])) {
7577 vdst
[lane
] = UINT_MAX
;
7579 } else if (exp
> 31) {
7580 vdst
[lane
] = UINT_MAX
;
7582 vdst
[lane
] = (VecElemU32
)src
[lane
];
7590 Inst_VOP1__V_CVT_I32_F32::Inst_VOP1__V_CVT_I32_F32(InFmt_VOP1
*iFmt
)
7591 : Inst_VOP1(iFmt
, "v_cvt_i32_f32")
7595 } // Inst_VOP1__V_CVT_I32_F32
7597 Inst_VOP1__V_CVT_I32_F32::~Inst_VOP1__V_CVT_I32_F32()
7599 } // ~Inst_VOP1__V_CVT_I32_F32
7602 // Out-of-range floating point values (including infinity) saturate. NaN
7603 // is converted to 0.
7605 Inst_VOP1__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst
)
7607 Wavefront
*wf
= gpuDynInst
->wavefront();
7608 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
7609 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
7613 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7614 if (wf
->execMask(lane
)) {
7616 std::frexp(src
[lane
],&exp
);
7617 if (std::isnan(src
[lane
])) {
7619 } else if (std::isinf(src
[lane
]) || exp
> 30) {
7620 if (std::signbit(src
[lane
])) {
7621 vdst
[lane
] = INT_MIN
;
7623 vdst
[lane
] = INT_MAX
;
7626 vdst
[lane
] = (VecElemI32
)src
[lane
];
7634 Inst_VOP1__V_MOV_FED_B32::Inst_VOP1__V_MOV_FED_B32(InFmt_VOP1
*iFmt
)
7635 : Inst_VOP1(iFmt
, "v_mov_fed_b32")
7638 } // Inst_VOP1__V_MOV_FED_B32
7640 Inst_VOP1__V_MOV_FED_B32::~Inst_VOP1__V_MOV_FED_B32()
7642 } // ~Inst_VOP1__V_MOV_FED_B32
7645 // Input and output modifiers not supported; this is an untyped operation.
7647 Inst_VOP1__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst
)
7649 panicUnimplemented();
7652 Inst_VOP1__V_CVT_F16_F32::Inst_VOP1__V_CVT_F16_F32(InFmt_VOP1
*iFmt
)
7653 : Inst_VOP1(iFmt
, "v_cvt_f16_f32")
7657 } // Inst_VOP1__V_CVT_F16_F32
7659 Inst_VOP1__V_CVT_F16_F32::~Inst_VOP1__V_CVT_F16_F32()
7661 } // ~Inst_VOP1__V_CVT_F16_F32
7663 // D.f16 = flt32_to_flt16(S0.f).
7665 Inst_VOP1__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst
)
7667 panicUnimplemented();
7670 Inst_VOP1__V_CVT_F32_F16::Inst_VOP1__V_CVT_F32_F16(InFmt_VOP1
*iFmt
)
7671 : Inst_VOP1(iFmt
, "v_cvt_f32_f16")
7675 } // Inst_VOP1__V_CVT_F32_F16
7677 Inst_VOP1__V_CVT_F32_F16::~Inst_VOP1__V_CVT_F32_F16()
7679 } // ~Inst_VOP1__V_CVT_F32_F16
7681 // D.f = flt16_to_flt32(S0.f16).
7683 Inst_VOP1__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst
)
7685 panicUnimplemented();
7688 Inst_VOP1__V_CVT_RPI_I32_F32::Inst_VOP1__V_CVT_RPI_I32_F32(
7690 : Inst_VOP1(iFmt
, "v_cvt_rpi_i32_f32")
7694 } // Inst_VOP1__V_CVT_RPI_I32_F32
7696 Inst_VOP1__V_CVT_RPI_I32_F32::~Inst_VOP1__V_CVT_RPI_I32_F32()
7698 } // ~Inst_VOP1__V_CVT_RPI_I32_F32
7700 // D.i = (int)floor(S0.f + 0.5).
7702 Inst_VOP1__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst
)
7704 Wavefront
*wf
= gpuDynInst
->wavefront();
7705 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
7706 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
7710 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7711 if (wf
->execMask(lane
)) {
7712 vdst
[lane
] = (VecElemI32
)std::floor(src
[lane
] + 0.5);
7719 Inst_VOP1__V_CVT_FLR_I32_F32::Inst_VOP1__V_CVT_FLR_I32_F32(
7721 : Inst_VOP1(iFmt
, "v_cvt_flr_i32_f32")
7725 } // Inst_VOP1__V_CVT_FLR_I32_F32
7727 Inst_VOP1__V_CVT_FLR_I32_F32::~Inst_VOP1__V_CVT_FLR_I32_F32()
7729 } // ~Inst_VOP1__V_CVT_FLR_I32_F32
7731 // D.i = (int)floor(S0.f).
7733 Inst_VOP1__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst
)
7735 Wavefront
*wf
= gpuDynInst
->wavefront();
7736 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
7737 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
7741 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7742 if (wf
->execMask(lane
)) {
7743 vdst
[lane
] = (VecElemI32
)std::floor(src
[lane
]);
7750 Inst_VOP1__V_CVT_OFF_F32_I4::Inst_VOP1__V_CVT_OFF_F32_I4(InFmt_VOP1
*iFmt
)
7751 : Inst_VOP1(iFmt
, "v_cvt_off_f32_i4")
7755 } // Inst_VOP1__V_CVT_OFF_F32_I4
7757 Inst_VOP1__V_CVT_OFF_F32_I4::~Inst_VOP1__V_CVT_OFF_F32_I4()
7759 } // ~Inst_VOP1__V_CVT_OFF_F32_I4
7761 // 4-bit signed int to 32-bit float.
7763 Inst_VOP1__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst
)
7765 panicUnimplemented();
7768 Inst_VOP1__V_CVT_F32_F64::Inst_VOP1__V_CVT_F32_F64(InFmt_VOP1
*iFmt
)
7769 : Inst_VOP1(iFmt
, "v_cvt_f32_f64")
7773 } // Inst_VOP1__V_CVT_F32_F64
7775 Inst_VOP1__V_CVT_F32_F64::~Inst_VOP1__V_CVT_F32_F64()
7777 } // ~Inst_VOP1__V_CVT_F32_F64
7779 // D.f = (float)S0.d.
7781 Inst_VOP1__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst
)
7783 Wavefront
*wf
= gpuDynInst
->wavefront();
7784 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
7785 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
7789 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7790 if (wf
->execMask(lane
)) {
7791 vdst
[lane
] = (VecElemF32
)src
[lane
];
7798 Inst_VOP1__V_CVT_F64_F32::Inst_VOP1__V_CVT_F64_F32(InFmt_VOP1
*iFmt
)
7799 : Inst_VOP1(iFmt
, "v_cvt_f64_f32")
7803 } // Inst_VOP1__V_CVT_F64_F32
7805 Inst_VOP1__V_CVT_F64_F32::~Inst_VOP1__V_CVT_F64_F32()
7807 } // ~Inst_VOP1__V_CVT_F64_F32
7809 // D.d = (double)S0.f.
7811 Inst_VOP1__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst
)
7813 Wavefront
*wf
= gpuDynInst
->wavefront();
7814 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
7815 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
7819 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7820 if (wf
->execMask(lane
)) {
7821 vdst
[lane
] = (VecElemF64
)src
[lane
];
7828 Inst_VOP1__V_CVT_F32_UBYTE0::Inst_VOP1__V_CVT_F32_UBYTE0(InFmt_VOP1
*iFmt
)
7829 : Inst_VOP1(iFmt
, "v_cvt_f32_ubyte0")
7833 } // Inst_VOP1__V_CVT_F32_UBYTE0
7835 Inst_VOP1__V_CVT_F32_UBYTE0::~Inst_VOP1__V_CVT_F32_UBYTE0()
7837 } // ~Inst_VOP1__V_CVT_F32_UBYTE0
7839 // D.f = (float)(S0.u[7:0]).
7841 Inst_VOP1__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst
)
7843 Wavefront
*wf
= gpuDynInst
->wavefront();
7844 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
7845 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
7849 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7850 if (wf
->execMask(lane
)) {
7851 vdst
[lane
] = (VecElemF32
)(bits(src
[lane
], 7, 0));
7858 Inst_VOP1__V_CVT_F32_UBYTE1::Inst_VOP1__V_CVT_F32_UBYTE1(InFmt_VOP1
*iFmt
)
7859 : Inst_VOP1(iFmt
, "v_cvt_f32_ubyte1")
7863 } // Inst_VOP1__V_CVT_F32_UBYTE1
7865 Inst_VOP1__V_CVT_F32_UBYTE1::~Inst_VOP1__V_CVT_F32_UBYTE1()
7867 } // ~Inst_VOP1__V_CVT_F32_UBYTE1
7869 // D.f = (float)(S0.u[15:8]).
7871 Inst_VOP1__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst
)
7873 Wavefront
*wf
= gpuDynInst
->wavefront();
7874 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
7875 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
7879 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7880 if (wf
->execMask(lane
)) {
7881 vdst
[lane
] = (VecElemF32
)(bits(src
[lane
], 15, 8));
7888 Inst_VOP1__V_CVT_F32_UBYTE2::Inst_VOP1__V_CVT_F32_UBYTE2(InFmt_VOP1
*iFmt
)
7889 : Inst_VOP1(iFmt
, "v_cvt_f32_ubyte2")
7893 } // Inst_VOP1__V_CVT_F32_UBYTE2
7895 Inst_VOP1__V_CVT_F32_UBYTE2::~Inst_VOP1__V_CVT_F32_UBYTE2()
7897 } // ~Inst_VOP1__V_CVT_F32_UBYTE2
7899 // D.f = (float)(S0.u[23:16]).
7901 Inst_VOP1__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst
)
7903 Wavefront
*wf
= gpuDynInst
->wavefront();
7904 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
7905 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
7909 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7910 if (wf
->execMask(lane
)) {
7911 vdst
[lane
] = (VecElemF32
)(bits(src
[lane
], 23, 16));
7918 Inst_VOP1__V_CVT_F32_UBYTE3::Inst_VOP1__V_CVT_F32_UBYTE3(InFmt_VOP1
*iFmt
)
7919 : Inst_VOP1(iFmt
, "v_cvt_f32_ubyte3")
7923 } // Inst_VOP1__V_CVT_F32_UBYTE3
7925 Inst_VOP1__V_CVT_F32_UBYTE3::~Inst_VOP1__V_CVT_F32_UBYTE3()
7927 } // ~Inst_VOP1__V_CVT_F32_UBYTE3
7929 // D.f = (float)(S0.u[31:24]).
7931 Inst_VOP1__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst
)
7933 Wavefront
*wf
= gpuDynInst
->wavefront();
7934 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
7935 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
7939 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7940 if (wf
->execMask(lane
)) {
7941 vdst
[lane
] = (VecElemF32
)(bits(src
[lane
], 31, 24));
7948 Inst_VOP1__V_CVT_U32_F64::Inst_VOP1__V_CVT_U32_F64(InFmt_VOP1
*iFmt
)
7949 : Inst_VOP1(iFmt
, "v_cvt_u32_f64")
7953 } // Inst_VOP1__V_CVT_U32_F64
7955 Inst_VOP1__V_CVT_U32_F64::~Inst_VOP1__V_CVT_U32_F64()
7957 } // ~Inst_VOP1__V_CVT_U32_F64
7959 // D.u = (unsigned)S0.d.
7960 // Out-of-range floating point values (including infinity) saturate. NaN
7961 // is converted to 0.
7963 Inst_VOP1__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst
)
7965 Wavefront
*wf
= gpuDynInst
->wavefront();
7966 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
7967 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
7971 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
7972 if (wf
->execMask(lane
)) {
7974 std::frexp(src
[lane
],&exp
);
7975 if (std::isnan(src
[lane
])) {
7977 } else if (std::isinf(src
[lane
])) {
7978 if (std::signbit(src
[lane
])) {
7981 vdst
[lane
] = UINT_MAX
;
7983 } else if (exp
> 31) {
7984 vdst
[lane
] = UINT_MAX
;
7986 vdst
[lane
] = (VecElemU32
)src
[lane
];
7994 Inst_VOP1__V_CVT_F64_U32::Inst_VOP1__V_CVT_F64_U32(InFmt_VOP1
*iFmt
)
7995 : Inst_VOP1(iFmt
, "v_cvt_f64_u32")
7999 } // Inst_VOP1__V_CVT_F64_U32
8001 Inst_VOP1__V_CVT_F64_U32::~Inst_VOP1__V_CVT_F64_U32()
8003 } // ~Inst_VOP1__V_CVT_F64_U32
8005 // D.d = (double)S0.u.
8007 Inst_VOP1__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst
)
8009 Wavefront
*wf
= gpuDynInst
->wavefront();
8010 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
8011 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
8015 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8016 if (wf
->execMask(lane
)) {
8017 vdst
[lane
] = (VecElemF64
)src
[lane
];
8024 Inst_VOP1__V_TRUNC_F64::Inst_VOP1__V_TRUNC_F64(InFmt_VOP1
*iFmt
)
8025 : Inst_VOP1(iFmt
, "v_trunc_f64")
8029 } // Inst_VOP1__V_TRUNC_F64
8031 Inst_VOP1__V_TRUNC_F64::~Inst_VOP1__V_TRUNC_F64()
8033 } // ~Inst_VOP1__V_TRUNC_F64
8035 // D.d = trunc(S0.d), return integer part of S0.d.
8037 Inst_VOP1__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst
)
8039 Wavefront
*wf
= gpuDynInst
->wavefront();
8040 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
8041 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
8045 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8046 if (wf
->execMask(lane
)) {
8047 vdst
[lane
] = std::trunc(src
[lane
]);
8054 Inst_VOP1__V_CEIL_F64::Inst_VOP1__V_CEIL_F64(InFmt_VOP1
*iFmt
)
8055 : Inst_VOP1(iFmt
, "v_ceil_f64")
8059 } // Inst_VOP1__V_CEIL_F64
8061 Inst_VOP1__V_CEIL_F64::~Inst_VOP1__V_CEIL_F64()
8063 } // ~Inst_VOP1__V_CEIL_F64
8065 // D.d = ceil(S0.d);
8067 Inst_VOP1__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst
)
8069 Wavefront
*wf
= gpuDynInst
->wavefront();
8070 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
8071 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
8075 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8076 if (wf
->execMask(lane
)) {
8077 vdst
[lane
] = std::ceil(src
[lane
]);
8084 Inst_VOP1__V_RNDNE_F64::Inst_VOP1__V_RNDNE_F64(InFmt_VOP1
*iFmt
)
8085 : Inst_VOP1(iFmt
, "v_rndne_f64")
8089 } // Inst_VOP1__V_RNDNE_F64
8091 Inst_VOP1__V_RNDNE_F64::~Inst_VOP1__V_RNDNE_F64()
8093 } // ~Inst_VOP1__V_RNDNE_F64
8095 // D.d = round_nearest_even(S0.d).
8097 Inst_VOP1__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst
)
8099 Wavefront
*wf
= gpuDynInst
->wavefront();
8100 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
8101 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
8105 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8106 if (wf
->execMask(lane
)) {
8107 vdst
[lane
] = roundNearestEven(src
[lane
]);
8114 Inst_VOP1__V_FLOOR_F64::Inst_VOP1__V_FLOOR_F64(InFmt_VOP1
*iFmt
)
8115 : Inst_VOP1(iFmt
, "v_floor_f64")
8119 } // Inst_VOP1__V_FLOOR_F64
8121 Inst_VOP1__V_FLOOR_F64::~Inst_VOP1__V_FLOOR_F64()
8123 } // ~Inst_VOP1__V_FLOOR_F64
8125 // D.d = floor(S0.d);
8127 Inst_VOP1__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst
)
8129 Wavefront
*wf
= gpuDynInst
->wavefront();
8130 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
8131 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
8135 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8136 if (wf
->execMask(lane
)) {
8137 vdst
[lane
] = std::floor(src
[lane
]);
8144 Inst_VOP1__V_FRACT_F32::Inst_VOP1__V_FRACT_F32(InFmt_VOP1
*iFmt
)
8145 : Inst_VOP1(iFmt
, "v_fract_f32")
8149 } // Inst_VOP1__V_FRACT_F32
8151 Inst_VOP1__V_FRACT_F32::~Inst_VOP1__V_FRACT_F32()
8153 } // ~Inst_VOP1__V_FRACT_F32
8155 // D.f = modf(S0.f).
8157 Inst_VOP1__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst
)
8159 Wavefront
*wf
= gpuDynInst
->wavefront();
8160 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8161 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8165 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8166 if (wf
->execMask(lane
)) {
8167 VecElemF32
int_part(0.0);
8168 vdst
[lane
] = std::modf(src
[lane
], &int_part
);
8175 Inst_VOP1__V_TRUNC_F32::Inst_VOP1__V_TRUNC_F32(InFmt_VOP1
*iFmt
)
8176 : Inst_VOP1(iFmt
, "v_trunc_f32")
8180 } // Inst_VOP1__V_TRUNC_F32
8182 Inst_VOP1__V_TRUNC_F32::~Inst_VOP1__V_TRUNC_F32()
8184 } // ~Inst_VOP1__V_TRUNC_F32
8186 // D.f = trunc(S0.f), return integer part of S0.f.
8188 Inst_VOP1__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst
)
8190 Wavefront
*wf
= gpuDynInst
->wavefront();
8191 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8192 VecOperandF32
vdst (gpuDynInst
, instData
.VDST
);
8196 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8197 if (wf
->execMask(lane
)) {
8198 vdst
[lane
] = std::trunc(src
[lane
]);
8205 Inst_VOP1__V_CEIL_F32::Inst_VOP1__V_CEIL_F32(InFmt_VOP1
*iFmt
)
8206 : Inst_VOP1(iFmt
, "v_ceil_f32")
8210 } // Inst_VOP1__V_CEIL_F32
8212 Inst_VOP1__V_CEIL_F32::~Inst_VOP1__V_CEIL_F32()
8214 } // ~Inst_VOP1__V_CEIL_F32
8216 // D.f = ceil(S0.f);
8218 Inst_VOP1__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst
)
8220 Wavefront
*wf
= gpuDynInst
->wavefront();
8221 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8222 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8226 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8227 if (wf
->execMask(lane
)) {
8228 vdst
[lane
] = std::ceil(src
[lane
]);
8235 Inst_VOP1__V_RNDNE_F32::Inst_VOP1__V_RNDNE_F32(InFmt_VOP1
*iFmt
)
8236 : Inst_VOP1(iFmt
, "v_rndne_f32")
8240 } // Inst_VOP1__V_RNDNE_F32
8242 Inst_VOP1__V_RNDNE_F32::~Inst_VOP1__V_RNDNE_F32()
8244 } // ~Inst_VOP1__V_RNDNE_F32
8246 // D.f = round_nearest_even(S0.f).
8248 Inst_VOP1__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst
)
8250 Wavefront
*wf
= gpuDynInst
->wavefront();
8251 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8252 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8256 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8257 if (wf
->execMask(lane
)) {
8258 vdst
[lane
] = roundNearestEven(src
[lane
]);
8265 Inst_VOP1__V_FLOOR_F32::Inst_VOP1__V_FLOOR_F32(InFmt_VOP1
*iFmt
)
8266 : Inst_VOP1(iFmt
, "v_floor_f32")
8270 } // Inst_VOP1__V_FLOOR_F32
8272 Inst_VOP1__V_FLOOR_F32::~Inst_VOP1__V_FLOOR_F32()
8274 } // ~Inst_VOP1__V_FLOOR_F32
8276 // D.f = floor(S0.f);
8278 Inst_VOP1__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst
)
8280 Wavefront
*wf
= gpuDynInst
->wavefront();
8281 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8282 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8286 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8287 if (wf
->execMask(lane
)) {
8288 vdst
[lane
] = std::floor(src
[lane
]);
8295 Inst_VOP1__V_EXP_F32::Inst_VOP1__V_EXP_F32(InFmt_VOP1
*iFmt
)
8296 : Inst_VOP1(iFmt
, "v_exp_f32")
8300 } // Inst_VOP1__V_EXP_F32
8302 Inst_VOP1__V_EXP_F32::~Inst_VOP1__V_EXP_F32()
8304 } // ~Inst_VOP1__V_EXP_F32
8306 // D.f = pow(2.0, S0.f).
8308 Inst_VOP1__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst
)
8310 Wavefront
*wf
= gpuDynInst
->wavefront();
8311 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8312 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8316 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8317 if (wf
->execMask(lane
)) {
8318 vdst
[lane
] = std::pow(2.0, src
[lane
]);
8325 Inst_VOP1__V_LOG_F32::Inst_VOP1__V_LOG_F32(InFmt_VOP1
*iFmt
)
8326 : Inst_VOP1(iFmt
, "v_log_f32")
8330 } // Inst_VOP1__V_LOG_F32
8332 Inst_VOP1__V_LOG_F32::~Inst_VOP1__V_LOG_F32()
8334 } // ~Inst_VOP1__V_LOG_F32
8336 // D.f = log2(S0.f).
8338 Inst_VOP1__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst
)
8340 Wavefront
*wf
= gpuDynInst
->wavefront();
8341 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8342 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8346 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8347 if (wf
->execMask(lane
)) {
8348 vdst
[lane
] = std::log2(src
[lane
]);
8355 Inst_VOP1__V_RCP_F32::Inst_VOP1__V_RCP_F32(InFmt_VOP1
*iFmt
)
8356 : Inst_VOP1(iFmt
, "v_rcp_f32")
8360 } // Inst_VOP1__V_RCP_F32
8362 Inst_VOP1__V_RCP_F32::~Inst_VOP1__V_RCP_F32()
8364 } // ~Inst_VOP1__V_RCP_F32
8366 // D.f = 1.0 / S0.f.
8368 Inst_VOP1__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst
)
8370 Wavefront
*wf
= gpuDynInst
->wavefront();
8371 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8372 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8376 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8377 if (wf
->execMask(lane
)) {
8378 vdst
[lane
] = 1.0 / src
[lane
];
8385 Inst_VOP1__V_RCP_IFLAG_F32::Inst_VOP1__V_RCP_IFLAG_F32(InFmt_VOP1
*iFmt
)
8386 : Inst_VOP1(iFmt
, "v_rcp_iflag_f32")
8390 } // Inst_VOP1__V_RCP_IFLAG_F32
8392 Inst_VOP1__V_RCP_IFLAG_F32::~Inst_VOP1__V_RCP_IFLAG_F32()
8394 } // ~Inst_VOP1__V_RCP_IFLAG_F32
8396 // D.f = 1.0 / S0.f.
8398 Inst_VOP1__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst
)
8400 Wavefront
*wf
= gpuDynInst
->wavefront();
8401 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8402 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8406 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8407 if (wf
->execMask(lane
)) {
8408 vdst
[lane
] = 1.0 / src
[lane
];
8415 Inst_VOP1__V_RSQ_F32::Inst_VOP1__V_RSQ_F32(InFmt_VOP1
*iFmt
)
8416 : Inst_VOP1(iFmt
, "v_rsq_f32")
8420 } // Inst_VOP1__V_RSQ_F32
8422 Inst_VOP1__V_RSQ_F32::~Inst_VOP1__V_RSQ_F32()
8424 } // ~Inst_VOP1__V_RSQ_F32
8426 // D.f = 1.0 / sqrt(S0.f).
8428 Inst_VOP1__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst
)
8430 Wavefront
*wf
= gpuDynInst
->wavefront();
8431 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8432 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8436 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8437 if (wf
->execMask(lane
)) {
8438 vdst
[lane
] = 1.0 / std::sqrt(src
[lane
]);
8445 Inst_VOP1__V_RCP_F64::Inst_VOP1__V_RCP_F64(InFmt_VOP1
*iFmt
)
8446 : Inst_VOP1(iFmt
, "v_rcp_f64")
8450 } // Inst_VOP1__V_RCP_F64
8452 Inst_VOP1__V_RCP_F64::~Inst_VOP1__V_RCP_F64()
8454 } // ~Inst_VOP1__V_RCP_F64
8456 // D.d = 1.0 / S0.d.
8458 Inst_VOP1__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst
)
8460 Wavefront
*wf
= gpuDynInst
->wavefront();
8461 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
8462 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
8466 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8467 if (wf
->execMask(lane
)) {
8468 if (std::fpclassify(src
[lane
]) == FP_ZERO
) {
8469 vdst
[lane
] = +INFINITY
;
8470 } else if (std::isnan(src
[lane
])) {
8472 } else if (std::isinf(src
[lane
])) {
8473 if (std::signbit(src
[lane
])) {
8479 vdst
[lane
] = 1.0 / src
[lane
];
8487 Inst_VOP1__V_RSQ_F64::Inst_VOP1__V_RSQ_F64(InFmt_VOP1
*iFmt
)
8488 : Inst_VOP1(iFmt
, "v_rsq_f64")
8492 } // Inst_VOP1__V_RSQ_F64
8494 Inst_VOP1__V_RSQ_F64::~Inst_VOP1__V_RSQ_F64()
8496 } // ~Inst_VOP1__V_RSQ_F64
8498 // D.d = 1.0 / sqrt(S0.d).
8500 Inst_VOP1__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst
)
8502 Wavefront
*wf
= gpuDynInst
->wavefront();
8503 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
8504 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
8508 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8509 if (wf
->execMask(lane
)) {
8510 if (std::fpclassify(src
[lane
]) == FP_ZERO
) {
8511 vdst
[lane
] = +INFINITY
;
8512 } else if (std::isnan(src
[lane
])) {
8514 } else if (std::isinf(src
[lane
])
8515 && !std::signbit(src
[lane
])) {
8517 } else if (std::signbit(src
[lane
])) {
8520 vdst
[lane
] = 1.0 / std::sqrt(src
[lane
]);
8528 Inst_VOP1__V_SQRT_F32::Inst_VOP1__V_SQRT_F32(InFmt_VOP1
*iFmt
)
8529 : Inst_VOP1(iFmt
, "v_sqrt_f32")
8533 } // Inst_VOP1__V_SQRT_F32
8535 Inst_VOP1__V_SQRT_F32::~Inst_VOP1__V_SQRT_F32()
8537 } // ~Inst_VOP1__V_SQRT_F32
8539 // D.f = sqrt(S0.f).
8541 Inst_VOP1__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst
)
8543 Wavefront
*wf
= gpuDynInst
->wavefront();
8544 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8545 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8549 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8550 if (wf
->execMask(lane
)) {
8551 vdst
[lane
] = std::sqrt(src
[lane
]);
8558 Inst_VOP1__V_SQRT_F64::Inst_VOP1__V_SQRT_F64(InFmt_VOP1
*iFmt
)
8559 : Inst_VOP1(iFmt
, "v_sqrt_f64")
8563 } // Inst_VOP1__V_SQRT_F64
8565 Inst_VOP1__V_SQRT_F64::~Inst_VOP1__V_SQRT_F64()
8567 } // ~Inst_VOP1__V_SQRT_F64
8569 // D.d = sqrt(S0.d).
8571 Inst_VOP1__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst
)
8573 Wavefront
*wf
= gpuDynInst
->wavefront();
8574 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
8575 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
8579 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8580 if (wf
->execMask(lane
)) {
8581 vdst
[lane
] = std::sqrt(src
[lane
]);
8588 Inst_VOP1__V_SIN_F32::Inst_VOP1__V_SIN_F32(InFmt_VOP1
*iFmt
)
8589 : Inst_VOP1(iFmt
, "v_sin_f32")
8593 } // Inst_VOP1__V_SIN_F32
8595 Inst_VOP1__V_SIN_F32::~Inst_VOP1__V_SIN_F32()
8597 } // ~Inst_VOP1__V_SIN_F32
8599 // D.f = sin(S0.f * 2 * PI).
8601 Inst_VOP1__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst
)
8603 Wavefront
*wf
= gpuDynInst
->wavefront();
8604 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8605 ConstScalarOperandF32
pi(gpuDynInst
, REG_PI
);
8606 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8611 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8612 if (wf
->execMask(lane
)) {
8613 if (src
[lane
] < -256.0 || src
[lane
] > 256.0) {
8616 vdst
[lane
] = std::sin(src
[lane
] * 2.0 * pi
.rawData());
8624 Inst_VOP1__V_COS_F32::Inst_VOP1__V_COS_F32(InFmt_VOP1
*iFmt
)
8625 : Inst_VOP1(iFmt
, "v_cos_f32")
8629 } // Inst_VOP1__V_COS_F32
8631 Inst_VOP1__V_COS_F32::~Inst_VOP1__V_COS_F32()
8633 } // ~Inst_VOP1__V_COS_F32
8635 // D.f = cos(S0.f * 2 * PI).
8637 Inst_VOP1__V_COS_F32::execute(GPUDynInstPtr gpuDynInst
)
8639 Wavefront
*wf
= gpuDynInst
->wavefront();
8640 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8641 ConstScalarOperandF32
pi(gpuDynInst
, REG_PI
);
8642 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8647 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8648 if (wf
->execMask(lane
)) {
8649 if (src
[lane
] < -256.0 || src
[lane
] > 256.0) {
8652 vdst
[lane
] = std::cos(src
[lane
] * 2.0 * pi
.rawData());
8660 Inst_VOP1__V_NOT_B32::Inst_VOP1__V_NOT_B32(InFmt_VOP1
*iFmt
)
8661 : Inst_VOP1(iFmt
, "v_not_b32")
8664 } // Inst_VOP1__V_NOT_B32
8666 Inst_VOP1__V_NOT_B32::~Inst_VOP1__V_NOT_B32()
8668 } // ~Inst_VOP1__V_NOT_B32
8671 // Input and output modifiers not supported.
8673 Inst_VOP1__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst
)
8675 Wavefront
*wf
= gpuDynInst
->wavefront();
8676 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
8677 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
8681 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8682 if (wf
->execMask(lane
)) {
8683 vdst
[lane
] = ~src
[lane
];
8690 Inst_VOP1__V_BFREV_B32::Inst_VOP1__V_BFREV_B32(InFmt_VOP1
*iFmt
)
8691 : Inst_VOP1(iFmt
, "v_bfrev_b32")
8694 } // Inst_VOP1__V_BFREV_B32
8696 Inst_VOP1__V_BFREV_B32::~Inst_VOP1__V_BFREV_B32()
8698 } // ~Inst_VOP1__V_BFREV_B32
8700 // D.u[31:0] = S0.u[0:31], bitfield reverse.
8701 // Input and output modifiers not supported.
8703 Inst_VOP1__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst
)
8705 Wavefront
*wf
= gpuDynInst
->wavefront();
8706 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
8707 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
8711 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8712 if (wf
->execMask(lane
)) {
8713 vdst
[lane
] = reverseBits(src
[lane
]);
8720 Inst_VOP1__V_FFBH_U32::Inst_VOP1__V_FFBH_U32(InFmt_VOP1
*iFmt
)
8721 : Inst_VOP1(iFmt
, "v_ffbh_u32")
8724 } // Inst_VOP1__V_FFBH_U32
8726 Inst_VOP1__V_FFBH_U32::~Inst_VOP1__V_FFBH_U32()
8728 } // ~Inst_VOP1__V_FFBH_U32
8730 // D.u = position of first 1 in S0.u from MSB;
8731 // D.u = 0xffffffff if S0.u == 0.
8733 Inst_VOP1__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst
)
8735 Wavefront
*wf
= gpuDynInst
->wavefront();
8736 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
8737 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
8741 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8742 if (wf
->execMask(lane
)) {
8743 vdst
[lane
] = findFirstOneMsb(src
[lane
]);
8750 Inst_VOP1__V_FFBL_B32::Inst_VOP1__V_FFBL_B32(InFmt_VOP1
*iFmt
)
8751 : Inst_VOP1(iFmt
, "v_ffbl_b32")
8754 } // Inst_VOP1__V_FFBL_B32
8756 Inst_VOP1__V_FFBL_B32::~Inst_VOP1__V_FFBL_B32()
8758 } // ~Inst_VOP1__V_FFBL_B32
8760 // D.u = position of first 1 in S0.u from LSB;
8761 // D.u = 0xffffffff if S0.u == 0.
8763 Inst_VOP1__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst
)
8765 Wavefront
*wf
= gpuDynInst
->wavefront();
8766 ConstVecOperandU32
src(gpuDynInst
, instData
.SRC0
);
8767 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
8771 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8772 if (wf
->execMask(lane
)) {
8773 vdst
[lane
] = findFirstOne(src
[lane
]);
8780 Inst_VOP1__V_FFBH_I32::Inst_VOP1__V_FFBH_I32(InFmt_VOP1
*iFmt
)
8781 : Inst_VOP1(iFmt
, "v_ffbh_i32")
8784 } // Inst_VOP1__V_FFBH_I32
8786 Inst_VOP1__V_FFBH_I32::~Inst_VOP1__V_FFBH_I32()
8788 } // ~Inst_VOP1__V_FFBH_I32
8790 // D.u = position of first bit different from sign bit in S0.i from MSB;
8791 // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff.
8793 Inst_VOP1__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst
)
8795 Wavefront
*wf
= gpuDynInst
->wavefront();
8796 ConstVecOperandI32
src(gpuDynInst
, instData
.SRC0
);
8797 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
8801 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8802 if (wf
->execMask(lane
)) {
8803 vdst
[lane
] = firstOppositeSignBit(src
[lane
]);
8810 Inst_VOP1__V_FREXP_EXP_I32_F64::Inst_VOP1__V_FREXP_EXP_I32_F64(
8812 : Inst_VOP1(iFmt
, "v_frexp_exp_i32_f64")
8816 } // Inst_VOP1__V_FREXP_EXP_I32_F64
8818 Inst_VOP1__V_FREXP_EXP_I32_F64::~Inst_VOP1__V_FREXP_EXP_I32_F64()
8820 } // ~Inst_VOP1__V_FREXP_EXP_I32_F64
8823 Inst_VOP1__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst
)
8825 Wavefront
*wf
= gpuDynInst
->wavefront();
8826 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
8827 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
8831 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8832 if (wf
->execMask(lane
)) {
8833 if (std::isinf(src
[lane
]) || std::isnan(src
[lane
])) {
8837 std::frexp(src
[lane
], &exp
);
8846 Inst_VOP1__V_FREXP_MANT_F64::Inst_VOP1__V_FREXP_MANT_F64(InFmt_VOP1
*iFmt
)
8847 : Inst_VOP1(iFmt
, "v_frexp_mant_f64")
8851 } // Inst_VOP1__V_FREXP_MANT_F64
8853 Inst_VOP1__V_FREXP_MANT_F64::~Inst_VOP1__V_FREXP_MANT_F64()
8855 } // ~Inst_VOP1__V_FREXP_MANT_F64
8858 Inst_VOP1__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst
)
8860 Wavefront
*wf
= gpuDynInst
->wavefront();
8861 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
8862 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
8866 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8867 if (wf
->execMask(lane
)) {
8868 if (std::isinf(src
[lane
]) || std::isnan(src
[lane
])) {
8869 vdst
[lane
] = src
[lane
];
8872 vdst
[lane
] = std::frexp(src
[lane
], &exp
);
8880 Inst_VOP1__V_FRACT_F64::Inst_VOP1__V_FRACT_F64(InFmt_VOP1
*iFmt
)
8881 : Inst_VOP1(iFmt
, "v_fract_f64")
8885 } // Inst_VOP1__V_FRACT_F64
8887 Inst_VOP1__V_FRACT_F64::~Inst_VOP1__V_FRACT_F64()
8889 } // ~Inst_VOP1__V_FRACT_F64
8892 Inst_VOP1__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst
)
8894 Wavefront
*wf
= gpuDynInst
->wavefront();
8895 ConstVecOperandF64
src(gpuDynInst
, instData
.SRC0
);
8896 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
8900 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8901 if (wf
->execMask(lane
)) {
8902 VecElemF64
int_part(0.0);
8903 vdst
[lane
] = std::modf(src
[lane
], &int_part
);
8910 Inst_VOP1__V_FREXP_EXP_I32_F32::Inst_VOP1__V_FREXP_EXP_I32_F32(
8912 : Inst_VOP1(iFmt
, "v_frexp_exp_i32_f32")
8916 } // Inst_VOP1__V_FREXP_EXP_I32_F32
8918 Inst_VOP1__V_FREXP_EXP_I32_F32::~Inst_VOP1__V_FREXP_EXP_I32_F32()
8920 } // ~Inst_VOP1__V_FREXP_EXP_I32_F32
8922 // frexp(S0.f, Exponent(S0.f))
8923 // if (S0.f == INF || S0.f == NAN) then D.i = 0;
8924 // else D.i = Exponent(S0.f);
8926 Inst_VOP1__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst
)
8928 Wavefront
*wf
= gpuDynInst
->wavefront();
8929 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8930 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
8934 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8935 if (wf
->execMask(lane
)) {
8936 if (std::isinf(src
[lane
]) || std::isnan(src
[lane
])) {
8940 std::frexp(src
[lane
], &exp
);
8949 Inst_VOP1__V_FREXP_MANT_F32::Inst_VOP1__V_FREXP_MANT_F32(InFmt_VOP1
*iFmt
)
8950 : Inst_VOP1(iFmt
, "v_frexp_mant_f32")
8954 } // Inst_VOP1__V_FREXP_MANT_F32
8956 Inst_VOP1__V_FREXP_MANT_F32::~Inst_VOP1__V_FREXP_MANT_F32()
8958 } // ~Inst_VOP1__V_FREXP_MANT_F32
8960 // if (S0.f == INF || S0.f == NAN) then D.f = S0.f;
8961 // else D.f = frexp(S0.f, Exponent(S0.f)).
8963 Inst_VOP1__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst
)
8965 Wavefront
*wf
= gpuDynInst
->wavefront();
8966 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
8967 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
8971 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
8972 if (wf
->execMask(lane
)) {
8973 if (std::isinf(src
[lane
]) || std::isnan(src
[lane
])) {
8974 vdst
[lane
] = src
[lane
];
8977 vdst
[lane
] = std::frexp(src
[lane
], &exp
);
8985 Inst_VOP1__V_CLREXCP::Inst_VOP1__V_CLREXCP(InFmt_VOP1
*iFmt
)
8986 : Inst_VOP1(iFmt
, "v_clrexcp")
8989 } // Inst_VOP1__V_CLREXCP
8991 Inst_VOP1__V_CLREXCP::~Inst_VOP1__V_CLREXCP()
8993 } // ~Inst_VOP1__V_CLREXCP
8996 Inst_VOP1__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst
)
8998 panicUnimplemented();
9001 Inst_VOP1__V_CVT_F16_U16::Inst_VOP1__V_CVT_F16_U16(InFmt_VOP1
*iFmt
)
9002 : Inst_VOP1(iFmt
, "v_cvt_f16_u16")
9006 } // Inst_VOP1__V_CVT_F16_U16
9008 Inst_VOP1__V_CVT_F16_U16::~Inst_VOP1__V_CVT_F16_U16()
9010 } // ~Inst_VOP1__V_CVT_F16_U16
9012 // D.f16 = uint16_to_flt16(S.u16).
9014 Inst_VOP1__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst
)
9016 panicUnimplemented();
9019 Inst_VOP1__V_CVT_F16_I16::Inst_VOP1__V_CVT_F16_I16(InFmt_VOP1
*iFmt
)
9020 : Inst_VOP1(iFmt
, "v_cvt_f16_i16")
9024 } // Inst_VOP1__V_CVT_F16_I16
9026 Inst_VOP1__V_CVT_F16_I16::~Inst_VOP1__V_CVT_F16_I16()
9028 } // ~Inst_VOP1__V_CVT_F16_I16
9030 // D.f16 = int16_to_flt16(S.i16).
9032 Inst_VOP1__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst
)
9034 panicUnimplemented();
9037 Inst_VOP1__V_CVT_U16_F16::Inst_VOP1__V_CVT_U16_F16(InFmt_VOP1
*iFmt
)
9038 : Inst_VOP1(iFmt
, "v_cvt_u16_f16")
9042 } // Inst_VOP1__V_CVT_U16_F16
9044 Inst_VOP1__V_CVT_U16_F16::~Inst_VOP1__V_CVT_U16_F16()
9046 } // ~Inst_VOP1__V_CVT_U16_F16
9048 // D.u16 = flt16_to_uint16(S.f16).
9050 Inst_VOP1__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst
)
9052 panicUnimplemented();
9055 Inst_VOP1__V_CVT_I16_F16::Inst_VOP1__V_CVT_I16_F16(InFmt_VOP1
*iFmt
)
9056 : Inst_VOP1(iFmt
, "v_cvt_i16_f16")
9060 } // Inst_VOP1__V_CVT_I16_F16
9062 Inst_VOP1__V_CVT_I16_F16::~Inst_VOP1__V_CVT_I16_F16()
9064 } // ~Inst_VOP1__V_CVT_I16_F16
9066 // D.i16 = flt16_to_int16(S.f16).
9068 Inst_VOP1__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst
)
9070 panicUnimplemented();
9073 Inst_VOP1__V_RCP_F16::Inst_VOP1__V_RCP_F16(InFmt_VOP1
*iFmt
)
9074 : Inst_VOP1(iFmt
, "v_rcp_f16")
9078 } // Inst_VOP1__V_RCP_F16
9080 Inst_VOP1__V_RCP_F16::~Inst_VOP1__V_RCP_F16()
9082 } // ~Inst_VOP1__V_RCP_F16
9084 // if (S0.f16 == 1.0f)
9087 // D.f16 = 1 / S0.f16;
9089 Inst_VOP1__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst
)
9091 panicUnimplemented();
9094 Inst_VOP1__V_SQRT_F16::Inst_VOP1__V_SQRT_F16(InFmt_VOP1
*iFmt
)
9095 : Inst_VOP1(iFmt
, "v_sqrt_f16")
9099 } // Inst_VOP1__V_SQRT_F16
9101 Inst_VOP1__V_SQRT_F16::~Inst_VOP1__V_SQRT_F16()
9103 } // ~Inst_VOP1__V_SQRT_F16
9105 // if (S0.f16 == 1.0f)
9108 // D.f16 = sqrt(S0.f16);
9110 Inst_VOP1__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst
)
9112 panicUnimplemented();
9115 Inst_VOP1__V_RSQ_F16::Inst_VOP1__V_RSQ_F16(InFmt_VOP1
*iFmt
)
9116 : Inst_VOP1(iFmt
, "v_rsq_f16")
9120 } // Inst_VOP1__V_RSQ_F16
9122 Inst_VOP1__V_RSQ_F16::~Inst_VOP1__V_RSQ_F16()
9124 } // ~Inst_VOP1__V_RSQ_F16
9126 // if (S0.f16 == 1.0f)
9129 // D.f16 = 1 / sqrt(S0.f16);
9131 Inst_VOP1__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst
)
9133 panicUnimplemented();
9136 Inst_VOP1__V_LOG_F16::Inst_VOP1__V_LOG_F16(InFmt_VOP1
*iFmt
)
9137 : Inst_VOP1(iFmt
, "v_log_f16")
9141 } // Inst_VOP1__V_LOG_F16
9143 Inst_VOP1__V_LOG_F16::~Inst_VOP1__V_LOG_F16()
9145 } // ~Inst_VOP1__V_LOG_F16
9147 // if (S0.f16 == 1.0f)
9150 // D.f16 = log2(S0.f16);
9152 Inst_VOP1__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst
)
9154 panicUnimplemented();
9157 Inst_VOP1__V_EXP_F16::Inst_VOP1__V_EXP_F16(InFmt_VOP1
*iFmt
)
9158 : Inst_VOP1(iFmt
, "v_exp_f16")
9162 } // Inst_VOP1__V_EXP_F16
9164 Inst_VOP1__V_EXP_F16::~Inst_VOP1__V_EXP_F16()
9166 } // ~Inst_VOP1__V_EXP_F16
9168 // if (S0.f16 == 0.0f)
9171 // D.f16 = pow(2.0, S0.f16).
9173 Inst_VOP1__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst
)
9175 panicUnimplemented();
9178 Inst_VOP1__V_FREXP_MANT_F16::Inst_VOP1__V_FREXP_MANT_F16(InFmt_VOP1
*iFmt
)
9179 : Inst_VOP1(iFmt
, "v_frexp_mant_f16")
9183 } // Inst_VOP1__V_FREXP_MANT_F16
9185 Inst_VOP1__V_FREXP_MANT_F16::~Inst_VOP1__V_FREXP_MANT_F16()
9187 } // ~Inst_VOP1__V_FREXP_MANT_F16
9189 // if (S0.f16 == +-INF || S0.f16 == NAN)
9192 // D.f16 = mantissa(S0.f16).
9194 Inst_VOP1__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst
)
9196 panicUnimplemented();
9199 Inst_VOP1__V_FREXP_EXP_I16_F16::Inst_VOP1__V_FREXP_EXP_I16_F16(
9201 : Inst_VOP1(iFmt
, "v_frexp_exp_i16_f16")
9205 } // Inst_VOP1__V_FREXP_EXP_I16_F16
9207 Inst_VOP1__V_FREXP_EXP_I16_F16::~Inst_VOP1__V_FREXP_EXP_I16_F16()
9209 } // ~Inst_VOP1__V_FREXP_EXP_I16_F16
9211 // frexp(S0.f16, Exponent(S0.f16))
9212 // if (S0.f16 == +-INF || S0.f16 == NAN)
9215 // D.i16 = Exponent(S0.f16);
9217 Inst_VOP1__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst
)
9219 panicUnimplemented();
9222 Inst_VOP1__V_FLOOR_F16::Inst_VOP1__V_FLOOR_F16(InFmt_VOP1
*iFmt
)
9223 : Inst_VOP1(iFmt
, "v_floor_f16")
9227 } // Inst_VOP1__V_FLOOR_F16
9229 Inst_VOP1__V_FLOOR_F16::~Inst_VOP1__V_FLOOR_F16()
9231 } // ~Inst_VOP1__V_FLOOR_F16
9233 // D.f16 = floor(S0.f16);
9235 Inst_VOP1__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst
)
9237 panicUnimplemented();
9240 Inst_VOP1__V_CEIL_F16::Inst_VOP1__V_CEIL_F16(InFmt_VOP1
*iFmt
)
9241 : Inst_VOP1(iFmt
, "v_ceil_f16")
9245 } // Inst_VOP1__V_CEIL_F16
9247 Inst_VOP1__V_CEIL_F16::~Inst_VOP1__V_CEIL_F16()
9249 } // ~Inst_VOP1__V_CEIL_F16
9251 // D.f16 = ceil(S0.f16);
9253 Inst_VOP1__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst
)
9255 panicUnimplemented();
9258 Inst_VOP1__V_TRUNC_F16::Inst_VOP1__V_TRUNC_F16(InFmt_VOP1
*iFmt
)
9259 : Inst_VOP1(iFmt
, "v_trunc_f16")
9263 } // Inst_VOP1__V_TRUNC_F16
9265 Inst_VOP1__V_TRUNC_F16::~Inst_VOP1__V_TRUNC_F16()
9267 } // ~Inst_VOP1__V_TRUNC_F16
9269 // D.f16 = trunc(S0.f16).
9271 Inst_VOP1__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst
)
9273 panicUnimplemented();
9276 Inst_VOP1__V_RNDNE_F16::Inst_VOP1__V_RNDNE_F16(InFmt_VOP1
*iFmt
)
9277 : Inst_VOP1(iFmt
, "v_rndne_f16")
9281 } // Inst_VOP1__V_RNDNE_F16
9283 Inst_VOP1__V_RNDNE_F16::~Inst_VOP1__V_RNDNE_F16()
9285 } // ~Inst_VOP1__V_RNDNE_F16
9287 // D.f16 = roundNearestEven(S0.f16);
9289 Inst_VOP1__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst
)
9291 panicUnimplemented();
9294 Inst_VOP1__V_FRACT_F16::Inst_VOP1__V_FRACT_F16(InFmt_VOP1
*iFmt
)
9295 : Inst_VOP1(iFmt
, "v_fract_f16")
9299 } // Inst_VOP1__V_FRACT_F16
9301 Inst_VOP1__V_FRACT_F16::~Inst_VOP1__V_FRACT_F16()
9303 } // ~Inst_VOP1__V_FRACT_F16
9305 // D.f16 = S0.f16 + -floor(S0.f16).
9307 Inst_VOP1__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst
)
9309 panicUnimplemented();
9312 Inst_VOP1__V_SIN_F16::Inst_VOP1__V_SIN_F16(InFmt_VOP1
*iFmt
)
9313 : Inst_VOP1(iFmt
, "v_sin_f16")
9317 } // Inst_VOP1__V_SIN_F16
9319 Inst_VOP1__V_SIN_F16::~Inst_VOP1__V_SIN_F16()
9321 } // ~Inst_VOP1__V_SIN_F16
9323 // D.f16 = sin(S0.f16 * 2 * PI).
9325 Inst_VOP1__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst
)
9327 panicUnimplemented();
9330 Inst_VOP1__V_COS_F16::Inst_VOP1__V_COS_F16(InFmt_VOP1
*iFmt
)
9331 : Inst_VOP1(iFmt
, "v_cos_f16")
9335 } // Inst_VOP1__V_COS_F16
9337 Inst_VOP1__V_COS_F16::~Inst_VOP1__V_COS_F16()
9339 } // ~Inst_VOP1__V_COS_F16
9341 // D.f16 = cos(S0.f16 * 2 * PI).
9343 Inst_VOP1__V_COS_F16::execute(GPUDynInstPtr gpuDynInst
)
9345 panicUnimplemented();
9348 Inst_VOP1__V_EXP_LEGACY_F32::Inst_VOP1__V_EXP_LEGACY_F32(InFmt_VOP1
*iFmt
)
9349 : Inst_VOP1(iFmt
, "v_exp_legacy_f32")
9353 } // Inst_VOP1__V_EXP_LEGACY_F32
9355 Inst_VOP1__V_EXP_LEGACY_F32::~Inst_VOP1__V_EXP_LEGACY_F32()
9357 } // ~Inst_VOP1__V_EXP_LEGACY_F32
9359 // D.f = pow(2.0, S0.f)
9361 Inst_VOP1__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst
)
9363 Wavefront
*wf
= gpuDynInst
->wavefront();
9364 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
9365 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
9369 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
9370 if (wf
->execMask(lane
)) {
9371 vdst
[lane
] = std::pow(2.0, src
[lane
]);
9378 Inst_VOP1__V_LOG_LEGACY_F32::Inst_VOP1__V_LOG_LEGACY_F32(InFmt_VOP1
*iFmt
)
9379 : Inst_VOP1(iFmt
, "v_log_legacy_f32")
9383 } // Inst_VOP1__V_LOG_LEGACY_F32
9385 Inst_VOP1__V_LOG_LEGACY_F32::~Inst_VOP1__V_LOG_LEGACY_F32()
9387 } // ~Inst_VOP1__V_LOG_LEGACY_F32
9389 // D.f = log2(S0.f).
9391 Inst_VOP1__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst
)
9393 Wavefront
*wf
= gpuDynInst
->wavefront();
9394 ConstVecOperandF32
src(gpuDynInst
, instData
.SRC0
);
9395 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
9399 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
9400 if (wf
->execMask(lane
)) {
9401 vdst
[lane
] = std::log2(src
[lane
]);
9408 Inst_VOPC__V_CMP_CLASS_F32::Inst_VOPC__V_CMP_CLASS_F32(InFmt_VOPC
*iFmt
)
9409 : Inst_VOPC(iFmt
, "v_cmp_class_f32")
9413 } // Inst_VOPC__V_CMP_CLASS_F32
9415 Inst_VOPC__V_CMP_CLASS_F32::~Inst_VOPC__V_CMP_CLASS_F32()
9417 } // ~Inst_VOPC__V_CMP_CLASS_F32
9419 // VCC = IEEE numeric class function specified in S1.u, performed on S0.f
9420 // The function reports true if the floating point value is any of the
9421 // numeric types selected in S1.u according to the following list:
9422 // S1.u[0] -- value is a signaling NaN.
9423 // S1.u[1] -- value is a quiet NaN.
9424 // S1.u[2] -- value is negative infinity.
9425 // S1.u[3] -- value is a negative normal value.
9426 // S1.u[4] -- value is a negative denormal value.
9427 // S1.u[5] -- value is negative zero.
9428 // S1.u[6] -- value is positive zero.
9429 // S1.u[7] -- value is a positive denormal value.
9430 // S1.u[8] -- value is a positive normal value.
9431 // S1.u[9] -- value is positive infinity.
9433 Inst_VOPC__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst
)
9435 Wavefront
*wf
= gpuDynInst
->wavefront();
9436 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
9437 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
9438 ConstScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
9443 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
9444 if (wf
->execMask(lane
)) {
9445 if (bits(src1
[lane
], 0) || bits(src1
[lane
], 1)) {
9447 if (std::isnan(src0
[lane
])) {
9448 vcc
.setBit(lane
, 1);
9452 if (bits(src1
[lane
], 2)) {
9454 if (std::isinf(src0
[lane
]) && std::signbit(src0
[lane
])) {
9455 vcc
.setBit(lane
, 1);
9459 if (bits(src1
[lane
], 3)) {
9461 if (std::isnormal(src0
[lane
])
9462 && std::signbit(src0
[lane
])) {
9463 vcc
.setBit(lane
, 1);
9467 if (bits(src1
[lane
], 4)) {
9469 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
9470 && std::signbit(src0
[lane
])) {
9471 vcc
.setBit(lane
, 1);
9475 if (bits(src1
[lane
], 5)) {
9477 if (std::fpclassify(src0
[lane
]) == FP_ZERO
9478 && std::signbit(src0
[lane
])) {
9479 vcc
.setBit(lane
, 1);
9483 if (bits(src1
[lane
], 6)) {
9485 if (std::fpclassify(src0
[lane
]) == FP_ZERO
9486 && !std::signbit(src0
[lane
])) {
9487 vcc
.setBit(lane
, 1);
9491 if (bits(src1
[lane
], 7)) {
9493 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
9494 && !std::signbit(src0
[lane
])) {
9495 vcc
.setBit(lane
, 1);
9499 if (bits(src1
[lane
], 8)) {
9501 if (std::isnormal(src0
[lane
])
9502 && !std::signbit(src0
[lane
])) {
9503 vcc
.setBit(lane
, 1);
9507 if (bits(src1
[lane
], 9)) {
9509 if (std::isinf(src0
[lane
]) && !std::signbit(src0
[lane
])) {
9510 vcc
.setBit(lane
, 1);
9520 Inst_VOPC__V_CMPX_CLASS_F32::Inst_VOPC__V_CMPX_CLASS_F32(InFmt_VOPC
*iFmt
)
9521 : Inst_VOPC(iFmt
, "v_cmpx_class_f32")
9525 } // Inst_VOPC__V_CMPX_CLASS_F32
9527 Inst_VOPC__V_CMPX_CLASS_F32::~Inst_VOPC__V_CMPX_CLASS_F32()
9529 } // ~Inst_VOPC__V_CMPX_CLASS_F32
9531 // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
9532 // S0.f The function reports true if the floating point value is any of
9533 // the numeric types selected in S1.u according to the following list:
9534 // S1.u[0] -- value is a signaling NaN.
9535 // S1.u[1] -- value is a quiet NaN.
9536 // S1.u[2] -- value is negative infinity.
9537 // S1.u[3] -- value is a negative normal value.
9538 // S1.u[4] -- value is a negative denormal value.
9539 // S1.u[5] -- value is negative zero.
9540 // S1.u[6] -- value is positive zero.
9541 // S1.u[7] -- value is a positive denormal value.
9542 // S1.u[8] -- value is a positive normal value.
9543 // S1.u[9] -- value is positive infinity.
9545 Inst_VOPC__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst
)
9547 Wavefront
*wf
= gpuDynInst
->wavefront();
9548 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
9549 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
9550 ConstScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
9555 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
9556 if (wf
->execMask(lane
)) {
9557 if (bits(src1
[lane
], 0) || bits(src1
[lane
], 1)) {
9559 if (std::isnan(src0
[lane
])) {
9560 vcc
.setBit(lane
, 1);
9564 if (bits(src1
[lane
], 2)) {
9566 if (std::isinf(src0
[lane
]) && std::signbit(src0
[lane
])) {
9567 vcc
.setBit(lane
, 1);
9571 if (bits(src1
[lane
], 3)) {
9573 if (std::isnormal(src0
[lane
])
9574 && std::signbit(src0
[lane
])) {
9575 vcc
.setBit(lane
, 1);
9579 if (bits(src1
[lane
], 4)) {
9581 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
9582 && std::signbit(src0
[lane
])) {
9583 vcc
.setBit(lane
, 1);
9587 if (bits(src1
[lane
], 5)) {
9589 if (std::fpclassify(src0
[lane
]) == FP_ZERO
9590 && std::signbit(src0
[lane
])) {
9591 vcc
.setBit(lane
, 1);
9595 if (bits(src1
[lane
], 6)) {
9597 if (std::fpclassify(src0
[lane
]) == FP_ZERO
9598 && !std::signbit(src0
[lane
])) {
9599 vcc
.setBit(lane
, 1);
9603 if (bits(src1
[lane
], 7)) {
9605 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
9606 && !std::signbit(src0
[lane
])) {
9607 vcc
.setBit(lane
, 1);
9611 if (bits(src1
[lane
], 8)) {
9613 if (std::isnormal(src0
[lane
])
9614 && !std::signbit(src0
[lane
])) {
9615 vcc
.setBit(lane
, 1);
9619 if (bits(src1
[lane
], 9)) {
9621 if (std::isinf(src0
[lane
]) && !std::signbit(src0
[lane
])) {
9622 vcc
.setBit(lane
, 1);
9630 wf
->execMask() = vcc
.rawData();
9633 Inst_VOPC__V_CMP_CLASS_F64::Inst_VOPC__V_CMP_CLASS_F64(InFmt_VOPC
*iFmt
)
9634 : Inst_VOPC(iFmt
, "v_cmp_class_f64")
9638 } // Inst_VOPC__V_CMP_CLASS_F64
9640 Inst_VOPC__V_CMP_CLASS_F64::~Inst_VOPC__V_CMP_CLASS_F64()
9642 } // ~Inst_VOPC__V_CMP_CLASS_F64
9644 // VCC = IEEE numeric class function specified in S1.u, performed on S0.d
9645 // The function reports true if the floating point value is any of the
9646 // numeric types selected in S1.u according to the following list:
9647 // S1.u[0] -- value is a signaling NaN.
9648 // S1.u[1] -- value is a quiet NaN.
9649 // S1.u[2] -- value is negative infinity.
9650 // S1.u[3] -- value is a negative normal value.
9651 // S1.u[4] -- value is a negative denormal value.
9652 // S1.u[5] -- value is negative zero.
9653 // S1.u[6] -- value is positive zero.
9654 // S1.u[7] -- value is a positive denormal value.
9655 // S1.u[8] -- value is a positive normal value.
9656 // S1.u[9] -- value is positive infinity.
9658 Inst_VOPC__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst
)
9660 Wavefront
*wf
= gpuDynInst
->wavefront();
9661 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
9662 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
9663 ConstScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
9668 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
9669 if (wf
->execMask(lane
)) {
9670 if (bits(src1
[lane
], 0) || bits(src1
[lane
], 1)) {
9672 if (std::isnan(src0
[lane
])) {
9673 vcc
.setBit(lane
, 1);
9677 if (bits(src1
[lane
], 2)) {
9679 if (std::isinf(src0
[lane
]) && std::signbit(src0
[lane
])) {
9680 vcc
.setBit(lane
, 1);
9684 if (bits(src1
[lane
], 3)) {
9686 if (std::isnormal(src0
[lane
])
9687 && std::signbit(src0
[lane
])) {
9688 vcc
.setBit(lane
, 1);
9692 if (bits(src1
[lane
], 4)) {
9694 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
9695 && std::signbit(src0
[lane
])) {
9696 vcc
.setBit(lane
, 1);
9700 if (bits(src1
[lane
], 5)) {
9702 if (std::fpclassify(src0
[lane
]) == FP_ZERO
9703 && std::signbit(src0
[lane
])) {
9704 vcc
.setBit(lane
, 1);
9708 if (bits(src1
[lane
], 6)) {
9710 if (std::fpclassify(src0
[lane
]) == FP_ZERO
9711 && !std::signbit(src0
[lane
])) {
9712 vcc
.setBit(lane
, 1);
9716 if (bits(src1
[lane
], 7)) {
9718 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
9719 && !std::signbit(src0
[lane
])) {
9720 vcc
.setBit(lane
, 1);
9724 if (bits(src1
[lane
], 8)) {
9726 if (std::isnormal(src0
[lane
])
9727 && !std::signbit(src0
[lane
])) {
9728 vcc
.setBit(lane
, 1);
9732 if (bits(src1
[lane
], 9)) {
9734 if (std::isinf(src0
[lane
])
9735 && !std::signbit(src0
[lane
])) {
9736 vcc
.setBit(lane
, 1);
9746 Inst_VOPC__V_CMPX_CLASS_F64::Inst_VOPC__V_CMPX_CLASS_F64(InFmt_VOPC
*iFmt
)
9747 : Inst_VOPC(iFmt
, "v_cmpx_class_f64")
9751 } // Inst_VOPC__V_CMPX_CLASS_F64
9753 Inst_VOPC__V_CMPX_CLASS_F64::~Inst_VOPC__V_CMPX_CLASS_F64()
9755 } // ~Inst_VOPC__V_CMPX_CLASS_F64
9757 // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
9758 // S0.d The function reports true if the floating point value is any of
9759 // the numeric types selected in S1.u according to the following list:
9760 // S1.u[0] -- value is a signaling NaN.
9761 // S1.u[1] -- value is a quiet NaN.
9762 // S1.u[2] -- value is negative infinity.
9763 // S1.u[3] -- value is a negative normal value.
9764 // S1.u[4] -- value is a negative denormal value.
9765 // S1.u[5] -- value is negative zero.
9766 // S1.u[6] -- value is positive zero.
9767 // S1.u[7] -- value is a positive denormal value.
9768 // S1.u[8] -- value is a positive normal value.
9769 // S1.u[9] -- value is positive infinity.
9771 Inst_VOPC__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst
)
9773 Wavefront
*wf
= gpuDynInst
->wavefront();
9774 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
9775 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
9776 ConstScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
9781 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
9782 if (wf
->execMask(lane
)) {
9783 if (bits(src1
[lane
], 0) || bits(src1
[lane
], 1)) {
9785 if (std::isnan(src0
[lane
])) {
9786 vcc
.setBit(lane
, 1);
9790 if (bits(src1
[lane
], 2)) {
9792 if (std::isinf(src0
[lane
]) && std::signbit(src0
[lane
])) {
9793 vcc
.setBit(lane
, 1);
9797 if (bits(src1
[lane
], 3)) {
9799 if (std::isnormal(src0
[lane
])
9800 && std::signbit(src0
[lane
])) {
9801 vcc
.setBit(lane
, 1);
9805 if (bits(src1
[lane
], 4)) {
9807 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
9808 && std::signbit(src0
[lane
])) {
9809 vcc
.setBit(lane
, 1);
9813 if (bits(src1
[lane
], 5)) {
9815 if (std::fpclassify(src0
[lane
]) == FP_ZERO
9816 && std::signbit(src0
[lane
])) {
9817 vcc
.setBit(lane
, 1);
9821 if (bits(src1
[lane
], 6)) {
9823 if (std::fpclassify(src0
[lane
]) == FP_ZERO
9824 && !std::signbit(src0
[lane
])) {
9825 vcc
.setBit(lane
, 1);
9829 if (bits(src1
[lane
], 7)) {
9831 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
9832 && !std::signbit(src0
[lane
])) {
9833 vcc
.setBit(lane
, 1);
9837 if (bits(src1
[lane
], 8)) {
9839 if (std::isnormal(src0
[lane
])
9840 && !std::signbit(src0
[lane
])) {
9841 vcc
.setBit(lane
, 1);
9845 if (bits(src1
[lane
], 9)) {
9847 if (std::isinf(src0
[lane
])
9848 && !std::signbit(src0
[lane
])) {
9849 vcc
.setBit(lane
, 1);
9857 wf
->execMask() = vcc
.rawData();
9860 Inst_VOPC__V_CMP_CLASS_F16::Inst_VOPC__V_CMP_CLASS_F16(InFmt_VOPC
*iFmt
)
9861 : Inst_VOPC(iFmt
, "v_cmp_class_f16")
9865 } // Inst_VOPC__V_CMP_CLASS_F16
9867 Inst_VOPC__V_CMP_CLASS_F16::~Inst_VOPC__V_CMP_CLASS_F16()
9869 } // ~Inst_VOPC__V_CMP_CLASS_F16
9871 // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16
9872 // The function reports true if the floating point value is any of the
9873 // numeric types selected in S1.u according to the following list:
9874 // S1.u[0] -- value is a signaling NaN.
9875 // S1.u[1] -- value is a quiet NaN.
9876 // S1.u[2] -- value is negative infinity.
9877 // S1.u[3] -- value is a negative normal value.
9878 // S1.u[4] -- value is a negative denormal value.
9879 // S1.u[5] -- value is negative zero.
9880 // S1.u[6] -- value is positive zero.
9881 // S1.u[7] -- value is a positive denormal value.
9882 // S1.u[8] -- value is a positive normal value.
9883 // S1.u[9] -- value is positive infinity.
9885 Inst_VOPC__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst
)
9887 panicUnimplemented();
9890 Inst_VOPC__V_CMPX_CLASS_F16::Inst_VOPC__V_CMPX_CLASS_F16(InFmt_VOPC
*iFmt
)
9891 : Inst_VOPC(iFmt
, "v_cmpx_class_f16")
9895 } // Inst_VOPC__V_CMPX_CLASS_F16
9897 Inst_VOPC__V_CMPX_CLASS_F16::~Inst_VOPC__V_CMPX_CLASS_F16()
9899 } // ~Inst_VOPC__V_CMPX_CLASS_F16
9901 // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
9903 // The function reports true if the floating point value is any of the
9904 // numeric types selected in S1.u according to the following list:
9905 // S1.u[0] -- value is a signaling NaN.
9906 // S1.u[1] -- value is a quiet NaN.
9907 // S1.u[2] -- value is negative infinity.
9908 // S1.u[3] -- value is a negative normal value.
9909 // S1.u[4] -- value is a negative denormal value.
9910 // S1.u[5] -- value is negative zero.
9911 // S1.u[6] -- value is positive zero.
9912 // S1.u[7] -- value is a positive denormal value.
9913 // S1.u[8] -- value is a positive normal value.
9914 // S1.u[9] -- value is positive infinity.
9916 Inst_VOPC__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst
)
9918 panicUnimplemented();
9921 Inst_VOPC__V_CMP_F_F16::Inst_VOPC__V_CMP_F_F16(InFmt_VOPC
*iFmt
)
9922 : Inst_VOPC(iFmt
, "v_cmp_f_f16")
9926 } // Inst_VOPC__V_CMP_F_F16
9928 Inst_VOPC__V_CMP_F_F16::~Inst_VOPC__V_CMP_F_F16()
9930 } // ~Inst_VOPC__V_CMP_F_F16
9932 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
9934 Inst_VOPC__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst
)
9936 panicUnimplemented();
9939 Inst_VOPC__V_CMP_LT_F16::Inst_VOPC__V_CMP_LT_F16(InFmt_VOPC
*iFmt
)
9940 : Inst_VOPC(iFmt
, "v_cmp_lt_f16")
9944 } // Inst_VOPC__V_CMP_LT_F16
9946 Inst_VOPC__V_CMP_LT_F16::~Inst_VOPC__V_CMP_LT_F16()
9948 } // ~Inst_VOPC__V_CMP_LT_F16
9950 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
9952 Inst_VOPC__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst
)
9954 panicUnimplemented();
9957 Inst_VOPC__V_CMP_EQ_F16::Inst_VOPC__V_CMP_EQ_F16(InFmt_VOPC
*iFmt
)
9958 : Inst_VOPC(iFmt
, "v_cmp_eq_f16")
9962 } // Inst_VOPC__V_CMP_EQ_F16
9964 Inst_VOPC__V_CMP_EQ_F16::~Inst_VOPC__V_CMP_EQ_F16()
9966 } // ~Inst_VOPC__V_CMP_EQ_F16
9968 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
9970 Inst_VOPC__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst
)
9972 panicUnimplemented();
9975 Inst_VOPC__V_CMP_LE_F16::Inst_VOPC__V_CMP_LE_F16(InFmt_VOPC
*iFmt
)
9976 : Inst_VOPC(iFmt
, "v_cmp_le_f16")
9980 } // Inst_VOPC__V_CMP_LE_F16
9982 Inst_VOPC__V_CMP_LE_F16::~Inst_VOPC__V_CMP_LE_F16()
9984 } // ~Inst_VOPC__V_CMP_LE_F16
9986 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
9988 Inst_VOPC__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst
)
9990 panicUnimplemented();
9993 Inst_VOPC__V_CMP_GT_F16::Inst_VOPC__V_CMP_GT_F16(InFmt_VOPC
*iFmt
)
9994 : Inst_VOPC(iFmt
, "v_cmp_gt_f16")
9998 } // Inst_VOPC__V_CMP_GT_F16
10000 Inst_VOPC__V_CMP_GT_F16::~Inst_VOPC__V_CMP_GT_F16()
10002 } // ~Inst_VOPC__V_CMP_GT_F16
10004 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
10006 Inst_VOPC__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst
)
10008 panicUnimplemented();
10011 Inst_VOPC__V_CMP_LG_F16::Inst_VOPC__V_CMP_LG_F16(InFmt_VOPC
*iFmt
)
10012 : Inst_VOPC(iFmt
, "v_cmp_lg_f16")
10016 } // Inst_VOPC__V_CMP_LG_F16
10018 Inst_VOPC__V_CMP_LG_F16::~Inst_VOPC__V_CMP_LG_F16()
10020 } // ~Inst_VOPC__V_CMP_LG_F16
10022 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
10024 Inst_VOPC__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst
)
10026 panicUnimplemented();
10029 Inst_VOPC__V_CMP_GE_F16::Inst_VOPC__V_CMP_GE_F16(InFmt_VOPC
*iFmt
)
10030 : Inst_VOPC(iFmt
, "v_cmp_ge_f16")
10034 } // Inst_VOPC__V_CMP_GE_F16
10036 Inst_VOPC__V_CMP_GE_F16::~Inst_VOPC__V_CMP_GE_F16()
10038 } // ~Inst_VOPC__V_CMP_GE_F16
10040 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
10042 Inst_VOPC__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst
)
10044 panicUnimplemented();
10047 Inst_VOPC__V_CMP_O_F16::Inst_VOPC__V_CMP_O_F16(InFmt_VOPC
*iFmt
)
10048 : Inst_VOPC(iFmt
, "v_cmp_o_f16")
10052 } // Inst_VOPC__V_CMP_O_F16
10054 Inst_VOPC__V_CMP_O_F16::~Inst_VOPC__V_CMP_O_F16()
10056 } // ~Inst_VOPC__V_CMP_O_F16
10058 // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
10060 Inst_VOPC__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst
)
10062 panicUnimplemented();
10065 Inst_VOPC__V_CMP_U_F16::Inst_VOPC__V_CMP_U_F16(InFmt_VOPC
*iFmt
)
10066 : Inst_VOPC(iFmt
, "v_cmp_u_f16")
10070 } // Inst_VOPC__V_CMP_U_F16
10072 Inst_VOPC__V_CMP_U_F16::~Inst_VOPC__V_CMP_U_F16()
10074 } // ~Inst_VOPC__V_CMP_U_F16
10076 // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
10078 Inst_VOPC__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst
)
10080 panicUnimplemented();
10083 Inst_VOPC__V_CMP_NGE_F16::Inst_VOPC__V_CMP_NGE_F16(InFmt_VOPC
*iFmt
)
10084 : Inst_VOPC(iFmt
, "v_cmp_nge_f16")
10088 } // Inst_VOPC__V_CMP_NGE_F16
10090 Inst_VOPC__V_CMP_NGE_F16::~Inst_VOPC__V_CMP_NGE_F16()
10092 } // ~Inst_VOPC__V_CMP_NGE_F16
10094 // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
10096 Inst_VOPC__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst
)
10098 panicUnimplemented();
10101 Inst_VOPC__V_CMP_NLG_F16::Inst_VOPC__V_CMP_NLG_F16(InFmt_VOPC
*iFmt
)
10102 : Inst_VOPC(iFmt
, "v_cmp_nlg_f16")
10106 } // Inst_VOPC__V_CMP_NLG_F16
10108 Inst_VOPC__V_CMP_NLG_F16::~Inst_VOPC__V_CMP_NLG_F16()
10110 } // ~Inst_VOPC__V_CMP_NLG_F16
10112 // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
10114 Inst_VOPC__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst
)
10116 panicUnimplemented();
10119 Inst_VOPC__V_CMP_NGT_F16::Inst_VOPC__V_CMP_NGT_F16(InFmt_VOPC
*iFmt
)
10120 : Inst_VOPC(iFmt
, "v_cmp_ngt_f16")
10124 } // Inst_VOPC__V_CMP_NGT_F16
10126 Inst_VOPC__V_CMP_NGT_F16::~Inst_VOPC__V_CMP_NGT_F16()
10128 } // ~Inst_VOPC__V_CMP_NGT_F16
10130 // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
10132 Inst_VOPC__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst
)
10134 panicUnimplemented();
10137 Inst_VOPC__V_CMP_NLE_F16::Inst_VOPC__V_CMP_NLE_F16(InFmt_VOPC
*iFmt
)
10138 : Inst_VOPC(iFmt
, "v_cmp_nle_f16")
10142 } // Inst_VOPC__V_CMP_NLE_F16
10144 Inst_VOPC__V_CMP_NLE_F16::~Inst_VOPC__V_CMP_NLE_F16()
10146 } // ~Inst_VOPC__V_CMP_NLE_F16
10148 // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
10150 Inst_VOPC__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst
)
10152 panicUnimplemented();
10155 Inst_VOPC__V_CMP_NEQ_F16::Inst_VOPC__V_CMP_NEQ_F16(InFmt_VOPC
*iFmt
)
10156 : Inst_VOPC(iFmt
, "v_cmp_neq_f16")
10160 } // Inst_VOPC__V_CMP_NEQ_F16
10162 Inst_VOPC__V_CMP_NEQ_F16::~Inst_VOPC__V_CMP_NEQ_F16()
10164 } // ~Inst_VOPC__V_CMP_NEQ_F16
10166 // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
10168 Inst_VOPC__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst
)
10170 panicUnimplemented();
10173 Inst_VOPC__V_CMP_NLT_F16::Inst_VOPC__V_CMP_NLT_F16(InFmt_VOPC
*iFmt
)
10174 : Inst_VOPC(iFmt
, "v_cmp_nlt_f16")
10178 } // Inst_VOPC__V_CMP_NLT_F16
10180 Inst_VOPC__V_CMP_NLT_F16::~Inst_VOPC__V_CMP_NLT_F16()
10182 } // ~Inst_VOPC__V_CMP_NLT_F16
10184 // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
10186 Inst_VOPC__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst
)
10188 panicUnimplemented();
10191 Inst_VOPC__V_CMP_TRU_F16::Inst_VOPC__V_CMP_TRU_F16(InFmt_VOPC
*iFmt
)
10192 : Inst_VOPC(iFmt
, "v_cmp_tru_f16")
10196 } // Inst_VOPC__V_CMP_TRU_F16
10198 Inst_VOPC__V_CMP_TRU_F16::~Inst_VOPC__V_CMP_TRU_F16()
10200 } // ~Inst_VOPC__V_CMP_TRU_F16
10202 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
10204 Inst_VOPC__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst
)
10206 panicUnimplemented();
10209 Inst_VOPC__V_CMPX_F_F16::Inst_VOPC__V_CMPX_F_F16(InFmt_VOPC
*iFmt
)
10210 : Inst_VOPC(iFmt
, "v_cmpx_f_f16")
10214 } // Inst_VOPC__V_CMPX_F_F16
10216 Inst_VOPC__V_CMPX_F_F16::~Inst_VOPC__V_CMPX_F_F16()
10218 } // ~Inst_VOPC__V_CMPX_F_F16
10220 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
10222 Inst_VOPC__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst
)
10224 panicUnimplemented();
10227 Inst_VOPC__V_CMPX_LT_F16::Inst_VOPC__V_CMPX_LT_F16(InFmt_VOPC
*iFmt
)
10228 : Inst_VOPC(iFmt
, "v_cmpx_lt_f16")
10232 } // Inst_VOPC__V_CMPX_LT_F16
10234 Inst_VOPC__V_CMPX_LT_F16::~Inst_VOPC__V_CMPX_LT_F16()
10236 } // ~Inst_VOPC__V_CMPX_LT_F16
10238 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
10240 Inst_VOPC__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst
)
10242 panicUnimplemented();
10245 Inst_VOPC__V_CMPX_EQ_F16::Inst_VOPC__V_CMPX_EQ_F16(InFmt_VOPC
*iFmt
)
10246 : Inst_VOPC(iFmt
, "v_cmpx_eq_f16")
10250 } // Inst_VOPC__V_CMPX_EQ_F16
10252 Inst_VOPC__V_CMPX_EQ_F16::~Inst_VOPC__V_CMPX_EQ_F16()
10254 } // ~Inst_VOPC__V_CMPX_EQ_F16
10256 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
10258 Inst_VOPC__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst
)
10260 panicUnimplemented();
10263 Inst_VOPC__V_CMPX_LE_F16::Inst_VOPC__V_CMPX_LE_F16(InFmt_VOPC
*iFmt
)
10264 : Inst_VOPC(iFmt
, "v_cmpx_le_f16")
10268 } // Inst_VOPC__V_CMPX_LE_F16
10270 Inst_VOPC__V_CMPX_LE_F16::~Inst_VOPC__V_CMPX_LE_F16()
10272 } // ~Inst_VOPC__V_CMPX_LE_F16
10274 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
10276 Inst_VOPC__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst
)
10278 panicUnimplemented();
10281 Inst_VOPC__V_CMPX_GT_F16::Inst_VOPC__V_CMPX_GT_F16(InFmt_VOPC
*iFmt
)
10282 : Inst_VOPC(iFmt
, "v_cmpx_gt_f16")
10286 } // Inst_VOPC__V_CMPX_GT_F16
10288 Inst_VOPC__V_CMPX_GT_F16::~Inst_VOPC__V_CMPX_GT_F16()
10290 } // ~Inst_VOPC__V_CMPX_GT_F16
10292 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
10294 Inst_VOPC__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst
)
10296 panicUnimplemented();
10299 Inst_VOPC__V_CMPX_LG_F16::Inst_VOPC__V_CMPX_LG_F16(InFmt_VOPC
*iFmt
)
10300 : Inst_VOPC(iFmt
, "v_cmpx_lg_f16")
10304 } // Inst_VOPC__V_CMPX_LG_F16
10306 Inst_VOPC__V_CMPX_LG_F16::~Inst_VOPC__V_CMPX_LG_F16()
10308 } // ~Inst_VOPC__V_CMPX_LG_F16
10310 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
10312 Inst_VOPC__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst
)
10314 panicUnimplemented();
10317 Inst_VOPC__V_CMPX_GE_F16::Inst_VOPC__V_CMPX_GE_F16(InFmt_VOPC
*iFmt
)
10318 : Inst_VOPC(iFmt
, "v_cmpx_ge_f16")
10322 } // Inst_VOPC__V_CMPX_GE_F16
10324 Inst_VOPC__V_CMPX_GE_F16::~Inst_VOPC__V_CMPX_GE_F16()
10326 } // ~Inst_VOPC__V_CMPX_GE_F16
10328 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
10330 Inst_VOPC__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst
)
10332 panicUnimplemented();
10335 Inst_VOPC__V_CMPX_O_F16::Inst_VOPC__V_CMPX_O_F16(InFmt_VOPC
*iFmt
)
10336 : Inst_VOPC(iFmt
, "v_cmpx_o_f16")
10340 } // Inst_VOPC__V_CMPX_O_F16
10342 Inst_VOPC__V_CMPX_O_F16::~Inst_VOPC__V_CMPX_O_F16()
10344 } // ~Inst_VOPC__V_CMPX_O_F16
10346 // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
10349 Inst_VOPC__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst
)
10351 panicUnimplemented();
10354 Inst_VOPC__V_CMPX_U_F16::Inst_VOPC__V_CMPX_U_F16(InFmt_VOPC
*iFmt
)
10355 : Inst_VOPC(iFmt
, "v_cmpx_u_f16")
10359 } // Inst_VOPC__V_CMPX_U_F16
10361 Inst_VOPC__V_CMPX_U_F16::~Inst_VOPC__V_CMPX_U_F16()
10363 } // ~Inst_VOPC__V_CMPX_U_F16
10365 // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
10368 Inst_VOPC__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst
)
10370 panicUnimplemented();
10373 Inst_VOPC__V_CMPX_NGE_F16::Inst_VOPC__V_CMPX_NGE_F16(InFmt_VOPC
*iFmt
)
10374 : Inst_VOPC(iFmt
, "v_cmpx_nge_f16")
10378 } // Inst_VOPC__V_CMPX_NGE_F16
10380 Inst_VOPC__V_CMPX_NGE_F16::~Inst_VOPC__V_CMPX_NGE_F16()
10382 } // ~Inst_VOPC__V_CMPX_NGE_F16
10384 // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
10386 Inst_VOPC__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst
)
10388 panicUnimplemented();
10391 Inst_VOPC__V_CMPX_NLG_F16::Inst_VOPC__V_CMPX_NLG_F16(InFmt_VOPC
*iFmt
)
10392 : Inst_VOPC(iFmt
, "v_cmpx_nlg_f16")
10396 } // Inst_VOPC__V_CMPX_NLG_F16
10398 Inst_VOPC__V_CMPX_NLG_F16::~Inst_VOPC__V_CMPX_NLG_F16()
10400 } // ~Inst_VOPC__V_CMPX_NLG_F16
10402 // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
10404 Inst_VOPC__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst
)
10406 panicUnimplemented();
10409 Inst_VOPC__V_CMPX_NGT_F16::Inst_VOPC__V_CMPX_NGT_F16(InFmt_VOPC
*iFmt
)
10410 : Inst_VOPC(iFmt
, "v_cmpx_ngt_f16")
10414 } // Inst_VOPC__V_CMPX_NGT_F16
10416 Inst_VOPC__V_CMPX_NGT_F16::~Inst_VOPC__V_CMPX_NGT_F16()
10418 } // ~Inst_VOPC__V_CMPX_NGT_F16
10420 // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
10422 Inst_VOPC__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst
)
10424 panicUnimplemented();
10427 Inst_VOPC__V_CMPX_NLE_F16::Inst_VOPC__V_CMPX_NLE_F16(InFmt_VOPC
*iFmt
)
10428 : Inst_VOPC(iFmt
, "v_cmpx_nle_f16")
10432 } // Inst_VOPC__V_CMPX_NLE_F16
10434 Inst_VOPC__V_CMPX_NLE_F16::~Inst_VOPC__V_CMPX_NLE_F16()
10436 } // ~Inst_VOPC__V_CMPX_NLE_F16
10438 // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
10440 Inst_VOPC__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst
)
10442 panicUnimplemented();
10445 Inst_VOPC__V_CMPX_NEQ_F16::Inst_VOPC__V_CMPX_NEQ_F16(InFmt_VOPC
*iFmt
)
10446 : Inst_VOPC(iFmt
, "v_cmpx_neq_f16")
10450 } // Inst_VOPC__V_CMPX_NEQ_F16
10452 Inst_VOPC__V_CMPX_NEQ_F16::~Inst_VOPC__V_CMPX_NEQ_F16()
10454 } // ~Inst_VOPC__V_CMPX_NEQ_F16
10456 // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
10458 Inst_VOPC__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst
)
10460 panicUnimplemented();
10463 Inst_VOPC__V_CMPX_NLT_F16::Inst_VOPC__V_CMPX_NLT_F16(InFmt_VOPC
*iFmt
)
10464 : Inst_VOPC(iFmt
, "v_cmpx_nlt_f16")
10468 } // Inst_VOPC__V_CMPX_NLT_F16
10470 Inst_VOPC__V_CMPX_NLT_F16::~Inst_VOPC__V_CMPX_NLT_F16()
10472 } // ~Inst_VOPC__V_CMPX_NLT_F16
10474 // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
10476 Inst_VOPC__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst
)
10478 panicUnimplemented();
10481 Inst_VOPC__V_CMPX_TRU_F16::Inst_VOPC__V_CMPX_TRU_F16(InFmt_VOPC
*iFmt
)
10482 : Inst_VOPC(iFmt
, "v_cmpx_tru_f16")
10486 } // Inst_VOPC__V_CMPX_TRU_F16
10488 Inst_VOPC__V_CMPX_TRU_F16::~Inst_VOPC__V_CMPX_TRU_F16()
10490 } // ~Inst_VOPC__V_CMPX_TRU_F16
10492 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
10494 Inst_VOPC__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst
)
10496 panicUnimplemented();
10499 Inst_VOPC__V_CMP_F_F32::Inst_VOPC__V_CMP_F_F32(InFmt_VOPC
*iFmt
)
10500 : Inst_VOPC(iFmt
, "v_cmp_f_f32")
10504 } // Inst_VOPC__V_CMP_F_F32
10506 Inst_VOPC__V_CMP_F_F32::~Inst_VOPC__V_CMP_F_F32()
10508 } // ~Inst_VOPC__V_CMP_F_F32
10510 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
10512 Inst_VOPC__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst
)
10514 Wavefront
*wf
= gpuDynInst
->wavefront();
10515 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10517 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10518 if (wf
->execMask(lane
)) {
10519 vcc
.setBit(lane
, 0);
10526 Inst_VOPC__V_CMP_LT_F32::Inst_VOPC__V_CMP_LT_F32(InFmt_VOPC
*iFmt
)
10527 : Inst_VOPC(iFmt
, "v_cmp_lt_f32")
10531 } // Inst_VOPC__V_CMP_LT_F32
10533 Inst_VOPC__V_CMP_LT_F32::~Inst_VOPC__V_CMP_LT_F32()
10535 } // ~Inst_VOPC__V_CMP_LT_F32
10537 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
10539 Inst_VOPC__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst
)
10541 Wavefront
*wf
= gpuDynInst
->wavefront();
10542 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10543 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10544 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10549 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10550 if (wf
->execMask(lane
)) {
10551 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
10558 Inst_VOPC__V_CMP_EQ_F32::Inst_VOPC__V_CMP_EQ_F32(InFmt_VOPC
*iFmt
)
10559 : Inst_VOPC(iFmt
, "v_cmp_eq_f32")
10563 } // Inst_VOPC__V_CMP_EQ_F32
10565 Inst_VOPC__V_CMP_EQ_F32::~Inst_VOPC__V_CMP_EQ_F32()
10567 } // ~Inst_VOPC__V_CMP_EQ_F32
10569 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
10571 Inst_VOPC__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst
)
10573 Wavefront
*wf
= gpuDynInst
->wavefront();
10574 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10575 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10576 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10581 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10582 if (wf
->execMask(lane
)) {
10583 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
10590 Inst_VOPC__V_CMP_LE_F32::Inst_VOPC__V_CMP_LE_F32(InFmt_VOPC
*iFmt
)
10591 : Inst_VOPC(iFmt
, "v_cmp_le_f32")
10595 } // Inst_VOPC__V_CMP_LE_F32
10597 Inst_VOPC__V_CMP_LE_F32::~Inst_VOPC__V_CMP_LE_F32()
10599 } // ~Inst_VOPC__V_CMP_LE_F32
10601 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
10603 Inst_VOPC__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst
)
10605 Wavefront
*wf
= gpuDynInst
->wavefront();
10606 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10607 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10608 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10613 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10614 if (wf
->execMask(lane
)) {
10615 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
10622 Inst_VOPC__V_CMP_GT_F32::Inst_VOPC__V_CMP_GT_F32(InFmt_VOPC
*iFmt
)
10623 : Inst_VOPC(iFmt
, "v_cmp_gt_f32")
10627 } // Inst_VOPC__V_CMP_GT_F32
10629 Inst_VOPC__V_CMP_GT_F32::~Inst_VOPC__V_CMP_GT_F32()
10631 } // ~Inst_VOPC__V_CMP_GT_F32
10633 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
10635 Inst_VOPC__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst
)
10637 Wavefront
*wf
= gpuDynInst
->wavefront();
10638 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10639 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10640 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10645 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10646 if (wf
->execMask(lane
)) {
10647 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
10654 Inst_VOPC__V_CMP_LG_F32::Inst_VOPC__V_CMP_LG_F32(InFmt_VOPC
*iFmt
)
10655 : Inst_VOPC(iFmt
, "v_cmp_lg_f32")
10659 } // Inst_VOPC__V_CMP_LG_F32
10661 Inst_VOPC__V_CMP_LG_F32::~Inst_VOPC__V_CMP_LG_F32()
10663 } // ~Inst_VOPC__V_CMP_LG_F32
10665 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
10667 Inst_VOPC__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst
)
10669 Wavefront
*wf
= gpuDynInst
->wavefront();
10670 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10671 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10672 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10677 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10678 if (wf
->execMask(lane
)) {
10679 vcc
.setBit(lane
, (src0
[lane
] < src1
[lane
]
10680 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
10687 Inst_VOPC__V_CMP_GE_F32::Inst_VOPC__V_CMP_GE_F32(InFmt_VOPC
*iFmt
)
10688 : Inst_VOPC(iFmt
, "v_cmp_ge_f32")
10692 } // Inst_VOPC__V_CMP_GE_F32
10694 Inst_VOPC__V_CMP_GE_F32::~Inst_VOPC__V_CMP_GE_F32()
10696 } // ~Inst_VOPC__V_CMP_GE_F32
10698 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
10700 Inst_VOPC__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst
)
10702 Wavefront
*wf
= gpuDynInst
->wavefront();
10703 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10704 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10705 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10710 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10711 if (wf
->execMask(lane
)) {
10712 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
10719 Inst_VOPC__V_CMP_O_F32::Inst_VOPC__V_CMP_O_F32(InFmt_VOPC
*iFmt
)
10720 : Inst_VOPC(iFmt
, "v_cmp_o_f32")
10724 } // Inst_VOPC__V_CMP_O_F32
10726 Inst_VOPC__V_CMP_O_F32::~Inst_VOPC__V_CMP_O_F32()
10728 } // ~Inst_VOPC__V_CMP_O_F32
10730 // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
10732 Inst_VOPC__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst
)
10734 Wavefront
*wf
= gpuDynInst
->wavefront();
10735 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10736 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10737 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10742 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10743 if (wf
->execMask(lane
)) {
10744 vcc
.setBit(lane
, (!std::isnan(src0
[lane
])
10745 && !std::isnan(src1
[lane
])) ? 1 : 0);
10752 Inst_VOPC__V_CMP_U_F32::Inst_VOPC__V_CMP_U_F32(InFmt_VOPC
*iFmt
)
10753 : Inst_VOPC(iFmt
, "v_cmp_u_f32")
10757 } // Inst_VOPC__V_CMP_U_F32
10759 Inst_VOPC__V_CMP_U_F32::~Inst_VOPC__V_CMP_U_F32()
10761 } // ~Inst_VOPC__V_CMP_U_F32
10763 // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
10765 Inst_VOPC__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst
)
10767 Wavefront
*wf
= gpuDynInst
->wavefront();
10768 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10769 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10770 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10775 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10776 if (wf
->execMask(lane
)) {
10777 vcc
.setBit(lane
, (std::isnan(src0
[lane
])
10778 || std::isnan(src1
[lane
])) ? 1 : 0);
10785 Inst_VOPC__V_CMP_NGE_F32::Inst_VOPC__V_CMP_NGE_F32(InFmt_VOPC
*iFmt
)
10786 : Inst_VOPC(iFmt
, "v_cmp_nge_f32")
10790 } // Inst_VOPC__V_CMP_NGE_F32
10792 Inst_VOPC__V_CMP_NGE_F32::~Inst_VOPC__V_CMP_NGE_F32()
10794 } // ~Inst_VOPC__V_CMP_NGE_F32
10796 // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
10798 Inst_VOPC__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst
)
10800 Wavefront
*wf
= gpuDynInst
->wavefront();
10801 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10802 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10803 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10808 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10809 if (wf
->execMask(lane
)) {
10810 vcc
.setBit(lane
, !(src0
[lane
] >= src1
[lane
]) ? 1 : 0);
10817 Inst_VOPC__V_CMP_NLG_F32::Inst_VOPC__V_CMP_NLG_F32(InFmt_VOPC
*iFmt
)
10818 : Inst_VOPC(iFmt
, "v_cmp_nlg_f32")
10822 } // Inst_VOPC__V_CMP_NLG_F32
10824 Inst_VOPC__V_CMP_NLG_F32::~Inst_VOPC__V_CMP_NLG_F32()
10826 } // ~Inst_VOPC__V_CMP_NLG_F32
10828 // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
10830 Inst_VOPC__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst
)
10832 Wavefront
*wf
= gpuDynInst
->wavefront();
10833 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10834 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10835 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10840 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10841 if (wf
->execMask(lane
)) {
10842 vcc
.setBit(lane
, !(src0
[lane
] < src1
[lane
]
10843 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
10850 Inst_VOPC__V_CMP_NGT_F32::Inst_VOPC__V_CMP_NGT_F32(InFmt_VOPC
*iFmt
)
10851 : Inst_VOPC(iFmt
, "v_cmp_ngt_f32")
10855 } // Inst_VOPC__V_CMP_NGT_F32
10857 Inst_VOPC__V_CMP_NGT_F32::~Inst_VOPC__V_CMP_NGT_F32()
10859 } // ~Inst_VOPC__V_CMP_NGT_F32
10861 // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
10863 Inst_VOPC__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst
)
10865 Wavefront
*wf
= gpuDynInst
->wavefront();
10866 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10867 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10868 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10873 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10874 if (wf
->execMask(lane
)) {
10875 vcc
.setBit(lane
, !(src0
[lane
] > src1
[lane
]) ? 1 : 0);
10882 Inst_VOPC__V_CMP_NLE_F32::Inst_VOPC__V_CMP_NLE_F32(InFmt_VOPC
*iFmt
)
10883 : Inst_VOPC(iFmt
, "v_cmp_nle_f32")
10887 } // Inst_VOPC__V_CMP_NLE_F32
10889 Inst_VOPC__V_CMP_NLE_F32::~Inst_VOPC__V_CMP_NLE_F32()
10891 } // ~Inst_VOPC__V_CMP_NLE_F32
10893 // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
10895 Inst_VOPC__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst
)
10897 Wavefront
*wf
= gpuDynInst
->wavefront();
10898 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10899 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10900 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10905 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10906 if (wf
->execMask(lane
)) {
10907 vcc
.setBit(lane
, !(src0
[lane
] <= src1
[lane
]) ? 1 : 0);
10914 Inst_VOPC__V_CMP_NEQ_F32::Inst_VOPC__V_CMP_NEQ_F32(InFmt_VOPC
*iFmt
)
10915 : Inst_VOPC(iFmt
, "v_cmp_neq_f32")
10919 } // Inst_VOPC__V_CMP_NEQ_F32
10921 Inst_VOPC__V_CMP_NEQ_F32::~Inst_VOPC__V_CMP_NEQ_F32()
10923 } // ~Inst_VOPC__V_CMP_NEQ_F32
10925 // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
10927 Inst_VOPC__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst
)
10929 Wavefront
*wf
= gpuDynInst
->wavefront();
10930 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10931 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10932 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10937 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10938 if (wf
->execMask(lane
)) {
10939 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
10946 Inst_VOPC__V_CMP_NLT_F32::Inst_VOPC__V_CMP_NLT_F32(InFmt_VOPC
*iFmt
)
10947 : Inst_VOPC(iFmt
, "v_cmp_nlt_f32")
10951 } // Inst_VOPC__V_CMP_NLT_F32
10953 Inst_VOPC__V_CMP_NLT_F32::~Inst_VOPC__V_CMP_NLT_F32()
10955 } // ~Inst_VOPC__V_CMP_NLT_F32
10957 // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
10959 Inst_VOPC__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst
)
10961 Wavefront
*wf
= gpuDynInst
->wavefront();
10962 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
10963 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
10964 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10969 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10970 if (wf
->execMask(lane
)) {
10971 vcc
.setBit(lane
, !(src0
[lane
] < src1
[lane
]) ? 1 : 0);
10978 Inst_VOPC__V_CMP_TRU_F32::Inst_VOPC__V_CMP_TRU_F32(InFmt_VOPC
*iFmt
)
10979 : Inst_VOPC(iFmt
, "v_cmp_tru_f32")
10983 } // Inst_VOPC__V_CMP_TRU_F32
10985 Inst_VOPC__V_CMP_TRU_F32::~Inst_VOPC__V_CMP_TRU_F32()
10987 } // ~Inst_VOPC__V_CMP_TRU_F32
10989 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
10991 Inst_VOPC__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst
)
10993 Wavefront
*wf
= gpuDynInst
->wavefront();
10994 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
10996 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
10997 if (wf
->execMask(lane
)) {
10998 vcc
.setBit(lane
, 1);
11005 Inst_VOPC__V_CMPX_F_F32::Inst_VOPC__V_CMPX_F_F32(InFmt_VOPC
*iFmt
)
11006 : Inst_VOPC(iFmt
, "v_cmpx_f_f32")
11010 } // Inst_VOPC__V_CMPX_F_F32
11012 Inst_VOPC__V_CMPX_F_F32::~Inst_VOPC__V_CMPX_F_F32()
11014 } // ~Inst_VOPC__V_CMPX_F_F32
11016 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
11018 Inst_VOPC__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst
)
11020 Wavefront
*wf
= gpuDynInst
->wavefront();
11021 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11023 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11024 if (wf
->execMask(lane
)) {
11025 vcc
.setBit(lane
, 0);
11030 wf
->execMask() = vcc
.rawData();
11033 Inst_VOPC__V_CMPX_LT_F32::Inst_VOPC__V_CMPX_LT_F32(InFmt_VOPC
*iFmt
)
11034 : Inst_VOPC(iFmt
, "v_cmpx_lt_f32")
11038 } // Inst_VOPC__V_CMPX_LT_F32
11040 Inst_VOPC__V_CMPX_LT_F32::~Inst_VOPC__V_CMPX_LT_F32()
11042 } // ~Inst_VOPC__V_CMPX_LT_F32
11044 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
11046 Inst_VOPC__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst
)
11048 Wavefront
*wf
= gpuDynInst
->wavefront();
11049 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11050 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11051 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11056 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11057 if (wf
->execMask(lane
)) {
11058 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
11063 wf
->execMask() = vcc
.rawData();
11066 Inst_VOPC__V_CMPX_EQ_F32::Inst_VOPC__V_CMPX_EQ_F32(InFmt_VOPC
*iFmt
)
11067 : Inst_VOPC(iFmt
, "v_cmpx_eq_f32")
11071 } // Inst_VOPC__V_CMPX_EQ_F32
11073 Inst_VOPC__V_CMPX_EQ_F32::~Inst_VOPC__V_CMPX_EQ_F32()
11075 } // ~Inst_VOPC__V_CMPX_EQ_F32
11077 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
11079 Inst_VOPC__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst
)
11081 Wavefront
*wf
= gpuDynInst
->wavefront();
11082 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11083 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11084 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11089 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11090 if (wf
->execMask(lane
)) {
11091 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
11096 wf
->execMask() = vcc
.rawData();
11099 Inst_VOPC__V_CMPX_LE_F32::Inst_VOPC__V_CMPX_LE_F32(InFmt_VOPC
*iFmt
)
11100 : Inst_VOPC(iFmt
, "v_cmpx_le_f32")
11104 } // Inst_VOPC__V_CMPX_LE_F32
11106 Inst_VOPC__V_CMPX_LE_F32::~Inst_VOPC__V_CMPX_LE_F32()
11108 } // ~Inst_VOPC__V_CMPX_LE_F32
11110 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
11112 Inst_VOPC__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst
)
11114 Wavefront
*wf
= gpuDynInst
->wavefront();
11115 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11116 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11117 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11122 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11123 if (wf
->execMask(lane
)) {
11124 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
11129 wf
->execMask() = vcc
.rawData();
11132 Inst_VOPC__V_CMPX_GT_F32::Inst_VOPC__V_CMPX_GT_F32(InFmt_VOPC
*iFmt
)
11133 : Inst_VOPC(iFmt
, "v_cmpx_gt_f32")
11137 } // Inst_VOPC__V_CMPX_GT_F32
11139 Inst_VOPC__V_CMPX_GT_F32::~Inst_VOPC__V_CMPX_GT_F32()
11141 } // ~Inst_VOPC__V_CMPX_GT_F32
11143 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
11145 Inst_VOPC__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst
)
11147 Wavefront
*wf
= gpuDynInst
->wavefront();
11148 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11149 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11150 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11155 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11156 if (wf
->execMask(lane
)) {
11157 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
11162 wf
->execMask() = vcc
.rawData();
11165 Inst_VOPC__V_CMPX_LG_F32::Inst_VOPC__V_CMPX_LG_F32(InFmt_VOPC
*iFmt
)
11166 : Inst_VOPC(iFmt
, "v_cmpx_lg_f32")
11170 } // Inst_VOPC__V_CMPX_LG_F32
11172 Inst_VOPC__V_CMPX_LG_F32::~Inst_VOPC__V_CMPX_LG_F32()
11174 } // ~Inst_VOPC__V_CMPX_LG_F32
11176 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
11178 Inst_VOPC__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst
)
11180 Wavefront
*wf
= gpuDynInst
->wavefront();
11181 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11182 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11183 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11188 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11189 if (wf
->execMask(lane
)) {
11190 vcc
.setBit(lane
, (src0
[lane
] < src1
[lane
]
11191 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
11196 wf
->execMask() = vcc
.rawData();
11199 Inst_VOPC__V_CMPX_GE_F32::Inst_VOPC__V_CMPX_GE_F32(InFmt_VOPC
*iFmt
)
11200 : Inst_VOPC(iFmt
, "v_cmpx_ge_f32")
11204 } // Inst_VOPC__V_CMPX_GE_F32
11206 Inst_VOPC__V_CMPX_GE_F32::~Inst_VOPC__V_CMPX_GE_F32()
11208 } // ~Inst_VOPC__V_CMPX_GE_F32
11210 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
11212 Inst_VOPC__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst
)
11214 Wavefront
*wf
= gpuDynInst
->wavefront();
11215 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11216 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11217 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11222 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11223 if (wf
->execMask(lane
)) {
11224 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
11229 wf
->execMask() = vcc
.rawData();
11232 Inst_VOPC__V_CMPX_O_F32::Inst_VOPC__V_CMPX_O_F32(InFmt_VOPC
*iFmt
)
11233 : Inst_VOPC(iFmt
, "v_cmpx_o_f32")
11237 } // Inst_VOPC__V_CMPX_O_F32
11239 Inst_VOPC__V_CMPX_O_F32::~Inst_VOPC__V_CMPX_O_F32()
11241 } // ~Inst_VOPC__V_CMPX_O_F32
11243 // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
11246 Inst_VOPC__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst
)
11248 Wavefront
*wf
= gpuDynInst
->wavefront();
11249 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11250 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11251 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11256 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11257 if (wf
->execMask(lane
)) {
11258 vcc
.setBit(lane
, (!std::isnan(src0
[lane
])
11259 && !std::isnan(src1
[lane
])) ? 1 : 0);
11264 wf
->execMask() = vcc
.rawData();
11267 Inst_VOPC__V_CMPX_U_F32::Inst_VOPC__V_CMPX_U_F32(InFmt_VOPC
*iFmt
)
11268 : Inst_VOPC(iFmt
, "v_cmpx_u_f32")
11272 } // Inst_VOPC__V_CMPX_U_F32
11274 Inst_VOPC__V_CMPX_U_F32::~Inst_VOPC__V_CMPX_U_F32()
11276 } // ~Inst_VOPC__V_CMPX_U_F32
11278 // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
11281 Inst_VOPC__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst
)
11283 Wavefront
*wf
= gpuDynInst
->wavefront();
11284 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11285 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11286 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11291 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11292 if (wf
->execMask(lane
)) {
11293 vcc
.setBit(lane
, (std::isnan(src0
[lane
])
11294 || std::isnan(src1
[lane
])) ? 1 : 0);
11299 wf
->execMask() = vcc
.rawData();
11302 Inst_VOPC__V_CMPX_NGE_F32::Inst_VOPC__V_CMPX_NGE_F32(InFmt_VOPC
*iFmt
)
11303 : Inst_VOPC(iFmt
, "v_cmpx_nge_f32")
11307 } // Inst_VOPC__V_CMPX_NGE_F32
11309 Inst_VOPC__V_CMPX_NGE_F32::~Inst_VOPC__V_CMPX_NGE_F32()
11311 } // ~Inst_VOPC__V_CMPX_NGE_F32
11313 // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
11315 Inst_VOPC__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst
)
11317 Wavefront
*wf
= gpuDynInst
->wavefront();
11318 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11319 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11320 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11325 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11326 if (wf
->execMask(lane
)) {
11327 vcc
.setBit(lane
, !(src0
[lane
] >= src1
[lane
]) ? 1 : 0);
11332 wf
->execMask() = vcc
.rawData();
11335 Inst_VOPC__V_CMPX_NLG_F32::Inst_VOPC__V_CMPX_NLG_F32(InFmt_VOPC
*iFmt
)
11336 : Inst_VOPC(iFmt
, "v_cmpx_nlg_f32")
11340 } // Inst_VOPC__V_CMPX_NLG_F32
11342 Inst_VOPC__V_CMPX_NLG_F32::~Inst_VOPC__V_CMPX_NLG_F32()
11344 } // ~Inst_VOPC__V_CMPX_NLG_F32
11346 // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
11348 Inst_VOPC__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst
)
11350 Wavefront
*wf
= gpuDynInst
->wavefront();
11351 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11352 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11353 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11358 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11359 if (wf
->execMask(lane
)) {
11360 vcc
.setBit(lane
, !(src0
[lane
] < src1
[lane
]
11361 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
11366 wf
->execMask() = vcc
.rawData();
11369 Inst_VOPC__V_CMPX_NGT_F32::Inst_VOPC__V_CMPX_NGT_F32(InFmt_VOPC
*iFmt
)
11370 : Inst_VOPC(iFmt
, "v_cmpx_ngt_f32")
11374 } // Inst_VOPC__V_CMPX_NGT_F32
11376 Inst_VOPC__V_CMPX_NGT_F32::~Inst_VOPC__V_CMPX_NGT_F32()
11378 } // ~Inst_VOPC__V_CMPX_NGT_F32
11380 // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
11382 Inst_VOPC__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst
)
11384 Wavefront
*wf
= gpuDynInst
->wavefront();
11385 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11386 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11387 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11392 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11393 if (wf
->execMask(lane
)) {
11394 vcc
.setBit(lane
, !(src0
[lane
] > src1
[lane
]) ? 1 : 0);
11399 wf
->execMask() = vcc
.rawData();
11402 Inst_VOPC__V_CMPX_NLE_F32::Inst_VOPC__V_CMPX_NLE_F32(InFmt_VOPC
*iFmt
)
11403 : Inst_VOPC(iFmt
, "v_cmpx_nle_f32")
11407 } // Inst_VOPC__V_CMPX_NLE_F32
11409 Inst_VOPC__V_CMPX_NLE_F32::~Inst_VOPC__V_CMPX_NLE_F32()
11411 } // ~Inst_VOPC__V_CMPX_NLE_F32
11413 // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
11415 Inst_VOPC__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst
)
11417 Wavefront
*wf
= gpuDynInst
->wavefront();
11418 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11419 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11420 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11425 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11426 if (wf
->execMask(lane
)) {
11427 vcc
.setBit(lane
, !(src0
[lane
] <= src1
[lane
]) ? 1 : 0);
11432 wf
->execMask() = vcc
.rawData();
11435 Inst_VOPC__V_CMPX_NEQ_F32::Inst_VOPC__V_CMPX_NEQ_F32(InFmt_VOPC
*iFmt
)
11436 : Inst_VOPC(iFmt
, "v_cmpx_neq_f32")
11440 } // Inst_VOPC__V_CMPX_NEQ_F32
11442 Inst_VOPC__V_CMPX_NEQ_F32::~Inst_VOPC__V_CMPX_NEQ_F32()
11444 } // ~Inst_VOPC__V_CMPX_NEQ_F32
11446 // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
11448 Inst_VOPC__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst
)
11450 Wavefront
*wf
= gpuDynInst
->wavefront();
11451 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11452 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11453 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11458 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11459 if (wf
->execMask(lane
)) {
11460 vcc
.setBit(lane
, !(src0
[lane
] == src1
[lane
]) ? 1 : 0);
11467 Inst_VOPC__V_CMPX_NLT_F32::Inst_VOPC__V_CMPX_NLT_F32(InFmt_VOPC
*iFmt
)
11468 : Inst_VOPC(iFmt
, "v_cmpx_nlt_f32")
11472 } // Inst_VOPC__V_CMPX_NLT_F32
11474 Inst_VOPC__V_CMPX_NLT_F32::~Inst_VOPC__V_CMPX_NLT_F32()
11476 } // ~Inst_VOPC__V_CMPX_NLT_F32
11478 // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
11480 Inst_VOPC__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst
)
11482 Wavefront
*wf
= gpuDynInst
->wavefront();
11483 ConstVecOperandF32
src0(gpuDynInst
, instData
.SRC0
);
11484 ConstVecOperandF32
src1(gpuDynInst
, instData
.VSRC1
);
11485 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11490 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11491 if (wf
->execMask(lane
)) {
11492 vcc
.setBit(lane
, !(src0
[lane
] < src1
[lane
]) ? 1 : 0);
11497 wf
->execMask() = vcc
.rawData();
11500 Inst_VOPC__V_CMPX_TRU_F32::Inst_VOPC__V_CMPX_TRU_F32(InFmt_VOPC
*iFmt
)
11501 : Inst_VOPC(iFmt
, "v_cmpx_tru_f32")
11505 } // Inst_VOPC__V_CMPX_TRU_F32
11507 Inst_VOPC__V_CMPX_TRU_F32::~Inst_VOPC__V_CMPX_TRU_F32()
11509 } // ~Inst_VOPC__V_CMPX_TRU_F32
11511 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
11513 Inst_VOPC__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst
)
11515 Wavefront
*wf
= gpuDynInst
->wavefront();
11516 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11518 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11519 if (wf
->execMask(lane
)) {
11520 vcc
.setBit(lane
, 1);
11525 wf
->execMask() = vcc
.rawData();
11528 Inst_VOPC__V_CMP_F_F64::Inst_VOPC__V_CMP_F_F64(InFmt_VOPC
*iFmt
)
11529 : Inst_VOPC(iFmt
, "v_cmp_f_f64")
11533 } // Inst_VOPC__V_CMP_F_F64
11535 Inst_VOPC__V_CMP_F_F64::~Inst_VOPC__V_CMP_F_F64()
11537 } // ~Inst_VOPC__V_CMP_F_F64
11539 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
11541 Inst_VOPC__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst
)
11543 Wavefront
*wf
= gpuDynInst
->wavefront();
11544 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11546 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11547 if (wf
->execMask(lane
)) {
11548 vcc
.setBit(lane
, 0);
11555 Inst_VOPC__V_CMP_LT_F64::Inst_VOPC__V_CMP_LT_F64(InFmt_VOPC
*iFmt
)
11556 : Inst_VOPC(iFmt
, "v_cmp_lt_f64")
11560 } // Inst_VOPC__V_CMP_LT_F64
11562 Inst_VOPC__V_CMP_LT_F64::~Inst_VOPC__V_CMP_LT_F64()
11564 } // ~Inst_VOPC__V_CMP_LT_F64
11566 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
11568 Inst_VOPC__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst
)
11570 Wavefront
*wf
= gpuDynInst
->wavefront();
11571 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11572 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11573 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11578 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11579 if (wf
->execMask(lane
)) {
11580 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
11587 Inst_VOPC__V_CMP_EQ_F64::Inst_VOPC__V_CMP_EQ_F64(InFmt_VOPC
*iFmt
)
11588 : Inst_VOPC(iFmt
, "v_cmp_eq_f64")
11592 } // Inst_VOPC__V_CMP_EQ_F64
11594 Inst_VOPC__V_CMP_EQ_F64::~Inst_VOPC__V_CMP_EQ_F64()
11596 } // ~Inst_VOPC__V_CMP_EQ_F64
11598 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
11600 Inst_VOPC__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst
)
11602 Wavefront
*wf
= gpuDynInst
->wavefront();
11603 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11604 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11605 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11610 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11611 if (wf
->execMask(lane
)) {
11612 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
11619 Inst_VOPC__V_CMP_LE_F64::Inst_VOPC__V_CMP_LE_F64(InFmt_VOPC
*iFmt
)
11620 : Inst_VOPC(iFmt
, "v_cmp_le_f64")
11624 } // Inst_VOPC__V_CMP_LE_F64
11626 Inst_VOPC__V_CMP_LE_F64::~Inst_VOPC__V_CMP_LE_F64()
11628 } // ~Inst_VOPC__V_CMP_LE_F64
11630 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
11632 Inst_VOPC__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst
)
11634 Wavefront
*wf
= gpuDynInst
->wavefront();
11635 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11636 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11637 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11642 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11643 if (wf
->execMask(lane
)) {
11644 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
11651 Inst_VOPC__V_CMP_GT_F64::Inst_VOPC__V_CMP_GT_F64(InFmt_VOPC
*iFmt
)
11652 : Inst_VOPC(iFmt
, "v_cmp_gt_f64")
11656 } // Inst_VOPC__V_CMP_GT_F64
11658 Inst_VOPC__V_CMP_GT_F64::~Inst_VOPC__V_CMP_GT_F64()
11660 } // ~Inst_VOPC__V_CMP_GT_F64
11662 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
11664 Inst_VOPC__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst
)
11666 Wavefront
*wf
= gpuDynInst
->wavefront();
11667 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11668 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11669 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11674 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11675 if (wf
->execMask(lane
)) {
11676 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
11683 Inst_VOPC__V_CMP_LG_F64::Inst_VOPC__V_CMP_LG_F64(InFmt_VOPC
*iFmt
)
11684 : Inst_VOPC(iFmt
, "v_cmp_lg_f64")
11688 } // Inst_VOPC__V_CMP_LG_F64
11690 Inst_VOPC__V_CMP_LG_F64::~Inst_VOPC__V_CMP_LG_F64()
11692 } // ~Inst_VOPC__V_CMP_LG_F64
11694 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
11696 Inst_VOPC__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst
)
11698 Wavefront
*wf
= gpuDynInst
->wavefront();
11699 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11700 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11701 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11706 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11707 if (wf
->execMask(lane
)) {
11708 vcc
.setBit(lane
, (src0
[lane
] < src1
[lane
]
11709 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
11716 Inst_VOPC__V_CMP_GE_F64::Inst_VOPC__V_CMP_GE_F64(InFmt_VOPC
*iFmt
)
11717 : Inst_VOPC(iFmt
, "v_cmp_ge_f64")
11721 } // Inst_VOPC__V_CMP_GE_F64
11723 Inst_VOPC__V_CMP_GE_F64::~Inst_VOPC__V_CMP_GE_F64()
11725 } // ~Inst_VOPC__V_CMP_GE_F64
11727 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
11729 Inst_VOPC__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst
)
11731 Wavefront
*wf
= gpuDynInst
->wavefront();
11732 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11733 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11734 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11739 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11740 if (wf
->execMask(lane
)) {
11741 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
11748 Inst_VOPC__V_CMP_O_F64::Inst_VOPC__V_CMP_O_F64(InFmt_VOPC
*iFmt
)
11749 : Inst_VOPC(iFmt
, "v_cmp_o_f64")
11753 } // Inst_VOPC__V_CMP_O_F64
11755 Inst_VOPC__V_CMP_O_F64::~Inst_VOPC__V_CMP_O_F64()
11757 } // ~Inst_VOPC__V_CMP_O_F64
11759 // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
11761 Inst_VOPC__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst
)
11763 Wavefront
*wf
= gpuDynInst
->wavefront();
11764 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11765 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11766 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11771 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11772 if (wf
->execMask(lane
)) {
11773 vcc
.setBit(lane
, (!std::isnan(src0
[lane
])
11774 && !std::isnan(src1
[lane
])) ? 1 : 0);
11781 Inst_VOPC__V_CMP_U_F64::Inst_VOPC__V_CMP_U_F64(InFmt_VOPC
*iFmt
)
11782 : Inst_VOPC(iFmt
, "v_cmp_u_f64")
11786 } // Inst_VOPC__V_CMP_U_F64
11788 Inst_VOPC__V_CMP_U_F64::~Inst_VOPC__V_CMP_U_F64()
11790 } // ~Inst_VOPC__V_CMP_U_F64
11792 // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
11794 Inst_VOPC__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst
)
11796 Wavefront
*wf
= gpuDynInst
->wavefront();
11797 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11798 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11799 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11804 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11805 if (wf
->execMask(lane
)) {
11806 vcc
.setBit(lane
, (std::isnan(src0
[lane
])
11807 || std::isnan(src1
[lane
])) ? 1 : 0);
11814 Inst_VOPC__V_CMP_NGE_F64::Inst_VOPC__V_CMP_NGE_F64(InFmt_VOPC
*iFmt
)
11815 : Inst_VOPC(iFmt
, "v_cmp_nge_f64")
11819 } // Inst_VOPC__V_CMP_NGE_F64
11821 Inst_VOPC__V_CMP_NGE_F64::~Inst_VOPC__V_CMP_NGE_F64()
11823 } // ~Inst_VOPC__V_CMP_NGE_F64
11825 // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
11827 Inst_VOPC__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst
)
11829 Wavefront
*wf
= gpuDynInst
->wavefront();
11830 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11831 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11832 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11837 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11838 if (wf
->execMask(lane
)) {
11839 vcc
.setBit(lane
, !(src0
[lane
] >= src1
[lane
]) ? 1 : 0);
11846 Inst_VOPC__V_CMP_NLG_F64::Inst_VOPC__V_CMP_NLG_F64(InFmt_VOPC
*iFmt
)
11847 : Inst_VOPC(iFmt
, "v_cmp_nlg_f64")
11851 } // Inst_VOPC__V_CMP_NLG_F64
11853 Inst_VOPC__V_CMP_NLG_F64::~Inst_VOPC__V_CMP_NLG_F64()
11855 } // ~Inst_VOPC__V_CMP_NLG_F64
11857 // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
11859 Inst_VOPC__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst
)
11861 Wavefront
*wf
= gpuDynInst
->wavefront();
11862 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11863 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11864 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11869 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11870 if (wf
->execMask(lane
)) {
11871 vcc
.setBit(lane
, !(src0
[lane
] < src1
[lane
]
11872 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
11879 Inst_VOPC__V_CMP_NGT_F64::Inst_VOPC__V_CMP_NGT_F64(InFmt_VOPC
*iFmt
)
11880 : Inst_VOPC(iFmt
, "v_cmp_ngt_f64")
11884 } // Inst_VOPC__V_CMP_NGT_F64
11886 Inst_VOPC__V_CMP_NGT_F64::~Inst_VOPC__V_CMP_NGT_F64()
11888 } // ~Inst_VOPC__V_CMP_NGT_F64
11890 // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
11892 Inst_VOPC__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst
)
11894 Wavefront
*wf
= gpuDynInst
->wavefront();
11895 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11896 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11897 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11902 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11903 if (wf
->execMask(lane
)) {
11904 vcc
.setBit(lane
, !(src0
[lane
] > src1
[lane
]) ? 1 : 0);
11911 Inst_VOPC__V_CMP_NLE_F64::Inst_VOPC__V_CMP_NLE_F64(InFmt_VOPC
*iFmt
)
11912 : Inst_VOPC(iFmt
, "v_cmp_nle_f64")
11916 } // Inst_VOPC__V_CMP_NLE_F64
11918 Inst_VOPC__V_CMP_NLE_F64::~Inst_VOPC__V_CMP_NLE_F64()
11920 } // ~Inst_VOPC__V_CMP_NLE_F64
11922 // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
11924 Inst_VOPC__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst
)
11926 Wavefront
*wf
= gpuDynInst
->wavefront();
11927 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11928 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11929 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11934 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11935 if (wf
->execMask(lane
)) {
11936 vcc
.setBit(lane
, !(src0
[lane
] <= src1
[lane
]) ? 1 : 0);
11943 Inst_VOPC__V_CMP_NEQ_F64::Inst_VOPC__V_CMP_NEQ_F64(InFmt_VOPC
*iFmt
)
11944 : Inst_VOPC(iFmt
, "v_cmp_neq_f64")
11948 } // Inst_VOPC__V_CMP_NEQ_F64
11950 Inst_VOPC__V_CMP_NEQ_F64::~Inst_VOPC__V_CMP_NEQ_F64()
11952 } // ~Inst_VOPC__V_CMP_NEQ_F64
11954 // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
11956 Inst_VOPC__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst
)
11958 Wavefront
*wf
= gpuDynInst
->wavefront();
11959 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11960 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11961 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11966 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11967 if (wf
->execMask(lane
)) {
11968 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
11975 Inst_VOPC__V_CMP_NLT_F64::Inst_VOPC__V_CMP_NLT_F64(InFmt_VOPC
*iFmt
)
11976 : Inst_VOPC(iFmt
, "v_cmp_nlt_f64")
11980 } // Inst_VOPC__V_CMP_NLT_F64
11982 Inst_VOPC__V_CMP_NLT_F64::~Inst_VOPC__V_CMP_NLT_F64()
11984 } // ~Inst_VOPC__V_CMP_NLT_F64
11986 // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
11988 Inst_VOPC__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst
)
11990 Wavefront
*wf
= gpuDynInst
->wavefront();
11991 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
11992 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
11993 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
11998 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
11999 if (wf
->execMask(lane
)) {
12000 vcc
.setBit(lane
, !(src0
[lane
] < src1
[lane
]) ? 1 : 0);
12007 Inst_VOPC__V_CMP_TRU_F64::Inst_VOPC__V_CMP_TRU_F64(InFmt_VOPC
*iFmt
)
12008 : Inst_VOPC(iFmt
, "v_cmp_tru_f64")
12012 } // Inst_VOPC__V_CMP_TRU_F64
12014 Inst_VOPC__V_CMP_TRU_F64::~Inst_VOPC__V_CMP_TRU_F64()
12016 } // ~Inst_VOPC__V_CMP_TRU_F64
12018 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
12020 Inst_VOPC__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst
)
12022 Wavefront
*wf
= gpuDynInst
->wavefront();
12023 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12025 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12026 if (wf
->execMask(lane
)) {
12027 vcc
.setBit(lane
, 1);
12034 Inst_VOPC__V_CMPX_F_F64::Inst_VOPC__V_CMPX_F_F64(InFmt_VOPC
*iFmt
)
12035 : Inst_VOPC(iFmt
, "v_cmpx_f_f64")
12039 } // Inst_VOPC__V_CMPX_F_F64
12041 Inst_VOPC__V_CMPX_F_F64::~Inst_VOPC__V_CMPX_F_F64()
12043 } // ~Inst_VOPC__V_CMPX_F_F64
12045 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
12047 Inst_VOPC__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst
)
12049 Wavefront
*wf
= gpuDynInst
->wavefront();
12050 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12052 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12053 if (wf
->execMask(lane
)) {
12054 vcc
.setBit(lane
, 0);
12059 wf
->execMask() = vcc
.rawData();
12062 Inst_VOPC__V_CMPX_LT_F64::Inst_VOPC__V_CMPX_LT_F64(InFmt_VOPC
*iFmt
)
12063 : Inst_VOPC(iFmt
, "v_cmpx_lt_f64")
12067 } // Inst_VOPC__V_CMPX_LT_F64
12069 Inst_VOPC__V_CMPX_LT_F64::~Inst_VOPC__V_CMPX_LT_F64()
12071 } // ~Inst_VOPC__V_CMPX_LT_F64
12073 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
12075 Inst_VOPC__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst
)
12077 Wavefront
*wf
= gpuDynInst
->wavefront();
12078 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12079 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12080 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12085 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12086 if (wf
->execMask(lane
)) {
12087 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
12092 wf
->execMask() = vcc
.rawData();
12095 Inst_VOPC__V_CMPX_EQ_F64::Inst_VOPC__V_CMPX_EQ_F64(InFmt_VOPC
*iFmt
)
12096 : Inst_VOPC(iFmt
, "v_cmpx_eq_f64")
12100 } // Inst_VOPC__V_CMPX_EQ_F64
12102 Inst_VOPC__V_CMPX_EQ_F64::~Inst_VOPC__V_CMPX_EQ_F64()
12104 } // ~Inst_VOPC__V_CMPX_EQ_F64
12106 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
12108 Inst_VOPC__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst
)
12110 Wavefront
*wf
= gpuDynInst
->wavefront();
12111 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12112 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12113 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12118 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12119 if (wf
->execMask(lane
)) {
12120 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
12125 wf
->execMask() = vcc
.rawData();
12128 Inst_VOPC__V_CMPX_LE_F64::Inst_VOPC__V_CMPX_LE_F64(InFmt_VOPC
*iFmt
)
12129 : Inst_VOPC(iFmt
, "v_cmpx_le_f64")
12133 } // Inst_VOPC__V_CMPX_LE_F64
12135 Inst_VOPC__V_CMPX_LE_F64::~Inst_VOPC__V_CMPX_LE_F64()
12137 } // ~Inst_VOPC__V_CMPX_LE_F64
12139 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
12141 Inst_VOPC__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst
)
12143 Wavefront
*wf
= gpuDynInst
->wavefront();
12144 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12145 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12146 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12151 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12152 if (wf
->execMask(lane
)) {
12153 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
12157 wf
->execMask() = vcc
.rawData();
12161 Inst_VOPC__V_CMPX_GT_F64::Inst_VOPC__V_CMPX_GT_F64(InFmt_VOPC
*iFmt
)
12162 : Inst_VOPC(iFmt
, "v_cmpx_gt_f64")
12166 } // Inst_VOPC__V_CMPX_GT_F64
12168 Inst_VOPC__V_CMPX_GT_F64::~Inst_VOPC__V_CMPX_GT_F64()
12170 } // ~Inst_VOPC__V_CMPX_GT_F64
12172 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
12174 Inst_VOPC__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst
)
12176 Wavefront
*wf
= gpuDynInst
->wavefront();
12177 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12178 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12179 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12184 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12185 if (wf
->execMask(lane
)) {
12186 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
12190 wf
->execMask() = vcc
.rawData();
12194 Inst_VOPC__V_CMPX_LG_F64::Inst_VOPC__V_CMPX_LG_F64(InFmt_VOPC
*iFmt
)
12195 : Inst_VOPC(iFmt
, "v_cmpx_lg_f64")
12199 } // Inst_VOPC__V_CMPX_LG_F64
12201 Inst_VOPC__V_CMPX_LG_F64::~Inst_VOPC__V_CMPX_LG_F64()
12203 } // ~Inst_VOPC__V_CMPX_LG_F64
12205 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
12207 Inst_VOPC__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst
)
12209 Wavefront
*wf
= gpuDynInst
->wavefront();
12210 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12211 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12212 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12217 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12218 if (wf
->execMask(lane
)) {
12219 vcc
.setBit(lane
, (src0
[lane
] < src1
[lane
]
12220 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
12224 wf
->execMask() = vcc
.rawData();
12228 Inst_VOPC__V_CMPX_GE_F64::Inst_VOPC__V_CMPX_GE_F64(InFmt_VOPC
*iFmt
)
12229 : Inst_VOPC(iFmt
, "v_cmpx_ge_f64")
12233 } // Inst_VOPC__V_CMPX_GE_F64
12235 Inst_VOPC__V_CMPX_GE_F64::~Inst_VOPC__V_CMPX_GE_F64()
12237 } // ~Inst_VOPC__V_CMPX_GE_F64
12239 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
12241 Inst_VOPC__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst
)
12243 Wavefront
*wf
= gpuDynInst
->wavefront();
12244 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12245 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12246 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12251 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12252 if (wf
->execMask(lane
)) {
12253 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
12257 wf
->execMask() = vcc
.rawData();
12261 Inst_VOPC__V_CMPX_O_F64::Inst_VOPC__V_CMPX_O_F64(InFmt_VOPC
*iFmt
)
12262 : Inst_VOPC(iFmt
, "v_cmpx_o_f64")
12266 } // Inst_VOPC__V_CMPX_O_F64
12268 Inst_VOPC__V_CMPX_O_F64::~Inst_VOPC__V_CMPX_O_F64()
12270 } // ~Inst_VOPC__V_CMPX_O_F64
12272 // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
12275 Inst_VOPC__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst
)
12277 Wavefront
*wf
= gpuDynInst
->wavefront();
12278 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12279 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12280 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12285 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12286 if (wf
->execMask(lane
)) {
12287 vcc
.setBit(lane
, (!std::isnan(src0
[lane
])
12288 && !std::isnan(src1
[lane
])) ? 1 : 0);
12292 wf
->execMask() = vcc
.rawData();
12296 Inst_VOPC__V_CMPX_U_F64::Inst_VOPC__V_CMPX_U_F64(InFmt_VOPC
*iFmt
)
12297 : Inst_VOPC(iFmt
, "v_cmpx_u_f64")
12301 } // Inst_VOPC__V_CMPX_U_F64
12303 Inst_VOPC__V_CMPX_U_F64::~Inst_VOPC__V_CMPX_U_F64()
12305 } // ~Inst_VOPC__V_CMPX_U_F64
12307 // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
12310 Inst_VOPC__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst
)
12312 Wavefront
*wf
= gpuDynInst
->wavefront();
12313 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12314 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12315 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12320 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12321 if (wf
->execMask(lane
)) {
12322 vcc
.setBit(lane
, (std::isnan(src0
[lane
])
12323 || std::isnan(src1
[lane
])) ? 1 : 0);
12327 wf
->execMask() = vcc
.rawData();
12331 Inst_VOPC__V_CMPX_NGE_F64::Inst_VOPC__V_CMPX_NGE_F64(InFmt_VOPC
*iFmt
)
12332 : Inst_VOPC(iFmt
, "v_cmpx_nge_f64")
12336 } // Inst_VOPC__V_CMPX_NGE_F64
12338 Inst_VOPC__V_CMPX_NGE_F64::~Inst_VOPC__V_CMPX_NGE_F64()
12340 } // ~Inst_VOPC__V_CMPX_NGE_F64
12342 // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
12344 Inst_VOPC__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst
)
12346 Wavefront
*wf
= gpuDynInst
->wavefront();
12347 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12348 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12349 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12354 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12355 if (wf
->execMask(lane
)) {
12356 vcc
.setBit(lane
, !(src0
[lane
] >= src1
[lane
]) ? 1 : 0);
12360 wf
->execMask() = vcc
.rawData();
12364 Inst_VOPC__V_CMPX_NLG_F64::Inst_VOPC__V_CMPX_NLG_F64(InFmt_VOPC
*iFmt
)
12365 : Inst_VOPC(iFmt
, "v_cmpx_nlg_f64")
12369 } // Inst_VOPC__V_CMPX_NLG_F64
12371 Inst_VOPC__V_CMPX_NLG_F64::~Inst_VOPC__V_CMPX_NLG_F64()
12373 } // ~Inst_VOPC__V_CMPX_NLG_F64
12375 // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
12377 Inst_VOPC__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst
)
12379 Wavefront
*wf
= gpuDynInst
->wavefront();
12380 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12381 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12382 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12387 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12388 if (wf
->execMask(lane
)) {
12389 vcc
.setBit(lane
, !(src0
[lane
] < src1
[lane
]
12390 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
12394 wf
->execMask() = vcc
.rawData();
12398 Inst_VOPC__V_CMPX_NGT_F64::Inst_VOPC__V_CMPX_NGT_F64(InFmt_VOPC
*iFmt
)
12399 : Inst_VOPC(iFmt
, "v_cmpx_ngt_f64")
12403 } // Inst_VOPC__V_CMPX_NGT_F64
12405 Inst_VOPC__V_CMPX_NGT_F64::~Inst_VOPC__V_CMPX_NGT_F64()
12407 } // ~Inst_VOPC__V_CMPX_NGT_F64
12409 // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
12411 Inst_VOPC__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst
)
12413 Wavefront
*wf
= gpuDynInst
->wavefront();
12414 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12415 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12416 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12421 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12422 if (wf
->execMask(lane
)) {
12423 vcc
.setBit(lane
, !(src0
[lane
] > src1
[lane
]) ? 1 : 0);
12427 wf
->execMask() = vcc
.rawData();
12431 Inst_VOPC__V_CMPX_NLE_F64::Inst_VOPC__V_CMPX_NLE_F64(InFmt_VOPC
*iFmt
)
12432 : Inst_VOPC(iFmt
, "v_cmpx_nle_f64")
12436 } // Inst_VOPC__V_CMPX_NLE_F64
12438 Inst_VOPC__V_CMPX_NLE_F64::~Inst_VOPC__V_CMPX_NLE_F64()
12440 } // ~Inst_VOPC__V_CMPX_NLE_F64
12442 // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
12444 Inst_VOPC__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst
)
12446 Wavefront
*wf
= gpuDynInst
->wavefront();
12447 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12448 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12449 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12454 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12455 if (wf
->execMask(lane
)) {
12456 vcc
.setBit(lane
, !(src0
[lane
] <= src1
[lane
]) ? 1 : 0);
12460 wf
->execMask() = vcc
.rawData();
12464 Inst_VOPC__V_CMPX_NEQ_F64::Inst_VOPC__V_CMPX_NEQ_F64(InFmt_VOPC
*iFmt
)
12465 : Inst_VOPC(iFmt
, "v_cmpx_neq_f64")
12469 } // Inst_VOPC__V_CMPX_NEQ_F64
12471 Inst_VOPC__V_CMPX_NEQ_F64::~Inst_VOPC__V_CMPX_NEQ_F64()
12473 } // ~Inst_VOPC__V_CMPX_NEQ_F64
12475 // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
12477 Inst_VOPC__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst
)
12479 Wavefront
*wf
= gpuDynInst
->wavefront();
12480 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12481 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12482 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12487 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12488 if (wf
->execMask(lane
)) {
12489 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
12493 wf
->execMask() = vcc
.rawData();
12497 Inst_VOPC__V_CMPX_NLT_F64::Inst_VOPC__V_CMPX_NLT_F64(InFmt_VOPC
*iFmt
)
12498 : Inst_VOPC(iFmt
, "v_cmpx_nlt_f64")
12502 } // Inst_VOPC__V_CMPX_NLT_F64
12504 Inst_VOPC__V_CMPX_NLT_F64::~Inst_VOPC__V_CMPX_NLT_F64()
12506 } // ~Inst_VOPC__V_CMPX_NLT_F64
12508 // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
12510 Inst_VOPC__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst
)
12512 Wavefront
*wf
= gpuDynInst
->wavefront();
12513 ConstVecOperandF64
src0(gpuDynInst
, instData
.SRC0
);
12514 ConstVecOperandF64
src1(gpuDynInst
, instData
.VSRC1
);
12515 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12520 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12521 if (wf
->execMask(lane
)) {
12522 vcc
.setBit(lane
, !(src0
[lane
] < src1
[lane
]) ? 1 : 0);
12526 wf
->execMask() = vcc
.rawData();
12530 Inst_VOPC__V_CMPX_TRU_F64::Inst_VOPC__V_CMPX_TRU_F64(InFmt_VOPC
*iFmt
)
12531 : Inst_VOPC(iFmt
, "v_cmpx_tru_f64")
12535 } // Inst_VOPC__V_CMPX_TRU_F64
12537 Inst_VOPC__V_CMPX_TRU_F64::~Inst_VOPC__V_CMPX_TRU_F64()
12539 } // ~Inst_VOPC__V_CMPX_TRU_F64
12541 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
12543 Inst_VOPC__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst
)
12545 Wavefront
*wf
= gpuDynInst
->wavefront();
12546 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12548 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12549 if (wf
->execMask(lane
)) {
12550 vcc
.setBit(lane
, 1);
12554 wf
->execMask() = vcc
.rawData();
12558 Inst_VOPC__V_CMP_F_I16::Inst_VOPC__V_CMP_F_I16(InFmt_VOPC
*iFmt
)
12559 : Inst_VOPC(iFmt
, "v_cmp_f_i16")
12562 } // Inst_VOPC__V_CMP_F_I16
12564 Inst_VOPC__V_CMP_F_I16::~Inst_VOPC__V_CMP_F_I16()
12566 } // ~Inst_VOPC__V_CMP_F_I16
12568 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
12570 Inst_VOPC__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst
)
12572 Wavefront
*wf
= gpuDynInst
->wavefront();
12573 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12575 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12576 if (wf
->execMask(lane
)) {
12577 vcc
.setBit(lane
, 0);
12584 Inst_VOPC__V_CMP_LT_I16::Inst_VOPC__V_CMP_LT_I16(InFmt_VOPC
*iFmt
)
12585 : Inst_VOPC(iFmt
, "v_cmp_lt_i16")
12588 } // Inst_VOPC__V_CMP_LT_I16
12590 Inst_VOPC__V_CMP_LT_I16::~Inst_VOPC__V_CMP_LT_I16()
12592 } // ~Inst_VOPC__V_CMP_LT_I16
12594 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
12596 Inst_VOPC__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst
)
12598 Wavefront
*wf
= gpuDynInst
->wavefront();
12599 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
12600 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
12601 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12606 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12607 if (wf
->execMask(lane
)) {
12608 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
12615 Inst_VOPC__V_CMP_EQ_I16::Inst_VOPC__V_CMP_EQ_I16(InFmt_VOPC
*iFmt
)
12616 : Inst_VOPC(iFmt
, "v_cmp_eq_i16")
12619 } // Inst_VOPC__V_CMP_EQ_I16
12621 Inst_VOPC__V_CMP_EQ_I16::~Inst_VOPC__V_CMP_EQ_I16()
12623 } // ~Inst_VOPC__V_CMP_EQ_I16
12625 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
12627 Inst_VOPC__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst
)
12629 Wavefront
*wf
= gpuDynInst
->wavefront();
12630 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
12631 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
12632 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12637 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12638 if (wf
->execMask(lane
)) {
12639 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
12646 Inst_VOPC__V_CMP_LE_I16::Inst_VOPC__V_CMP_LE_I16(InFmt_VOPC
*iFmt
)
12647 : Inst_VOPC(iFmt
, "v_cmp_le_i16")
12650 } // Inst_VOPC__V_CMP_LE_I16
12652 Inst_VOPC__V_CMP_LE_I16::~Inst_VOPC__V_CMP_LE_I16()
12654 } // ~Inst_VOPC__V_CMP_LE_I16
12656 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
12658 Inst_VOPC__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst
)
12660 Wavefront
*wf
= gpuDynInst
->wavefront();
12661 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
12662 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
12663 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12668 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12669 if (wf
->execMask(lane
)) {
12670 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
12677 Inst_VOPC__V_CMP_GT_I16::Inst_VOPC__V_CMP_GT_I16(InFmt_VOPC
*iFmt
)
12678 : Inst_VOPC(iFmt
, "v_cmp_gt_i16")
12681 } // Inst_VOPC__V_CMP_GT_I16
12683 Inst_VOPC__V_CMP_GT_I16::~Inst_VOPC__V_CMP_GT_I16()
12685 } // ~Inst_VOPC__V_CMP_GT_I16
12687 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
12689 Inst_VOPC__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst
)
12691 Wavefront
*wf
= gpuDynInst
->wavefront();
12692 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
12693 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
12694 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12699 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12700 if (wf
->execMask(lane
)) {
12701 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
12708 Inst_VOPC__V_CMP_NE_I16::Inst_VOPC__V_CMP_NE_I16(InFmt_VOPC
*iFmt
)
12709 : Inst_VOPC(iFmt
, "v_cmp_ne_i16")
12712 } // Inst_VOPC__V_CMP_NE_I16
12714 Inst_VOPC__V_CMP_NE_I16::~Inst_VOPC__V_CMP_NE_I16()
12716 } // ~Inst_VOPC__V_CMP_NE_I16
12718 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
12720 Inst_VOPC__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst
)
12722 Wavefront
*wf
= gpuDynInst
->wavefront();
12723 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
12724 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
12725 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12730 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12731 if (wf
->execMask(lane
)) {
12732 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
12739 Inst_VOPC__V_CMP_GE_I16::Inst_VOPC__V_CMP_GE_I16(InFmt_VOPC
*iFmt
)
12740 : Inst_VOPC(iFmt
, "v_cmp_ge_i16")
12743 } // Inst_VOPC__V_CMP_GE_I16
12745 Inst_VOPC__V_CMP_GE_I16::~Inst_VOPC__V_CMP_GE_I16()
12747 } // ~Inst_VOPC__V_CMP_GE_I16
12749 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
12751 Inst_VOPC__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst
)
12753 Wavefront
*wf
= gpuDynInst
->wavefront();
12754 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
12755 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
12756 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12761 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12762 if (wf
->execMask(lane
)) {
12763 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
12770 Inst_VOPC__V_CMP_T_I16::Inst_VOPC__V_CMP_T_I16(InFmt_VOPC
*iFmt
)
12771 : Inst_VOPC(iFmt
, "v_cmp_t_i16")
12774 } // Inst_VOPC__V_CMP_T_I16
12776 Inst_VOPC__V_CMP_T_I16::~Inst_VOPC__V_CMP_T_I16()
12778 } // ~Inst_VOPC__V_CMP_T_I16
12780 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
12782 Inst_VOPC__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst
)
12784 Wavefront
*wf
= gpuDynInst
->wavefront();
12785 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12787 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12788 if (wf
->execMask(lane
)) {
12789 vcc
.setBit(lane
, 1);
12796 Inst_VOPC__V_CMP_F_U16::Inst_VOPC__V_CMP_F_U16(InFmt_VOPC
*iFmt
)
12797 : Inst_VOPC(iFmt
, "v_cmp_f_u16")
12800 } // Inst_VOPC__V_CMP_F_U16
12802 Inst_VOPC__V_CMP_F_U16::~Inst_VOPC__V_CMP_F_U16()
12804 } // ~Inst_VOPC__V_CMP_F_U16
12806 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
12808 Inst_VOPC__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst
)
12810 Wavefront
*wf
= gpuDynInst
->wavefront();
12811 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12813 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12814 if (wf
->execMask(lane
)) {
12815 vcc
.setBit(lane
, 0);
12822 Inst_VOPC__V_CMP_LT_U16::Inst_VOPC__V_CMP_LT_U16(InFmt_VOPC
*iFmt
)
12823 : Inst_VOPC(iFmt
, "v_cmp_lt_u16")
12826 } // Inst_VOPC__V_CMP_LT_U16
12828 Inst_VOPC__V_CMP_LT_U16::~Inst_VOPC__V_CMP_LT_U16()
12830 } // ~Inst_VOPC__V_CMP_LT_U16
12832 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
12834 Inst_VOPC__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst
)
12836 Wavefront
*wf
= gpuDynInst
->wavefront();
12837 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
12838 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
12839 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12844 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12845 if (wf
->execMask(lane
)) {
12846 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
12853 Inst_VOPC__V_CMP_EQ_U16::Inst_VOPC__V_CMP_EQ_U16(InFmt_VOPC
*iFmt
)
12854 : Inst_VOPC(iFmt
, "v_cmp_eq_u16")
12857 } // Inst_VOPC__V_CMP_EQ_U16
12859 Inst_VOPC__V_CMP_EQ_U16::~Inst_VOPC__V_CMP_EQ_U16()
12861 } // ~Inst_VOPC__V_CMP_EQ_U16
12863 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
12865 Inst_VOPC__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst
)
12867 Wavefront
*wf
= gpuDynInst
->wavefront();
12868 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
12869 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
12870 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12875 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12876 if (wf
->execMask(lane
)) {
12877 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
12884 Inst_VOPC__V_CMP_LE_U16::Inst_VOPC__V_CMP_LE_U16(InFmt_VOPC
*iFmt
)
12885 : Inst_VOPC(iFmt
, "v_cmp_le_u16")
12888 } // Inst_VOPC__V_CMP_LE_U16
12890 Inst_VOPC__V_CMP_LE_U16::~Inst_VOPC__V_CMP_LE_U16()
12892 } // ~Inst_VOPC__V_CMP_LE_U16
12894 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
12896 Inst_VOPC__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst
)
12898 Wavefront
*wf
= gpuDynInst
->wavefront();
12899 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
12900 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
12901 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12906 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12907 if (wf
->execMask(lane
)) {
12908 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
12915 Inst_VOPC__V_CMP_GT_U16::Inst_VOPC__V_CMP_GT_U16(InFmt_VOPC
*iFmt
)
12916 : Inst_VOPC(iFmt
, "v_cmp_gt_u16")
12919 } // Inst_VOPC__V_CMP_GT_U16
12921 Inst_VOPC__V_CMP_GT_U16::~Inst_VOPC__V_CMP_GT_U16()
12923 } // ~Inst_VOPC__V_CMP_GT_U16
12925 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
12927 Inst_VOPC__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst
)
12929 Wavefront
*wf
= gpuDynInst
->wavefront();
12930 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
12931 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
12932 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12937 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12938 if (wf
->execMask(lane
)) {
12939 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
12946 Inst_VOPC__V_CMP_NE_U16::Inst_VOPC__V_CMP_NE_U16(InFmt_VOPC
*iFmt
)
12947 : Inst_VOPC(iFmt
, "v_cmp_ne_u16")
12950 } // Inst_VOPC__V_CMP_NE_U16
12952 Inst_VOPC__V_CMP_NE_U16::~Inst_VOPC__V_CMP_NE_U16()
12954 } // ~Inst_VOPC__V_CMP_NE_U16
12956 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
12958 Inst_VOPC__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst
)
12960 Wavefront
*wf
= gpuDynInst
->wavefront();
12961 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
12962 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
12963 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12968 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
12969 if (wf
->execMask(lane
)) {
12970 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
12977 Inst_VOPC__V_CMP_GE_U16::Inst_VOPC__V_CMP_GE_U16(InFmt_VOPC
*iFmt
)
12978 : Inst_VOPC(iFmt
, "v_cmp_ge_u16")
12981 } // Inst_VOPC__V_CMP_GE_U16
12983 Inst_VOPC__V_CMP_GE_U16::~Inst_VOPC__V_CMP_GE_U16()
12985 } // ~Inst_VOPC__V_CMP_GE_U16
12987 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
12989 Inst_VOPC__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst
)
12991 Wavefront
*wf
= gpuDynInst
->wavefront();
12992 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
12993 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
12994 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
12999 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13000 if (wf
->execMask(lane
)) {
13001 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
13008 Inst_VOPC__V_CMP_T_U16::Inst_VOPC__V_CMP_T_U16(InFmt_VOPC
*iFmt
)
13009 : Inst_VOPC(iFmt
, "v_cmp_t_u16")
13012 } // Inst_VOPC__V_CMP_T_U16
13014 Inst_VOPC__V_CMP_T_U16::~Inst_VOPC__V_CMP_T_U16()
13016 } // ~Inst_VOPC__V_CMP_T_U16
13018 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
13020 Inst_VOPC__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst
)
13022 Wavefront
*wf
= gpuDynInst
->wavefront();
13023 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13025 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13026 if (wf
->execMask(lane
)) {
13027 vcc
.setBit(lane
, 1);
13034 Inst_VOPC__V_CMPX_F_I16::Inst_VOPC__V_CMPX_F_I16(InFmt_VOPC
*iFmt
)
13035 : Inst_VOPC(iFmt
, "v_cmpx_f_i16")
13038 } // Inst_VOPC__V_CMPX_F_I16
13040 Inst_VOPC__V_CMPX_F_I16::~Inst_VOPC__V_CMPX_F_I16()
13042 } // ~Inst_VOPC__V_CMPX_F_I16
13044 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
13046 Inst_VOPC__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst
)
13048 Wavefront
*wf
= gpuDynInst
->wavefront();
13049 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13051 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13052 if (wf
->execMask(lane
)) {
13053 vcc
.setBit(lane
, 0);
13057 wf
->execMask() = vcc
.rawData();
13061 Inst_VOPC__V_CMPX_LT_I16::Inst_VOPC__V_CMPX_LT_I16(InFmt_VOPC
*iFmt
)
13062 : Inst_VOPC(iFmt
, "v_cmpx_lt_i16")
13065 } // Inst_VOPC__V_CMPX_LT_I16
13067 Inst_VOPC__V_CMPX_LT_I16::~Inst_VOPC__V_CMPX_LT_I16()
13069 } // ~Inst_VOPC__V_CMPX_LT_I16
13071 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
13073 Inst_VOPC__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst
)
13075 Wavefront
*wf
= gpuDynInst
->wavefront();
13076 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
13077 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
13078 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13083 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13084 if (wf
->execMask(lane
)) {
13085 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
13089 wf
->execMask() = vcc
.rawData();
13093 Inst_VOPC__V_CMPX_EQ_I16::Inst_VOPC__V_CMPX_EQ_I16(InFmt_VOPC
*iFmt
)
13094 : Inst_VOPC(iFmt
, "v_cmpx_eq_i16")
13097 } // Inst_VOPC__V_CMPX_EQ_I16
13099 Inst_VOPC__V_CMPX_EQ_I16::~Inst_VOPC__V_CMPX_EQ_I16()
13101 } // ~Inst_VOPC__V_CMPX_EQ_I16
13103 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
13105 Inst_VOPC__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst
)
13107 Wavefront
*wf
= gpuDynInst
->wavefront();
13108 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
13109 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
13110 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13115 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13116 if (wf
->execMask(lane
)) {
13117 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
13121 wf
->execMask() = vcc
.rawData();
13125 Inst_VOPC__V_CMPX_LE_I16::Inst_VOPC__V_CMPX_LE_I16(InFmt_VOPC
*iFmt
)
13126 : Inst_VOPC(iFmt
, "v_cmpx_le_i16")
13129 } // Inst_VOPC__V_CMPX_LE_I16
13131 Inst_VOPC__V_CMPX_LE_I16::~Inst_VOPC__V_CMPX_LE_I16()
13133 } // ~Inst_VOPC__V_CMPX_LE_I16
13135 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
13137 Inst_VOPC__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst
)
13139 Wavefront
*wf
= gpuDynInst
->wavefront();
13140 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
13141 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
13142 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13147 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13148 if (wf
->execMask(lane
)) {
13149 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
13153 wf
->execMask() = vcc
.rawData();
13157 Inst_VOPC__V_CMPX_GT_I16::Inst_VOPC__V_CMPX_GT_I16(InFmt_VOPC
*iFmt
)
13158 : Inst_VOPC(iFmt
, "v_cmpx_gt_i16")
13161 } // Inst_VOPC__V_CMPX_GT_I16
13163 Inst_VOPC__V_CMPX_GT_I16::~Inst_VOPC__V_CMPX_GT_I16()
13165 } // ~Inst_VOPC__V_CMPX_GT_I16
13167 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
13169 Inst_VOPC__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst
)
13171 Wavefront
*wf
= gpuDynInst
->wavefront();
13172 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
13173 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
13174 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13179 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13180 if (wf
->execMask(lane
)) {
13181 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
13185 wf
->execMask() = vcc
.rawData();
13189 Inst_VOPC__V_CMPX_NE_I16::Inst_VOPC__V_CMPX_NE_I16(InFmt_VOPC
*iFmt
)
13190 : Inst_VOPC(iFmt
, "v_cmpx_ne_i16")
13193 } // Inst_VOPC__V_CMPX_NE_I16
13195 Inst_VOPC__V_CMPX_NE_I16::~Inst_VOPC__V_CMPX_NE_I16()
13197 } // ~Inst_VOPC__V_CMPX_NE_I16
13199 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
13201 Inst_VOPC__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst
)
13203 Wavefront
*wf
= gpuDynInst
->wavefront();
13204 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
13205 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
13206 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13211 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13212 if (wf
->execMask(lane
)) {
13213 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
13217 wf
->execMask() = vcc
.rawData();
13221 Inst_VOPC__V_CMPX_GE_I16::Inst_VOPC__V_CMPX_GE_I16(InFmt_VOPC
*iFmt
)
13222 : Inst_VOPC(iFmt
, "v_cmpx_ge_i16")
13225 } // Inst_VOPC__V_CMPX_GE_I16
13227 Inst_VOPC__V_CMPX_GE_I16::~Inst_VOPC__V_CMPX_GE_I16()
13229 } // ~Inst_VOPC__V_CMPX_GE_I16
13231 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
13233 Inst_VOPC__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst
)
13235 Wavefront
*wf
= gpuDynInst
->wavefront();
13236 ConstVecOperandI16
src0(gpuDynInst
, instData
.SRC0
);
13237 ConstVecOperandI16
src1(gpuDynInst
, instData
.VSRC1
);
13238 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13243 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13244 if (wf
->execMask(lane
)) {
13245 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
13249 wf
->execMask() = vcc
.rawData();
13253 Inst_VOPC__V_CMPX_T_I16::Inst_VOPC__V_CMPX_T_I16(InFmt_VOPC
*iFmt
)
13254 : Inst_VOPC(iFmt
, "v_cmpx_t_i16")
13257 } // Inst_VOPC__V_CMPX_T_I16
13259 Inst_VOPC__V_CMPX_T_I16::~Inst_VOPC__V_CMPX_T_I16()
13261 } // ~Inst_VOPC__V_CMPX_T_I16
13263 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
13265 Inst_VOPC__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst
)
13267 Wavefront
*wf
= gpuDynInst
->wavefront();
13268 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13270 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13271 if (wf
->execMask(lane
)) {
13272 vcc
.setBit(lane
, 1);
13276 wf
->execMask() = vcc
.rawData();
13280 Inst_VOPC__V_CMPX_F_U16::Inst_VOPC__V_CMPX_F_U16(InFmt_VOPC
*iFmt
)
13281 : Inst_VOPC(iFmt
, "v_cmpx_f_u16")
13284 } // Inst_VOPC__V_CMPX_F_U16
13286 Inst_VOPC__V_CMPX_F_U16::~Inst_VOPC__V_CMPX_F_U16()
13288 } // ~Inst_VOPC__V_CMPX_F_U16
13290 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
13292 Inst_VOPC__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst
)
13294 Wavefront
*wf
= gpuDynInst
->wavefront();
13295 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13297 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13298 if (wf
->execMask(lane
)) {
13299 vcc
.setBit(lane
, 0);
13303 wf
->execMask() = vcc
.rawData();
13307 Inst_VOPC__V_CMPX_LT_U16::Inst_VOPC__V_CMPX_LT_U16(InFmt_VOPC
*iFmt
)
13308 : Inst_VOPC(iFmt
, "v_cmpx_lt_u16")
13311 } // Inst_VOPC__V_CMPX_LT_U16
13313 Inst_VOPC__V_CMPX_LT_U16::~Inst_VOPC__V_CMPX_LT_U16()
13315 } // ~Inst_VOPC__V_CMPX_LT_U16
13317 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
13319 Inst_VOPC__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst
)
13321 Wavefront
*wf
= gpuDynInst
->wavefront();
13322 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
13323 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
13324 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13329 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13330 if (wf
->execMask(lane
)) {
13331 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
13335 wf
->execMask() = vcc
.rawData();
13339 Inst_VOPC__V_CMPX_EQ_U16::Inst_VOPC__V_CMPX_EQ_U16(InFmt_VOPC
*iFmt
)
13340 : Inst_VOPC(iFmt
, "v_cmpx_eq_u16")
13343 } // Inst_VOPC__V_CMPX_EQ_U16
13345 Inst_VOPC__V_CMPX_EQ_U16::~Inst_VOPC__V_CMPX_EQ_U16()
13347 } // ~Inst_VOPC__V_CMPX_EQ_U16
13349 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
13351 Inst_VOPC__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst
)
13353 Wavefront
*wf
= gpuDynInst
->wavefront();
13354 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
13355 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
13356 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13361 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13362 if (wf
->execMask(lane
)) {
13363 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
13367 wf
->execMask() = vcc
.rawData();
13371 Inst_VOPC__V_CMPX_LE_U16::Inst_VOPC__V_CMPX_LE_U16(InFmt_VOPC
*iFmt
)
13372 : Inst_VOPC(iFmt
, "v_cmpx_le_u16")
13375 } // Inst_VOPC__V_CMPX_LE_U16
13377 Inst_VOPC__V_CMPX_LE_U16::~Inst_VOPC__V_CMPX_LE_U16()
13379 } // ~Inst_VOPC__V_CMPX_LE_U16
13381 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
13383 Inst_VOPC__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst
)
13385 Wavefront
*wf
= gpuDynInst
->wavefront();
13386 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
13387 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
13388 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13393 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13394 if (wf
->execMask(lane
)) {
13395 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
13399 wf
->execMask() = vcc
.rawData();
13403 Inst_VOPC__V_CMPX_GT_U16::Inst_VOPC__V_CMPX_GT_U16(InFmt_VOPC
*iFmt
)
13404 : Inst_VOPC(iFmt
, "v_cmpx_gt_u16")
13407 } // Inst_VOPC__V_CMPX_GT_U16
13409 Inst_VOPC__V_CMPX_GT_U16::~Inst_VOPC__V_CMPX_GT_U16()
13411 } // ~Inst_VOPC__V_CMPX_GT_U16
13413 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
13415 Inst_VOPC__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst
)
13417 Wavefront
*wf
= gpuDynInst
->wavefront();
13418 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
13419 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
13420 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13425 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13426 if (wf
->execMask(lane
)) {
13427 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
13431 wf
->execMask() = vcc
.rawData();
13435 Inst_VOPC__V_CMPX_NE_U16::Inst_VOPC__V_CMPX_NE_U16(InFmt_VOPC
*iFmt
)
13436 : Inst_VOPC(iFmt
, "v_cmpx_ne_u16")
13439 } // Inst_VOPC__V_CMPX_NE_U16
13441 Inst_VOPC__V_CMPX_NE_U16::~Inst_VOPC__V_CMPX_NE_U16()
13443 } // ~Inst_VOPC__V_CMPX_NE_U16
13445 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
13447 Inst_VOPC__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst
)
13449 Wavefront
*wf
= gpuDynInst
->wavefront();
13450 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
13451 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
13452 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13457 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13458 if (wf
->execMask(lane
)) {
13459 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
13463 wf
->execMask() = vcc
.rawData();
13467 Inst_VOPC__V_CMPX_GE_U16::Inst_VOPC__V_CMPX_GE_U16(InFmt_VOPC
*iFmt
)
13468 : Inst_VOPC(iFmt
, "v_cmpx_ge_u16")
13471 } // Inst_VOPC__V_CMPX_GE_U16
13473 Inst_VOPC__V_CMPX_GE_U16::~Inst_VOPC__V_CMPX_GE_U16()
13475 } // ~Inst_VOPC__V_CMPX_GE_U16
13477 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
13479 Inst_VOPC__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst
)
13481 Wavefront
*wf
= gpuDynInst
->wavefront();
13482 ConstVecOperandU16
src0(gpuDynInst
, instData
.SRC0
);
13483 ConstVecOperandU16
src1(gpuDynInst
, instData
.VSRC1
);
13484 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13489 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13490 if (wf
->execMask(lane
)) {
13491 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
13495 wf
->execMask() = vcc
.rawData();
13499 Inst_VOPC__V_CMPX_T_U16::Inst_VOPC__V_CMPX_T_U16(InFmt_VOPC
*iFmt
)
13500 : Inst_VOPC(iFmt
, "v_cmpx_t_u16")
13503 } // Inst_VOPC__V_CMPX_T_U16
13505 Inst_VOPC__V_CMPX_T_U16::~Inst_VOPC__V_CMPX_T_U16()
13507 } // ~Inst_VOPC__V_CMPX_T_U16
13509 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
13511 Inst_VOPC__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst
)
13513 Wavefront
*wf
= gpuDynInst
->wavefront();
13514 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13516 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13517 if (wf
->execMask(lane
)) {
13518 vcc
.setBit(lane
, 1);
13522 wf
->execMask() = vcc
.rawData();
13526 Inst_VOPC__V_CMP_F_I32::Inst_VOPC__V_CMP_F_I32(InFmt_VOPC
*iFmt
)
13527 : Inst_VOPC(iFmt
, "v_cmp_f_i32")
13530 } // Inst_VOPC__V_CMP_F_I32
13532 Inst_VOPC__V_CMP_F_I32::~Inst_VOPC__V_CMP_F_I32()
13534 } // ~Inst_VOPC__V_CMP_F_I32
13536 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
13538 Inst_VOPC__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst
)
13540 Wavefront
*wf
= gpuDynInst
->wavefront();
13541 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13543 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13544 if (wf
->execMask(lane
)) {
13545 vcc
.setBit(lane
, 0);
13552 Inst_VOPC__V_CMP_LT_I32::Inst_VOPC__V_CMP_LT_I32(InFmt_VOPC
*iFmt
)
13553 : Inst_VOPC(iFmt
, "v_cmp_lt_i32")
13556 } // Inst_VOPC__V_CMP_LT_I32
13558 Inst_VOPC__V_CMP_LT_I32::~Inst_VOPC__V_CMP_LT_I32()
13560 } // ~Inst_VOPC__V_CMP_LT_I32
13562 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
13564 Inst_VOPC__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst
)
13566 Wavefront
*wf
= gpuDynInst
->wavefront();
13567 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
13568 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
13569 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13574 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13575 if (wf
->execMask(lane
)) {
13576 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
13583 Inst_VOPC__V_CMP_EQ_I32::Inst_VOPC__V_CMP_EQ_I32(InFmt_VOPC
*iFmt
)
13584 : Inst_VOPC(iFmt
, "v_cmp_eq_i32")
13587 } // Inst_VOPC__V_CMP_EQ_I32
13589 Inst_VOPC__V_CMP_EQ_I32::~Inst_VOPC__V_CMP_EQ_I32()
13591 } // ~Inst_VOPC__V_CMP_EQ_I32
13593 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
13595 Inst_VOPC__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst
)
13597 Wavefront
*wf
= gpuDynInst
->wavefront();
13598 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
13599 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
13600 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13605 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13606 if (wf
->execMask(lane
)) {
13607 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
13614 Inst_VOPC__V_CMP_LE_I32::Inst_VOPC__V_CMP_LE_I32(InFmt_VOPC
*iFmt
)
13615 : Inst_VOPC(iFmt
, "v_cmp_le_i32")
13618 } // Inst_VOPC__V_CMP_LE_I32
13620 Inst_VOPC__V_CMP_LE_I32::~Inst_VOPC__V_CMP_LE_I32()
13622 } // ~Inst_VOPC__V_CMP_LE_I32
13624 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
13626 Inst_VOPC__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst
)
13628 Wavefront
*wf
= gpuDynInst
->wavefront();
13629 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
13630 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
13631 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13636 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13637 if (wf
->execMask(lane
)) {
13638 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
13645 Inst_VOPC__V_CMP_GT_I32::Inst_VOPC__V_CMP_GT_I32(InFmt_VOPC
*iFmt
)
13646 : Inst_VOPC(iFmt
, "v_cmp_gt_i32")
13649 } // Inst_VOPC__V_CMP_GT_I32
13651 Inst_VOPC__V_CMP_GT_I32::~Inst_VOPC__V_CMP_GT_I32()
13653 } // ~Inst_VOPC__V_CMP_GT_I32
13655 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
13657 Inst_VOPC__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst
)
13659 Wavefront
*wf
= gpuDynInst
->wavefront();
13660 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
13661 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
13662 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13667 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13668 if (wf
->execMask(lane
)) {
13669 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
13676 Inst_VOPC__V_CMP_NE_I32::Inst_VOPC__V_CMP_NE_I32(InFmt_VOPC
*iFmt
)
13677 : Inst_VOPC(iFmt
, "v_cmp_ne_i32")
13680 } // Inst_VOPC__V_CMP_NE_I32
13682 Inst_VOPC__V_CMP_NE_I32::~Inst_VOPC__V_CMP_NE_I32()
13684 } // ~Inst_VOPC__V_CMP_NE_I32
13686 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
13688 Inst_VOPC__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst
)
13690 Wavefront
*wf
= gpuDynInst
->wavefront();
13691 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
13692 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
13693 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13698 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13699 if (wf
->execMask(lane
)) {
13700 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
13707 Inst_VOPC__V_CMP_GE_I32::Inst_VOPC__V_CMP_GE_I32(InFmt_VOPC
*iFmt
)
13708 : Inst_VOPC(iFmt
, "v_cmp_ge_i32")
13711 } // Inst_VOPC__V_CMP_GE_I32
13713 Inst_VOPC__V_CMP_GE_I32::~Inst_VOPC__V_CMP_GE_I32()
13715 } // ~Inst_VOPC__V_CMP_GE_I32
13717 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
13719 Inst_VOPC__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst
)
13721 Wavefront
*wf
= gpuDynInst
->wavefront();
13722 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
13723 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
13724 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13729 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13730 if (wf
->execMask(lane
)) {
13731 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
13738 Inst_VOPC__V_CMP_T_I32::Inst_VOPC__V_CMP_T_I32(InFmt_VOPC
*iFmt
)
13739 : Inst_VOPC(iFmt
, "v_cmp_t_i32")
13742 } // Inst_VOPC__V_CMP_T_I32
13744 Inst_VOPC__V_CMP_T_I32::~Inst_VOPC__V_CMP_T_I32()
13746 } // ~Inst_VOPC__V_CMP_T_I32
13748 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
13750 Inst_VOPC__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst
)
13752 Wavefront
*wf
= gpuDynInst
->wavefront();
13753 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13755 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13756 if (wf
->execMask(lane
)) {
13757 vcc
.setBit(lane
, 1);
13764 Inst_VOPC__V_CMP_F_U32::Inst_VOPC__V_CMP_F_U32(InFmt_VOPC
*iFmt
)
13765 : Inst_VOPC(iFmt
, "v_cmp_f_u32")
13768 } // Inst_VOPC__V_CMP_F_U32
13770 Inst_VOPC__V_CMP_F_U32::~Inst_VOPC__V_CMP_F_U32()
13772 } // ~Inst_VOPC__V_CMP_F_U32
13774 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
13776 Inst_VOPC__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst
)
13778 Wavefront
*wf
= gpuDynInst
->wavefront();
13779 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13781 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13782 if (wf
->execMask(lane
)) {
13783 vcc
.setBit(lane
, 0);
13790 Inst_VOPC__V_CMP_LT_U32::Inst_VOPC__V_CMP_LT_U32(InFmt_VOPC
*iFmt
)
13791 : Inst_VOPC(iFmt
, "v_cmp_lt_u32")
13794 } // Inst_VOPC__V_CMP_LT_U32
13796 Inst_VOPC__V_CMP_LT_U32::~Inst_VOPC__V_CMP_LT_U32()
13798 } // ~Inst_VOPC__V_CMP_LT_U32
13800 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
13802 Inst_VOPC__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst
)
13804 Wavefront
*wf
= gpuDynInst
->wavefront();
13805 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
13806 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
13807 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13812 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13813 if (wf
->execMask(lane
)) {
13814 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
13821 Inst_VOPC__V_CMP_EQ_U32::Inst_VOPC__V_CMP_EQ_U32(InFmt_VOPC
*iFmt
)
13822 : Inst_VOPC(iFmt
, "v_cmp_eq_u32")
13825 } // Inst_VOPC__V_CMP_EQ_U32
13827 Inst_VOPC__V_CMP_EQ_U32::~Inst_VOPC__V_CMP_EQ_U32()
13829 } // ~Inst_VOPC__V_CMP_EQ_U32
13831 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
13833 Inst_VOPC__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst
)
13835 Wavefront
*wf
= gpuDynInst
->wavefront();
13836 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
13837 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
13838 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13843 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13844 if (wf
->execMask(lane
)) {
13845 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
13852 Inst_VOPC__V_CMP_LE_U32::Inst_VOPC__V_CMP_LE_U32(InFmt_VOPC
*iFmt
)
13853 : Inst_VOPC(iFmt
, "v_cmp_le_u32")
13856 } // Inst_VOPC__V_CMP_LE_U32
13858 Inst_VOPC__V_CMP_LE_U32::~Inst_VOPC__V_CMP_LE_U32()
13860 } // ~Inst_VOPC__V_CMP_LE_U32
13862 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
13864 Inst_VOPC__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst
)
13866 Wavefront
*wf
= gpuDynInst
->wavefront();
13867 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
13868 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
13869 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13874 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13875 if (wf
->execMask(lane
)) {
13876 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
13883 Inst_VOPC__V_CMP_GT_U32::Inst_VOPC__V_CMP_GT_U32(InFmt_VOPC
*iFmt
)
13884 : Inst_VOPC(iFmt
, "v_cmp_gt_u32")
13887 } // Inst_VOPC__V_CMP_GT_U32
13889 Inst_VOPC__V_CMP_GT_U32::~Inst_VOPC__V_CMP_GT_U32()
13891 } // ~Inst_VOPC__V_CMP_GT_U32
13893 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
13895 Inst_VOPC__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst
)
13897 Wavefront
*wf
= gpuDynInst
->wavefront();
13898 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
13899 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
13900 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13905 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13906 if (wf
->execMask(lane
)) {
13907 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
13914 Inst_VOPC__V_CMP_NE_U32::Inst_VOPC__V_CMP_NE_U32(InFmt_VOPC
*iFmt
)
13915 : Inst_VOPC(iFmt
, "v_cmp_ne_u32")
13918 } // Inst_VOPC__V_CMP_NE_U32
13920 Inst_VOPC__V_CMP_NE_U32::~Inst_VOPC__V_CMP_NE_U32()
13922 } // ~Inst_VOPC__V_CMP_NE_U32
13924 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
13926 Inst_VOPC__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst
)
13928 Wavefront
*wf
= gpuDynInst
->wavefront();
13929 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
13930 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
13931 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13936 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13937 if (wf
->execMask(lane
)) {
13938 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
13945 Inst_VOPC__V_CMP_GE_U32::Inst_VOPC__V_CMP_GE_U32(InFmt_VOPC
*iFmt
)
13946 : Inst_VOPC(iFmt
, "v_cmp_ge_u32")
13949 } // Inst_VOPC__V_CMP_GE_U32
13951 Inst_VOPC__V_CMP_GE_U32::~Inst_VOPC__V_CMP_GE_U32()
13953 } // ~Inst_VOPC__V_CMP_GE_U32
13955 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
13957 Inst_VOPC__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst
)
13959 Wavefront
*wf
= gpuDynInst
->wavefront();
13960 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
13961 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
13962 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13967 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13968 if (wf
->execMask(lane
)) {
13969 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
13976 Inst_VOPC__V_CMP_T_U32::Inst_VOPC__V_CMP_T_U32(InFmt_VOPC
*iFmt
)
13977 : Inst_VOPC(iFmt
, "v_cmp_t_u32")
13980 } // Inst_VOPC__V_CMP_T_U32
13982 Inst_VOPC__V_CMP_T_U32::~Inst_VOPC__V_CMP_T_U32()
13984 } // ~Inst_VOPC__V_CMP_T_U32
13986 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
13988 Inst_VOPC__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst
)
13990 Wavefront
*wf
= gpuDynInst
->wavefront();
13991 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
13993 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
13994 if (wf
->execMask(lane
)) {
13995 vcc
.setBit(lane
, 1);
14002 Inst_VOPC__V_CMPX_F_I32::Inst_VOPC__V_CMPX_F_I32(InFmt_VOPC
*iFmt
)
14003 : Inst_VOPC(iFmt
, "v_cmpx_f_i32")
14006 } // Inst_VOPC__V_CMPX_F_I32
14008 Inst_VOPC__V_CMPX_F_I32::~Inst_VOPC__V_CMPX_F_I32()
14010 } // ~Inst_VOPC__V_CMPX_F_I32
14012 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
14014 Inst_VOPC__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst
)
14016 Wavefront
*wf
= gpuDynInst
->wavefront();
14017 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14019 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14020 if (wf
->execMask(lane
)) {
14021 vcc
.setBit(lane
, 0);
14025 wf
->execMask() = vcc
.rawData();
14029 Inst_VOPC__V_CMPX_LT_I32::Inst_VOPC__V_CMPX_LT_I32(InFmt_VOPC
*iFmt
)
14030 : Inst_VOPC(iFmt
, "v_cmpx_lt_i32")
14033 } // Inst_VOPC__V_CMPX_LT_I32
14035 Inst_VOPC__V_CMPX_LT_I32::~Inst_VOPC__V_CMPX_LT_I32()
14037 } // ~Inst_VOPC__V_CMPX_LT_I32
14039 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14041 Inst_VOPC__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst
)
14043 Wavefront
*wf
= gpuDynInst
->wavefront();
14044 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
14045 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
14046 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14051 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14052 if (wf
->execMask(lane
)) {
14053 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
14057 wf
->execMask() = vcc
.rawData();
14061 Inst_VOPC__V_CMPX_EQ_I32::Inst_VOPC__V_CMPX_EQ_I32(InFmt_VOPC
*iFmt
)
14062 : Inst_VOPC(iFmt
, "v_cmpx_eq_i32")
14065 } // Inst_VOPC__V_CMPX_EQ_I32
14067 Inst_VOPC__V_CMPX_EQ_I32::~Inst_VOPC__V_CMPX_EQ_I32()
14069 } // ~Inst_VOPC__V_CMPX_EQ_I32
14071 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
14073 Inst_VOPC__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst
)
14075 Wavefront
*wf
= gpuDynInst
->wavefront();
14076 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
14077 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
14078 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14083 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14084 if (wf
->execMask(lane
)) {
14085 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
14089 wf
->execMask() = vcc
.rawData();
14093 Inst_VOPC__V_CMPX_LE_I32::Inst_VOPC__V_CMPX_LE_I32(InFmt_VOPC
*iFmt
)
14094 : Inst_VOPC(iFmt
, "v_cmpx_le_i32")
14097 } // Inst_VOPC__V_CMPX_LE_I32
14099 Inst_VOPC__V_CMPX_LE_I32::~Inst_VOPC__V_CMPX_LE_I32()
14101 } // ~Inst_VOPC__V_CMPX_LE_I32
14103 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
14105 Inst_VOPC__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst
)
14107 Wavefront
*wf
= gpuDynInst
->wavefront();
14108 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
14109 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
14110 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14115 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14116 if (wf
->execMask(lane
)) {
14117 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
14121 wf
->execMask() = vcc
.rawData();
14125 Inst_VOPC__V_CMPX_GT_I32::Inst_VOPC__V_CMPX_GT_I32(InFmt_VOPC
*iFmt
)
14126 : Inst_VOPC(iFmt
, "v_cmpx_gt_i32")
14129 } // Inst_VOPC__V_CMPX_GT_I32
14131 Inst_VOPC__V_CMPX_GT_I32::~Inst_VOPC__V_CMPX_GT_I32()
14133 } // ~Inst_VOPC__V_CMPX_GT_I32
14135 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
14137 Inst_VOPC__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst
)
14139 Wavefront
*wf
= gpuDynInst
->wavefront();
14140 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
14141 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
14142 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14147 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14148 if (wf
->execMask(lane
)) {
14149 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
14153 wf
->execMask() = vcc
.rawData();
14157 Inst_VOPC__V_CMPX_NE_I32::Inst_VOPC__V_CMPX_NE_I32(InFmt_VOPC
*iFmt
)
14158 : Inst_VOPC(iFmt
, "v_cmpx_ne_i32")
14161 } // Inst_VOPC__V_CMPX_NE_I32
14163 Inst_VOPC__V_CMPX_NE_I32::~Inst_VOPC__V_CMPX_NE_I32()
14165 } // ~Inst_VOPC__V_CMPX_NE_I32
14167 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
14169 Inst_VOPC__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst
)
14171 Wavefront
*wf
= gpuDynInst
->wavefront();
14172 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
14173 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
14174 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14179 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14180 if (wf
->execMask(lane
)) {
14181 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
14185 wf
->execMask() = vcc
.rawData();
14189 Inst_VOPC__V_CMPX_GE_I32::Inst_VOPC__V_CMPX_GE_I32(InFmt_VOPC
*iFmt
)
14190 : Inst_VOPC(iFmt
, "v_cmpx_ge_i32")
14193 } // Inst_VOPC__V_CMPX_GE_I32
14195 Inst_VOPC__V_CMPX_GE_I32::~Inst_VOPC__V_CMPX_GE_I32()
14197 } // ~Inst_VOPC__V_CMPX_GE_I32
14199 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
14201 Inst_VOPC__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst
)
14203 Wavefront
*wf
= gpuDynInst
->wavefront();
14204 ConstVecOperandI32
src0(gpuDynInst
, instData
.SRC0
);
14205 ConstVecOperandI32
src1(gpuDynInst
, instData
.VSRC1
);
14206 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14211 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14212 if (wf
->execMask(lane
)) {
14213 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
14217 wf
->execMask() = vcc
.rawData();
14221 Inst_VOPC__V_CMPX_T_I32::Inst_VOPC__V_CMPX_T_I32(InFmt_VOPC
*iFmt
)
14222 : Inst_VOPC(iFmt
, "v_cmpx_t_i32")
14225 } // Inst_VOPC__V_CMPX_T_I32
14227 Inst_VOPC__V_CMPX_T_I32::~Inst_VOPC__V_CMPX_T_I32()
14229 } // ~Inst_VOPC__V_CMPX_T_I32
14231 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
14233 Inst_VOPC__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst
)
14235 Wavefront
*wf
= gpuDynInst
->wavefront();
14236 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14238 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14239 if (wf
->execMask(lane
)) {
14240 vcc
.setBit(lane
, 1);
14244 wf
->execMask() = vcc
.rawData();
14248 Inst_VOPC__V_CMPX_F_U32::Inst_VOPC__V_CMPX_F_U32(InFmt_VOPC
*iFmt
)
14249 : Inst_VOPC(iFmt
, "v_cmpx_f_u32")
14252 } // Inst_VOPC__V_CMPX_F_U32
14254 Inst_VOPC__V_CMPX_F_U32::~Inst_VOPC__V_CMPX_F_U32()
14256 } // ~Inst_VOPC__V_CMPX_F_U32
14258 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
14260 Inst_VOPC__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst
)
14262 Wavefront
*wf
= gpuDynInst
->wavefront();
14263 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14265 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14266 if (wf
->execMask(lane
)) {
14267 vcc
.setBit(lane
, 0);
14271 wf
->execMask() = vcc
.rawData();
14275 Inst_VOPC__V_CMPX_LT_U32::Inst_VOPC__V_CMPX_LT_U32(InFmt_VOPC
*iFmt
)
14276 : Inst_VOPC(iFmt
, "v_cmpx_lt_u32")
14279 } // Inst_VOPC__V_CMPX_LT_U32
14281 Inst_VOPC__V_CMPX_LT_U32::~Inst_VOPC__V_CMPX_LT_U32()
14283 } // ~Inst_VOPC__V_CMPX_LT_U32
14285 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14287 Inst_VOPC__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst
)
14289 Wavefront
*wf
= gpuDynInst
->wavefront();
14290 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
14291 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
14292 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14297 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14298 if (wf
->execMask(lane
)) {
14299 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
14303 wf
->execMask() = vcc
.rawData();
14307 Inst_VOPC__V_CMPX_EQ_U32::Inst_VOPC__V_CMPX_EQ_U32(InFmt_VOPC
*iFmt
)
14308 : Inst_VOPC(iFmt
, "v_cmpx_eq_u32")
14311 } // Inst_VOPC__V_CMPX_EQ_U32
14313 Inst_VOPC__V_CMPX_EQ_U32::~Inst_VOPC__V_CMPX_EQ_U32()
14315 } // ~Inst_VOPC__V_CMPX_EQ_U32
14317 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
14319 Inst_VOPC__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst
)
14321 Wavefront
*wf
= gpuDynInst
->wavefront();
14322 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
14323 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
14324 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14329 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14330 if (wf
->execMask(lane
)) {
14331 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
14335 wf
->execMask() = vcc
.rawData();
14339 Inst_VOPC__V_CMPX_LE_U32::Inst_VOPC__V_CMPX_LE_U32(InFmt_VOPC
*iFmt
)
14340 : Inst_VOPC(iFmt
, "v_cmpx_le_u32")
14343 } // Inst_VOPC__V_CMPX_LE_U32
14345 Inst_VOPC__V_CMPX_LE_U32::~Inst_VOPC__V_CMPX_LE_U32()
14347 } // ~Inst_VOPC__V_CMPX_LE_U32
14349 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
14351 Inst_VOPC__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst
)
14353 Wavefront
*wf
= gpuDynInst
->wavefront();
14354 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
14355 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
14356 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14361 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14362 if (wf
->execMask(lane
)) {
14363 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
14367 wf
->execMask() = vcc
.rawData();
14371 Inst_VOPC__V_CMPX_GT_U32::Inst_VOPC__V_CMPX_GT_U32(InFmt_VOPC
*iFmt
)
14372 : Inst_VOPC(iFmt
, "v_cmpx_gt_u32")
14375 } // Inst_VOPC__V_CMPX_GT_U32
14377 Inst_VOPC__V_CMPX_GT_U32::~Inst_VOPC__V_CMPX_GT_U32()
14379 } // ~Inst_VOPC__V_CMPX_GT_U32
14381 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
14383 Inst_VOPC__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst
)
14385 Wavefront
*wf
= gpuDynInst
->wavefront();
14386 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
14387 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
14388 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14393 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14394 if (wf
->execMask(lane
)) {
14395 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
14399 wf
->execMask() = vcc
.rawData();
14403 Inst_VOPC__V_CMPX_NE_U32::Inst_VOPC__V_CMPX_NE_U32(InFmt_VOPC
*iFmt
)
14404 : Inst_VOPC(iFmt
, "v_cmpx_ne_u32")
14407 } // Inst_VOPC__V_CMPX_NE_U32
14409 Inst_VOPC__V_CMPX_NE_U32::~Inst_VOPC__V_CMPX_NE_U32()
14411 } // ~Inst_VOPC__V_CMPX_NE_U32
14413 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
14415 Inst_VOPC__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst
)
14417 Wavefront
*wf
= gpuDynInst
->wavefront();
14418 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
14419 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
14420 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14425 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14426 if (wf
->execMask(lane
)) {
14427 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
14431 wf
->execMask() = vcc
.rawData();
14435 Inst_VOPC__V_CMPX_GE_U32::Inst_VOPC__V_CMPX_GE_U32(InFmt_VOPC
*iFmt
)
14436 : Inst_VOPC(iFmt
, "v_cmpx_ge_u32")
14439 } // Inst_VOPC__V_CMPX_GE_U32
14441 Inst_VOPC__V_CMPX_GE_U32::~Inst_VOPC__V_CMPX_GE_U32()
14443 } // ~Inst_VOPC__V_CMPX_GE_U32
14445 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
14447 Inst_VOPC__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst
)
14449 Wavefront
*wf
= gpuDynInst
->wavefront();
14450 ConstVecOperandU32
src0(gpuDynInst
, instData
.SRC0
);
14451 ConstVecOperandU32
src1(gpuDynInst
, instData
.VSRC1
);
14452 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14457 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14458 if (wf
->execMask(lane
)) {
14459 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
14463 wf
->execMask() = vcc
.rawData();
14467 Inst_VOPC__V_CMPX_T_U32::Inst_VOPC__V_CMPX_T_U32(InFmt_VOPC
*iFmt
)
14468 : Inst_VOPC(iFmt
, "v_cmpx_t_u32")
14471 } // Inst_VOPC__V_CMPX_T_U32
14473 Inst_VOPC__V_CMPX_T_U32::~Inst_VOPC__V_CMPX_T_U32()
14475 } // ~Inst_VOPC__V_CMPX_T_U32
14477 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
14479 Inst_VOPC__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst
)
14481 Wavefront
*wf
= gpuDynInst
->wavefront();
14482 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14484 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14485 if (wf
->execMask(lane
)) {
14486 vcc
.setBit(lane
, 1);
14490 wf
->execMask() = vcc
.rawData();
14494 Inst_VOPC__V_CMP_F_I64::Inst_VOPC__V_CMP_F_I64(InFmt_VOPC
*iFmt
)
14495 : Inst_VOPC(iFmt
, "v_cmp_f_i64")
14498 } // Inst_VOPC__V_CMP_F_I64
14500 Inst_VOPC__V_CMP_F_I64::~Inst_VOPC__V_CMP_F_I64()
14502 } // ~Inst_VOPC__V_CMP_F_I64
14504 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
14506 Inst_VOPC__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst
)
14508 Wavefront
*wf
= gpuDynInst
->wavefront();
14509 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14511 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14512 if (wf
->execMask(lane
)) {
14513 vcc
.setBit(lane
, 0);
14520 Inst_VOPC__V_CMP_LT_I64::Inst_VOPC__V_CMP_LT_I64(InFmt_VOPC
*iFmt
)
14521 : Inst_VOPC(iFmt
, "v_cmp_lt_i64")
14524 } // Inst_VOPC__V_CMP_LT_I64
14526 Inst_VOPC__V_CMP_LT_I64::~Inst_VOPC__V_CMP_LT_I64()
14528 } // ~Inst_VOPC__V_CMP_LT_I64
14530 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14532 Inst_VOPC__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst
)
14534 Wavefront
*wf
= gpuDynInst
->wavefront();
14535 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
14536 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
14537 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14542 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14543 if (wf
->execMask(lane
)) {
14544 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
14551 Inst_VOPC__V_CMP_EQ_I64::Inst_VOPC__V_CMP_EQ_I64(InFmt_VOPC
*iFmt
)
14552 : Inst_VOPC(iFmt
, "v_cmp_eq_i64")
14555 } // Inst_VOPC__V_CMP_EQ_I64
14557 Inst_VOPC__V_CMP_EQ_I64::~Inst_VOPC__V_CMP_EQ_I64()
14559 } // ~Inst_VOPC__V_CMP_EQ_I64
14561 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
14563 Inst_VOPC__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst
)
14565 Wavefront
*wf
= gpuDynInst
->wavefront();
14566 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
14567 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
14568 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14573 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14574 if (wf
->execMask(lane
)) {
14575 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
14582 Inst_VOPC__V_CMP_LE_I64::Inst_VOPC__V_CMP_LE_I64(InFmt_VOPC
*iFmt
)
14583 : Inst_VOPC(iFmt
, "v_cmp_le_i64")
14586 } // Inst_VOPC__V_CMP_LE_I64
14588 Inst_VOPC__V_CMP_LE_I64::~Inst_VOPC__V_CMP_LE_I64()
14590 } // ~Inst_VOPC__V_CMP_LE_I64
14592 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
14594 Inst_VOPC__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst
)
14596 Wavefront
*wf
= gpuDynInst
->wavefront();
14597 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
14598 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
14599 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14604 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14605 if (wf
->execMask(lane
)) {
14606 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
14613 Inst_VOPC__V_CMP_GT_I64::Inst_VOPC__V_CMP_GT_I64(InFmt_VOPC
*iFmt
)
14614 : Inst_VOPC(iFmt
, "v_cmp_gt_i64")
14617 } // Inst_VOPC__V_CMP_GT_I64
14619 Inst_VOPC__V_CMP_GT_I64::~Inst_VOPC__V_CMP_GT_I64()
14621 } // ~Inst_VOPC__V_CMP_GT_I64
14623 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
14625 Inst_VOPC__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst
)
14627 Wavefront
*wf
= gpuDynInst
->wavefront();
14628 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
14629 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
14630 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14635 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14636 if (wf
->execMask(lane
)) {
14637 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
14644 Inst_VOPC__V_CMP_NE_I64::Inst_VOPC__V_CMP_NE_I64(InFmt_VOPC
*iFmt
)
14645 : Inst_VOPC(iFmt
, "v_cmp_ne_i64")
14648 } // Inst_VOPC__V_CMP_NE_I64
14650 Inst_VOPC__V_CMP_NE_I64::~Inst_VOPC__V_CMP_NE_I64()
14652 } // ~Inst_VOPC__V_CMP_NE_I64
14654 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
14656 Inst_VOPC__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst
)
14658 Wavefront
*wf
= gpuDynInst
->wavefront();
14659 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
14660 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
14661 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14666 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14667 if (wf
->execMask(lane
)) {
14668 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
14675 Inst_VOPC__V_CMP_GE_I64::Inst_VOPC__V_CMP_GE_I64(InFmt_VOPC
*iFmt
)
14676 : Inst_VOPC(iFmt
, "v_cmp_ge_i64")
14679 } // Inst_VOPC__V_CMP_GE_I64
14681 Inst_VOPC__V_CMP_GE_I64::~Inst_VOPC__V_CMP_GE_I64()
14683 } // ~Inst_VOPC__V_CMP_GE_I64
14685 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
14687 Inst_VOPC__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst
)
14689 Wavefront
*wf
= gpuDynInst
->wavefront();
14690 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
14691 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
14692 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14697 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14698 if (wf
->execMask(lane
)) {
14699 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
14706 Inst_VOPC__V_CMP_T_I64::Inst_VOPC__V_CMP_T_I64(InFmt_VOPC
*iFmt
)
14707 : Inst_VOPC(iFmt
, "v_cmp_t_i64")
14710 } // Inst_VOPC__V_CMP_T_I64
14712 Inst_VOPC__V_CMP_T_I64::~Inst_VOPC__V_CMP_T_I64()
14714 } // ~Inst_VOPC__V_CMP_T_I64
14716 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
14718 Inst_VOPC__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst
)
14720 Wavefront
*wf
= gpuDynInst
->wavefront();
14721 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14723 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14724 if (wf
->execMask(lane
)) {
14725 vcc
.setBit(lane
, 1);
14732 Inst_VOPC__V_CMP_F_U64::Inst_VOPC__V_CMP_F_U64(InFmt_VOPC
*iFmt
)
14733 : Inst_VOPC(iFmt
, "v_cmp_f_u64")
14736 } // Inst_VOPC__V_CMP_F_U64
14738 Inst_VOPC__V_CMP_F_U64::~Inst_VOPC__V_CMP_F_U64()
14740 } // ~Inst_VOPC__V_CMP_F_U64
14742 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
14744 Inst_VOPC__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst
)
14746 Wavefront
*wf
= gpuDynInst
->wavefront();
14747 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14749 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14750 if (wf
->execMask(lane
)) {
14751 vcc
.setBit(lane
, 0);
14758 Inst_VOPC__V_CMP_LT_U64::Inst_VOPC__V_CMP_LT_U64(InFmt_VOPC
*iFmt
)
14759 : Inst_VOPC(iFmt
, "v_cmp_lt_u64")
14762 } // Inst_VOPC__V_CMP_LT_U64
14764 Inst_VOPC__V_CMP_LT_U64::~Inst_VOPC__V_CMP_LT_U64()
14766 } // ~Inst_VOPC__V_CMP_LT_U64
14768 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14770 Inst_VOPC__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst
)
14772 Wavefront
*wf
= gpuDynInst
->wavefront();
14773 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
14774 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
14775 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14780 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14781 if (wf
->execMask(lane
)) {
14782 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
14789 Inst_VOPC__V_CMP_EQ_U64::Inst_VOPC__V_CMP_EQ_U64(InFmt_VOPC
*iFmt
)
14790 : Inst_VOPC(iFmt
, "v_cmp_eq_u64")
14793 } // Inst_VOPC__V_CMP_EQ_U64
14795 Inst_VOPC__V_CMP_EQ_U64::~Inst_VOPC__V_CMP_EQ_U64()
14797 } // ~Inst_VOPC__V_CMP_EQ_U64
14799 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
14801 Inst_VOPC__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst
)
14803 Wavefront
*wf
= gpuDynInst
->wavefront();
14804 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
14805 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
14806 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14811 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14812 if (wf
->execMask(lane
)) {
14813 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
14820 Inst_VOPC__V_CMP_LE_U64::Inst_VOPC__V_CMP_LE_U64(InFmt_VOPC
*iFmt
)
14821 : Inst_VOPC(iFmt
, "v_cmp_le_u64")
14824 } // Inst_VOPC__V_CMP_LE_U64
14826 Inst_VOPC__V_CMP_LE_U64::~Inst_VOPC__V_CMP_LE_U64()
14828 } // ~Inst_VOPC__V_CMP_LE_U64
14830 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
14832 Inst_VOPC__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst
)
14834 Wavefront
*wf
= gpuDynInst
->wavefront();
14835 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
14836 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
14837 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14842 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14843 if (wf
->execMask(lane
)) {
14844 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
14851 Inst_VOPC__V_CMP_GT_U64::Inst_VOPC__V_CMP_GT_U64(InFmt_VOPC
*iFmt
)
14852 : Inst_VOPC(iFmt
, "v_cmp_gt_u64")
14855 } // Inst_VOPC__V_CMP_GT_U64
14857 Inst_VOPC__V_CMP_GT_U64::~Inst_VOPC__V_CMP_GT_U64()
14859 } // ~Inst_VOPC__V_CMP_GT_U64
14861 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
14863 Inst_VOPC__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst
)
14865 Wavefront
*wf
= gpuDynInst
->wavefront();
14866 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
14867 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
14868 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14873 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14874 if (wf
->execMask(lane
)) {
14875 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
14882 Inst_VOPC__V_CMP_NE_U64::Inst_VOPC__V_CMP_NE_U64(InFmt_VOPC
*iFmt
)
14883 : Inst_VOPC(iFmt
, "v_cmp_ne_u64")
14886 } // Inst_VOPC__V_CMP_NE_U64
14888 Inst_VOPC__V_CMP_NE_U64::~Inst_VOPC__V_CMP_NE_U64()
14890 } // ~Inst_VOPC__V_CMP_NE_U64
14892 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
14894 Inst_VOPC__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst
)
14896 Wavefront
*wf
= gpuDynInst
->wavefront();
14897 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
14898 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
14899 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14904 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14905 if (wf
->execMask(lane
)) {
14906 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
14913 Inst_VOPC__V_CMP_GE_U64::Inst_VOPC__V_CMP_GE_U64(InFmt_VOPC
*iFmt
)
14914 : Inst_VOPC(iFmt
, "v_cmp_ge_u64")
14917 } // Inst_VOPC__V_CMP_GE_U64
14919 Inst_VOPC__V_CMP_GE_U64::~Inst_VOPC__V_CMP_GE_U64()
14921 } // ~Inst_VOPC__V_CMP_GE_U64
14923 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
14925 Inst_VOPC__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst
)
14927 Wavefront
*wf
= gpuDynInst
->wavefront();
14928 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
14929 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
14930 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14935 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14936 if (wf
->execMask(lane
)) {
14937 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
14944 Inst_VOPC__V_CMP_T_U64::Inst_VOPC__V_CMP_T_U64(InFmt_VOPC
*iFmt
)
14945 : Inst_VOPC(iFmt
, "v_cmp_t_u64")
14948 } // Inst_VOPC__V_CMP_T_U64
14950 Inst_VOPC__V_CMP_T_U64::~Inst_VOPC__V_CMP_T_U64()
14952 } // ~Inst_VOPC__V_CMP_T_U64
14954 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
14956 Inst_VOPC__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst
)
14958 Wavefront
*wf
= gpuDynInst
->wavefront();
14959 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14961 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14962 if (wf
->execMask(lane
)) {
14963 vcc
.setBit(lane
, 1);
14970 Inst_VOPC__V_CMPX_F_I64::Inst_VOPC__V_CMPX_F_I64(InFmt_VOPC
*iFmt
)
14971 : Inst_VOPC(iFmt
, "v_cmpx_f_i64")
14974 } // Inst_VOPC__V_CMPX_F_I64
14976 Inst_VOPC__V_CMPX_F_I64::~Inst_VOPC__V_CMPX_F_I64()
14978 } // ~Inst_VOPC__V_CMPX_F_I64
14980 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
14982 Inst_VOPC__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst
)
14984 Wavefront
*wf
= gpuDynInst
->wavefront();
14985 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
14987 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
14988 if (wf
->execMask(lane
)) {
14989 vcc
.setBit(lane
, 0);
14993 wf
->execMask() = vcc
.rawData();
14997 Inst_VOPC__V_CMPX_LT_I64::Inst_VOPC__V_CMPX_LT_I64(InFmt_VOPC
*iFmt
)
14998 : Inst_VOPC(iFmt
, "v_cmpx_lt_i64")
15001 } // Inst_VOPC__V_CMPX_LT_I64
15003 Inst_VOPC__V_CMPX_LT_I64::~Inst_VOPC__V_CMPX_LT_I64()
15005 } // ~Inst_VOPC__V_CMPX_LT_I64
15007 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
15009 Inst_VOPC__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst
)
15011 Wavefront
*wf
= gpuDynInst
->wavefront();
15012 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
15013 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
15014 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15019 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15020 if (wf
->execMask(lane
)) {
15021 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
15025 wf
->execMask() = vcc
.rawData();
15029 Inst_VOPC__V_CMPX_EQ_I64::Inst_VOPC__V_CMPX_EQ_I64(InFmt_VOPC
*iFmt
)
15030 : Inst_VOPC(iFmt
, "v_cmpx_eq_i64")
15033 } // Inst_VOPC__V_CMPX_EQ_I64
15035 Inst_VOPC__V_CMPX_EQ_I64::~Inst_VOPC__V_CMPX_EQ_I64()
15037 } // ~Inst_VOPC__V_CMPX_EQ_I64
15039 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
15041 Inst_VOPC__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst
)
15043 Wavefront
*wf
= gpuDynInst
->wavefront();
15044 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
15045 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
15046 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15051 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15052 if (wf
->execMask(lane
)) {
15053 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
15057 wf
->execMask() = vcc
.rawData();
15061 Inst_VOPC__V_CMPX_LE_I64::Inst_VOPC__V_CMPX_LE_I64(InFmt_VOPC
*iFmt
)
15062 : Inst_VOPC(iFmt
, "v_cmpx_le_i64")
15065 } // Inst_VOPC__V_CMPX_LE_I64
15067 Inst_VOPC__V_CMPX_LE_I64::~Inst_VOPC__V_CMPX_LE_I64()
15069 } // ~Inst_VOPC__V_CMPX_LE_I64
15071 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
15073 Inst_VOPC__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst
)
15075 Wavefront
*wf
= gpuDynInst
->wavefront();
15076 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
15077 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
15078 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15083 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15084 if (wf
->execMask(lane
)) {
15085 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
15089 wf
->execMask() = vcc
.rawData();
15093 Inst_VOPC__V_CMPX_GT_I64::Inst_VOPC__V_CMPX_GT_I64(InFmt_VOPC
*iFmt
)
15094 : Inst_VOPC(iFmt
, "v_cmpx_gt_i64")
15097 } // Inst_VOPC__V_CMPX_GT_I64
15099 Inst_VOPC__V_CMPX_GT_I64::~Inst_VOPC__V_CMPX_GT_I64()
15101 } // ~Inst_VOPC__V_CMPX_GT_I64
15103 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
15105 Inst_VOPC__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst
)
15107 Wavefront
*wf
= gpuDynInst
->wavefront();
15108 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
15109 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
15110 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15115 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15116 if (wf
->execMask(lane
)) {
15117 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
15121 wf
->execMask() = vcc
.rawData();
15125 Inst_VOPC__V_CMPX_NE_I64::Inst_VOPC__V_CMPX_NE_I64(InFmt_VOPC
*iFmt
)
15126 : Inst_VOPC(iFmt
, "v_cmpx_ne_i64")
15129 } // Inst_VOPC__V_CMPX_NE_I64
15131 Inst_VOPC__V_CMPX_NE_I64::~Inst_VOPC__V_CMPX_NE_I64()
15133 } // ~Inst_VOPC__V_CMPX_NE_I64
15135 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
15137 Inst_VOPC__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst
)
15139 Wavefront
*wf
= gpuDynInst
->wavefront();
15140 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
15141 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
15142 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15147 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15148 if (wf
->execMask(lane
)) {
15149 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
15153 wf
->execMask() = vcc
.rawData();
15157 Inst_VOPC__V_CMPX_GE_I64::Inst_VOPC__V_CMPX_GE_I64(InFmt_VOPC
*iFmt
)
15158 : Inst_VOPC(iFmt
, "v_cmpx_ge_i64")
15161 } // Inst_VOPC__V_CMPX_GE_I64
15163 Inst_VOPC__V_CMPX_GE_I64::~Inst_VOPC__V_CMPX_GE_I64()
15165 } // ~Inst_VOPC__V_CMPX_GE_I64
15167 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
15169 Inst_VOPC__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst
)
15171 Wavefront
*wf
= gpuDynInst
->wavefront();
15172 ConstVecOperandI64
src0(gpuDynInst
, instData
.SRC0
);
15173 ConstVecOperandI64
src1(gpuDynInst
, instData
.VSRC1
);
15174 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15179 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15180 if (wf
->execMask(lane
)) {
15181 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
15185 wf
->execMask() = vcc
.rawData();
15189 Inst_VOPC__V_CMPX_T_I64::Inst_VOPC__V_CMPX_T_I64(InFmt_VOPC
*iFmt
)
15190 : Inst_VOPC(iFmt
, "v_cmpx_t_i64")
15193 } // Inst_VOPC__V_CMPX_T_I64
15195 Inst_VOPC__V_CMPX_T_I64::~Inst_VOPC__V_CMPX_T_I64()
15197 } // ~Inst_VOPC__V_CMPX_T_I64
15199 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
15201 Inst_VOPC__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst
)
15203 Wavefront
*wf
= gpuDynInst
->wavefront();
15204 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15206 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15207 if (wf
->execMask(lane
)) {
15208 vcc
.setBit(lane
, 1);
15212 wf
->execMask() = vcc
.rawData();
15216 Inst_VOPC__V_CMPX_F_U64::Inst_VOPC__V_CMPX_F_U64(InFmt_VOPC
*iFmt
)
15217 : Inst_VOPC(iFmt
, "v_cmpx_f_u64")
15220 } // Inst_VOPC__V_CMPX_F_U64
15222 Inst_VOPC__V_CMPX_F_U64::~Inst_VOPC__V_CMPX_F_U64()
15224 } // ~Inst_VOPC__V_CMPX_F_U64
15226 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
15228 Inst_VOPC__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst
)
15230 Wavefront
*wf
= gpuDynInst
->wavefront();
15231 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15233 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15234 if (wf
->execMask(lane
)) {
15235 vcc
.setBit(lane
, 0);
15239 wf
->execMask() = vcc
.rawData();
15243 Inst_VOPC__V_CMPX_LT_U64::Inst_VOPC__V_CMPX_LT_U64(InFmt_VOPC
*iFmt
)
15244 : Inst_VOPC(iFmt
, "v_cmpx_lt_u64")
15247 } // Inst_VOPC__V_CMPX_LT_U64
15249 Inst_VOPC__V_CMPX_LT_U64::~Inst_VOPC__V_CMPX_LT_U64()
15251 } // ~Inst_VOPC__V_CMPX_LT_U64
15253 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
15255 Inst_VOPC__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst
)
15257 Wavefront
*wf
= gpuDynInst
->wavefront();
15258 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
15259 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
15260 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15265 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15266 if (wf
->execMask(lane
)) {
15267 vcc
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
15271 wf
->execMask() = vcc
.rawData();
15275 Inst_VOPC__V_CMPX_EQ_U64::Inst_VOPC__V_CMPX_EQ_U64(InFmt_VOPC
*iFmt
)
15276 : Inst_VOPC(iFmt
, "v_cmpx_eq_u64")
15279 } // Inst_VOPC__V_CMPX_EQ_U64
15281 Inst_VOPC__V_CMPX_EQ_U64::~Inst_VOPC__V_CMPX_EQ_U64()
15283 } // ~Inst_VOPC__V_CMPX_EQ_U64
15285 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
15287 Inst_VOPC__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst
)
15289 Wavefront
*wf
= gpuDynInst
->wavefront();
15290 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
15291 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
15292 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15297 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15298 if (wf
->execMask(lane
)) {
15299 vcc
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
15303 wf
->execMask() = vcc
.rawData();
15307 Inst_VOPC__V_CMPX_LE_U64::Inst_VOPC__V_CMPX_LE_U64(InFmt_VOPC
*iFmt
)
15308 : Inst_VOPC(iFmt
, "v_cmpx_le_u64")
15311 } // Inst_VOPC__V_CMPX_LE_U64
15313 Inst_VOPC__V_CMPX_LE_U64::~Inst_VOPC__V_CMPX_LE_U64()
15315 } // ~Inst_VOPC__V_CMPX_LE_U64
15317 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
15319 Inst_VOPC__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst
)
15321 Wavefront
*wf
= gpuDynInst
->wavefront();
15322 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
15323 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
15324 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15329 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15330 if (wf
->execMask(lane
)) {
15331 vcc
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
15335 wf
->execMask() = vcc
.rawData();
15339 Inst_VOPC__V_CMPX_GT_U64::Inst_VOPC__V_CMPX_GT_U64(InFmt_VOPC
*iFmt
)
15340 : Inst_VOPC(iFmt
, "v_cmpx_gt_u64")
15343 } // Inst_VOPC__V_CMPX_GT_U64
15345 Inst_VOPC__V_CMPX_GT_U64::~Inst_VOPC__V_CMPX_GT_U64()
15347 } // ~Inst_VOPC__V_CMPX_GT_U64
15349 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
15351 Inst_VOPC__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst
)
15353 Wavefront
*wf
= gpuDynInst
->wavefront();
15354 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
15355 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
15356 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15361 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15362 if (wf
->execMask(lane
)) {
15363 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
15367 wf
->execMask() = vcc
.rawData();
15371 Inst_VOPC__V_CMPX_NE_U64::Inst_VOPC__V_CMPX_NE_U64(InFmt_VOPC
*iFmt
)
15372 : Inst_VOPC(iFmt
, "v_cmpx_ne_u64")
15375 } // Inst_VOPC__V_CMPX_NE_U64
15377 Inst_VOPC__V_CMPX_NE_U64::~Inst_VOPC__V_CMPX_NE_U64()
15379 } // ~Inst_VOPC__V_CMPX_NE_U64
15381 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
15383 Inst_VOPC__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst
)
15385 Wavefront
*wf
= gpuDynInst
->wavefront();
15386 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
15387 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
15388 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15393 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15394 if (wf
->execMask(lane
)) {
15395 vcc
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
15399 wf
->execMask() = vcc
.rawData();
15403 Inst_VOPC__V_CMPX_GE_U64::Inst_VOPC__V_CMPX_GE_U64(InFmt_VOPC
*iFmt
)
15404 : Inst_VOPC(iFmt
, "v_cmpx_ge_u64")
15407 } // Inst_VOPC__V_CMPX_GE_U64
15409 Inst_VOPC__V_CMPX_GE_U64::~Inst_VOPC__V_CMPX_GE_U64()
15411 } // ~Inst_VOPC__V_CMPX_GE_U64
15413 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
15415 Inst_VOPC__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst
)
15417 Wavefront
*wf
= gpuDynInst
->wavefront();
15418 ConstVecOperandU64
src0(gpuDynInst
, instData
.SRC0
);
15419 ConstVecOperandU64
src1(gpuDynInst
, instData
.VSRC1
);
15420 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15425 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15426 if (wf
->execMask(lane
)) {
15427 vcc
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
15431 wf
->execMask() = vcc
.rawData();
15435 Inst_VOPC__V_CMPX_T_U64::Inst_VOPC__V_CMPX_T_U64(InFmt_VOPC
*iFmt
)
15436 : Inst_VOPC(iFmt
, "v_cmpx_t_u64")
15439 } // Inst_VOPC__V_CMPX_T_U64
15441 Inst_VOPC__V_CMPX_T_U64::~Inst_VOPC__V_CMPX_T_U64()
15443 } // ~Inst_VOPC__V_CMPX_T_U64
15445 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
15447 Inst_VOPC__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst
)
15449 Wavefront
*wf
= gpuDynInst
->wavefront();
15450 ScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
15452 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15453 if (wf
->execMask(lane
)) {
15454 vcc
.setBit(lane
, 1);
15458 wf
->execMask() = vcc
.rawData();
15462 Inst_VINTRP__V_INTERP_P1_F32::Inst_VINTRP__V_INTERP_P1_F32(
15463 InFmt_VINTRP
*iFmt
)
15464 : Inst_VINTRP(iFmt
, "v_interp_p1_f32")
15468 } // Inst_VINTRP__V_INTERP_P1_F32
15470 Inst_VINTRP__V_INTERP_P1_F32::~Inst_VINTRP__V_INTERP_P1_F32()
15472 } // ~Inst_VINTRP__V_INTERP_P1_F32
15474 // D.f = P10 * S.f + P0; parameter interpolation
15476 Inst_VINTRP__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst
)
15478 panicUnimplemented();
15481 Inst_VINTRP__V_INTERP_P2_F32::Inst_VINTRP__V_INTERP_P2_F32(
15482 InFmt_VINTRP
*iFmt
)
15483 : Inst_VINTRP(iFmt
, "v_interp_p2_f32")
15487 } // Inst_VINTRP__V_INTERP_P2_F32
15489 Inst_VINTRP__V_INTERP_P2_F32::~Inst_VINTRP__V_INTERP_P2_F32()
15491 } // ~Inst_VINTRP__V_INTERP_P2_F32
15493 // D.f = P20 * S.f + D.f; parameter interpolation
15495 Inst_VINTRP__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst
)
15497 panicUnimplemented();
15500 Inst_VINTRP__V_INTERP_MOV_F32::Inst_VINTRP__V_INTERP_MOV_F32(
15501 InFmt_VINTRP
*iFmt
)
15502 : Inst_VINTRP(iFmt
, "v_interp_mov_f32")
15506 } // Inst_VINTRP__V_INTERP_MOV_F32
15508 Inst_VINTRP__V_INTERP_MOV_F32::~Inst_VINTRP__V_INTERP_MOV_F32()
15510 } // ~Inst_VINTRP__V_INTERP_MOV_F32
15513 Inst_VINTRP__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst
)
15515 panicUnimplemented();
15518 Inst_VOP3__V_CMP_CLASS_F32::Inst_VOP3__V_CMP_CLASS_F32(
15520 : Inst_VOP3(iFmt
, "v_cmp_class_f32", true)
15524 } // Inst_VOP3__V_CMP_CLASS_F32
15526 Inst_VOP3__V_CMP_CLASS_F32::~Inst_VOP3__V_CMP_CLASS_F32()
15528 } // ~Inst_VOP3__V_CMP_CLASS_F32
15530 // VCC = IEEE numeric class function specified in S1.u, performed on S0.f
15531 // The function reports true if the floating point value is any of the
15532 // numeric types selected in S1.u according to the following list:
15533 // S1.u[0] -- value is a signaling NaN.
15534 // S1.u[1] -- value is a quiet NaN.
15535 // S1.u[2] -- value is negative infinity.
15536 // S1.u[3] -- value is a negative normal value.
15537 // S1.u[4] -- value is a negative denormal value.
15538 // S1.u[5] -- value is negative zero.
15539 // S1.u[6] -- value is positive zero.
15540 // S1.u[7] -- value is a positive denormal value.
15541 // S1.u[8] -- value is a positive normal value.
15542 // S1.u[9] -- value is positive infinity.
15544 Inst_VOP3__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst
)
15546 Wavefront
*wf
= gpuDynInst
->wavefront();
15547 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
15548 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
15549 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
15554 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15555 if (wf
->execMask(lane
)) {
15556 if (bits(src1
[lane
], 0) || bits(src1
[lane
], 1)) {
15558 if (std::isnan(src0
[lane
])) {
15559 sdst
.setBit(lane
, 1);
15563 if (bits(src1
[lane
], 2)) {
15565 if (std::isinf(src0
[lane
]) && std::signbit(src0
[lane
])) {
15566 sdst
.setBit(lane
, 1);
15570 if (bits(src1
[lane
], 3)) {
15572 if (std::isnormal(src0
[lane
])
15573 && std::signbit(src0
[lane
])) {
15574 sdst
.setBit(lane
, 1);
15578 if (bits(src1
[lane
], 4)) {
15580 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
15581 && std::signbit(src0
[lane
])) {
15582 sdst
.setBit(lane
, 1);
15586 if (bits(src1
[lane
], 5)) {
15588 if (std::fpclassify(src0
[lane
]) == FP_ZERO
15589 && std::signbit(src0
[lane
])) {
15590 sdst
.setBit(lane
, 1);
15594 if (bits(src1
[lane
], 6)) {
15596 if (std::fpclassify(src0
[lane
]) == FP_ZERO
15597 && !std::signbit(src0
[lane
])) {
15598 sdst
.setBit(lane
, 1);
15602 if (bits(src1
[lane
], 7)) {
15604 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
15605 && !std::signbit(src0
[lane
])) {
15606 sdst
.setBit(lane
, 1);
15610 if (bits(src1
[lane
], 8)) {
15612 if (std::isnormal(src0
[lane
])
15613 && !std::signbit(src0
[lane
])) {
15614 sdst
.setBit(lane
, 1);
15618 if (bits(src1
[lane
], 9)) {
15620 if (std::isinf(src0
[lane
])
15621 && !std::signbit(src0
[lane
])) {
15622 sdst
.setBit(lane
, 1);
15632 Inst_VOP3__V_CMPX_CLASS_F32::Inst_VOP3__V_CMPX_CLASS_F32(
15634 : Inst_VOP3(iFmt
, "v_cmpx_class_f32", true)
15638 } // Inst_VOP3__V_CMPX_CLASS_F32
15640 Inst_VOP3__V_CMPX_CLASS_F32::~Inst_VOP3__V_CMPX_CLASS_F32()
15642 } // ~Inst_VOP3__V_CMPX_CLASS_F32
15644 // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
15646 // The function reports true if the floating point value is any of the
15647 // numeric types selected in S1.u according to the following list:
15648 // S1.u[0] -- value is a signaling NaN.
15649 // S1.u[1] -- value is a quiet NaN.
15650 // S1.u[2] -- value is negative infinity.
15651 // S1.u[3] -- value is a negative normal value.
15652 // S1.u[4] -- value is a negative denormal value.
15653 // S1.u[5] -- value is negative zero.
15654 // S1.u[6] -- value is positive zero.
15655 // S1.u[7] -- value is a positive denormal value.
15656 // S1.u[8] -- value is a positive normal value.
15657 // S1.u[9] -- value is positive infinity.
15659 Inst_VOP3__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst
)
15661 Wavefront
*wf
= gpuDynInst
->wavefront();
15662 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
15663 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
15664 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
15669 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15670 if (wf
->execMask(lane
)) {
15671 if (bits(src1
[lane
], 0) || bits(src1
[lane
], 1)) {
15673 if (std::isnan(src0
[lane
])) {
15674 sdst
.setBit(lane
, 1);
15678 if (bits(src1
[lane
], 2)) {
15680 if (std::isinf(src0
[lane
]) && std::signbit(src0
[lane
])) {
15681 sdst
.setBit(lane
, 1);
15685 if (bits(src1
[lane
], 3)) {
15687 if (std::isnormal(src0
[lane
])
15688 && std::signbit(src0
[lane
])) {
15689 sdst
.setBit(lane
, 1);
15693 if (bits(src1
[lane
], 4)) {
15695 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
15696 && std::signbit(src0
[lane
])) {
15697 sdst
.setBit(lane
, 1);
15701 if (bits(src1
[lane
], 5)) {
15703 if (std::fpclassify(src0
[lane
]) == FP_ZERO
15704 && std::signbit(src0
[lane
])) {
15705 sdst
.setBit(lane
, 1);
15709 if (bits(src1
[lane
], 6)) {
15711 if (std::fpclassify(src0
[lane
]) == FP_ZERO
15712 && !std::signbit(src0
[lane
])) {
15713 sdst
.setBit(lane
, 1);
15717 if (bits(src1
[lane
], 7)) {
15719 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
15720 && !std::signbit(src0
[lane
])) {
15721 sdst
.setBit(lane
, 1);
15725 if (bits(src1
[lane
], 8)) {
15727 if (std::isnormal(src0
[lane
])
15728 && !std::signbit(src0
[lane
])) {
15729 sdst
.setBit(lane
, 1);
15733 if (bits(src1
[lane
], 9)) {
15735 if (std::isinf(src0
[lane
])
15736 && !std::signbit(src0
[lane
])) {
15737 sdst
.setBit(lane
, 1);
15744 wf
->execMask() = sdst
.rawData();
15748 Inst_VOP3__V_CMP_CLASS_F64::Inst_VOP3__V_CMP_CLASS_F64(
15750 : Inst_VOP3(iFmt
, "v_cmp_class_f64", true)
15754 } // Inst_VOP3__V_CMP_CLASS_F64
15756 Inst_VOP3__V_CMP_CLASS_F64::~Inst_VOP3__V_CMP_CLASS_F64()
15758 } // ~Inst_VOP3__V_CMP_CLASS_F64
15760 // VCC = IEEE numeric class function specified in S1.u, performed on S0.d
15761 // The function reports true if the floating point value is any of the
15762 // numeric types selected in S1.u according to the following list:
15763 // S1.u[0] -- value is a signaling NaN.
15764 // S1.u[1] -- value is a quiet NaN.
15765 // S1.u[2] -- value is negative infinity.
15766 // S1.u[3] -- value is a negative normal value.
15767 // S1.u[4] -- value is a negative denormal value.
15768 // S1.u[5] -- value is negative zero.
15769 // S1.u[6] -- value is positive zero.
15770 // S1.u[7] -- value is a positive denormal value.
15771 // S1.u[8] -- value is a positive normal value.
15772 // S1.u[9] -- value is positive infinity.
15774 Inst_VOP3__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst
)
15776 Wavefront
*wf
= gpuDynInst
->wavefront();
15777 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
15778 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
15779 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
15784 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15785 if (wf
->execMask(lane
)) {
15786 if (bits(src1
[lane
], 0) || bits(src1
[lane
], 1)) {
15788 if (std::isnan(src0
[lane
])) {
15789 sdst
.setBit(lane
, 1);
15793 if (bits(src1
[lane
], 2)) {
15795 if (std::isinf(src0
[lane
]) && std::signbit(src0
[lane
])) {
15796 sdst
.setBit(lane
, 1);
15800 if (bits(src1
[lane
], 3)) {
15802 if (std::isnormal(src0
[lane
])
15803 && std::signbit(src0
[lane
])) {
15804 sdst
.setBit(lane
, 1);
15808 if (bits(src1
[lane
], 4)) {
15810 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
15811 && std::signbit(src0
[lane
])) {
15812 sdst
.setBit(lane
, 1);
15816 if (bits(src1
[lane
], 5)) {
15818 if (std::fpclassify(src0
[lane
]) == FP_ZERO
15819 && std::signbit(src0
[lane
])) {
15820 sdst
.setBit(lane
, 1);
15824 if (bits(src1
[lane
], 6)) {
15826 if (std::fpclassify(src0
[lane
]) == FP_ZERO
15827 && !std::signbit(src0
[lane
])) {
15828 sdst
.setBit(lane
, 1);
15832 if (bits(src1
[lane
], 7)) {
15834 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
15835 && !std::signbit(src0
[lane
])) {
15836 sdst
.setBit(lane
, 1);
15840 if (bits(src1
[lane
], 8)) {
15842 if (std::isnormal(src0
[lane
])
15843 && !std::signbit(src0
[lane
])) {
15844 sdst
.setBit(lane
, 1);
15848 if (bits(src1
[lane
], 9)) {
15850 if (std::isinf(src0
[lane
])
15851 && !std::signbit(src0
[lane
])) {
15852 sdst
.setBit(lane
, 1);
15862 Inst_VOP3__V_CMPX_CLASS_F64::Inst_VOP3__V_CMPX_CLASS_F64(
15864 : Inst_VOP3(iFmt
, "v_cmpx_class_f64", true)
15868 } // Inst_VOP3__V_CMPX_CLASS_F64
15870 Inst_VOP3__V_CMPX_CLASS_F64::~Inst_VOP3__V_CMPX_CLASS_F64()
15872 } // ~Inst_VOP3__V_CMPX_CLASS_F64
15874 // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
15876 // The function reports true if the floating point value is any of the
15877 // numeric types selected in S1.u according to the following list:
15878 // S1.u[0] -- value is a signaling NaN.
15879 // S1.u[1] -- value is a quiet NaN.
15880 // S1.u[2] -- value is negative infinity.
15881 // S1.u[3] -- value is a negative normal value.
15882 // S1.u[4] -- value is a negative denormal value.
15883 // S1.u[5] -- value is negative zero.
15884 // S1.u[6] -- value is positive zero.
15885 // S1.u[7] -- value is a positive denormal value.
15886 // S1.u[8] -- value is a positive normal value.
15887 // S1.u[9] -- value is positive infinity.
15889 Inst_VOP3__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst
)
15891 Wavefront
*wf
= gpuDynInst
->wavefront();
15892 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
15893 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
15894 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
15899 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
15900 if (wf
->execMask(lane
)) {
15901 if (bits(src1
[lane
], 0) || bits(src1
[lane
], 1)) {
15903 if (std::isnan(src0
[lane
])) {
15904 sdst
.setBit(lane
, 1);
15908 if (bits(src1
[lane
], 2)) {
15910 if (std::isinf(src0
[lane
]) && std::signbit(src0
[lane
])) {
15911 sdst
.setBit(lane
, 1);
15915 if (bits(src1
[lane
], 3)) {
15917 if (std::isnormal(src0
[lane
])
15918 && std::signbit(src0
[lane
])) {
15919 sdst
.setBit(lane
, 1);
15923 if (bits(src1
[lane
], 4)) {
15925 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
15926 && std::signbit(src0
[lane
])) {
15927 sdst
.setBit(lane
, 1);
15931 if (bits(src1
[lane
], 5)) {
15933 if (std::fpclassify(src0
[lane
]) == FP_ZERO
15934 && std::signbit(src0
[lane
])) {
15935 sdst
.setBit(lane
, 1);
15939 if (bits(src1
[lane
], 6)) {
15941 if (std::fpclassify(src0
[lane
]) == FP_ZERO
15942 && !std::signbit(src0
[lane
])) {
15943 sdst
.setBit(lane
, 1);
15947 if (bits(src1
[lane
], 7)) {
15949 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
15950 && !std::signbit(src0
[lane
])) {
15951 sdst
.setBit(lane
, 1);
15955 if (bits(src1
[lane
], 8)) {
15957 if (std::isnormal(src0
[lane
])
15958 && !std::signbit(src0
[lane
])) {
15959 sdst
.setBit(lane
, 1);
15963 if (bits(src1
[lane
], 9)) {
15965 if (std::isinf(src0
[lane
])
15966 && !std::signbit(src0
[lane
])) {
15967 sdst
.setBit(lane
, 1);
15974 wf
->execMask() = sdst
.rawData();
15978 Inst_VOP3__V_CMP_CLASS_F16::Inst_VOP3__V_CMP_CLASS_F16(
15980 : Inst_VOP3(iFmt
, "v_cmp_class_f16", true)
15984 } // Inst_VOP3__V_CMP_CLASS_F16
15986 Inst_VOP3__V_CMP_CLASS_F16::~Inst_VOP3__V_CMP_CLASS_F16()
15988 } // ~Inst_VOP3__V_CMP_CLASS_F16
15990 // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16
15991 // The function reports true if the floating point value is any of the
15992 // numeric types selected in S1.u according to the following list:
15993 // S1.u[0] -- value is a signaling NaN.
15994 // S1.u[1] -- value is a quiet NaN.
15995 // S1.u[2] -- value is negative infinity.
15996 // S1.u[3] -- value is a negative normal value.
15997 // S1.u[4] -- value is a negative denormal value.
15998 // S1.u[5] -- value is negative zero.
15999 // S1.u[6] -- value is positive zero.
16000 // S1.u[7] -- value is a positive denormal value.
16001 // S1.u[8] -- value is a positive normal value.
16002 // S1.u[9] -- value is positive infinity.
16004 Inst_VOP3__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst
)
16006 panicUnimplemented();
16009 Inst_VOP3__V_CMPX_CLASS_F16::Inst_VOP3__V_CMPX_CLASS_F16(
16011 : Inst_VOP3(iFmt
, "v_cmpx_class_f16", true)
16015 } // Inst_VOP3__V_CMPX_CLASS_F16
16017 Inst_VOP3__V_CMPX_CLASS_F16::~Inst_VOP3__V_CMPX_CLASS_F16()
16019 } // ~Inst_VOP3__V_CMPX_CLASS_F16
16021 // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
16023 // The function reports true if the floating point value is any of the
16024 // numeric types selected in S1.u according to the following list:
16025 // S1.u[0] -- value is a signaling NaN.
16026 // S1.u[1] -- value is a quiet NaN.
16027 // S1.u[2] -- value is negative infinity.
16028 // S1.u[3] -- value is a negative normal value.
16029 // S1.u[4] -- value is a negative denormal value.
16030 // S1.u[5] -- value is negative zero.
16031 // S1.u[6] -- value is positive zero.
16032 // S1.u[7] -- value is a positive denormal value.
16033 // S1.u[8] -- value is a positive normal value.
16034 // S1.u[9] -- value is positive infinity.
16036 Inst_VOP3__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst
)
16038 panicUnimplemented();
16041 Inst_VOP3__V_CMP_F_F16::Inst_VOP3__V_CMP_F_F16(InFmt_VOP3
*iFmt
)
16042 : Inst_VOP3(iFmt
, "v_cmp_f_f16", true)
16046 } // Inst_VOP3__V_CMP_F_F16
16048 Inst_VOP3__V_CMP_F_F16::~Inst_VOP3__V_CMP_F_F16()
16050 } // ~Inst_VOP3__V_CMP_F_F16
16052 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
16054 Inst_VOP3__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst
)
16056 panicUnimplemented();
16059 Inst_VOP3__V_CMP_LT_F16::Inst_VOP3__V_CMP_LT_F16(
16061 : Inst_VOP3(iFmt
, "v_cmp_lt_f16", true)
16065 } // Inst_VOP3__V_CMP_LT_F16
16067 Inst_VOP3__V_CMP_LT_F16::~Inst_VOP3__V_CMP_LT_F16()
16069 } // ~Inst_VOP3__V_CMP_LT_F16
16071 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
16073 Inst_VOP3__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst
)
16075 panicUnimplemented();
16078 Inst_VOP3__V_CMP_EQ_F16::Inst_VOP3__V_CMP_EQ_F16(
16080 : Inst_VOP3(iFmt
, "v_cmp_eq_f16", true)
16084 } // Inst_VOP3__V_CMP_EQ_F16
16086 Inst_VOP3__V_CMP_EQ_F16::~Inst_VOP3__V_CMP_EQ_F16()
16088 } // ~Inst_VOP3__V_CMP_EQ_F16
16090 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
16092 Inst_VOP3__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst
)
16094 panicUnimplemented();
16097 Inst_VOP3__V_CMP_LE_F16::Inst_VOP3__V_CMP_LE_F16(
16099 : Inst_VOP3(iFmt
, "v_cmp_le_f16", true)
16103 } // Inst_VOP3__V_CMP_LE_F16
16105 Inst_VOP3__V_CMP_LE_F16::~Inst_VOP3__V_CMP_LE_F16()
16107 } // ~Inst_VOP3__V_CMP_LE_F16
16109 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
16111 Inst_VOP3__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst
)
16113 panicUnimplemented();
16116 Inst_VOP3__V_CMP_GT_F16::Inst_VOP3__V_CMP_GT_F16(
16118 : Inst_VOP3(iFmt
, "v_cmp_gt_f16", true)
16122 } // Inst_VOP3__V_CMP_GT_F16
16124 Inst_VOP3__V_CMP_GT_F16::~Inst_VOP3__V_CMP_GT_F16()
16126 } // ~Inst_VOP3__V_CMP_GT_F16
16128 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
16130 Inst_VOP3__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst
)
16132 panicUnimplemented();
16135 Inst_VOP3__V_CMP_LG_F16::Inst_VOP3__V_CMP_LG_F16(
16137 : Inst_VOP3(iFmt
, "v_cmp_lg_f16", true)
16141 } // Inst_VOP3__V_CMP_LG_F16
16143 Inst_VOP3__V_CMP_LG_F16::~Inst_VOP3__V_CMP_LG_F16()
16145 } // ~Inst_VOP3__V_CMP_LG_F16
16147 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
16149 Inst_VOP3__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst
)
16151 panicUnimplemented();
16154 Inst_VOP3__V_CMP_GE_F16::Inst_VOP3__V_CMP_GE_F16(
16156 : Inst_VOP3(iFmt
, "v_cmp_ge_f16", true)
16160 } // Inst_VOP3__V_CMP_GE_F16
16162 Inst_VOP3__V_CMP_GE_F16::~Inst_VOP3__V_CMP_GE_F16()
16164 } // ~Inst_VOP3__V_CMP_GE_F16
16166 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
16168 Inst_VOP3__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst
)
16170 panicUnimplemented();
16173 Inst_VOP3__V_CMP_O_F16::Inst_VOP3__V_CMP_O_F16(InFmt_VOP3
*iFmt
)
16174 : Inst_VOP3(iFmt
, "v_cmp_o_f16", true)
16178 } // Inst_VOP3__V_CMP_O_F16
16180 Inst_VOP3__V_CMP_O_F16::~Inst_VOP3__V_CMP_O_F16()
16182 } // ~Inst_VOP3__V_CMP_O_F16
16184 // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
16186 Inst_VOP3__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst
)
16188 panicUnimplemented();
16191 Inst_VOP3__V_CMP_U_F16::Inst_VOP3__V_CMP_U_F16(InFmt_VOP3
*iFmt
)
16192 : Inst_VOP3(iFmt
, "v_cmp_u_f16", true)
16196 } // Inst_VOP3__V_CMP_U_F16
16198 Inst_VOP3__V_CMP_U_F16::~Inst_VOP3__V_CMP_U_F16()
16200 } // ~Inst_VOP3__V_CMP_U_F16
16202 // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
16204 Inst_VOP3__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst
)
16206 panicUnimplemented();
16209 Inst_VOP3__V_CMP_NGE_F16::Inst_VOP3__V_CMP_NGE_F16(
16211 : Inst_VOP3(iFmt
, "v_cmp_nge_f16", true)
16215 } // Inst_VOP3__V_CMP_NGE_F16
16217 Inst_VOP3__V_CMP_NGE_F16::~Inst_VOP3__V_CMP_NGE_F16()
16219 } // ~Inst_VOP3__V_CMP_NGE_F16
16221 // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
16223 Inst_VOP3__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst
)
16225 panicUnimplemented();
16228 Inst_VOP3__V_CMP_NLG_F16::Inst_VOP3__V_CMP_NLG_F16(
16230 : Inst_VOP3(iFmt
, "v_cmp_nlg_f16", true)
16234 } // Inst_VOP3__V_CMP_NLG_F16
16236 Inst_VOP3__V_CMP_NLG_F16::~Inst_VOP3__V_CMP_NLG_F16()
16238 } // ~Inst_VOP3__V_CMP_NLG_F16
16240 // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
16242 Inst_VOP3__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst
)
16244 panicUnimplemented();
16247 Inst_VOP3__V_CMP_NGT_F16::Inst_VOP3__V_CMP_NGT_F16(
16249 : Inst_VOP3(iFmt
, "v_cmp_ngt_f16", true)
16253 } // Inst_VOP3__V_CMP_NGT_F16
16255 Inst_VOP3__V_CMP_NGT_F16::~Inst_VOP3__V_CMP_NGT_F16()
16257 } // ~Inst_VOP3__V_CMP_NGT_F16
16259 // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
16261 Inst_VOP3__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst
)
16263 panicUnimplemented();
16266 Inst_VOP3__V_CMP_NLE_F16::Inst_VOP3__V_CMP_NLE_F16(
16268 : Inst_VOP3(iFmt
, "v_cmp_nle_f16", true)
16272 } // Inst_VOP3__V_CMP_NLE_F16
16274 Inst_VOP3__V_CMP_NLE_F16::~Inst_VOP3__V_CMP_NLE_F16()
16276 } // ~Inst_VOP3__V_CMP_NLE_F16
16278 // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
16280 Inst_VOP3__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst
)
16282 panicUnimplemented();
16285 Inst_VOP3__V_CMP_NEQ_F16::Inst_VOP3__V_CMP_NEQ_F16(
16287 : Inst_VOP3(iFmt
, "v_cmp_neq_f16", true)
16291 } // Inst_VOP3__V_CMP_NEQ_F16
16293 Inst_VOP3__V_CMP_NEQ_F16::~Inst_VOP3__V_CMP_NEQ_F16()
16295 } // ~Inst_VOP3__V_CMP_NEQ_F16
16297 // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
16299 Inst_VOP3__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst
)
16301 panicUnimplemented();
16304 Inst_VOP3__V_CMP_NLT_F16::Inst_VOP3__V_CMP_NLT_F16(
16306 : Inst_VOP3(iFmt
, "v_cmp_nlt_f16", true)
16310 } // Inst_VOP3__V_CMP_NLT_F16
16312 Inst_VOP3__V_CMP_NLT_F16::~Inst_VOP3__V_CMP_NLT_F16()
16314 } // ~Inst_VOP3__V_CMP_NLT_F16
16316 // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
16318 Inst_VOP3__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst
)
16320 panicUnimplemented();
16323 Inst_VOP3__V_CMP_TRU_F16::Inst_VOP3__V_CMP_TRU_F16(
16325 : Inst_VOP3(iFmt
, "v_cmp_tru_f16", true)
16329 } // Inst_VOP3__V_CMP_TRU_F16
16331 Inst_VOP3__V_CMP_TRU_F16::~Inst_VOP3__V_CMP_TRU_F16()
16333 } // ~Inst_VOP3__V_CMP_TRU_F16
16335 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
16337 Inst_VOP3__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst
)
16339 Wavefront
*wf
= gpuDynInst
->wavefront();
16340 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16342 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16343 if (wf
->execMask(lane
)) {
16344 sdst
.setBit(lane
, 1);
16351 Inst_VOP3__V_CMPX_F_F16::Inst_VOP3__V_CMPX_F_F16(
16353 : Inst_VOP3(iFmt
, "v_cmpx_f_f16", true)
16356 } // Inst_VOP3__V_CMPX_F_F16
16358 Inst_VOP3__V_CMPX_F_F16::~Inst_VOP3__V_CMPX_F_F16()
16360 } // ~Inst_VOP3__V_CMPX_F_F16
16362 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
16364 Inst_VOP3__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst
)
16366 Wavefront
*wf
= gpuDynInst
->wavefront();
16367 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16369 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16370 if (wf
->execMask(lane
)) {
16371 sdst
.setBit(lane
, 0);
16375 wf
->execMask() = sdst
.rawData();
16379 Inst_VOP3__V_CMPX_LT_F16::Inst_VOP3__V_CMPX_LT_F16(
16381 : Inst_VOP3(iFmt
, "v_cmpx_lt_f16", true)
16385 } // Inst_VOP3__V_CMPX_LT_F16
16387 Inst_VOP3__V_CMPX_LT_F16::~Inst_VOP3__V_CMPX_LT_F16()
16389 } // ~Inst_VOP3__V_CMPX_LT_F16
16391 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
16393 Inst_VOP3__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst
)
16395 panicUnimplemented();
16398 Inst_VOP3__V_CMPX_EQ_F16::Inst_VOP3__V_CMPX_EQ_F16(
16400 : Inst_VOP3(iFmt
, "v_cmpx_eq_f16", true)
16404 } // Inst_VOP3__V_CMPX_EQ_F16
16406 Inst_VOP3__V_CMPX_EQ_F16::~Inst_VOP3__V_CMPX_EQ_F16()
16408 } // ~Inst_VOP3__V_CMPX_EQ_F16
16410 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
16412 Inst_VOP3__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst
)
16414 panicUnimplemented();
16417 Inst_VOP3__V_CMPX_LE_F16::Inst_VOP3__V_CMPX_LE_F16(
16419 : Inst_VOP3(iFmt
, "v_cmpx_le_f16", true)
16423 } // Inst_VOP3__V_CMPX_LE_F16
16425 Inst_VOP3__V_CMPX_LE_F16::~Inst_VOP3__V_CMPX_LE_F16()
16427 } // ~Inst_VOP3__V_CMPX_LE_F16
16429 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
16431 Inst_VOP3__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst
)
16433 panicUnimplemented();
16436 Inst_VOP3__V_CMPX_GT_F16::Inst_VOP3__V_CMPX_GT_F16(
16438 : Inst_VOP3(iFmt
, "v_cmpx_gt_f16", true)
16442 } // Inst_VOP3__V_CMPX_GT_F16
16444 Inst_VOP3__V_CMPX_GT_F16::~Inst_VOP3__V_CMPX_GT_F16()
16446 } // ~Inst_VOP3__V_CMPX_GT_F16
16448 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
16450 Inst_VOP3__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst
)
16452 panicUnimplemented();
16455 Inst_VOP3__V_CMPX_LG_F16::Inst_VOP3__V_CMPX_LG_F16(
16457 : Inst_VOP3(iFmt
, "v_cmpx_lg_f16", true)
16461 } // Inst_VOP3__V_CMPX_LG_F16
16463 Inst_VOP3__V_CMPX_LG_F16::~Inst_VOP3__V_CMPX_LG_F16()
16465 } // ~Inst_VOP3__V_CMPX_LG_F16
16467 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
16469 Inst_VOP3__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst
)
16471 panicUnimplemented();
16474 Inst_VOP3__V_CMPX_GE_F16::Inst_VOP3__V_CMPX_GE_F16(
16476 : Inst_VOP3(iFmt
, "v_cmpx_ge_f16", true)
16480 } // Inst_VOP3__V_CMPX_GE_F16
16482 Inst_VOP3__V_CMPX_GE_F16::~Inst_VOP3__V_CMPX_GE_F16()
16484 } // ~Inst_VOP3__V_CMPX_GE_F16
16486 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
16488 Inst_VOP3__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst
)
16490 panicUnimplemented();
16493 Inst_VOP3__V_CMPX_O_F16::Inst_VOP3__V_CMPX_O_F16(
16495 : Inst_VOP3(iFmt
, "v_cmpx_o_f16", true)
16499 } // Inst_VOP3__V_CMPX_O_F16
16501 Inst_VOP3__V_CMPX_O_F16::~Inst_VOP3__V_CMPX_O_F16()
16503 } // ~Inst_VOP3__V_CMPX_O_F16
16505 // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
16508 Inst_VOP3__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst
)
16510 panicUnimplemented();
16513 Inst_VOP3__V_CMPX_U_F16::Inst_VOP3__V_CMPX_U_F16(
16515 : Inst_VOP3(iFmt
, "v_cmpx_u_f16", true)
16519 } // Inst_VOP3__V_CMPX_U_F16
16521 Inst_VOP3__V_CMPX_U_F16::~Inst_VOP3__V_CMPX_U_F16()
16523 } // ~Inst_VOP3__V_CMPX_U_F16
16525 // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
16528 Inst_VOP3__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst
)
16530 panicUnimplemented();
16533 Inst_VOP3__V_CMPX_NGE_F16::Inst_VOP3__V_CMPX_NGE_F16(
16535 : Inst_VOP3(iFmt
, "v_cmpx_nge_f16", true)
16539 } // Inst_VOP3__V_CMPX_NGE_F16
16541 Inst_VOP3__V_CMPX_NGE_F16::~Inst_VOP3__V_CMPX_NGE_F16()
16543 } // ~Inst_VOP3__V_CMPX_NGE_F16
16545 // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
16547 Inst_VOP3__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst
)
16549 panicUnimplemented();
16552 Inst_VOP3__V_CMPX_NLG_F16::Inst_VOP3__V_CMPX_NLG_F16(
16554 : Inst_VOP3(iFmt
, "v_cmpx_nlg_f16", true)
16558 } // Inst_VOP3__V_CMPX_NLG_F16
16560 Inst_VOP3__V_CMPX_NLG_F16::~Inst_VOP3__V_CMPX_NLG_F16()
16562 } // ~Inst_VOP3__V_CMPX_NLG_F16
16564 // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
16566 Inst_VOP3__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst
)
16568 panicUnimplemented();
16571 Inst_VOP3__V_CMPX_NGT_F16::Inst_VOP3__V_CMPX_NGT_F16(
16573 : Inst_VOP3(iFmt
, "v_cmpx_ngt_f16", true)
16577 } // Inst_VOP3__V_CMPX_NGT_F16
16579 Inst_VOP3__V_CMPX_NGT_F16::~Inst_VOP3__V_CMPX_NGT_F16()
16581 } // ~Inst_VOP3__V_CMPX_NGT_F16
16583 // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
16585 Inst_VOP3__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst
)
16587 panicUnimplemented();
16590 Inst_VOP3__V_CMPX_NLE_F16::Inst_VOP3__V_CMPX_NLE_F16(
16592 : Inst_VOP3(iFmt
, "v_cmpx_nle_f16", true)
16596 } // Inst_VOP3__V_CMPX_NLE_F16
16598 Inst_VOP3__V_CMPX_NLE_F16::~Inst_VOP3__V_CMPX_NLE_F16()
16600 } // ~Inst_VOP3__V_CMPX_NLE_F16
16602 // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
16604 Inst_VOP3__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst
)
16606 panicUnimplemented();
16609 Inst_VOP3__V_CMPX_NEQ_F16::Inst_VOP3__V_CMPX_NEQ_F16(
16611 : Inst_VOP3(iFmt
, "v_cmpx_neq_f16", true)
16615 } // Inst_VOP3__V_CMPX_NEQ_F16
16617 Inst_VOP3__V_CMPX_NEQ_F16::~Inst_VOP3__V_CMPX_NEQ_F16()
16619 } // ~Inst_VOP3__V_CMPX_NEQ_F16
16621 // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
16623 Inst_VOP3__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst
)
16625 panicUnimplemented();
16628 Inst_VOP3__V_CMPX_NLT_F16::Inst_VOP3__V_CMPX_NLT_F16(
16630 : Inst_VOP3(iFmt
, "v_cmpx_nlt_f16", true)
16634 } // Inst_VOP3__V_CMPX_NLT_F16
16636 Inst_VOP3__V_CMPX_NLT_F16::~Inst_VOP3__V_CMPX_NLT_F16()
16638 } // ~Inst_VOP3__V_CMPX_NLT_F16
16640 // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
16642 Inst_VOP3__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst
)
16644 panicUnimplemented();
16647 Inst_VOP3__V_CMPX_TRU_F16::Inst_VOP3__V_CMPX_TRU_F16(
16649 : Inst_VOP3(iFmt
, "v_cmpx_tru_f16", true)
16653 } // Inst_VOP3__V_CMPX_TRU_F16
16655 Inst_VOP3__V_CMPX_TRU_F16::~Inst_VOP3__V_CMPX_TRU_F16()
16657 } // ~Inst_VOP3__V_CMPX_TRU_F16
16659 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
16661 Inst_VOP3__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst
)
16663 Wavefront
*wf
= gpuDynInst
->wavefront();
16664 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16666 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16667 if (wf
->execMask(lane
)) {
16668 sdst
.setBit(lane
, 1);
16672 wf
->execMask() = sdst
.rawData();
16676 Inst_VOP3__V_CMP_F_F32::Inst_VOP3__V_CMP_F_F32(InFmt_VOP3
*iFmt
)
16677 : Inst_VOP3(iFmt
, "v_cmp_f_f32", true)
16681 } // Inst_VOP3__V_CMP_F_F32
16683 Inst_VOP3__V_CMP_F_F32::~Inst_VOP3__V_CMP_F_F32()
16685 } // ~Inst_VOP3__V_CMP_F_F32
16687 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
16689 Inst_VOP3__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst
)
16691 Wavefront
*wf
= gpuDynInst
->wavefront();
16692 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16694 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16695 if (wf
->execMask(lane
)) {
16696 sdst
.setBit(lane
, 0);
16703 Inst_VOP3__V_CMP_LT_F32::Inst_VOP3__V_CMP_LT_F32(
16705 : Inst_VOP3(iFmt
, "v_cmp_lt_f32", true)
16709 } // Inst_VOP3__V_CMP_LT_F32
16711 Inst_VOP3__V_CMP_LT_F32::~Inst_VOP3__V_CMP_LT_F32()
16713 } // ~Inst_VOP3__V_CMP_LT_F32
16715 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
16717 Inst_VOP3__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst
)
16719 Wavefront
*wf
= gpuDynInst
->wavefront();
16720 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
16721 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
16722 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16727 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16728 if (wf
->execMask(lane
)) {
16729 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
16736 Inst_VOP3__V_CMP_EQ_F32::Inst_VOP3__V_CMP_EQ_F32(
16738 : Inst_VOP3(iFmt
, "v_cmp_eq_f32", true)
16742 } // Inst_VOP3__V_CMP_EQ_F32
16744 Inst_VOP3__V_CMP_EQ_F32::~Inst_VOP3__V_CMP_EQ_F32()
16746 } // ~Inst_VOP3__V_CMP_EQ_F32
16748 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
16750 Inst_VOP3__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst
)
16752 Wavefront
*wf
= gpuDynInst
->wavefront();
16753 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
16754 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
16755 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16760 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16761 if (wf
->execMask(lane
)) {
16762 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
16769 Inst_VOP3__V_CMP_LE_F32::Inst_VOP3__V_CMP_LE_F32(
16771 : Inst_VOP3(iFmt
, "v_cmp_le_f32", true)
16775 } // Inst_VOP3__V_CMP_LE_F32
16777 Inst_VOP3__V_CMP_LE_F32::~Inst_VOP3__V_CMP_LE_F32()
16779 } // ~Inst_VOP3__V_CMP_LE_F32
16781 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
16783 Inst_VOP3__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst
)
16785 Wavefront
*wf
= gpuDynInst
->wavefront();
16786 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
16787 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
16788 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16793 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16794 if (wf
->execMask(lane
)) {
16795 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
16802 Inst_VOP3__V_CMP_GT_F32::Inst_VOP3__V_CMP_GT_F32(
16804 : Inst_VOP3(iFmt
, "v_cmp_gt_f32", true)
16808 } // Inst_VOP3__V_CMP_GT_F32
16810 Inst_VOP3__V_CMP_GT_F32::~Inst_VOP3__V_CMP_GT_F32()
16812 } // ~Inst_VOP3__V_CMP_GT_F32
16814 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
16816 Inst_VOP3__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst
)
16818 Wavefront
*wf
= gpuDynInst
->wavefront();
16819 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
16820 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
16821 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16826 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16827 if (wf
->execMask(lane
)) {
16828 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
16835 Inst_VOP3__V_CMP_LG_F32::Inst_VOP3__V_CMP_LG_F32(
16837 : Inst_VOP3(iFmt
, "v_cmp_lg_f32", true)
16841 } // Inst_VOP3__V_CMP_LG_F32
16843 Inst_VOP3__V_CMP_LG_F32::~Inst_VOP3__V_CMP_LG_F32()
16845 } // ~Inst_VOP3__V_CMP_LG_F32
16847 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
16849 Inst_VOP3__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst
)
16851 Wavefront
*wf
= gpuDynInst
->wavefront();
16852 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
16853 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
16854 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16859 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16860 if (wf
->execMask(lane
)) {
16861 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
16868 Inst_VOP3__V_CMP_GE_F32::Inst_VOP3__V_CMP_GE_F32(
16870 : Inst_VOP3(iFmt
, "v_cmp_ge_f32", true)
16874 } // Inst_VOP3__V_CMP_GE_F32
16876 Inst_VOP3__V_CMP_GE_F32::~Inst_VOP3__V_CMP_GE_F32()
16878 } // ~Inst_VOP3__V_CMP_GE_F32
16880 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
16882 Inst_VOP3__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst
)
16884 Wavefront
*wf
= gpuDynInst
->wavefront();
16885 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
16886 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
16887 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16892 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16893 if (wf
->execMask(lane
)) {
16894 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
16901 Inst_VOP3__V_CMP_O_F32::Inst_VOP3__V_CMP_O_F32(InFmt_VOP3
*iFmt
)
16902 : Inst_VOP3(iFmt
, "v_cmp_o_f32", true)
16906 } // Inst_VOP3__V_CMP_O_F32
16908 Inst_VOP3__V_CMP_O_F32::~Inst_VOP3__V_CMP_O_F32()
16910 } // ~Inst_VOP3__V_CMP_O_F32
16912 // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
16914 Inst_VOP3__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst
)
16916 Wavefront
*wf
= gpuDynInst
->wavefront();
16917 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
16918 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
16919 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16924 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16925 if (wf
->execMask(lane
)) {
16926 sdst
.setBit(lane
, (!std::isnan(src0
[lane
])
16927 && !std::isnan(src1
[lane
])) ? 1 : 0);
16934 Inst_VOP3__V_CMP_U_F32::Inst_VOP3__V_CMP_U_F32(InFmt_VOP3
*iFmt
)
16935 : Inst_VOP3(iFmt
, "v_cmp_u_f32", true)
16939 } // Inst_VOP3__V_CMP_U_F32
16941 Inst_VOP3__V_CMP_U_F32::~Inst_VOP3__V_CMP_U_F32()
16943 } // ~Inst_VOP3__V_CMP_U_F32
16945 // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
16947 Inst_VOP3__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst
)
16949 Wavefront
*wf
= gpuDynInst
->wavefront();
16950 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
16951 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
16952 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16957 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16958 if (wf
->execMask(lane
)) {
16959 sdst
.setBit(lane
, (std::isnan(src0
[lane
])
16960 || std::isnan(src1
[lane
])) ? 1 : 0);
16967 Inst_VOP3__V_CMP_NGE_F32::Inst_VOP3__V_CMP_NGE_F32(
16969 : Inst_VOP3(iFmt
, "v_cmp_nge_f32", true)
16973 } // Inst_VOP3__V_CMP_NGE_F32
16975 Inst_VOP3__V_CMP_NGE_F32::~Inst_VOP3__V_CMP_NGE_F32()
16977 } // ~Inst_VOP3__V_CMP_NGE_F32
16979 // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
16981 Inst_VOP3__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst
)
16983 Wavefront
*wf
= gpuDynInst
->wavefront();
16984 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
16985 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
16986 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
16991 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
16992 if (wf
->execMask(lane
)) {
16993 sdst
.setBit(lane
, !(src0
[lane
] >= src1
[lane
]) ? 1 : 0);
17000 Inst_VOP3__V_CMP_NLG_F32::Inst_VOP3__V_CMP_NLG_F32(
17002 : Inst_VOP3(iFmt
, "v_cmp_nlg_f32", true)
17006 } // Inst_VOP3__V_CMP_NLG_F32
17008 Inst_VOP3__V_CMP_NLG_F32::~Inst_VOP3__V_CMP_NLG_F32()
17010 } // ~Inst_VOP3__V_CMP_NLG_F32
17012 // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
17014 Inst_VOP3__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst
)
17016 Wavefront
*wf
= gpuDynInst
->wavefront();
17017 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17018 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17019 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17024 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17025 if (wf
->execMask(lane
)) {
17026 sdst
.setBit(lane
, !(src0
[lane
] < src1
[lane
]
17027 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
17034 Inst_VOP3__V_CMP_NGT_F32::Inst_VOP3__V_CMP_NGT_F32(
17036 : Inst_VOP3(iFmt
, "v_cmp_ngt_f32", true)
17040 } // Inst_VOP3__V_CMP_NGT_F32
17042 Inst_VOP3__V_CMP_NGT_F32::~Inst_VOP3__V_CMP_NGT_F32()
17044 } // ~Inst_VOP3__V_CMP_NGT_F32
17046 // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
17048 Inst_VOP3__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst
)
17050 Wavefront
*wf
= gpuDynInst
->wavefront();
17051 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17052 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17053 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17058 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17059 if (wf
->execMask(lane
)) {
17060 sdst
.setBit(lane
, !(src0
[lane
] > src1
[lane
]) ? 1 : 0);
17067 Inst_VOP3__V_CMP_NLE_F32::Inst_VOP3__V_CMP_NLE_F32(
17069 : Inst_VOP3(iFmt
, "v_cmp_nle_f32", true)
17073 } // Inst_VOP3__V_CMP_NLE_F32
17075 Inst_VOP3__V_CMP_NLE_F32::~Inst_VOP3__V_CMP_NLE_F32()
17077 } // ~Inst_VOP3__V_CMP_NLE_F32
17079 // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
17081 Inst_VOP3__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst
)
17083 Wavefront
*wf
= gpuDynInst
->wavefront();
17084 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17085 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17086 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17091 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17092 if (wf
->execMask(lane
)) {
17093 sdst
.setBit(lane
, !(src0
[lane
] <= src1
[lane
]) ? 1 : 0);
17100 Inst_VOP3__V_CMP_NEQ_F32::Inst_VOP3__V_CMP_NEQ_F32(
17102 : Inst_VOP3(iFmt
, "v_cmp_neq_f32", true)
17106 } // Inst_VOP3__V_CMP_NEQ_F32
17108 Inst_VOP3__V_CMP_NEQ_F32::~Inst_VOP3__V_CMP_NEQ_F32()
17110 } // ~Inst_VOP3__V_CMP_NEQ_F32
17112 // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
17114 Inst_VOP3__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst
)
17116 Wavefront
*wf
= gpuDynInst
->wavefront();
17117 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17118 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17119 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17124 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17125 if (wf
->execMask(lane
)) {
17126 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
17133 Inst_VOP3__V_CMP_NLT_F32::Inst_VOP3__V_CMP_NLT_F32(
17135 : Inst_VOP3(iFmt
, "v_cmp_nlt_f32", true)
17139 } // Inst_VOP3__V_CMP_NLT_F32
17141 Inst_VOP3__V_CMP_NLT_F32::~Inst_VOP3__V_CMP_NLT_F32()
17143 } // ~Inst_VOP3__V_CMP_NLT_F32
17145 // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
17147 Inst_VOP3__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst
)
17149 Wavefront
*wf
= gpuDynInst
->wavefront();
17150 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17151 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17152 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17157 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17158 if (wf
->execMask(lane
)) {
17159 sdst
.setBit(lane
, !(src0
[lane
] < src1
[lane
]) ? 1 : 0);
17166 Inst_VOP3__V_CMP_TRU_F32::Inst_VOP3__V_CMP_TRU_F32(
17168 : Inst_VOP3(iFmt
, "v_cmp_tru_f32", true)
17172 } // Inst_VOP3__V_CMP_TRU_F32
17174 Inst_VOP3__V_CMP_TRU_F32::~Inst_VOP3__V_CMP_TRU_F32()
17176 } // ~Inst_VOP3__V_CMP_TRU_F32
17178 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
17180 Inst_VOP3__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst
)
17182 Wavefront
*wf
= gpuDynInst
->wavefront();
17183 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17185 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17186 if (wf
->execMask(lane
)) {
17187 sdst
.setBit(lane
, 1);
17194 Inst_VOP3__V_CMPX_F_F32::Inst_VOP3__V_CMPX_F_F32(
17196 : Inst_VOP3(iFmt
, "v_cmpx_f_f32", true)
17200 } // Inst_VOP3__V_CMPX_F_F32
17202 Inst_VOP3__V_CMPX_F_F32::~Inst_VOP3__V_CMPX_F_F32()
17204 } // ~Inst_VOP3__V_CMPX_F_F32
17206 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
17208 Inst_VOP3__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst
)
17210 Wavefront
*wf
= gpuDynInst
->wavefront();
17211 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17213 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17214 if (wf
->execMask(lane
)) {
17215 sdst
.setBit(lane
, 0);
17219 wf
->execMask() = sdst
.rawData();
17223 Inst_VOP3__V_CMPX_LT_F32::Inst_VOP3__V_CMPX_LT_F32(
17225 : Inst_VOP3(iFmt
, "v_cmpx_lt_f32", true)
17229 } // Inst_VOP3__V_CMPX_LT_F32
17231 Inst_VOP3__V_CMPX_LT_F32::~Inst_VOP3__V_CMPX_LT_F32()
17233 } // ~Inst_VOP3__V_CMPX_LT_F32
17235 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
17237 Inst_VOP3__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst
)
17239 Wavefront
*wf
= gpuDynInst
->wavefront();
17240 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17241 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17242 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17247 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17248 if (wf
->execMask(lane
)) {
17249 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
17253 wf
->execMask() = sdst
.rawData();
17257 Inst_VOP3__V_CMPX_EQ_F32::Inst_VOP3__V_CMPX_EQ_F32(
17259 : Inst_VOP3(iFmt
, "v_cmpx_eq_f32", true)
17263 } // Inst_VOP3__V_CMPX_EQ_F32
17265 Inst_VOP3__V_CMPX_EQ_F32::~Inst_VOP3__V_CMPX_EQ_F32()
17267 } // ~Inst_VOP3__V_CMPX_EQ_F32
17269 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
17271 Inst_VOP3__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst
)
17273 Wavefront
*wf
= gpuDynInst
->wavefront();
17274 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17275 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17276 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17281 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17282 if (wf
->execMask(lane
)) {
17283 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
17287 wf
->execMask() = sdst
.rawData();
17291 Inst_VOP3__V_CMPX_LE_F32::Inst_VOP3__V_CMPX_LE_F32(
17293 : Inst_VOP3(iFmt
, "v_cmpx_le_f32", true)
17297 } // Inst_VOP3__V_CMPX_LE_F32
17299 Inst_VOP3__V_CMPX_LE_F32::~Inst_VOP3__V_CMPX_LE_F32()
17301 } // ~Inst_VOP3__V_CMPX_LE_F32
17303 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
17305 Inst_VOP3__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst
)
17307 Wavefront
*wf
= gpuDynInst
->wavefront();
17308 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17309 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17310 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17315 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17316 if (wf
->execMask(lane
)) {
17317 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
17321 wf
->execMask() = sdst
.rawData();
17325 Inst_VOP3__V_CMPX_GT_F32::Inst_VOP3__V_CMPX_GT_F32(
17327 : Inst_VOP3(iFmt
, "v_cmpx_gt_f32", true)
17331 } // Inst_VOP3__V_CMPX_GT_F32
17333 Inst_VOP3__V_CMPX_GT_F32::~Inst_VOP3__V_CMPX_GT_F32()
17335 } // ~Inst_VOP3__V_CMPX_GT_F32
17337 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
17339 Inst_VOP3__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst
)
17341 Wavefront
*wf
= gpuDynInst
->wavefront();
17342 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17343 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17344 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17349 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17350 if (wf
->execMask(lane
)) {
17351 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
17355 wf
->execMask() = sdst
.rawData();
17359 Inst_VOP3__V_CMPX_LG_F32::Inst_VOP3__V_CMPX_LG_F32(
17361 : Inst_VOP3(iFmt
, "v_cmpx_lg_f32", true)
17365 } // Inst_VOP3__V_CMPX_LG_F32
17367 Inst_VOP3__V_CMPX_LG_F32::~Inst_VOP3__V_CMPX_LG_F32()
17369 } // ~Inst_VOP3__V_CMPX_LG_F32
17371 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
17373 Inst_VOP3__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst
)
17375 Wavefront
*wf
= gpuDynInst
->wavefront();
17376 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17377 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17378 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17383 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17384 if (wf
->execMask(lane
)) {
17385 sdst
.setBit(lane
, (src0
[lane
] < src1
[lane
]
17386 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
17390 wf
->execMask() = sdst
.rawData();
17394 Inst_VOP3__V_CMPX_GE_F32::Inst_VOP3__V_CMPX_GE_F32(
17396 : Inst_VOP3(iFmt
, "v_cmpx_ge_f32", true)
17400 } // Inst_VOP3__V_CMPX_GE_F32
17402 Inst_VOP3__V_CMPX_GE_F32::~Inst_VOP3__V_CMPX_GE_F32()
17404 } // ~Inst_VOP3__V_CMPX_GE_F32
17406 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
17408 Inst_VOP3__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst
)
17410 Wavefront
*wf
= gpuDynInst
->wavefront();
17411 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17412 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17413 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17418 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17419 if (wf
->execMask(lane
)) {
17420 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
17424 wf
->execMask() = sdst
.rawData();
17428 Inst_VOP3__V_CMPX_O_F32::Inst_VOP3__V_CMPX_O_F32(
17430 : Inst_VOP3(iFmt
, "v_cmpx_o_f32", true)
17434 } // Inst_VOP3__V_CMPX_O_F32
17436 Inst_VOP3__V_CMPX_O_F32::~Inst_VOP3__V_CMPX_O_F32()
17438 } // ~Inst_VOP3__V_CMPX_O_F32
17440 // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
17443 Inst_VOP3__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst
)
17445 Wavefront
*wf
= gpuDynInst
->wavefront();
17446 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17447 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17448 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17453 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17454 if (wf
->execMask(lane
)) {
17455 sdst
.setBit(lane
, (!std::isnan(src0
[lane
])
17456 && !std::isnan(src1
[lane
])) ? 1 : 0);
17460 wf
->execMask() = sdst
.rawData();
17464 Inst_VOP3__V_CMPX_U_F32::Inst_VOP3__V_CMPX_U_F32(
17466 : Inst_VOP3(iFmt
, "v_cmpx_u_f32", true)
17470 } // Inst_VOP3__V_CMPX_U_F32
17472 Inst_VOP3__V_CMPX_U_F32::~Inst_VOP3__V_CMPX_U_F32()
17474 } // ~Inst_VOP3__V_CMPX_U_F32
17476 // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
17479 Inst_VOP3__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst
)
17481 Wavefront
*wf
= gpuDynInst
->wavefront();
17482 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17483 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17484 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17489 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17490 if (wf
->execMask(lane
)) {
17491 sdst
.setBit(lane
, (std::isnan(src0
[lane
])
17492 || std::isnan(src1
[lane
])) ? 1 : 0);
17496 wf
->execMask() = sdst
.rawData();
17500 Inst_VOP3__V_CMPX_NGE_F32::Inst_VOP3__V_CMPX_NGE_F32(
17502 : Inst_VOP3(iFmt
, "v_cmpx_nge_f32", true)
17506 } // Inst_VOP3__V_CMPX_NGE_F32
17508 Inst_VOP3__V_CMPX_NGE_F32::~Inst_VOP3__V_CMPX_NGE_F32()
17510 } // ~Inst_VOP3__V_CMPX_NGE_F32
17512 // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
17514 Inst_VOP3__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst
)
17516 Wavefront
*wf
= gpuDynInst
->wavefront();
17517 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17518 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17519 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17524 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17525 if (wf
->execMask(lane
)) {
17526 sdst
.setBit(lane
, !(src0
[lane
] >= src1
[lane
]) ? 1 : 0);
17530 wf
->execMask() = sdst
.rawData();
17534 Inst_VOP3__V_CMPX_NLG_F32::Inst_VOP3__V_CMPX_NLG_F32(
17536 : Inst_VOP3(iFmt
, "v_cmpx_nlg_f32", true)
17540 } // Inst_VOP3__V_CMPX_NLG_F32
17542 Inst_VOP3__V_CMPX_NLG_F32::~Inst_VOP3__V_CMPX_NLG_F32()
17544 } // ~Inst_VOP3__V_CMPX_NLG_F32
17546 // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
17548 Inst_VOP3__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst
)
17550 Wavefront
*wf
= gpuDynInst
->wavefront();
17551 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17552 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17553 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17558 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17559 if (wf
->execMask(lane
)) {
17560 sdst
.setBit(lane
, !(src0
[lane
] < src1
[lane
]
17561 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
17565 wf
->execMask() = sdst
.rawData();
17569 Inst_VOP3__V_CMPX_NGT_F32::Inst_VOP3__V_CMPX_NGT_F32(
17571 : Inst_VOP3(iFmt
, "v_cmpx_ngt_f32", true)
17575 } // Inst_VOP3__V_CMPX_NGT_F32
17577 Inst_VOP3__V_CMPX_NGT_F32::~Inst_VOP3__V_CMPX_NGT_F32()
17579 } // ~Inst_VOP3__V_CMPX_NGT_F32
17581 // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
17583 Inst_VOP3__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst
)
17585 Wavefront
*wf
= gpuDynInst
->wavefront();
17586 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17587 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17588 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17593 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17594 if (wf
->execMask(lane
)) {
17595 sdst
.setBit(lane
, !(src0
[lane
] > src1
[lane
]) ? 1 : 0);
17599 wf
->execMask() = sdst
.rawData();
17603 Inst_VOP3__V_CMPX_NLE_F32::Inst_VOP3__V_CMPX_NLE_F32(
17605 : Inst_VOP3(iFmt
, "v_cmpx_nle_f32", true)
17609 } // Inst_VOP3__V_CMPX_NLE_F32
17611 Inst_VOP3__V_CMPX_NLE_F32::~Inst_VOP3__V_CMPX_NLE_F32()
17613 } // ~Inst_VOP3__V_CMPX_NLE_F32
17615 // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
17617 Inst_VOP3__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst
)
17619 Wavefront
*wf
= gpuDynInst
->wavefront();
17620 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17621 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17622 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17627 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17628 if (wf
->execMask(lane
)) {
17629 sdst
.setBit(lane
, !(src0
[lane
] <= src1
[lane
]) ? 1 : 0);
17633 wf
->execMask() = sdst
.rawData();
17637 Inst_VOP3__V_CMPX_NEQ_F32::Inst_VOP3__V_CMPX_NEQ_F32(
17639 : Inst_VOP3(iFmt
, "v_cmpx_neq_f32", true)
17643 } // Inst_VOP3__V_CMPX_NEQ_F32
17645 Inst_VOP3__V_CMPX_NEQ_F32::~Inst_VOP3__V_CMPX_NEQ_F32()
17647 } // ~Inst_VOP3__V_CMPX_NEQ_F32
17649 // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
17651 Inst_VOP3__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst
)
17653 Wavefront
*wf
= gpuDynInst
->wavefront();
17654 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17655 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17656 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17661 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17662 if (wf
->execMask(lane
)) {
17663 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
17667 wf
->execMask() = sdst
.rawData();
17671 Inst_VOP3__V_CMPX_NLT_F32::Inst_VOP3__V_CMPX_NLT_F32(
17673 : Inst_VOP3(iFmt
, "v_cmpx_nlt_f32", true)
17677 } // Inst_VOP3__V_CMPX_NLT_F32
17679 Inst_VOP3__V_CMPX_NLT_F32::~Inst_VOP3__V_CMPX_NLT_F32()
17681 } // ~Inst_VOP3__V_CMPX_NLT_F32
17683 // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
17685 Inst_VOP3__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst
)
17687 Wavefront
*wf
= gpuDynInst
->wavefront();
17688 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
17689 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
17690 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17695 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17696 if (wf
->execMask(lane
)) {
17697 sdst
.setBit(lane
, !(src0
[lane
] < src1
[lane
]) ? 1 : 0);
17701 wf
->execMask() = sdst
.rawData();
17705 Inst_VOP3__V_CMPX_TRU_F32::Inst_VOP3__V_CMPX_TRU_F32(
17707 : Inst_VOP3(iFmt
, "v_cmpx_tru_f32", true)
17711 } // Inst_VOP3__V_CMPX_TRU_F32
17713 Inst_VOP3__V_CMPX_TRU_F32::~Inst_VOP3__V_CMPX_TRU_F32()
17715 } // ~Inst_VOP3__V_CMPX_TRU_F32
17717 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
17719 Inst_VOP3__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst
)
17721 Wavefront
*wf
= gpuDynInst
->wavefront();
17722 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17724 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17725 if (wf
->execMask(lane
)) {
17726 sdst
.setBit(lane
, 1);
17730 wf
->execMask() = sdst
.rawData();
17734 Inst_VOP3__V_CMP_F_F64::Inst_VOP3__V_CMP_F_F64(InFmt_VOP3
*iFmt
)
17735 : Inst_VOP3(iFmt
, "v_cmp_f_f64", true)
17739 } // Inst_VOP3__V_CMP_F_F64
17741 Inst_VOP3__V_CMP_F_F64::~Inst_VOP3__V_CMP_F_F64()
17743 } // ~Inst_VOP3__V_CMP_F_F64
17745 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
17747 Inst_VOP3__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst
)
17749 Wavefront
*wf
= gpuDynInst
->wavefront();
17750 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17752 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17753 if (wf
->execMask(lane
)) {
17754 sdst
.setBit(lane
, 0);
17761 Inst_VOP3__V_CMP_LT_F64::Inst_VOP3__V_CMP_LT_F64(
17763 : Inst_VOP3(iFmt
, "v_cmp_lt_f64", true)
17767 } // Inst_VOP3__V_CMP_LT_F64
17769 Inst_VOP3__V_CMP_LT_F64::~Inst_VOP3__V_CMP_LT_F64()
17771 } // ~Inst_VOP3__V_CMP_LT_F64
17773 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
17775 Inst_VOP3__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst
)
17777 Wavefront
*wf
= gpuDynInst
->wavefront();
17778 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
17779 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
17780 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17785 if (instData
.ABS
& 0x1) {
17786 src0
.absModifier();
17789 if (instData
.ABS
& 0x2) {
17790 src1
.absModifier();
17793 if (extData
.NEG
& 0x1) {
17794 src0
.negModifier();
17797 if (extData
.NEG
& 0x2) {
17798 src1
.negModifier();
17802 * input modifiers are supported by FP operations only
17804 assert(!(instData
.ABS
& 0x4));
17805 assert(!(extData
.NEG
& 0x4));
17807 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17808 if (wf
->execMask(lane
)) {
17809 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
17816 Inst_VOP3__V_CMP_EQ_F64::Inst_VOP3__V_CMP_EQ_F64(
17818 : Inst_VOP3(iFmt
, "v_cmp_eq_f64", true)
17822 } // Inst_VOP3__V_CMP_EQ_F64
17824 Inst_VOP3__V_CMP_EQ_F64::~Inst_VOP3__V_CMP_EQ_F64()
17826 } // ~Inst_VOP3__V_CMP_EQ_F64
17828 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
17830 Inst_VOP3__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst
)
17832 Wavefront
*wf
= gpuDynInst
->wavefront();
17833 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
17834 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
17835 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17840 if (instData
.ABS
& 0x1) {
17841 src0
.absModifier();
17844 if (instData
.ABS
& 0x2) {
17845 src1
.absModifier();
17848 if (extData
.NEG
& 0x1) {
17849 src0
.negModifier();
17852 if (extData
.NEG
& 0x2) {
17853 src1
.negModifier();
17857 * input modifiers are supported by FP operations only
17859 assert(!(instData
.ABS
& 0x4));
17860 assert(!(extData
.NEG
& 0x4));
17862 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17863 if (wf
->execMask(lane
)) {
17864 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
17871 Inst_VOP3__V_CMP_LE_F64::Inst_VOP3__V_CMP_LE_F64(
17873 : Inst_VOP3(iFmt
, "v_cmp_le_f64", true)
17877 } // Inst_VOP3__V_CMP_LE_F64
17879 Inst_VOP3__V_CMP_LE_F64::~Inst_VOP3__V_CMP_LE_F64()
17881 } // ~Inst_VOP3__V_CMP_LE_F64
17883 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
17885 Inst_VOP3__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst
)
17887 Wavefront
*wf
= gpuDynInst
->wavefront();
17888 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
17889 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
17890 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17895 if (instData
.ABS
& 0x1) {
17896 src0
.absModifier();
17899 if (instData
.ABS
& 0x2) {
17900 src1
.absModifier();
17903 if (extData
.NEG
& 0x1) {
17904 src0
.negModifier();
17907 if (extData
.NEG
& 0x2) {
17908 src1
.negModifier();
17912 * input modifiers are supported by FP operations only
17914 assert(!(instData
.ABS
& 0x4));
17915 assert(!(extData
.NEG
& 0x4));
17917 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17918 if (wf
->execMask(lane
)) {
17919 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
17926 Inst_VOP3__V_CMP_GT_F64::Inst_VOP3__V_CMP_GT_F64(
17928 : Inst_VOP3(iFmt
, "v_cmp_gt_f64", true)
17932 } // Inst_VOP3__V_CMP_GT_F64
17934 Inst_VOP3__V_CMP_GT_F64::~Inst_VOP3__V_CMP_GT_F64()
17936 } // ~Inst_VOP3__V_CMP_GT_F64
17938 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
17940 Inst_VOP3__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst
)
17942 Wavefront
*wf
= gpuDynInst
->wavefront();
17943 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
17944 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
17945 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
17950 if (instData
.ABS
& 0x1) {
17951 src0
.absModifier();
17954 if (instData
.ABS
& 0x2) {
17955 src1
.absModifier();
17958 if (extData
.NEG
& 0x1) {
17959 src0
.negModifier();
17962 if (extData
.NEG
& 0x2) {
17963 src1
.negModifier();
17967 * input modifiers are supported by FP operations only
17969 assert(!(instData
.ABS
& 0x4));
17970 assert(!(extData
.NEG
& 0x4));
17972 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
17973 if (wf
->execMask(lane
)) {
17974 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
17981 Inst_VOP3__V_CMP_LG_F64::Inst_VOP3__V_CMP_LG_F64(
17983 : Inst_VOP3(iFmt
, "v_cmp_lg_f64", true)
17987 } // Inst_VOP3__V_CMP_LG_F64
17989 Inst_VOP3__V_CMP_LG_F64::~Inst_VOP3__V_CMP_LG_F64()
17991 } // ~Inst_VOP3__V_CMP_LG_F64
17993 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
17995 Inst_VOP3__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst
)
17997 Wavefront
*wf
= gpuDynInst
->wavefront();
17998 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
17999 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18000 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18005 if (instData
.ABS
& 0x1) {
18006 src0
.absModifier();
18009 if (instData
.ABS
& 0x2) {
18010 src1
.absModifier();
18013 if (extData
.NEG
& 0x1) {
18014 src0
.negModifier();
18017 if (extData
.NEG
& 0x2) {
18018 src1
.negModifier();
18022 * input modifiers are supported by FP operations only
18024 assert(!(instData
.ABS
& 0x4));
18025 assert(!(extData
.NEG
& 0x4));
18027 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18028 if (wf
->execMask(lane
)) {
18029 sdst
.setBit(lane
, (src0
[lane
] < src1
[lane
]
18030 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
18037 Inst_VOP3__V_CMP_GE_F64::Inst_VOP3__V_CMP_GE_F64(
18039 : Inst_VOP3(iFmt
, "v_cmp_ge_f64", true)
18043 } // Inst_VOP3__V_CMP_GE_F64
18045 Inst_VOP3__V_CMP_GE_F64::~Inst_VOP3__V_CMP_GE_F64()
18047 } // ~Inst_VOP3__V_CMP_GE_F64
18049 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
18051 Inst_VOP3__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst
)
18053 Wavefront
*wf
= gpuDynInst
->wavefront();
18054 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18055 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18056 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18061 if (instData
.ABS
& 0x1) {
18062 src0
.absModifier();
18065 if (instData
.ABS
& 0x2) {
18066 src1
.absModifier();
18069 if (extData
.NEG
& 0x1) {
18070 src0
.negModifier();
18073 if (extData
.NEG
& 0x2) {
18074 src1
.negModifier();
18078 * input modifiers are supported by FP operations only
18080 assert(!(instData
.ABS
& 0x4));
18081 assert(!(extData
.NEG
& 0x4));
18083 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18084 if (wf
->execMask(lane
)) {
18085 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
18092 Inst_VOP3__V_CMP_O_F64::Inst_VOP3__V_CMP_O_F64(InFmt_VOP3
*iFmt
)
18093 : Inst_VOP3(iFmt
, "v_cmp_o_f64", true)
18097 } // Inst_VOP3__V_CMP_O_F64
18099 Inst_VOP3__V_CMP_O_F64::~Inst_VOP3__V_CMP_O_F64()
18101 } // ~Inst_VOP3__V_CMP_O_F64
18103 // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
18105 Inst_VOP3__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst
)
18107 Wavefront
*wf
= gpuDynInst
->wavefront();
18108 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18109 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18110 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18115 if (instData
.ABS
& 0x1) {
18116 src0
.absModifier();
18119 if (instData
.ABS
& 0x2) {
18120 src1
.absModifier();
18123 if (extData
.NEG
& 0x1) {
18124 src0
.negModifier();
18127 if (extData
.NEG
& 0x2) {
18128 src1
.negModifier();
18132 * input modifiers are supported by FP operations only
18134 assert(!(instData
.ABS
& 0x4));
18135 assert(!(extData
.NEG
& 0x4));
18137 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18138 if (wf
->execMask(lane
)) {
18139 sdst
.setBit(lane
, (!std::isnan(src0
[lane
])
18140 && !std::isnan(src1
[lane
])) ? 1 : 0);
18147 Inst_VOP3__V_CMP_U_F64::Inst_VOP3__V_CMP_U_F64(InFmt_VOP3
*iFmt
)
18148 : Inst_VOP3(iFmt
, "v_cmp_u_f64", true)
18152 } // Inst_VOP3__V_CMP_U_F64
18154 Inst_VOP3__V_CMP_U_F64::~Inst_VOP3__V_CMP_U_F64()
18156 } // ~Inst_VOP3__V_CMP_U_F64
18158 // D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC encoding.
18160 Inst_VOP3__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst
)
18162 Wavefront
*wf
= gpuDynInst
->wavefront();
18163 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18164 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18165 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18170 if (instData
.ABS
& 0x1) {
18171 src0
.absModifier();
18174 if (instData
.ABS
& 0x2) {
18175 src1
.absModifier();
18178 if (extData
.NEG
& 0x1) {
18179 src0
.negModifier();
18182 if (extData
.NEG
& 0x2) {
18183 src1
.negModifier();
18187 * input modifiers are supported by FP operations only
18189 assert(!(instData
.ABS
& 0x4));
18190 assert(!(extData
.NEG
& 0x4));
18192 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18193 if (wf
->execMask(lane
)) {
18194 sdst
.setBit(lane
, (std::isnan(src0
[lane
])
18195 || std::isnan(src1
[lane
])) ? 1 : 0);
18202 Inst_VOP3__V_CMP_NGE_F64::Inst_VOP3__V_CMP_NGE_F64(
18204 : Inst_VOP3(iFmt
, "v_cmp_nge_f64", true)
18208 } // Inst_VOP3__V_CMP_NGE_F64
18210 Inst_VOP3__V_CMP_NGE_F64::~Inst_VOP3__V_CMP_NGE_F64()
18212 } // ~Inst_VOP3__V_CMP_NGE_F64
18214 // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
18216 Inst_VOP3__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst
)
18218 Wavefront
*wf
= gpuDynInst
->wavefront();
18219 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18220 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18221 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18226 if (instData
.ABS
& 0x1) {
18227 src0
.absModifier();
18230 if (instData
.ABS
& 0x2) {
18231 src1
.absModifier();
18234 if (extData
.NEG
& 0x1) {
18235 src0
.negModifier();
18238 if (extData
.NEG
& 0x2) {
18239 src1
.negModifier();
18243 * input modifiers are supported by FP operations only
18245 assert(!(instData
.ABS
& 0x4));
18246 assert(!(extData
.NEG
& 0x4));
18248 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18249 if (wf
->execMask(lane
)) {
18250 sdst
.setBit(lane
, !(src0
[lane
] >= src1
[lane
]) ? 1 : 0);
18257 Inst_VOP3__V_CMP_NLG_F64::Inst_VOP3__V_CMP_NLG_F64(
18259 : Inst_VOP3(iFmt
, "v_cmp_nlg_f64", true)
18263 } // Inst_VOP3__V_CMP_NLG_F64
18265 Inst_VOP3__V_CMP_NLG_F64::~Inst_VOP3__V_CMP_NLG_F64()
18267 } // ~Inst_VOP3__V_CMP_NLG_F64
18269 // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
18271 Inst_VOP3__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst
)
18273 Wavefront
*wf
= gpuDynInst
->wavefront();
18274 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18275 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18276 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18281 if (instData
.ABS
& 0x1) {
18282 src0
.absModifier();
18285 if (instData
.ABS
& 0x2) {
18286 src1
.absModifier();
18289 if (extData
.NEG
& 0x1) {
18290 src0
.negModifier();
18293 if (extData
.NEG
& 0x2) {
18294 src1
.negModifier();
18298 * input modifiers are supported by FP operations only
18300 assert(!(instData
.ABS
& 0x4));
18301 assert(!(extData
.NEG
& 0x4));
18303 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18304 if (wf
->execMask(lane
)) {
18305 sdst
.setBit(lane
, !(src0
[lane
] < src1
[lane
]
18306 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
18313 Inst_VOP3__V_CMP_NGT_F64::Inst_VOP3__V_CMP_NGT_F64(
18315 : Inst_VOP3(iFmt
, "v_cmp_ngt_f64", true)
18319 } // Inst_VOP3__V_CMP_NGT_F64
18321 Inst_VOP3__V_CMP_NGT_F64::~Inst_VOP3__V_CMP_NGT_F64()
18323 } // ~Inst_VOP3__V_CMP_NGT_F64
18325 // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
18327 Inst_VOP3__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst
)
18329 Wavefront
*wf
= gpuDynInst
->wavefront();
18330 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18331 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18332 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18337 if (instData
.ABS
& 0x1) {
18338 src0
.absModifier();
18341 if (instData
.ABS
& 0x2) {
18342 src1
.absModifier();
18345 if (extData
.NEG
& 0x1) {
18346 src0
.negModifier();
18349 if (extData
.NEG
& 0x2) {
18350 src1
.negModifier();
18354 * input modifiers are supported by FP operations only
18356 assert(!(instData
.ABS
& 0x4));
18357 assert(!(extData
.NEG
& 0x4));
18359 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18360 if (wf
->execMask(lane
)) {
18361 sdst
.setBit(lane
, !(src0
[lane
] > src1
[lane
]) ? 1 : 0);
18368 Inst_VOP3__V_CMP_NLE_F64::Inst_VOP3__V_CMP_NLE_F64(
18370 : Inst_VOP3(iFmt
, "v_cmp_nle_f64", true)
18374 } // Inst_VOP3__V_CMP_NLE_F64
18376 Inst_VOP3__V_CMP_NLE_F64::~Inst_VOP3__V_CMP_NLE_F64()
18378 } // ~Inst_VOP3__V_CMP_NLE_F64
18380 // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
18382 Inst_VOP3__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst
)
18384 Wavefront
*wf
= gpuDynInst
->wavefront();
18385 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18386 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18387 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18392 if (instData
.ABS
& 0x1) {
18393 src0
.absModifier();
18396 if (instData
.ABS
& 0x2) {
18397 src1
.absModifier();
18400 if (extData
.NEG
& 0x1) {
18401 src0
.negModifier();
18404 if (extData
.NEG
& 0x2) {
18405 src1
.negModifier();
18409 * input modifiers are supported by FP operations only
18411 assert(!(instData
.ABS
& 0x4));
18412 assert(!(extData
.NEG
& 0x4));
18414 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18415 if (wf
->execMask(lane
)) {
18416 sdst
.setBit(lane
, !(src0
[lane
] <= src1
[lane
]) ? 1 : 0);
18423 Inst_VOP3__V_CMP_NEQ_F64::Inst_VOP3__V_CMP_NEQ_F64(
18425 : Inst_VOP3(iFmt
, "v_cmp_neq_f64", true)
18429 } // Inst_VOP3__V_CMP_NEQ_F64
18431 Inst_VOP3__V_CMP_NEQ_F64::~Inst_VOP3__V_CMP_NEQ_F64()
18433 } // ~Inst_VOP3__V_CMP_NEQ_F64
18435 // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
18437 Inst_VOP3__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst
)
18439 Wavefront
*wf
= gpuDynInst
->wavefront();
18440 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18441 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18442 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18447 if (instData
.ABS
& 0x1) {
18448 src0
.absModifier();
18451 if (instData
.ABS
& 0x2) {
18452 src1
.absModifier();
18455 if (extData
.NEG
& 0x1) {
18456 src0
.negModifier();
18459 if (extData
.NEG
& 0x2) {
18460 src1
.negModifier();
18464 * input modifiers are supported by FP operations only
18466 assert(!(instData
.ABS
& 0x4));
18467 assert(!(extData
.NEG
& 0x4));
18469 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18470 if (wf
->execMask(lane
)) {
18471 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
18478 Inst_VOP3__V_CMP_NLT_F64::Inst_VOP3__V_CMP_NLT_F64(
18480 : Inst_VOP3(iFmt
, "v_cmp_nlt_f64", true)
18484 } // Inst_VOP3__V_CMP_NLT_F64
18486 Inst_VOP3__V_CMP_NLT_F64::~Inst_VOP3__V_CMP_NLT_F64()
18488 } // ~Inst_VOP3__V_CMP_NLT_F64
18490 // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
18492 Inst_VOP3__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst
)
18494 Wavefront
*wf
= gpuDynInst
->wavefront();
18495 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18496 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18497 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18502 if (instData
.ABS
& 0x1) {
18503 src0
.absModifier();
18506 if (instData
.ABS
& 0x2) {
18507 src1
.absModifier();
18510 if (extData
.NEG
& 0x1) {
18511 src0
.negModifier();
18514 if (extData
.NEG
& 0x2) {
18515 src1
.negModifier();
18519 * input modifiers are supported by FP operations only
18521 assert(!(instData
.ABS
& 0x4));
18522 assert(!(extData
.NEG
& 0x4));
18524 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18525 if (wf
->execMask(lane
)) {
18526 sdst
.setBit(lane
, !(src0
[lane
] < src1
[lane
]) ? 1 : 0);
18533 Inst_VOP3__V_CMP_TRU_F64::Inst_VOP3__V_CMP_TRU_F64(
18535 : Inst_VOP3(iFmt
, "v_cmp_tru_f64", true)
18539 } // Inst_VOP3__V_CMP_TRU_F64
18541 Inst_VOP3__V_CMP_TRU_F64::~Inst_VOP3__V_CMP_TRU_F64()
18543 } // ~Inst_VOP3__V_CMP_TRU_F64
18545 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
18547 Inst_VOP3__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst
)
18549 Wavefront
*wf
= gpuDynInst
->wavefront();
18550 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18552 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18553 if (wf
->execMask(lane
)) {
18554 sdst
.setBit(lane
, 1);
18561 Inst_VOP3__V_CMPX_F_F64::Inst_VOP3__V_CMPX_F_F64(
18563 : Inst_VOP3(iFmt
, "v_cmpx_f_f64", true)
18567 } // Inst_VOP3__V_CMPX_F_F64
18569 Inst_VOP3__V_CMPX_F_F64::~Inst_VOP3__V_CMPX_F_F64()
18571 } // ~Inst_VOP3__V_CMPX_F_F64
18573 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
18575 Inst_VOP3__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst
)
18577 Wavefront
*wf
= gpuDynInst
->wavefront();
18578 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18580 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18581 if (wf
->execMask(lane
)) {
18582 sdst
.setBit(lane
, 0);
18586 wf
->execMask() = sdst
.rawData();
18590 Inst_VOP3__V_CMPX_LT_F64::Inst_VOP3__V_CMPX_LT_F64(
18592 : Inst_VOP3(iFmt
, "v_cmpx_lt_f64", true)
18596 } // Inst_VOP3__V_CMPX_LT_F64
18598 Inst_VOP3__V_CMPX_LT_F64::~Inst_VOP3__V_CMPX_LT_F64()
18600 } // ~Inst_VOP3__V_CMPX_LT_F64
18602 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
18604 Inst_VOP3__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst
)
18606 Wavefront
*wf
= gpuDynInst
->wavefront();
18607 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18608 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18609 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18614 if (instData
.ABS
& 0x1) {
18615 src0
.absModifier();
18618 if (instData
.ABS
& 0x2) {
18619 src1
.absModifier();
18622 if (extData
.NEG
& 0x1) {
18623 src0
.negModifier();
18626 if (extData
.NEG
& 0x2) {
18627 src1
.negModifier();
18631 * input modifiers are supported by FP operations only
18633 assert(!(instData
.ABS
& 0x4));
18634 assert(!(extData
.NEG
& 0x4));
18636 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18637 if (wf
->execMask(lane
)) {
18638 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
18642 wf
->execMask() = sdst
.rawData();
18646 Inst_VOP3__V_CMPX_EQ_F64::Inst_VOP3__V_CMPX_EQ_F64(
18648 : Inst_VOP3(iFmt
, "v_cmpx_eq_f64", true)
18652 } // Inst_VOP3__V_CMPX_EQ_F64
18654 Inst_VOP3__V_CMPX_EQ_F64::~Inst_VOP3__V_CMPX_EQ_F64()
18656 } // ~Inst_VOP3__V_CMPX_EQ_F64
18658 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
18660 Inst_VOP3__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst
)
18662 Wavefront
*wf
= gpuDynInst
->wavefront();
18663 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18664 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18665 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18670 if (instData
.ABS
& 0x1) {
18671 src0
.absModifier();
18674 if (instData
.ABS
& 0x2) {
18675 src1
.absModifier();
18678 if (extData
.NEG
& 0x1) {
18679 src0
.negModifier();
18682 if (extData
.NEG
& 0x2) {
18683 src1
.negModifier();
18687 * input modifiers are supported by FP operations only
18689 assert(!(instData
.ABS
& 0x4));
18690 assert(!(extData
.NEG
& 0x4));
18692 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18693 if (wf
->execMask(lane
)) {
18694 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
18698 wf
->execMask() = sdst
.rawData();
18702 Inst_VOP3__V_CMPX_LE_F64::Inst_VOP3__V_CMPX_LE_F64(
18704 : Inst_VOP3(iFmt
, "v_cmpx_le_f64", true)
18708 } // Inst_VOP3__V_CMPX_LE_F64
18710 Inst_VOP3__V_CMPX_LE_F64::~Inst_VOP3__V_CMPX_LE_F64()
18712 } // ~Inst_VOP3__V_CMPX_LE_F64
18714 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
18716 Inst_VOP3__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst
)
18718 Wavefront
*wf
= gpuDynInst
->wavefront();
18719 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18720 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18721 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18726 if (instData
.ABS
& 0x1) {
18727 src0
.absModifier();
18730 if (instData
.ABS
& 0x2) {
18731 src1
.absModifier();
18734 if (extData
.NEG
& 0x1) {
18735 src0
.negModifier();
18738 if (extData
.NEG
& 0x2) {
18739 src1
.negModifier();
18743 * input modifiers are supported by FP operations only
18745 assert(!(instData
.ABS
& 0x4));
18746 assert(!(extData
.NEG
& 0x4));
18748 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18749 if (wf
->execMask(lane
)) {
18750 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
18754 wf
->execMask() = sdst
.rawData();
18758 Inst_VOP3__V_CMPX_GT_F64::Inst_VOP3__V_CMPX_GT_F64(
18760 : Inst_VOP3(iFmt
, "v_cmpx_gt_f64", true)
18764 } // Inst_VOP3__V_CMPX_GT_F64
18766 Inst_VOP3__V_CMPX_GT_F64::~Inst_VOP3__V_CMPX_GT_F64()
18768 } // ~Inst_VOP3__V_CMPX_GT_F64
18770 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
18772 Inst_VOP3__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst
)
18774 Wavefront
*wf
= gpuDynInst
->wavefront();
18775 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18776 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18777 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18782 if (instData
.ABS
& 0x1) {
18783 src0
.absModifier();
18786 if (instData
.ABS
& 0x2) {
18787 src1
.absModifier();
18790 if (extData
.NEG
& 0x1) {
18791 src0
.negModifier();
18794 if (extData
.NEG
& 0x2) {
18795 src1
.negModifier();
18799 * input modifiers are supported by FP operations only
18801 assert(!(instData
.ABS
& 0x4));
18802 assert(!(extData
.NEG
& 0x4));
18804 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18805 if (wf
->execMask(lane
)) {
18806 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
18810 wf
->execMask() = sdst
.rawData();
18814 Inst_VOP3__V_CMPX_LG_F64::Inst_VOP3__V_CMPX_LG_F64(
18816 : Inst_VOP3(iFmt
, "v_cmpx_lg_f64", true)
18820 } // Inst_VOP3__V_CMPX_LG_F64
18822 Inst_VOP3__V_CMPX_LG_F64::~Inst_VOP3__V_CMPX_LG_F64()
18824 } // ~Inst_VOP3__V_CMPX_LG_F64
18826 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
18828 Inst_VOP3__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst
)
18830 Wavefront
*wf
= gpuDynInst
->wavefront();
18831 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18832 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18833 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18838 if (instData
.ABS
& 0x1) {
18839 src0
.absModifier();
18842 if (instData
.ABS
& 0x2) {
18843 src1
.absModifier();
18846 if (extData
.NEG
& 0x1) {
18847 src0
.negModifier();
18850 if (extData
.NEG
& 0x2) {
18851 src1
.negModifier();
18855 * input modifiers are supported by FP operations only
18857 assert(!(instData
.ABS
& 0x4));
18858 assert(!(extData
.NEG
& 0x4));
18860 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18861 if (wf
->execMask(lane
)) {
18862 sdst
.setBit(lane
, (src0
[lane
] < src1
[lane
]
18863 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
18867 wf
->execMask() = sdst
.rawData();
18871 Inst_VOP3__V_CMPX_GE_F64::Inst_VOP3__V_CMPX_GE_F64(
18873 : Inst_VOP3(iFmt
, "v_cmpx_ge_f64", true)
18877 } // Inst_VOP3__V_CMPX_GE_F64
18879 Inst_VOP3__V_CMPX_GE_F64::~Inst_VOP3__V_CMPX_GE_F64()
18881 } // ~Inst_VOP3__V_CMPX_GE_F64
18883 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
18885 Inst_VOP3__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst
)
18887 Wavefront
*wf
= gpuDynInst
->wavefront();
18888 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18889 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18890 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18895 if (instData
.ABS
& 0x1) {
18896 src0
.absModifier();
18899 if (instData
.ABS
& 0x2) {
18900 src1
.absModifier();
18903 if (extData
.NEG
& 0x1) {
18904 src0
.negModifier();
18907 if (extData
.NEG
& 0x2) {
18908 src1
.negModifier();
18912 * input modifiers are supported by FP operations only
18914 assert(!(instData
.ABS
& 0x4));
18915 assert(!(extData
.NEG
& 0x4));
18917 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18918 if (wf
->execMask(lane
)) {
18919 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
18923 wf
->execMask() = sdst
.rawData();
18927 Inst_VOP3__V_CMPX_O_F64::Inst_VOP3__V_CMPX_O_F64(
18929 : Inst_VOP3(iFmt
, "v_cmpx_o_f64", true)
18933 } // Inst_VOP3__V_CMPX_O_F64
18935 Inst_VOP3__V_CMPX_O_F64::~Inst_VOP3__V_CMPX_O_F64()
18937 } // ~Inst_VOP3__V_CMPX_O_F64
18939 // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
18942 Inst_VOP3__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst
)
18944 Wavefront
*wf
= gpuDynInst
->wavefront();
18945 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
18946 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
18947 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
18952 if (instData
.ABS
& 0x1) {
18953 src0
.absModifier();
18956 if (instData
.ABS
& 0x2) {
18957 src1
.absModifier();
18960 if (extData
.NEG
& 0x1) {
18961 src0
.negModifier();
18964 if (extData
.NEG
& 0x2) {
18965 src1
.negModifier();
18969 * input modifiers are supported by FP operations only
18971 assert(!(instData
.ABS
& 0x4));
18972 assert(!(extData
.NEG
& 0x4));
18974 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
18975 if (wf
->execMask(lane
)) {
18976 sdst
.setBit(lane
, (!std::isnan(src0
[lane
])
18977 && !std::isnan(src1
[lane
])) ? 1 : 0);
18981 wf
->execMask() = sdst
.rawData();
18985 Inst_VOP3__V_CMPX_U_F64::Inst_VOP3__V_CMPX_U_F64(
18987 : Inst_VOP3(iFmt
, "v_cmpx_u_f64", true)
18991 } // Inst_VOP3__V_CMPX_U_F64
18993 Inst_VOP3__V_CMPX_U_F64::~Inst_VOP3__V_CMPX_U_F64()
18995 } // ~Inst_VOP3__V_CMPX_U_F64
18997 // EXEC,D.u64[threadID] = (isNan(S0) || isNan(S1)); D = VCC in VOPC
19000 Inst_VOP3__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst
)
19002 Wavefront
*wf
= gpuDynInst
->wavefront();
19003 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
19004 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
19005 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19010 if (instData
.ABS
& 0x1) {
19011 src0
.absModifier();
19014 if (instData
.ABS
& 0x2) {
19015 src1
.absModifier();
19018 if (extData
.NEG
& 0x1) {
19019 src0
.negModifier();
19022 if (extData
.NEG
& 0x2) {
19023 src1
.negModifier();
19027 * input modifiers are supported by FP operations only
19029 assert(!(instData
.ABS
& 0x4));
19030 assert(!(extData
.NEG
& 0x4));
19032 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19033 if (wf
->execMask(lane
)) {
19034 sdst
.setBit(lane
, (std::isnan(src0
[lane
])
19035 || std::isnan(src1
[lane
])) ? 1 : 0);
19039 wf
->execMask() = sdst
.rawData();
19043 Inst_VOP3__V_CMPX_NGE_F64::Inst_VOP3__V_CMPX_NGE_F64(
19045 : Inst_VOP3(iFmt
, "v_cmpx_nge_f64", true)
19049 } // Inst_VOP3__V_CMPX_NGE_F64
19051 Inst_VOP3__V_CMPX_NGE_F64::~Inst_VOP3__V_CMPX_NGE_F64()
19053 } // ~Inst_VOP3__V_CMPX_NGE_F64
19055 // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
19057 Inst_VOP3__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst
)
19059 Wavefront
*wf
= gpuDynInst
->wavefront();
19060 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
19061 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
19062 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19067 if (instData
.ABS
& 0x1) {
19068 src0
.absModifier();
19071 if (instData
.ABS
& 0x2) {
19072 src1
.absModifier();
19075 if (extData
.NEG
& 0x1) {
19076 src0
.negModifier();
19079 if (extData
.NEG
& 0x2) {
19080 src1
.negModifier();
19084 * input modifiers are supported by FP operations only
19086 assert(!(instData
.ABS
& 0x4));
19087 assert(!(extData
.NEG
& 0x4));
19089 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19090 if (wf
->execMask(lane
)) {
19091 sdst
.setBit(lane
, !(src0
[lane
] >= src1
[lane
]) ? 1 : 0);
19095 wf
->execMask() = sdst
.rawData();
19099 Inst_VOP3__V_CMPX_NLG_F64::Inst_VOP3__V_CMPX_NLG_F64(
19101 : Inst_VOP3(iFmt
, "v_cmpx_nlg_f64", true)
19105 } // Inst_VOP3__V_CMPX_NLG_F64
19107 Inst_VOP3__V_CMPX_NLG_F64::~Inst_VOP3__V_CMPX_NLG_F64()
19109 } // ~Inst_VOP3__V_CMPX_NLG_F64
19111 // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
19113 Inst_VOP3__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst
)
19115 Wavefront
*wf
= gpuDynInst
->wavefront();
19116 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
19117 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
19118 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19123 if (instData
.ABS
& 0x1) {
19124 src0
.absModifier();
19127 if (instData
.ABS
& 0x2) {
19128 src1
.absModifier();
19131 if (extData
.NEG
& 0x1) {
19132 src0
.negModifier();
19135 if (extData
.NEG
& 0x2) {
19136 src1
.negModifier();
19140 * input modifiers are supported by FP operations only
19142 assert(!(instData
.ABS
& 0x4));
19143 assert(!(extData
.NEG
& 0x4));
19145 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19146 if (wf
->execMask(lane
)) {
19147 sdst
.setBit(lane
, !(src0
[lane
] < src1
[lane
]
19148 || src0
[lane
] > src1
[lane
]) ? 1 : 0);
19152 wf
->execMask() = sdst
.rawData();
19156 Inst_VOP3__V_CMPX_NGT_F64::Inst_VOP3__V_CMPX_NGT_F64(
19158 : Inst_VOP3(iFmt
, "v_cmpx_ngt_f64", true)
19162 } // Inst_VOP3__V_CMPX_NGT_F64
19164 Inst_VOP3__V_CMPX_NGT_F64::~Inst_VOP3__V_CMPX_NGT_F64()
19166 } // ~Inst_VOP3__V_CMPX_NGT_F64
19168 // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
19170 Inst_VOP3__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst
)
19172 Wavefront
*wf
= gpuDynInst
->wavefront();
19173 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
19174 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
19175 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19180 if (instData
.ABS
& 0x1) {
19181 src0
.absModifier();
19184 if (instData
.ABS
& 0x2) {
19185 src1
.absModifier();
19188 if (extData
.NEG
& 0x1) {
19189 src0
.negModifier();
19192 if (extData
.NEG
& 0x2) {
19193 src1
.negModifier();
19197 * input modifiers are supported by FP operations only
19199 assert(!(instData
.ABS
& 0x4));
19200 assert(!(extData
.NEG
& 0x4));
19202 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19203 if (wf
->execMask(lane
)) {
19204 sdst
.setBit(lane
, !(src0
[lane
] > src1
[lane
]) ? 1 : 0);
19208 wf
->execMask() = sdst
.rawData();
19212 Inst_VOP3__V_CMPX_NLE_F64::Inst_VOP3__V_CMPX_NLE_F64(
19214 : Inst_VOP3(iFmt
, "v_cmpx_nle_f64", true)
19218 } // Inst_VOP3__V_CMPX_NLE_F64
19220 Inst_VOP3__V_CMPX_NLE_F64::~Inst_VOP3__V_CMPX_NLE_F64()
19222 } // ~Inst_VOP3__V_CMPX_NLE_F64
19224 // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
19226 Inst_VOP3__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst
)
19228 Wavefront
*wf
= gpuDynInst
->wavefront();
19229 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
19230 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
19231 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19236 if (instData
.ABS
& 0x1) {
19237 src0
.absModifier();
19240 if (instData
.ABS
& 0x2) {
19241 src1
.absModifier();
19244 if (extData
.NEG
& 0x1) {
19245 src0
.negModifier();
19248 if (extData
.NEG
& 0x2) {
19249 src1
.negModifier();
19253 * input modifiers are supported by FP operations only
19255 assert(!(instData
.ABS
& 0x4));
19256 assert(!(extData
.NEG
& 0x4));
19258 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19259 if (wf
->execMask(lane
)) {
19260 sdst
.setBit(lane
, !(src0
[lane
] <= src1
[lane
]) ? 1 : 0);
19264 wf
->execMask() = sdst
.rawData();
19268 Inst_VOP3__V_CMPX_NEQ_F64::Inst_VOP3__V_CMPX_NEQ_F64(
19270 : Inst_VOP3(iFmt
, "v_cmpx_neq_f64", true)
19274 } // Inst_VOP3__V_CMPX_NEQ_F64
19276 Inst_VOP3__V_CMPX_NEQ_F64::~Inst_VOP3__V_CMPX_NEQ_F64()
19278 } // ~Inst_VOP3__V_CMPX_NEQ_F64
19280 // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
19282 Inst_VOP3__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst
)
19284 Wavefront
*wf
= gpuDynInst
->wavefront();
19285 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
19286 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
19287 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19292 if (instData
.ABS
& 0x1) {
19293 src0
.absModifier();
19296 if (instData
.ABS
& 0x2) {
19297 src1
.absModifier();
19300 if (extData
.NEG
& 0x1) {
19301 src0
.negModifier();
19304 if (extData
.NEG
& 0x2) {
19305 src1
.negModifier();
19309 * input modifiers are supported by FP operations only
19311 assert(!(instData
.ABS
& 0x4));
19312 assert(!(extData
.NEG
& 0x4));
19314 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19315 if (wf
->execMask(lane
)) {
19316 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
19320 wf
->execMask() = sdst
.rawData();
19324 Inst_VOP3__V_CMPX_NLT_F64::Inst_VOP3__V_CMPX_NLT_F64(
19326 : Inst_VOP3(iFmt
, "v_cmpx_nlt_f64", true)
19330 } // Inst_VOP3__V_CMPX_NLT_F64
19332 Inst_VOP3__V_CMPX_NLT_F64::~Inst_VOP3__V_CMPX_NLT_F64()
19334 } // ~Inst_VOP3__V_CMPX_NLT_F64
19336 // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
19338 Inst_VOP3__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst
)
19340 Wavefront
*wf
= gpuDynInst
->wavefront();
19341 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
19342 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
19343 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19348 if (instData
.ABS
& 0x1) {
19349 src0
.absModifier();
19352 if (instData
.ABS
& 0x2) {
19353 src1
.absModifier();
19356 if (extData
.NEG
& 0x1) {
19357 src0
.negModifier();
19360 if (extData
.NEG
& 0x2) {
19361 src1
.negModifier();
19365 * input modifiers are supported by FP operations only
19367 assert(!(instData
.ABS
& 0x4));
19368 assert(!(extData
.NEG
& 0x4));
19370 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19371 if (wf
->execMask(lane
)) {
19372 sdst
.setBit(lane
, !(src0
[lane
] < src1
[lane
]) ? 1 : 0);
19376 wf
->execMask() = sdst
.rawData();
19380 Inst_VOP3__V_CMPX_TRU_F64::Inst_VOP3__V_CMPX_TRU_F64(
19382 : Inst_VOP3(iFmt
, "v_cmpx_tru_f64", true)
19386 } // Inst_VOP3__V_CMPX_TRU_F64
19388 Inst_VOP3__V_CMPX_TRU_F64::~Inst_VOP3__V_CMPX_TRU_F64()
19390 } // ~Inst_VOP3__V_CMPX_TRU_F64
19392 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
19394 Inst_VOP3__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst
)
19396 Wavefront
*wf
= gpuDynInst
->wavefront();
19397 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19399 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19400 if (wf
->execMask(lane
)) {
19401 sdst
.setBit(lane
, 1);
19405 wf
->execMask() = sdst
.rawData();
19409 Inst_VOP3__V_CMP_F_I16::Inst_VOP3__V_CMP_F_I16(InFmt_VOP3
*iFmt
)
19410 : Inst_VOP3(iFmt
, "v_cmp_f_i16", true)
19413 } // Inst_VOP3__V_CMP_F_I16
19415 Inst_VOP3__V_CMP_F_I16::~Inst_VOP3__V_CMP_F_I16()
19417 } // ~Inst_VOP3__V_CMP_F_I16
19419 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
19421 Inst_VOP3__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst
)
19423 Wavefront
*wf
= gpuDynInst
->wavefront();
19424 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19426 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19427 if (wf
->execMask(lane
)) {
19428 sdst
.setBit(lane
, 0);
19435 Inst_VOP3__V_CMP_LT_I16::Inst_VOP3__V_CMP_LT_I16(
19437 : Inst_VOP3(iFmt
, "v_cmp_lt_i16", true)
19440 } // Inst_VOP3__V_CMP_LT_I16
19442 Inst_VOP3__V_CMP_LT_I16::~Inst_VOP3__V_CMP_LT_I16()
19444 } // ~Inst_VOP3__V_CMP_LT_I16
19446 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
19448 Inst_VOP3__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst
)
19450 Wavefront
*wf
= gpuDynInst
->wavefront();
19451 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
19452 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
19453 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19459 * input modifiers are supported by FP operations only
19461 assert(!(instData
.ABS
& 0x1));
19462 assert(!(instData
.ABS
& 0x2));
19463 assert(!(instData
.ABS
& 0x4));
19464 assert(!(extData
.NEG
& 0x1));
19465 assert(!(extData
.NEG
& 0x2));
19466 assert(!(extData
.NEG
& 0x4));
19468 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19469 if (wf
->execMask(lane
)) {
19470 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
19477 Inst_VOP3__V_CMP_EQ_I16::Inst_VOP3__V_CMP_EQ_I16(
19479 : Inst_VOP3(iFmt
, "v_cmp_eq_i16", true)
19482 } // Inst_VOP3__V_CMP_EQ_I16
19484 Inst_VOP3__V_CMP_EQ_I16::~Inst_VOP3__V_CMP_EQ_I16()
19486 } // ~Inst_VOP3__V_CMP_EQ_I16
19488 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
19490 Inst_VOP3__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst
)
19492 Wavefront
*wf
= gpuDynInst
->wavefront();
19493 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
19494 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
19495 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19501 * input modifiers are supported by FP operations only
19503 assert(!(instData
.ABS
& 0x1));
19504 assert(!(instData
.ABS
& 0x2));
19505 assert(!(instData
.ABS
& 0x4));
19506 assert(!(extData
.NEG
& 0x1));
19507 assert(!(extData
.NEG
& 0x2));
19508 assert(!(extData
.NEG
& 0x4));
19510 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19511 if (wf
->execMask(lane
)) {
19512 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
19519 Inst_VOP3__V_CMP_LE_I16::Inst_VOP3__V_CMP_LE_I16(
19521 : Inst_VOP3(iFmt
, "v_cmp_le_i16", true)
19524 } // Inst_VOP3__V_CMP_LE_I16
19526 Inst_VOP3__V_CMP_LE_I16::~Inst_VOP3__V_CMP_LE_I16()
19528 } // ~Inst_VOP3__V_CMP_LE_I16
19530 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
19532 Inst_VOP3__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst
)
19534 Wavefront
*wf
= gpuDynInst
->wavefront();
19535 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
19536 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
19537 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19543 * input modifiers are supported by FP operations only
19545 assert(!(instData
.ABS
& 0x1));
19546 assert(!(instData
.ABS
& 0x2));
19547 assert(!(instData
.ABS
& 0x4));
19548 assert(!(extData
.NEG
& 0x1));
19549 assert(!(extData
.NEG
& 0x2));
19550 assert(!(extData
.NEG
& 0x4));
19552 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19553 if (wf
->execMask(lane
)) {
19554 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
19561 Inst_VOP3__V_CMP_GT_I16::Inst_VOP3__V_CMP_GT_I16(
19563 : Inst_VOP3(iFmt
, "v_cmp_gt_i16", true)
19566 } // Inst_VOP3__V_CMP_GT_I16
19568 Inst_VOP3__V_CMP_GT_I16::~Inst_VOP3__V_CMP_GT_I16()
19570 } // ~Inst_VOP3__V_CMP_GT_I16
19572 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
19574 Inst_VOP3__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst
)
19576 Wavefront
*wf
= gpuDynInst
->wavefront();
19577 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
19578 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
19579 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19585 * input modifiers are supported by FP operations only
19587 assert(!(instData
.ABS
& 0x1));
19588 assert(!(instData
.ABS
& 0x2));
19589 assert(!(instData
.ABS
& 0x4));
19590 assert(!(extData
.NEG
& 0x1));
19591 assert(!(extData
.NEG
& 0x2));
19592 assert(!(extData
.NEG
& 0x4));
19594 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19595 if (wf
->execMask(lane
)) {
19596 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
19603 Inst_VOP3__V_CMP_NE_I16::Inst_VOP3__V_CMP_NE_I16(
19605 : Inst_VOP3(iFmt
, "v_cmp_ne_i16", true)
19608 } // Inst_VOP3__V_CMP_NE_I16
19610 Inst_VOP3__V_CMP_NE_I16::~Inst_VOP3__V_CMP_NE_I16()
19612 } // ~Inst_VOP3__V_CMP_NE_I16
19614 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
19616 Inst_VOP3__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst
)
19618 Wavefront
*wf
= gpuDynInst
->wavefront();
19619 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
19620 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
19621 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19627 * input modifiers are supported by FP operations only
19629 assert(!(instData
.ABS
& 0x1));
19630 assert(!(instData
.ABS
& 0x2));
19631 assert(!(instData
.ABS
& 0x4));
19632 assert(!(extData
.NEG
& 0x1));
19633 assert(!(extData
.NEG
& 0x2));
19634 assert(!(extData
.NEG
& 0x4));
19636 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19637 if (wf
->execMask(lane
)) {
19638 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
19645 Inst_VOP3__V_CMP_GE_I16::Inst_VOP3__V_CMP_GE_I16(
19647 : Inst_VOP3(iFmt
, "v_cmp_ge_i16", true)
19650 } // Inst_VOP3__V_CMP_GE_I16
19652 Inst_VOP3__V_CMP_GE_I16::~Inst_VOP3__V_CMP_GE_I16()
19654 } // ~Inst_VOP3__V_CMP_GE_I16
19656 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
19658 Inst_VOP3__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst
)
19660 Wavefront
*wf
= gpuDynInst
->wavefront();
19661 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
19662 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
19663 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19669 * input modifiers are supported by FP operations only
19671 assert(!(instData
.ABS
& 0x1));
19672 assert(!(instData
.ABS
& 0x2));
19673 assert(!(instData
.ABS
& 0x4));
19674 assert(!(extData
.NEG
& 0x1));
19675 assert(!(extData
.NEG
& 0x2));
19676 assert(!(extData
.NEG
& 0x4));
19678 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19679 if (wf
->execMask(lane
)) {
19680 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
19687 Inst_VOP3__V_CMP_T_I16::Inst_VOP3__V_CMP_T_I16(InFmt_VOP3
*iFmt
)
19688 : Inst_VOP3(iFmt
, "v_cmp_t_i16", true)
19691 } // Inst_VOP3__V_CMP_T_I16
19693 Inst_VOP3__V_CMP_T_I16::~Inst_VOP3__V_CMP_T_I16()
19695 } // ~Inst_VOP3__V_CMP_T_I16
19697 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
19699 Inst_VOP3__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst
)
19701 Wavefront
*wf
= gpuDynInst
->wavefront();
19702 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19704 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19705 if (wf
->execMask(lane
)) {
19706 sdst
.setBit(lane
, 1);
19713 Inst_VOP3__V_CMP_F_U16::Inst_VOP3__V_CMP_F_U16(InFmt_VOP3
*iFmt
)
19714 : Inst_VOP3(iFmt
, "v_cmp_f_u16", true)
19717 } // Inst_VOP3__V_CMP_F_U16
19719 Inst_VOP3__V_CMP_F_U16::~Inst_VOP3__V_CMP_F_U16()
19721 } // ~Inst_VOP3__V_CMP_F_U16
19723 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
19725 Inst_VOP3__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst
)
19727 Wavefront
*wf
= gpuDynInst
->wavefront();
19728 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19730 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19731 if (wf
->execMask(lane
)) {
19732 sdst
.setBit(lane
, 0);
19739 Inst_VOP3__V_CMP_LT_U16::Inst_VOP3__V_CMP_LT_U16(
19741 : Inst_VOP3(iFmt
, "v_cmp_lt_u16", true)
19744 } // Inst_VOP3__V_CMP_LT_U16
19746 Inst_VOP3__V_CMP_LT_U16::~Inst_VOP3__V_CMP_LT_U16()
19748 } // ~Inst_VOP3__V_CMP_LT_U16
19750 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
19752 Inst_VOP3__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst
)
19754 Wavefront
*wf
= gpuDynInst
->wavefront();
19755 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
19756 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
19757 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19763 * input modifiers are supported by FP operations only
19765 assert(!(instData
.ABS
& 0x1));
19766 assert(!(instData
.ABS
& 0x2));
19767 assert(!(instData
.ABS
& 0x4));
19768 assert(!(extData
.NEG
& 0x1));
19769 assert(!(extData
.NEG
& 0x2));
19770 assert(!(extData
.NEG
& 0x4));
19772 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19773 if (wf
->execMask(lane
)) {
19774 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
19781 Inst_VOP3__V_CMP_EQ_U16::Inst_VOP3__V_CMP_EQ_U16(
19783 : Inst_VOP3(iFmt
, "v_cmp_eq_u16", true)
19786 } // Inst_VOP3__V_CMP_EQ_U16
19788 Inst_VOP3__V_CMP_EQ_U16::~Inst_VOP3__V_CMP_EQ_U16()
19790 } // ~Inst_VOP3__V_CMP_EQ_U16
19792 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
19794 Inst_VOP3__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst
)
19796 Wavefront
*wf
= gpuDynInst
->wavefront();
19797 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
19798 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
19799 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19805 * input modifiers are supported by FP operations only
19807 assert(!(instData
.ABS
& 0x1));
19808 assert(!(instData
.ABS
& 0x2));
19809 assert(!(instData
.ABS
& 0x4));
19810 assert(!(extData
.NEG
& 0x1));
19811 assert(!(extData
.NEG
& 0x2));
19812 assert(!(extData
.NEG
& 0x4));
19814 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19815 if (wf
->execMask(lane
)) {
19816 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
19823 Inst_VOP3__V_CMP_LE_U16::Inst_VOP3__V_CMP_LE_U16(
19825 : Inst_VOP3(iFmt
, "v_cmp_le_u16", true)
19828 } // Inst_VOP3__V_CMP_LE_U16
19830 Inst_VOP3__V_CMP_LE_U16::~Inst_VOP3__V_CMP_LE_U16()
19832 } // ~Inst_VOP3__V_CMP_LE_U16
19834 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
19836 Inst_VOP3__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst
)
19838 Wavefront
*wf
= gpuDynInst
->wavefront();
19839 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
19840 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
19841 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19847 * input modifiers are supported by FP operations only
19849 assert(!(instData
.ABS
& 0x1));
19850 assert(!(instData
.ABS
& 0x2));
19851 assert(!(instData
.ABS
& 0x4));
19852 assert(!(extData
.NEG
& 0x1));
19853 assert(!(extData
.NEG
& 0x2));
19854 assert(!(extData
.NEG
& 0x4));
19856 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19857 if (wf
->execMask(lane
)) {
19858 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
19865 Inst_VOP3__V_CMP_GT_U16::Inst_VOP3__V_CMP_GT_U16(
19867 : Inst_VOP3(iFmt
, "v_cmp_gt_u16", true)
19870 } // Inst_VOP3__V_CMP_GT_U16
19872 Inst_VOP3__V_CMP_GT_U16::~Inst_VOP3__V_CMP_GT_U16()
19874 } // ~Inst_VOP3__V_CMP_GT_U16
19876 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
19878 Inst_VOP3__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst
)
19880 Wavefront
*wf
= gpuDynInst
->wavefront();
19881 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
19882 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
19883 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19889 * input modifiers are supported by FP operations only
19891 assert(!(instData
.ABS
& 0x1));
19892 assert(!(instData
.ABS
& 0x2));
19893 assert(!(instData
.ABS
& 0x4));
19894 assert(!(extData
.NEG
& 0x1));
19895 assert(!(extData
.NEG
& 0x2));
19896 assert(!(extData
.NEG
& 0x4));
19898 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19899 if (wf
->execMask(lane
)) {
19900 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
19907 Inst_VOP3__V_CMP_NE_U16::Inst_VOP3__V_CMP_NE_U16(
19909 : Inst_VOP3(iFmt
, "v_cmp_ne_u16", true)
19912 } // Inst_VOP3__V_CMP_NE_U16
19914 Inst_VOP3__V_CMP_NE_U16::~Inst_VOP3__V_CMP_NE_U16()
19916 } // ~Inst_VOP3__V_CMP_NE_U16
19918 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
19920 Inst_VOP3__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst
)
19922 Wavefront
*wf
= gpuDynInst
->wavefront();
19923 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
19924 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
19925 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19931 * input modifiers are supported by FP operations only
19933 assert(!(instData
.ABS
& 0x1));
19934 assert(!(instData
.ABS
& 0x2));
19935 assert(!(instData
.ABS
& 0x4));
19936 assert(!(extData
.NEG
& 0x1));
19937 assert(!(extData
.NEG
& 0x2));
19938 assert(!(extData
.NEG
& 0x4));
19940 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19941 if (wf
->execMask(lane
)) {
19942 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
19949 Inst_VOP3__V_CMP_GE_U16::Inst_VOP3__V_CMP_GE_U16(
19951 : Inst_VOP3(iFmt
, "v_cmp_ge_u16", true)
19954 } // Inst_VOP3__V_CMP_GE_U16
19956 Inst_VOP3__V_CMP_GE_U16::~Inst_VOP3__V_CMP_GE_U16()
19958 } // ~Inst_VOP3__V_CMP_GE_U16
19960 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
19962 Inst_VOP3__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst
)
19964 Wavefront
*wf
= gpuDynInst
->wavefront();
19965 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
19966 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
19967 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
19973 * input modifiers are supported by FP operations only
19975 assert(!(instData
.ABS
& 0x1));
19976 assert(!(instData
.ABS
& 0x2));
19977 assert(!(instData
.ABS
& 0x4));
19978 assert(!(extData
.NEG
& 0x1));
19979 assert(!(extData
.NEG
& 0x2));
19980 assert(!(extData
.NEG
& 0x4));
19982 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
19983 if (wf
->execMask(lane
)) {
19984 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
19991 Inst_VOP3__V_CMP_T_U16::Inst_VOP3__V_CMP_T_U16(InFmt_VOP3
*iFmt
)
19992 : Inst_VOP3(iFmt
, "v_cmp_t_u16", true)
19995 } // Inst_VOP3__V_CMP_T_U16
19997 Inst_VOP3__V_CMP_T_U16::~Inst_VOP3__V_CMP_T_U16()
19999 } // ~Inst_VOP3__V_CMP_T_U16
20001 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
20003 Inst_VOP3__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst
)
20005 Wavefront
*wf
= gpuDynInst
->wavefront();
20006 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20008 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20009 if (wf
->execMask(lane
)) {
20010 sdst
.setBit(lane
, 1);
20017 Inst_VOP3__V_CMPX_F_I16::Inst_VOP3__V_CMPX_F_I16(
20019 : Inst_VOP3(iFmt
, "v_cmpx_f_i16", true)
20022 } // Inst_VOP3__V_CMPX_F_I16
20024 Inst_VOP3__V_CMPX_F_I16::~Inst_VOP3__V_CMPX_F_I16()
20026 } // ~Inst_VOP3__V_CMPX_F_I16
20028 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
20030 Inst_VOP3__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst
)
20032 Wavefront
*wf
= gpuDynInst
->wavefront();
20033 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20035 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20036 if (wf
->execMask(lane
)) {
20037 sdst
.setBit(lane
, 0);
20041 wf
->execMask() = sdst
.rawData();
20045 Inst_VOP3__V_CMPX_LT_I16::Inst_VOP3__V_CMPX_LT_I16(
20047 : Inst_VOP3(iFmt
, "v_cmpx_lt_i16", true)
20050 } // Inst_VOP3__V_CMPX_LT_I16
20052 Inst_VOP3__V_CMPX_LT_I16::~Inst_VOP3__V_CMPX_LT_I16()
20054 } // ~Inst_VOP3__V_CMPX_LT_I16
20056 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
20058 Inst_VOP3__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst
)
20060 Wavefront
*wf
= gpuDynInst
->wavefront();
20061 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
20062 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
20063 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20069 * input modifiers are supported by FP operations only
20071 assert(!(instData
.ABS
& 0x1));
20072 assert(!(instData
.ABS
& 0x2));
20073 assert(!(instData
.ABS
& 0x4));
20074 assert(!(extData
.NEG
& 0x1));
20075 assert(!(extData
.NEG
& 0x2));
20076 assert(!(extData
.NEG
& 0x4));
20078 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20079 if (wf
->execMask(lane
)) {
20080 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
20084 wf
->execMask() = sdst
.rawData();
20088 Inst_VOP3__V_CMPX_EQ_I16::Inst_VOP3__V_CMPX_EQ_I16(
20090 : Inst_VOP3(iFmt
, "v_cmpx_eq_i16", true)
20093 } // Inst_VOP3__V_CMPX_EQ_I16
20095 Inst_VOP3__V_CMPX_EQ_I16::~Inst_VOP3__V_CMPX_EQ_I16()
20097 } // ~Inst_VOP3__V_CMPX_EQ_I16
20099 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
20101 Inst_VOP3__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst
)
20103 Wavefront
*wf
= gpuDynInst
->wavefront();
20104 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
20105 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
20106 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20112 * input modifiers are supported by FP operations only
20114 assert(!(instData
.ABS
& 0x1));
20115 assert(!(instData
.ABS
& 0x2));
20116 assert(!(instData
.ABS
& 0x4));
20117 assert(!(extData
.NEG
& 0x1));
20118 assert(!(extData
.NEG
& 0x2));
20119 assert(!(extData
.NEG
& 0x4));
20121 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20122 if (wf
->execMask(lane
)) {
20123 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
20127 wf
->execMask() = sdst
.rawData();
20131 Inst_VOP3__V_CMPX_LE_I16::Inst_VOP3__V_CMPX_LE_I16(
20133 : Inst_VOP3(iFmt
, "v_cmpx_le_i16", true)
20136 } // Inst_VOP3__V_CMPX_LE_I16
20138 Inst_VOP3__V_CMPX_LE_I16::~Inst_VOP3__V_CMPX_LE_I16()
20140 } // ~Inst_VOP3__V_CMPX_LE_I16
20142 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
20144 Inst_VOP3__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst
)
20146 Wavefront
*wf
= gpuDynInst
->wavefront();
20147 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
20148 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
20149 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20155 * input modifiers are supported by FP operations only
20157 assert(!(instData
.ABS
& 0x1));
20158 assert(!(instData
.ABS
& 0x2));
20159 assert(!(instData
.ABS
& 0x4));
20160 assert(!(extData
.NEG
& 0x1));
20161 assert(!(extData
.NEG
& 0x2));
20162 assert(!(extData
.NEG
& 0x4));
20164 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20165 if (wf
->execMask(lane
)) {
20166 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
20170 wf
->execMask() = sdst
.rawData();
20174 Inst_VOP3__V_CMPX_GT_I16::Inst_VOP3__V_CMPX_GT_I16(
20176 : Inst_VOP3(iFmt
, "v_cmpx_gt_i16", true)
20179 } // Inst_VOP3__V_CMPX_GT_I16
20181 Inst_VOP3__V_CMPX_GT_I16::~Inst_VOP3__V_CMPX_GT_I16()
20183 } // ~Inst_VOP3__V_CMPX_GT_I16
20185 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
20187 Inst_VOP3__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst
)
20189 Wavefront
*wf
= gpuDynInst
->wavefront();
20190 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
20191 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
20192 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20198 * input modifiers are supported by FP operations only
20200 assert(!(instData
.ABS
& 0x1));
20201 assert(!(instData
.ABS
& 0x2));
20202 assert(!(instData
.ABS
& 0x4));
20203 assert(!(extData
.NEG
& 0x1));
20204 assert(!(extData
.NEG
& 0x2));
20205 assert(!(extData
.NEG
& 0x4));
20207 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20208 if (wf
->execMask(lane
)) {
20209 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
20213 wf
->execMask() = sdst
.rawData();
20217 Inst_VOP3__V_CMPX_NE_I16::Inst_VOP3__V_CMPX_NE_I16(
20219 : Inst_VOP3(iFmt
, "v_cmpx_ne_i16", true)
20222 } // Inst_VOP3__V_CMPX_NE_I16
20224 Inst_VOP3__V_CMPX_NE_I16::~Inst_VOP3__V_CMPX_NE_I16()
20226 } // ~Inst_VOP3__V_CMPX_NE_I16
20228 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
20230 Inst_VOP3__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst
)
20232 Wavefront
*wf
= gpuDynInst
->wavefront();
20233 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
20234 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
20235 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20241 * input modifiers are supported by FP operations only
20243 assert(!(instData
.ABS
& 0x1));
20244 assert(!(instData
.ABS
& 0x2));
20245 assert(!(instData
.ABS
& 0x4));
20246 assert(!(extData
.NEG
& 0x1));
20247 assert(!(extData
.NEG
& 0x2));
20248 assert(!(extData
.NEG
& 0x4));
20250 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20251 if (wf
->execMask(lane
)) {
20252 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
20256 wf
->execMask() = sdst
.rawData();
20260 Inst_VOP3__V_CMPX_GE_I16::Inst_VOP3__V_CMPX_GE_I16(
20262 : Inst_VOP3(iFmt
, "v_cmpx_ge_i16", true)
20265 } // Inst_VOP3__V_CMPX_GE_I16
20267 Inst_VOP3__V_CMPX_GE_I16::~Inst_VOP3__V_CMPX_GE_I16()
20269 } // ~Inst_VOP3__V_CMPX_GE_I16
20271 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
20273 Inst_VOP3__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst
)
20275 Wavefront
*wf
= gpuDynInst
->wavefront();
20276 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
20277 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
20278 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20284 * input modifiers are supported by FP operations only
20286 assert(!(instData
.ABS
& 0x1));
20287 assert(!(instData
.ABS
& 0x2));
20288 assert(!(instData
.ABS
& 0x4));
20289 assert(!(extData
.NEG
& 0x1));
20290 assert(!(extData
.NEG
& 0x2));
20291 assert(!(extData
.NEG
& 0x4));
20293 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20294 if (wf
->execMask(lane
)) {
20295 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
20299 wf
->execMask() = sdst
.rawData();
20303 Inst_VOP3__V_CMPX_T_I16::Inst_VOP3__V_CMPX_T_I16(
20305 : Inst_VOP3(iFmt
, "v_cmpx_t_i16", true)
20308 } // Inst_VOP3__V_CMPX_T_I16
20310 Inst_VOP3__V_CMPX_T_I16::~Inst_VOP3__V_CMPX_T_I16()
20312 } // ~Inst_VOP3__V_CMPX_T_I16
20314 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
20316 Inst_VOP3__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst
)
20318 Wavefront
*wf
= gpuDynInst
->wavefront();
20319 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20321 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20322 if (wf
->execMask(lane
)) {
20323 sdst
.setBit(lane
, 1);
20327 wf
->execMask() = sdst
.rawData();
20331 Inst_VOP3__V_CMPX_F_U16::Inst_VOP3__V_CMPX_F_U16(
20333 : Inst_VOP3(iFmt
, "v_cmpx_f_u16", true)
20336 } // Inst_VOP3__V_CMPX_F_U16
20338 Inst_VOP3__V_CMPX_F_U16::~Inst_VOP3__V_CMPX_F_U16()
20340 } // ~Inst_VOP3__V_CMPX_F_U16
20342 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
20344 Inst_VOP3__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst
)
20346 Wavefront
*wf
= gpuDynInst
->wavefront();
20347 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20349 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20350 if (wf
->execMask(lane
)) {
20351 sdst
.setBit(lane
, 0);
20355 wf
->execMask() = sdst
.rawData();
20359 Inst_VOP3__V_CMPX_LT_U16::Inst_VOP3__V_CMPX_LT_U16(
20361 : Inst_VOP3(iFmt
, "v_cmpx_lt_u16", true)
20364 } // Inst_VOP3__V_CMPX_LT_U16
20366 Inst_VOP3__V_CMPX_LT_U16::~Inst_VOP3__V_CMPX_LT_U16()
20368 } // ~Inst_VOP3__V_CMPX_LT_U16
20370 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
20372 Inst_VOP3__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst
)
20374 Wavefront
*wf
= gpuDynInst
->wavefront();
20375 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
20376 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
20377 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20383 * input modifiers are supported by FP operations only
20385 assert(!(instData
.ABS
& 0x1));
20386 assert(!(instData
.ABS
& 0x2));
20387 assert(!(instData
.ABS
& 0x4));
20388 assert(!(extData
.NEG
& 0x1));
20389 assert(!(extData
.NEG
& 0x2));
20390 assert(!(extData
.NEG
& 0x4));
20392 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20393 if (wf
->execMask(lane
)) {
20394 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
20398 wf
->execMask() = sdst
.rawData();
20402 Inst_VOP3__V_CMPX_EQ_U16::Inst_VOP3__V_CMPX_EQ_U16(
20404 : Inst_VOP3(iFmt
, "v_cmpx_eq_u16", true)
20407 } // Inst_VOP3__V_CMPX_EQ_U16
20409 Inst_VOP3__V_CMPX_EQ_U16::~Inst_VOP3__V_CMPX_EQ_U16()
20411 } // ~Inst_VOP3__V_CMPX_EQ_U16
20413 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
20415 Inst_VOP3__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst
)
20417 Wavefront
*wf
= gpuDynInst
->wavefront();
20418 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
20419 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
20420 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20426 * input modifiers are supported by FP operations only
20428 assert(!(instData
.ABS
& 0x1));
20429 assert(!(instData
.ABS
& 0x2));
20430 assert(!(instData
.ABS
& 0x4));
20431 assert(!(extData
.NEG
& 0x1));
20432 assert(!(extData
.NEG
& 0x2));
20433 assert(!(extData
.NEG
& 0x4));
20435 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20436 if (wf
->execMask(lane
)) {
20437 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
20441 wf
->execMask() = sdst
.rawData();
20445 Inst_VOP3__V_CMPX_LE_U16::Inst_VOP3__V_CMPX_LE_U16(
20447 : Inst_VOP3(iFmt
, "v_cmpx_le_u16", true)
20450 } // Inst_VOP3__V_CMPX_LE_U16
20452 Inst_VOP3__V_CMPX_LE_U16::~Inst_VOP3__V_CMPX_LE_U16()
20454 } // ~Inst_VOP3__V_CMPX_LE_U16
20456 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
20458 Inst_VOP3__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst
)
20460 Wavefront
*wf
= gpuDynInst
->wavefront();
20461 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
20462 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
20463 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20469 * input modifiers are supported by FP operations only
20471 assert(!(instData
.ABS
& 0x1));
20472 assert(!(instData
.ABS
& 0x2));
20473 assert(!(instData
.ABS
& 0x4));
20474 assert(!(extData
.NEG
& 0x1));
20475 assert(!(extData
.NEG
& 0x2));
20476 assert(!(extData
.NEG
& 0x4));
20478 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20479 if (wf
->execMask(lane
)) {
20480 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
20484 wf
->execMask() = sdst
.rawData();
20488 Inst_VOP3__V_CMPX_GT_U16::Inst_VOP3__V_CMPX_GT_U16(
20490 : Inst_VOP3(iFmt
, "v_cmpx_gt_u16", true)
20493 } // Inst_VOP3__V_CMPX_GT_U16
20495 Inst_VOP3__V_CMPX_GT_U16::~Inst_VOP3__V_CMPX_GT_U16()
20497 } // ~Inst_VOP3__V_CMPX_GT_U16
20499 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
20501 Inst_VOP3__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst
)
20503 Wavefront
*wf
= gpuDynInst
->wavefront();
20504 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
20505 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
20506 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20512 * input modifiers are supported by FP operations only
20514 assert(!(instData
.ABS
& 0x1));
20515 assert(!(instData
.ABS
& 0x2));
20516 assert(!(instData
.ABS
& 0x4));
20517 assert(!(extData
.NEG
& 0x1));
20518 assert(!(extData
.NEG
& 0x2));
20519 assert(!(extData
.NEG
& 0x4));
20521 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20522 if (wf
->execMask(lane
)) {
20523 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
20527 wf
->execMask() = sdst
.rawData();
20531 Inst_VOP3__V_CMPX_NE_U16::Inst_VOP3__V_CMPX_NE_U16(
20533 : Inst_VOP3(iFmt
, "v_cmpx_ne_u16", true)
20536 } // Inst_VOP3__V_CMPX_NE_U16
20538 Inst_VOP3__V_CMPX_NE_U16::~Inst_VOP3__V_CMPX_NE_U16()
20540 } // ~Inst_VOP3__V_CMPX_NE_U16
20542 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
20544 Inst_VOP3__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst
)
20546 Wavefront
*wf
= gpuDynInst
->wavefront();
20547 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
20548 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
20549 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20555 * input modifiers are supported by FP operations only
20557 assert(!(instData
.ABS
& 0x1));
20558 assert(!(instData
.ABS
& 0x2));
20559 assert(!(instData
.ABS
& 0x4));
20560 assert(!(extData
.NEG
& 0x1));
20561 assert(!(extData
.NEG
& 0x2));
20562 assert(!(extData
.NEG
& 0x4));
20564 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20565 if (wf
->execMask(lane
)) {
20566 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
20570 wf
->execMask() = sdst
.rawData();
20574 Inst_VOP3__V_CMPX_GE_U16::Inst_VOP3__V_CMPX_GE_U16(
20576 : Inst_VOP3(iFmt
, "v_cmpx_ge_u16", true)
20579 } // Inst_VOP3__V_CMPX_GE_U16
20581 Inst_VOP3__V_CMPX_GE_U16::~Inst_VOP3__V_CMPX_GE_U16()
20583 } // ~Inst_VOP3__V_CMPX_GE_U16
20585 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
20587 Inst_VOP3__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst
)
20589 Wavefront
*wf
= gpuDynInst
->wavefront();
20590 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
20591 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
20592 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20598 * input modifiers are supported by FP operations only
20600 assert(!(instData
.ABS
& 0x1));
20601 assert(!(instData
.ABS
& 0x2));
20602 assert(!(instData
.ABS
& 0x4));
20603 assert(!(extData
.NEG
& 0x1));
20604 assert(!(extData
.NEG
& 0x2));
20605 assert(!(extData
.NEG
& 0x4));
20607 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20608 if (wf
->execMask(lane
)) {
20609 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
20613 wf
->execMask() = sdst
.rawData();
20617 Inst_VOP3__V_CMPX_T_U16::Inst_VOP3__V_CMPX_T_U16(
20619 : Inst_VOP3(iFmt
, "v_cmpx_t_u16", true)
20622 } // Inst_VOP3__V_CMPX_T_U16
20624 Inst_VOP3__V_CMPX_T_U16::~Inst_VOP3__V_CMPX_T_U16()
20626 } // ~Inst_VOP3__V_CMPX_T_U16
20628 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
20630 Inst_VOP3__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst
)
20632 Wavefront
*wf
= gpuDynInst
->wavefront();
20633 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20635 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20636 if (wf
->execMask(lane
)) {
20637 sdst
.setBit(lane
, 1);
20641 wf
->execMask() = sdst
.rawData();
20645 Inst_VOP3__V_CMP_F_I32::Inst_VOP3__V_CMP_F_I32(InFmt_VOP3
*iFmt
)
20646 : Inst_VOP3(iFmt
, "v_cmp_f_i32", true)
20649 } // Inst_VOP3__V_CMP_F_I32
20651 Inst_VOP3__V_CMP_F_I32::~Inst_VOP3__V_CMP_F_I32()
20653 } // ~Inst_VOP3__V_CMP_F_I32
20655 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
20657 Inst_VOP3__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst
)
20659 Wavefront
*wf
= gpuDynInst
->wavefront();
20660 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20662 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20663 if (wf
->execMask(lane
)) {
20664 sdst
.setBit(lane
, 0);
20668 wf
->execMask() = sdst
.rawData();
20672 Inst_VOP3__V_CMP_LT_I32::Inst_VOP3__V_CMP_LT_I32(
20674 : Inst_VOP3(iFmt
, "v_cmp_lt_i32", true)
20677 } // Inst_VOP3__V_CMP_LT_I32
20679 Inst_VOP3__V_CMP_LT_I32::~Inst_VOP3__V_CMP_LT_I32()
20681 } // ~Inst_VOP3__V_CMP_LT_I32
20683 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
20685 Inst_VOP3__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst
)
20687 Wavefront
*wf
= gpuDynInst
->wavefront();
20688 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
20689 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
20690 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20696 * input modifiers are supported by FP operations only
20698 assert(!(instData
.ABS
& 0x1));
20699 assert(!(instData
.ABS
& 0x2));
20700 assert(!(instData
.ABS
& 0x4));
20701 assert(!(extData
.NEG
& 0x1));
20702 assert(!(extData
.NEG
& 0x2));
20703 assert(!(extData
.NEG
& 0x4));
20705 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20706 if (wf
->execMask(lane
)) {
20707 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
20714 Inst_VOP3__V_CMP_EQ_I32::Inst_VOP3__V_CMP_EQ_I32(
20716 : Inst_VOP3(iFmt
, "v_cmp_eq_i32", true)
20719 } // Inst_VOP3__V_CMP_EQ_I32
20721 Inst_VOP3__V_CMP_EQ_I32::~Inst_VOP3__V_CMP_EQ_I32()
20723 } // ~Inst_VOP3__V_CMP_EQ_I32
20725 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
20727 Inst_VOP3__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst
)
20729 Wavefront
*wf
= gpuDynInst
->wavefront();
20730 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
20731 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
20732 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20738 * input modifiers are supported by FP operations only
20740 assert(!(instData
.ABS
& 0x1));
20741 assert(!(instData
.ABS
& 0x2));
20742 assert(!(instData
.ABS
& 0x4));
20743 assert(!(extData
.NEG
& 0x1));
20744 assert(!(extData
.NEG
& 0x2));
20745 assert(!(extData
.NEG
& 0x4));
20747 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20748 if (wf
->execMask(lane
)) {
20749 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
20756 Inst_VOP3__V_CMP_LE_I32::Inst_VOP3__V_CMP_LE_I32(
20758 : Inst_VOP3(iFmt
, "v_cmp_le_i32", true)
20761 } // Inst_VOP3__V_CMP_LE_I32
20763 Inst_VOP3__V_CMP_LE_I32::~Inst_VOP3__V_CMP_LE_I32()
20765 } // ~Inst_VOP3__V_CMP_LE_I32
20767 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
20769 Inst_VOP3__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst
)
20771 Wavefront
*wf
= gpuDynInst
->wavefront();
20772 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
20773 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
20774 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20780 * input modifiers are supported by FP operations only
20782 assert(!(instData
.ABS
& 0x1));
20783 assert(!(instData
.ABS
& 0x2));
20784 assert(!(instData
.ABS
& 0x4));
20785 assert(!(extData
.NEG
& 0x1));
20786 assert(!(extData
.NEG
& 0x2));
20787 assert(!(extData
.NEG
& 0x4));
20789 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20790 if (wf
->execMask(lane
)) {
20791 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
20798 Inst_VOP3__V_CMP_GT_I32::Inst_VOP3__V_CMP_GT_I32(
20800 : Inst_VOP3(iFmt
, "v_cmp_gt_i32", true)
20803 } // Inst_VOP3__V_CMP_GT_I32
20805 Inst_VOP3__V_CMP_GT_I32::~Inst_VOP3__V_CMP_GT_I32()
20807 } // ~Inst_VOP3__V_CMP_GT_I32
20809 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
20811 Inst_VOP3__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst
)
20813 Wavefront
*wf
= gpuDynInst
->wavefront();
20814 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
20815 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
20816 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20822 * input modifiers are supported by FP operations only
20824 assert(!(instData
.ABS
& 0x1));
20825 assert(!(instData
.ABS
& 0x2));
20826 assert(!(instData
.ABS
& 0x4));
20827 assert(!(extData
.NEG
& 0x1));
20828 assert(!(extData
.NEG
& 0x2));
20829 assert(!(extData
.NEG
& 0x4));
20831 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20832 if (wf
->execMask(lane
)) {
20833 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
20840 Inst_VOP3__V_CMP_NE_I32::Inst_VOP3__V_CMP_NE_I32(
20842 : Inst_VOP3(iFmt
, "v_cmp_ne_i32", true)
20845 } // Inst_VOP3__V_CMP_NE_I32
20847 Inst_VOP3__V_CMP_NE_I32::~Inst_VOP3__V_CMP_NE_I32()
20849 } // ~Inst_VOP3__V_CMP_NE_I32
20851 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
20853 Inst_VOP3__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst
)
20855 Wavefront
*wf
= gpuDynInst
->wavefront();
20856 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
20857 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
20858 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20864 * input modifiers are supported by FP operations only
20866 assert(!(instData
.ABS
& 0x1));
20867 assert(!(instData
.ABS
& 0x2));
20868 assert(!(instData
.ABS
& 0x4));
20869 assert(!(extData
.NEG
& 0x1));
20870 assert(!(extData
.NEG
& 0x2));
20871 assert(!(extData
.NEG
& 0x4));
20873 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20874 if (wf
->execMask(lane
)) {
20875 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
20882 Inst_VOP3__V_CMP_GE_I32::Inst_VOP3__V_CMP_GE_I32(
20884 : Inst_VOP3(iFmt
, "v_cmp_ge_i32", true)
20887 } // Inst_VOP3__V_CMP_GE_I32
20889 Inst_VOP3__V_CMP_GE_I32::~Inst_VOP3__V_CMP_GE_I32()
20891 } // ~Inst_VOP3__V_CMP_GE_I32
20893 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
20895 Inst_VOP3__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst
)
20897 Wavefront
*wf
= gpuDynInst
->wavefront();
20898 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
20899 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
20900 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20906 * input modifiers are supported by FP operations only
20908 assert(!(instData
.ABS
& 0x1));
20909 assert(!(instData
.ABS
& 0x2));
20910 assert(!(instData
.ABS
& 0x4));
20911 assert(!(extData
.NEG
& 0x1));
20912 assert(!(extData
.NEG
& 0x2));
20913 assert(!(extData
.NEG
& 0x4));
20915 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20916 if (wf
->execMask(lane
)) {
20917 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
20924 Inst_VOP3__V_CMP_T_I32::Inst_VOP3__V_CMP_T_I32(InFmt_VOP3
*iFmt
)
20925 : Inst_VOP3(iFmt
, "v_cmp_t_i32", true)
20928 } // Inst_VOP3__V_CMP_T_I32
20930 Inst_VOP3__V_CMP_T_I32::~Inst_VOP3__V_CMP_T_I32()
20932 } // ~Inst_VOP3__V_CMP_T_I32
20934 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
20936 Inst_VOP3__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst
)
20938 Wavefront
*wf
= gpuDynInst
->wavefront();
20939 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20941 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20942 if (wf
->execMask(lane
)) {
20943 sdst
.setBit(lane
, 1);
20950 Inst_VOP3__V_CMP_F_U32::Inst_VOP3__V_CMP_F_U32(InFmt_VOP3
*iFmt
)
20951 : Inst_VOP3(iFmt
, "v_cmp_f_u32", true)
20954 } // Inst_VOP3__V_CMP_F_U32
20956 Inst_VOP3__V_CMP_F_U32::~Inst_VOP3__V_CMP_F_U32()
20958 } // ~Inst_VOP3__V_CMP_F_U32
20960 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
20962 Inst_VOP3__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst
)
20964 Wavefront
*wf
= gpuDynInst
->wavefront();
20965 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
20967 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
20968 if (wf
->execMask(lane
)) {
20969 sdst
.setBit(lane
, 0);
20976 Inst_VOP3__V_CMP_LT_U32::Inst_VOP3__V_CMP_LT_U32(
20978 : Inst_VOP3(iFmt
, "v_cmp_lt_u32", true)
20981 } // Inst_VOP3__V_CMP_LT_U32
20983 Inst_VOP3__V_CMP_LT_U32::~Inst_VOP3__V_CMP_LT_U32()
20985 } // ~Inst_VOP3__V_CMP_LT_U32
20987 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
20989 Inst_VOP3__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst
)
20991 Wavefront
*wf
= gpuDynInst
->wavefront();
20992 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
20993 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
20994 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21000 * input modifiers are supported by FP operations only
21002 assert(!(instData
.ABS
& 0x1));
21003 assert(!(instData
.ABS
& 0x2));
21004 assert(!(instData
.ABS
& 0x4));
21005 assert(!(extData
.NEG
& 0x1));
21006 assert(!(extData
.NEG
& 0x2));
21007 assert(!(extData
.NEG
& 0x4));
21009 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21010 if (wf
->execMask(lane
)) {
21011 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
21018 Inst_VOP3__V_CMP_EQ_U32::Inst_VOP3__V_CMP_EQ_U32(
21020 : Inst_VOP3(iFmt
, "v_cmp_eq_u32", true)
21023 } // Inst_VOP3__V_CMP_EQ_U32
21025 Inst_VOP3__V_CMP_EQ_U32::~Inst_VOP3__V_CMP_EQ_U32()
21027 } // ~Inst_VOP3__V_CMP_EQ_U32
21029 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
21031 Inst_VOP3__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst
)
21033 Wavefront
*wf
= gpuDynInst
->wavefront();
21034 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
21035 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
21036 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21042 * input modifiers are supported by FP operations only
21044 assert(!(instData
.ABS
& 0x1));
21045 assert(!(instData
.ABS
& 0x2));
21046 assert(!(instData
.ABS
& 0x4));
21047 assert(!(extData
.NEG
& 0x1));
21048 assert(!(extData
.NEG
& 0x2));
21049 assert(!(extData
.NEG
& 0x4));
21051 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21052 if (wf
->execMask(lane
)) {
21053 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
21060 Inst_VOP3__V_CMP_LE_U32::Inst_VOP3__V_CMP_LE_U32(
21062 : Inst_VOP3(iFmt
, "v_cmp_le_u32", true)
21065 } // Inst_VOP3__V_CMP_LE_U32
21067 Inst_VOP3__V_CMP_LE_U32::~Inst_VOP3__V_CMP_LE_U32()
21069 } // ~Inst_VOP3__V_CMP_LE_U32
21071 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
21073 Inst_VOP3__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst
)
21075 Wavefront
*wf
= gpuDynInst
->wavefront();
21076 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
21077 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
21078 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21084 * input modifiers are supported by FP operations only
21086 assert(!(instData
.ABS
& 0x1));
21087 assert(!(instData
.ABS
& 0x2));
21088 assert(!(instData
.ABS
& 0x4));
21089 assert(!(extData
.NEG
& 0x1));
21090 assert(!(extData
.NEG
& 0x2));
21091 assert(!(extData
.NEG
& 0x4));
21093 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21094 if (wf
->execMask(lane
)) {
21095 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
21102 Inst_VOP3__V_CMP_GT_U32::Inst_VOP3__V_CMP_GT_U32(
21104 : Inst_VOP3(iFmt
, "v_cmp_gt_u32", true)
21107 } // Inst_VOP3__V_CMP_GT_U32
21109 Inst_VOP3__V_CMP_GT_U32::~Inst_VOP3__V_CMP_GT_U32()
21111 } // ~Inst_VOP3__V_CMP_GT_U32
21113 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
21115 Inst_VOP3__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst
)
21117 Wavefront
*wf
= gpuDynInst
->wavefront();
21118 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
21119 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
21120 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21126 * input modifiers are supported by FP operations only
21128 assert(!(instData
.ABS
& 0x1));
21129 assert(!(instData
.ABS
& 0x2));
21130 assert(!(instData
.ABS
& 0x4));
21131 assert(!(extData
.NEG
& 0x1));
21132 assert(!(extData
.NEG
& 0x2));
21133 assert(!(extData
.NEG
& 0x4));
21135 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21136 if (wf
->execMask(lane
)) {
21137 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
21144 Inst_VOP3__V_CMP_NE_U32::Inst_VOP3__V_CMP_NE_U32(
21146 : Inst_VOP3(iFmt
, "v_cmp_ne_u32", true)
21149 } // Inst_VOP3__V_CMP_NE_U32
21151 Inst_VOP3__V_CMP_NE_U32::~Inst_VOP3__V_CMP_NE_U32()
21153 } // ~Inst_VOP3__V_CMP_NE_U32
21155 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
21157 Inst_VOP3__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst
)
21159 Wavefront
*wf
= gpuDynInst
->wavefront();
21160 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
21161 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
21162 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21168 * input modifiers are supported by FP operations only
21170 assert(!(instData
.ABS
& 0x1));
21171 assert(!(instData
.ABS
& 0x2));
21172 assert(!(instData
.ABS
& 0x4));
21173 assert(!(extData
.NEG
& 0x1));
21174 assert(!(extData
.NEG
& 0x2));
21175 assert(!(extData
.NEG
& 0x4));
21177 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21178 if (wf
->execMask(lane
)) {
21179 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
21186 Inst_VOP3__V_CMP_GE_U32::Inst_VOP3__V_CMP_GE_U32(
21188 : Inst_VOP3(iFmt
, "v_cmp_ge_u32", true)
21191 } // Inst_VOP3__V_CMP_GE_U32
21193 Inst_VOP3__V_CMP_GE_U32::~Inst_VOP3__V_CMP_GE_U32()
21195 } // ~Inst_VOP3__V_CMP_GE_U32
21197 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
21199 Inst_VOP3__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst
)
21201 Wavefront
*wf
= gpuDynInst
->wavefront();
21202 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
21203 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
21204 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21210 * input modifiers are supported by FP operations only
21212 assert(!(instData
.ABS
& 0x1));
21213 assert(!(instData
.ABS
& 0x2));
21214 assert(!(instData
.ABS
& 0x4));
21215 assert(!(extData
.NEG
& 0x1));
21216 assert(!(extData
.NEG
& 0x2));
21217 assert(!(extData
.NEG
& 0x4));
21219 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21220 if (wf
->execMask(lane
)) {
21221 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
21228 Inst_VOP3__V_CMP_T_U32::Inst_VOP3__V_CMP_T_U32(InFmt_VOP3
*iFmt
)
21229 : Inst_VOP3(iFmt
, "v_cmp_t_u32", true)
21232 } // Inst_VOP3__V_CMP_T_U32
21234 Inst_VOP3__V_CMP_T_U32::~Inst_VOP3__V_CMP_T_U32()
21236 } // ~Inst_VOP3__V_CMP_T_U32
21238 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
21240 Inst_VOP3__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst
)
21242 Wavefront
*wf
= gpuDynInst
->wavefront();
21243 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21245 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21246 if (wf
->execMask(lane
)) {
21247 sdst
.setBit(lane
, 1);
21254 Inst_VOP3__V_CMPX_F_I32::Inst_VOP3__V_CMPX_F_I32(
21256 : Inst_VOP3(iFmt
, "v_cmpx_f_i32", true)
21259 } // Inst_VOP3__V_CMPX_F_I32
21261 Inst_VOP3__V_CMPX_F_I32::~Inst_VOP3__V_CMPX_F_I32()
21263 } // ~Inst_VOP3__V_CMPX_F_I32
21265 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
21267 Inst_VOP3__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst
)
21269 Wavefront
*wf
= gpuDynInst
->wavefront();
21270 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21272 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21273 if (wf
->execMask(lane
)) {
21274 sdst
.setBit(lane
, 0);
21278 wf
->execMask() = sdst
.rawData();
21282 Inst_VOP3__V_CMPX_LT_I32::Inst_VOP3__V_CMPX_LT_I32(
21284 : Inst_VOP3(iFmt
, "v_cmpx_lt_i32", true)
21287 } // Inst_VOP3__V_CMPX_LT_I32
21289 Inst_VOP3__V_CMPX_LT_I32::~Inst_VOP3__V_CMPX_LT_I32()
21291 } // ~Inst_VOP3__V_CMPX_LT_I32
21293 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
21295 Inst_VOP3__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst
)
21297 Wavefront
*wf
= gpuDynInst
->wavefront();
21298 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
21299 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
21300 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21306 * input modifiers are supported by FP operations only
21308 assert(!(instData
.ABS
& 0x1));
21309 assert(!(instData
.ABS
& 0x2));
21310 assert(!(instData
.ABS
& 0x4));
21311 assert(!(extData
.NEG
& 0x1));
21312 assert(!(extData
.NEG
& 0x2));
21313 assert(!(extData
.NEG
& 0x4));
21315 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21316 if (wf
->execMask(lane
)) {
21317 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
21321 wf
->execMask() = sdst
.rawData();
21325 Inst_VOP3__V_CMPX_EQ_I32::Inst_VOP3__V_CMPX_EQ_I32(
21327 : Inst_VOP3(iFmt
, "v_cmpx_eq_i32", true)
21330 } // Inst_VOP3__V_CMPX_EQ_I32
21332 Inst_VOP3__V_CMPX_EQ_I32::~Inst_VOP3__V_CMPX_EQ_I32()
21334 } // ~Inst_VOP3__V_CMPX_EQ_I32
21336 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
21338 Inst_VOP3__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst
)
21340 Wavefront
*wf
= gpuDynInst
->wavefront();
21341 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
21342 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
21343 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21349 * input modifiers are supported by FP operations only
21351 assert(!(instData
.ABS
& 0x1));
21352 assert(!(instData
.ABS
& 0x2));
21353 assert(!(instData
.ABS
& 0x4));
21354 assert(!(extData
.NEG
& 0x1));
21355 assert(!(extData
.NEG
& 0x2));
21356 assert(!(extData
.NEG
& 0x4));
21358 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21359 if (wf
->execMask(lane
)) {
21360 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
21364 wf
->execMask() = sdst
.rawData();
21368 Inst_VOP3__V_CMPX_LE_I32::Inst_VOP3__V_CMPX_LE_I32(
21370 : Inst_VOP3(iFmt
, "v_cmpx_le_i32", true)
21373 } // Inst_VOP3__V_CMPX_LE_I32
21375 Inst_VOP3__V_CMPX_LE_I32::~Inst_VOP3__V_CMPX_LE_I32()
21377 } // ~Inst_VOP3__V_CMPX_LE_I32
21379 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
21381 Inst_VOP3__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst
)
21383 Wavefront
*wf
= gpuDynInst
->wavefront();
21384 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
21385 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
21386 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21392 * input modifiers are supported by FP operations only
21394 assert(!(instData
.ABS
& 0x1));
21395 assert(!(instData
.ABS
& 0x2));
21396 assert(!(instData
.ABS
& 0x4));
21397 assert(!(extData
.NEG
& 0x1));
21398 assert(!(extData
.NEG
& 0x2));
21399 assert(!(extData
.NEG
& 0x4));
21401 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21402 if (wf
->execMask(lane
)) {
21403 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
21407 wf
->execMask() = sdst
.rawData();
21411 Inst_VOP3__V_CMPX_GT_I32::Inst_VOP3__V_CMPX_GT_I32(
21413 : Inst_VOP3(iFmt
, "v_cmpx_gt_i32", true)
21416 } // Inst_VOP3__V_CMPX_GT_I32
21418 Inst_VOP3__V_CMPX_GT_I32::~Inst_VOP3__V_CMPX_GT_I32()
21420 } // ~Inst_VOP3__V_CMPX_GT_I32
21422 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
21424 Inst_VOP3__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst
)
21426 Wavefront
*wf
= gpuDynInst
->wavefront();
21427 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
21428 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
21429 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21435 * input modifiers are supported by FP operations only
21437 assert(!(instData
.ABS
& 0x1));
21438 assert(!(instData
.ABS
& 0x2));
21439 assert(!(instData
.ABS
& 0x4));
21440 assert(!(extData
.NEG
& 0x1));
21441 assert(!(extData
.NEG
& 0x2));
21442 assert(!(extData
.NEG
& 0x4));
21444 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21445 if (wf
->execMask(lane
)) {
21446 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
21450 wf
->execMask() = sdst
.rawData();
21454 Inst_VOP3__V_CMPX_NE_I32::Inst_VOP3__V_CMPX_NE_I32(
21456 : Inst_VOP3(iFmt
, "v_cmpx_ne_i32", true)
21459 } // Inst_VOP3__V_CMPX_NE_I32
21461 Inst_VOP3__V_CMPX_NE_I32::~Inst_VOP3__V_CMPX_NE_I32()
21463 } // ~Inst_VOP3__V_CMPX_NE_I32
21465 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
21467 Inst_VOP3__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst
)
21469 Wavefront
*wf
= gpuDynInst
->wavefront();
21470 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
21471 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
21472 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21478 * input modifiers are supported by FP operations only
21480 assert(!(instData
.ABS
& 0x1));
21481 assert(!(instData
.ABS
& 0x2));
21482 assert(!(instData
.ABS
& 0x4));
21483 assert(!(extData
.NEG
& 0x1));
21484 assert(!(extData
.NEG
& 0x2));
21485 assert(!(extData
.NEG
& 0x4));
21487 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21488 if (wf
->execMask(lane
)) {
21489 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
21493 wf
->execMask() = sdst
.rawData();
21497 Inst_VOP3__V_CMPX_GE_I32::Inst_VOP3__V_CMPX_GE_I32(
21499 : Inst_VOP3(iFmt
, "v_cmpx_ge_i32", true)
21502 } // Inst_VOP3__V_CMPX_GE_I32
21504 Inst_VOP3__V_CMPX_GE_I32::~Inst_VOP3__V_CMPX_GE_I32()
21506 } // ~Inst_VOP3__V_CMPX_GE_I32
21508 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
21510 Inst_VOP3__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst
)
21512 Wavefront
*wf
= gpuDynInst
->wavefront();
21513 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
21514 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
21515 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21521 * input modifiers are supported by FP operations only
21523 assert(!(instData
.ABS
& 0x1));
21524 assert(!(instData
.ABS
& 0x2));
21525 assert(!(instData
.ABS
& 0x4));
21526 assert(!(extData
.NEG
& 0x1));
21527 assert(!(extData
.NEG
& 0x2));
21528 assert(!(extData
.NEG
& 0x4));
21530 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21531 if (wf
->execMask(lane
)) {
21532 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
21536 wf
->execMask() = sdst
.rawData();
21540 Inst_VOP3__V_CMPX_T_I32::Inst_VOP3__V_CMPX_T_I32(
21542 : Inst_VOP3(iFmt
, "v_cmpx_t_i32", true)
21545 } // Inst_VOP3__V_CMPX_T_I32
21547 Inst_VOP3__V_CMPX_T_I32::~Inst_VOP3__V_CMPX_T_I32()
21549 } // ~Inst_VOP3__V_CMPX_T_I32
21551 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
21553 Inst_VOP3__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst
)
21555 Wavefront
*wf
= gpuDynInst
->wavefront();
21556 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21558 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21559 if (wf
->execMask(lane
)) {
21560 sdst
.setBit(lane
, 1);
21564 wf
->execMask() = sdst
.rawData();
21568 Inst_VOP3__V_CMPX_F_U32::Inst_VOP3__V_CMPX_F_U32(
21570 : Inst_VOP3(iFmt
, "v_cmpx_f_u32", true)
21573 } // Inst_VOP3__V_CMPX_F_U32
21575 Inst_VOP3__V_CMPX_F_U32::~Inst_VOP3__V_CMPX_F_U32()
21577 } // ~Inst_VOP3__V_CMPX_F_U32
21579 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
21581 Inst_VOP3__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst
)
21583 Wavefront
*wf
= gpuDynInst
->wavefront();
21584 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21586 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21587 if (wf
->execMask(lane
)) {
21588 sdst
.setBit(lane
, 0);
21592 wf
->execMask() = sdst
.rawData();
21596 Inst_VOP3__V_CMPX_LT_U32::Inst_VOP3__V_CMPX_LT_U32(
21598 : Inst_VOP3(iFmt
, "v_cmpx_lt_u32", true)
21601 } // Inst_VOP3__V_CMPX_LT_U32
21603 Inst_VOP3__V_CMPX_LT_U32::~Inst_VOP3__V_CMPX_LT_U32()
21605 } // ~Inst_VOP3__V_CMPX_LT_U32
21607 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
21609 Inst_VOP3__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst
)
21611 Wavefront
*wf
= gpuDynInst
->wavefront();
21612 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
21613 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
21614 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21620 * input modifiers are supported by FP operations only
21622 assert(!(instData
.ABS
& 0x1));
21623 assert(!(instData
.ABS
& 0x2));
21624 assert(!(instData
.ABS
& 0x4));
21625 assert(!(extData
.NEG
& 0x1));
21626 assert(!(extData
.NEG
& 0x2));
21627 assert(!(extData
.NEG
& 0x4));
21629 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21630 if (wf
->execMask(lane
)) {
21631 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
21635 wf
->execMask() = sdst
.rawData();
21639 Inst_VOP3__V_CMPX_EQ_U32::Inst_VOP3__V_CMPX_EQ_U32(
21641 : Inst_VOP3(iFmt
, "v_cmpx_eq_u32", true)
21644 } // Inst_VOP3__V_CMPX_EQ_U32
21646 Inst_VOP3__V_CMPX_EQ_U32::~Inst_VOP3__V_CMPX_EQ_U32()
21648 } // ~Inst_VOP3__V_CMPX_EQ_U32
21650 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
21652 Inst_VOP3__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst
)
21654 Wavefront
*wf
= gpuDynInst
->wavefront();
21655 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
21656 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
21657 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21663 * input modifiers are supported by FP operations only
21665 assert(!(instData
.ABS
& 0x1));
21666 assert(!(instData
.ABS
& 0x2));
21667 assert(!(instData
.ABS
& 0x4));
21668 assert(!(extData
.NEG
& 0x1));
21669 assert(!(extData
.NEG
& 0x2));
21670 assert(!(extData
.NEG
& 0x4));
21672 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21673 if (wf
->execMask(lane
)) {
21674 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
21678 wf
->execMask() = sdst
.rawData();
21682 Inst_VOP3__V_CMPX_LE_U32::Inst_VOP3__V_CMPX_LE_U32(
21684 : Inst_VOP3(iFmt
, "v_cmpx_le_u32", true)
21687 } // Inst_VOP3__V_CMPX_LE_U32
21689 Inst_VOP3__V_CMPX_LE_U32::~Inst_VOP3__V_CMPX_LE_U32()
21691 } // ~Inst_VOP3__V_CMPX_LE_U32
21693 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
21695 Inst_VOP3__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst
)
21697 Wavefront
*wf
= gpuDynInst
->wavefront();
21698 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
21699 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
21700 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21706 * input modifiers are supported by FP operations only
21708 assert(!(instData
.ABS
& 0x1));
21709 assert(!(instData
.ABS
& 0x2));
21710 assert(!(instData
.ABS
& 0x4));
21711 assert(!(extData
.NEG
& 0x1));
21712 assert(!(extData
.NEG
& 0x2));
21713 assert(!(extData
.NEG
& 0x4));
21715 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21716 if (wf
->execMask(lane
)) {
21717 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
21721 wf
->execMask() = sdst
.rawData();
21725 Inst_VOP3__V_CMPX_GT_U32::Inst_VOP3__V_CMPX_GT_U32(
21727 : Inst_VOP3(iFmt
, "v_cmpx_gt_u32", true)
21730 } // Inst_VOP3__V_CMPX_GT_U32
21732 Inst_VOP3__V_CMPX_GT_U32::~Inst_VOP3__V_CMPX_GT_U32()
21734 } // ~Inst_VOP3__V_CMPX_GT_U32
21736 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
21738 Inst_VOP3__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst
)
21740 Wavefront
*wf
= gpuDynInst
->wavefront();
21741 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
21742 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
21743 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21749 * input modifiers are supported by FP operations only
21751 assert(!(instData
.ABS
& 0x1));
21752 assert(!(instData
.ABS
& 0x2));
21753 assert(!(instData
.ABS
& 0x4));
21754 assert(!(extData
.NEG
& 0x1));
21755 assert(!(extData
.NEG
& 0x2));
21756 assert(!(extData
.NEG
& 0x4));
21758 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21759 if (wf
->execMask(lane
)) {
21760 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
21764 wf
->execMask() = sdst
.rawData();
21768 Inst_VOP3__V_CMPX_NE_U32::Inst_VOP3__V_CMPX_NE_U32(
21770 : Inst_VOP3(iFmt
, "v_cmpx_ne_u32", true)
21773 } // Inst_VOP3__V_CMPX_NE_U32
21775 Inst_VOP3__V_CMPX_NE_U32::~Inst_VOP3__V_CMPX_NE_U32()
21777 } // ~Inst_VOP3__V_CMPX_NE_U32
21779 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
21781 Inst_VOP3__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst
)
21783 Wavefront
*wf
= gpuDynInst
->wavefront();
21784 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
21785 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
21786 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21792 * input modifiers are supported by FP operations only
21794 assert(!(instData
.ABS
& 0x1));
21795 assert(!(instData
.ABS
& 0x2));
21796 assert(!(instData
.ABS
& 0x4));
21797 assert(!(extData
.NEG
& 0x1));
21798 assert(!(extData
.NEG
& 0x2));
21799 assert(!(extData
.NEG
& 0x4));
21801 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21802 if (wf
->execMask(lane
)) {
21803 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
21807 wf
->execMask() = sdst
.rawData();
21811 Inst_VOP3__V_CMPX_GE_U32::Inst_VOP3__V_CMPX_GE_U32(
21813 : Inst_VOP3(iFmt
, "v_cmpx_ge_u32", true)
21816 } // Inst_VOP3__V_CMPX_GE_U32
21818 Inst_VOP3__V_CMPX_GE_U32::~Inst_VOP3__V_CMPX_GE_U32()
21820 } // ~Inst_VOP3__V_CMPX_GE_U32
21822 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
21824 Inst_VOP3__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst
)
21826 Wavefront
*wf
= gpuDynInst
->wavefront();
21827 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
21828 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
21829 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21835 * input modifiers are supported by FP operations only
21837 assert(!(instData
.ABS
& 0x1));
21838 assert(!(instData
.ABS
& 0x2));
21839 assert(!(instData
.ABS
& 0x4));
21840 assert(!(extData
.NEG
& 0x1));
21841 assert(!(extData
.NEG
& 0x2));
21842 assert(!(extData
.NEG
& 0x4));
21844 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21845 if (wf
->execMask(lane
)) {
21846 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
21850 wf
->execMask() = sdst
.rawData();
21854 Inst_VOP3__V_CMPX_T_U32::Inst_VOP3__V_CMPX_T_U32(
21856 : Inst_VOP3(iFmt
, "v_cmpx_t_u32", true)
21859 } // Inst_VOP3__V_CMPX_T_U32
21861 Inst_VOP3__V_CMPX_T_U32::~Inst_VOP3__V_CMPX_T_U32()
21863 } // ~Inst_VOP3__V_CMPX_T_U32
21865 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
21867 Inst_VOP3__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst
)
21869 Wavefront
*wf
= gpuDynInst
->wavefront();
21870 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21872 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21873 if (wf
->execMask(lane
)) {
21874 sdst
.setBit(lane
, 1);
21878 wf
->execMask() = sdst
.rawData();
21882 Inst_VOP3__V_CMP_F_I64::Inst_VOP3__V_CMP_F_I64(InFmt_VOP3
*iFmt
)
21883 : Inst_VOP3(iFmt
, "v_cmp_f_i64", true)
21886 } // Inst_VOP3__V_CMP_F_I64
21888 Inst_VOP3__V_CMP_F_I64::~Inst_VOP3__V_CMP_F_I64()
21890 } // ~Inst_VOP3__V_CMP_F_I64
21892 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
21894 Inst_VOP3__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst
)
21896 Wavefront
*wf
= gpuDynInst
->wavefront();
21897 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21899 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21900 if (wf
->execMask(lane
)) {
21901 sdst
.setBit(lane
, 0);
21908 Inst_VOP3__V_CMP_LT_I64::Inst_VOP3__V_CMP_LT_I64(
21910 : Inst_VOP3(iFmt
, "v_cmp_lt_i64", true)
21913 } // Inst_VOP3__V_CMP_LT_I64
21915 Inst_VOP3__V_CMP_LT_I64::~Inst_VOP3__V_CMP_LT_I64()
21917 } // ~Inst_VOP3__V_CMP_LT_I64
21919 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
21921 Inst_VOP3__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst
)
21923 Wavefront
*wf
= gpuDynInst
->wavefront();
21924 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
21925 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
21926 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21932 * input modifiers are supported by FP operations only
21934 assert(!(instData
.ABS
& 0x1));
21935 assert(!(instData
.ABS
& 0x2));
21936 assert(!(instData
.ABS
& 0x4));
21937 assert(!(extData
.NEG
& 0x1));
21938 assert(!(extData
.NEG
& 0x2));
21939 assert(!(extData
.NEG
& 0x4));
21941 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21942 if (wf
->execMask(lane
)) {
21943 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
21950 Inst_VOP3__V_CMP_EQ_I64::Inst_VOP3__V_CMP_EQ_I64(
21952 : Inst_VOP3(iFmt
, "v_cmp_eq_i64", true)
21955 } // Inst_VOP3__V_CMP_EQ_I64
21957 Inst_VOP3__V_CMP_EQ_I64::~Inst_VOP3__V_CMP_EQ_I64()
21959 } // ~Inst_VOP3__V_CMP_EQ_I64
21961 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
21963 Inst_VOP3__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst
)
21965 Wavefront
*wf
= gpuDynInst
->wavefront();
21966 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
21967 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
21968 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
21974 * input modifiers are supported by FP operations only
21976 assert(!(instData
.ABS
& 0x1));
21977 assert(!(instData
.ABS
& 0x2));
21978 assert(!(instData
.ABS
& 0x4));
21979 assert(!(extData
.NEG
& 0x1));
21980 assert(!(extData
.NEG
& 0x2));
21981 assert(!(extData
.NEG
& 0x4));
21983 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
21984 if (wf
->execMask(lane
)) {
21985 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
21992 Inst_VOP3__V_CMP_LE_I64::Inst_VOP3__V_CMP_LE_I64(
21994 : Inst_VOP3(iFmt
, "v_cmp_le_i64", true)
21997 } // Inst_VOP3__V_CMP_LE_I64
21999 Inst_VOP3__V_CMP_LE_I64::~Inst_VOP3__V_CMP_LE_I64()
22001 } // ~Inst_VOP3__V_CMP_LE_I64
22003 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
22005 Inst_VOP3__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst
)
22007 Wavefront
*wf
= gpuDynInst
->wavefront();
22008 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
22009 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
22010 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22016 * input modifiers are supported by FP operations only
22018 assert(!(instData
.ABS
& 0x1));
22019 assert(!(instData
.ABS
& 0x2));
22020 assert(!(instData
.ABS
& 0x4));
22021 assert(!(extData
.NEG
& 0x1));
22022 assert(!(extData
.NEG
& 0x2));
22023 assert(!(extData
.NEG
& 0x4));
22025 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22026 if (wf
->execMask(lane
)) {
22027 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
22034 Inst_VOP3__V_CMP_GT_I64::Inst_VOP3__V_CMP_GT_I64(
22036 : Inst_VOP3(iFmt
, "v_cmp_gt_i64", true)
22039 } // Inst_VOP3__V_CMP_GT_I64
22041 Inst_VOP3__V_CMP_GT_I64::~Inst_VOP3__V_CMP_GT_I64()
22043 } // ~Inst_VOP3__V_CMP_GT_I64
22045 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
22047 Inst_VOP3__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst
)
22049 Wavefront
*wf
= gpuDynInst
->wavefront();
22050 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
22051 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
22052 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22058 * input modifiers are supported by FP operations only
22060 assert(!(instData
.ABS
& 0x1));
22061 assert(!(instData
.ABS
& 0x2));
22062 assert(!(instData
.ABS
& 0x4));
22063 assert(!(extData
.NEG
& 0x1));
22064 assert(!(extData
.NEG
& 0x2));
22065 assert(!(extData
.NEG
& 0x4));
22067 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22068 if (wf
->execMask(lane
)) {
22069 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
22076 Inst_VOP3__V_CMP_NE_I64::Inst_VOP3__V_CMP_NE_I64(
22078 : Inst_VOP3(iFmt
, "v_cmp_ne_i64", true)
22081 } // Inst_VOP3__V_CMP_NE_I64
22083 Inst_VOP3__V_CMP_NE_I64::~Inst_VOP3__V_CMP_NE_I64()
22085 } // ~Inst_VOP3__V_CMP_NE_I64
22087 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
22089 Inst_VOP3__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst
)
22091 Wavefront
*wf
= gpuDynInst
->wavefront();
22092 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
22093 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
22094 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22100 * input modifiers are supported by FP operations only
22102 assert(!(instData
.ABS
& 0x1));
22103 assert(!(instData
.ABS
& 0x2));
22104 assert(!(instData
.ABS
& 0x4));
22105 assert(!(extData
.NEG
& 0x1));
22106 assert(!(extData
.NEG
& 0x2));
22107 assert(!(extData
.NEG
& 0x4));
22109 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22110 if (wf
->execMask(lane
)) {
22111 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
22118 Inst_VOP3__V_CMP_GE_I64::Inst_VOP3__V_CMP_GE_I64(
22120 : Inst_VOP3(iFmt
, "v_cmp_ge_i64", true)
22123 } // Inst_VOP3__V_CMP_GE_I64
22125 Inst_VOP3__V_CMP_GE_I64::~Inst_VOP3__V_CMP_GE_I64()
22127 } // ~Inst_VOP3__V_CMP_GE_I64
22129 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
22131 Inst_VOP3__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst
)
22133 Wavefront
*wf
= gpuDynInst
->wavefront();
22134 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
22135 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
22136 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22142 * input modifiers are supported by FP operations only
22144 assert(!(instData
.ABS
& 0x1));
22145 assert(!(instData
.ABS
& 0x2));
22146 assert(!(instData
.ABS
& 0x4));
22147 assert(!(extData
.NEG
& 0x1));
22148 assert(!(extData
.NEG
& 0x2));
22149 assert(!(extData
.NEG
& 0x4));
22151 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22152 if (wf
->execMask(lane
)) {
22153 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
22160 Inst_VOP3__V_CMP_T_I64::Inst_VOP3__V_CMP_T_I64(InFmt_VOP3
*iFmt
)
22161 : Inst_VOP3(iFmt
, "v_cmp_t_i64", true)
22164 } // Inst_VOP3__V_CMP_T_I64
22166 Inst_VOP3__V_CMP_T_I64::~Inst_VOP3__V_CMP_T_I64()
22168 } // ~Inst_VOP3__V_CMP_T_I64
22170 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
22172 Inst_VOP3__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst
)
22174 Wavefront
*wf
= gpuDynInst
->wavefront();
22175 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22177 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22178 if (wf
->execMask(lane
)) {
22179 sdst
.setBit(lane
, 1);
22186 Inst_VOP3__V_CMP_F_U64::Inst_VOP3__V_CMP_F_U64(InFmt_VOP3
*iFmt
)
22187 : Inst_VOP3(iFmt
, "v_cmp_f_u64", true)
22190 } // Inst_VOP3__V_CMP_F_U64
22192 Inst_VOP3__V_CMP_F_U64::~Inst_VOP3__V_CMP_F_U64()
22194 } // ~Inst_VOP3__V_CMP_F_U64
22196 // D.u64[threadID] = 0; D = VCC in VOPC encoding.
22198 Inst_VOP3__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst
)
22200 Wavefront
*wf
= gpuDynInst
->wavefront();
22201 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22203 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22204 if (wf
->execMask(lane
)) {
22205 sdst
.setBit(lane
, 0);
22212 Inst_VOP3__V_CMP_LT_U64::Inst_VOP3__V_CMP_LT_U64(
22214 : Inst_VOP3(iFmt
, "v_cmp_lt_u64", true)
22217 } // Inst_VOP3__V_CMP_LT_U64
22219 Inst_VOP3__V_CMP_LT_U64::~Inst_VOP3__V_CMP_LT_U64()
22221 } // ~Inst_VOP3__V_CMP_LT_U64
22223 // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
22225 Inst_VOP3__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst
)
22227 Wavefront
*wf
= gpuDynInst
->wavefront();
22228 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
22229 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
22230 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22236 * input modifiers are supported by FP operations only
22238 assert(!(instData
.ABS
& 0x1));
22239 assert(!(instData
.ABS
& 0x2));
22240 assert(!(instData
.ABS
& 0x4));
22241 assert(!(extData
.NEG
& 0x1));
22242 assert(!(extData
.NEG
& 0x2));
22243 assert(!(extData
.NEG
& 0x4));
22245 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22246 if (wf
->execMask(lane
)) {
22247 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
22254 Inst_VOP3__V_CMP_EQ_U64::Inst_VOP3__V_CMP_EQ_U64(
22256 : Inst_VOP3(iFmt
, "v_cmp_eq_u64", true)
22259 } // Inst_VOP3__V_CMP_EQ_U64
22261 Inst_VOP3__V_CMP_EQ_U64::~Inst_VOP3__V_CMP_EQ_U64()
22263 } // ~Inst_VOP3__V_CMP_EQ_U64
22265 // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
22267 Inst_VOP3__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst
)
22269 Wavefront
*wf
= gpuDynInst
->wavefront();
22270 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
22271 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
22272 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22278 * input modifiers are supported by FP operations only
22280 assert(!(instData
.ABS
& 0x1));
22281 assert(!(instData
.ABS
& 0x2));
22282 assert(!(instData
.ABS
& 0x4));
22283 assert(!(extData
.NEG
& 0x1));
22284 assert(!(extData
.NEG
& 0x2));
22285 assert(!(extData
.NEG
& 0x4));
22287 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22288 if (wf
->execMask(lane
)) {
22289 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
22296 Inst_VOP3__V_CMP_LE_U64::Inst_VOP3__V_CMP_LE_U64(
22298 : Inst_VOP3(iFmt
, "v_cmp_le_u64", true)
22301 } // Inst_VOP3__V_CMP_LE_U64
22303 Inst_VOP3__V_CMP_LE_U64::~Inst_VOP3__V_CMP_LE_U64()
22305 } // ~Inst_VOP3__V_CMP_LE_U64
22307 // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
22309 Inst_VOP3__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst
)
22311 Wavefront
*wf
= gpuDynInst
->wavefront();
22312 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
22313 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
22314 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22320 * input modifiers are supported by FP operations only
22322 assert(!(instData
.ABS
& 0x1));
22323 assert(!(instData
.ABS
& 0x2));
22324 assert(!(instData
.ABS
& 0x4));
22325 assert(!(extData
.NEG
& 0x1));
22326 assert(!(extData
.NEG
& 0x2));
22327 assert(!(extData
.NEG
& 0x4));
22329 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22330 if (wf
->execMask(lane
)) {
22331 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
22338 Inst_VOP3__V_CMP_GT_U64::Inst_VOP3__V_CMP_GT_U64(
22340 : Inst_VOP3(iFmt
, "v_cmp_gt_u64", true)
22343 } // Inst_VOP3__V_CMP_GT_U64
22345 Inst_VOP3__V_CMP_GT_U64::~Inst_VOP3__V_CMP_GT_U64()
22347 } // ~Inst_VOP3__V_CMP_GT_U64
22349 // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
22351 Inst_VOP3__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst
)
22353 Wavefront
*wf
= gpuDynInst
->wavefront();
22354 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
22355 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
22356 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22362 * input modifiers are supported by FP operations only
22364 assert(!(instData
.ABS
& 0x1));
22365 assert(!(instData
.ABS
& 0x2));
22366 assert(!(instData
.ABS
& 0x4));
22367 assert(!(extData
.NEG
& 0x1));
22368 assert(!(extData
.NEG
& 0x2));
22369 assert(!(extData
.NEG
& 0x4));
22371 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22372 if (wf
->execMask(lane
)) {
22373 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
22380 Inst_VOP3__V_CMP_NE_U64::Inst_VOP3__V_CMP_NE_U64(
22382 : Inst_VOP3(iFmt
, "v_cmp_ne_u64", true)
22385 } // Inst_VOP3__V_CMP_NE_U64
22387 Inst_VOP3__V_CMP_NE_U64::~Inst_VOP3__V_CMP_NE_U64()
22389 } // ~Inst_VOP3__V_CMP_NE_U64
22391 // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
22393 Inst_VOP3__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst
)
22395 Wavefront
*wf
= gpuDynInst
->wavefront();
22396 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
22397 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
22398 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22404 * input modifiers are supported by FP operations only
22406 assert(!(instData
.ABS
& 0x1));
22407 assert(!(instData
.ABS
& 0x2));
22408 assert(!(instData
.ABS
& 0x4));
22409 assert(!(extData
.NEG
& 0x1));
22410 assert(!(extData
.NEG
& 0x2));
22411 assert(!(extData
.NEG
& 0x4));
22413 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22414 if (wf
->execMask(lane
)) {
22415 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
22422 Inst_VOP3__V_CMP_GE_U64::Inst_VOP3__V_CMP_GE_U64(
22424 : Inst_VOP3(iFmt
, "v_cmp_ge_u64", true)
22427 } // Inst_VOP3__V_CMP_GE_U64
22429 Inst_VOP3__V_CMP_GE_U64::~Inst_VOP3__V_CMP_GE_U64()
22431 } // ~Inst_VOP3__V_CMP_GE_U64
22433 // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
22435 Inst_VOP3__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst
)
22437 Wavefront
*wf
= gpuDynInst
->wavefront();
22438 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
22439 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
22440 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22446 * input modifiers are supported by FP operations only
22448 assert(!(instData
.ABS
& 0x1));
22449 assert(!(instData
.ABS
& 0x2));
22450 assert(!(instData
.ABS
& 0x4));
22451 assert(!(extData
.NEG
& 0x1));
22452 assert(!(extData
.NEG
& 0x2));
22453 assert(!(extData
.NEG
& 0x4));
22455 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22456 if (wf
->execMask(lane
)) {
22457 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
22464 Inst_VOP3__V_CMP_T_U64::Inst_VOP3__V_CMP_T_U64(InFmt_VOP3
*iFmt
)
22465 : Inst_VOP3(iFmt
, "v_cmp_t_u64", true)
22468 } // Inst_VOP3__V_CMP_T_U64
22470 Inst_VOP3__V_CMP_T_U64::~Inst_VOP3__V_CMP_T_U64()
22472 } // ~Inst_VOP3__V_CMP_T_U64
22474 // D.u64[threadID] = 1; D = VCC in VOPC encoding.
22476 Inst_VOP3__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst
)
22478 Wavefront
*wf
= gpuDynInst
->wavefront();
22479 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22481 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22482 if (wf
->execMask(lane
)) {
22483 sdst
.setBit(lane
, 1);
22490 Inst_VOP3__V_CMPX_F_I64::Inst_VOP3__V_CMPX_F_I64(
22492 : Inst_VOP3(iFmt
, "v_cmpx_f_i64", true)
22495 } // Inst_VOP3__V_CMPX_F_I64
22497 Inst_VOP3__V_CMPX_F_I64::~Inst_VOP3__V_CMPX_F_I64()
22499 } // ~Inst_VOP3__V_CMPX_F_I64
22501 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
22503 Inst_VOP3__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst
)
22505 Wavefront
*wf
= gpuDynInst
->wavefront();
22506 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22508 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22509 if (wf
->execMask(lane
)) {
22510 sdst
.setBit(lane
, 0);
22514 wf
->execMask() = sdst
.rawData();
22518 Inst_VOP3__V_CMPX_LT_I64::Inst_VOP3__V_CMPX_LT_I64(
22520 : Inst_VOP3(iFmt
, "v_cmpx_lt_i64", true)
22523 } // Inst_VOP3__V_CMPX_LT_I64
22525 Inst_VOP3__V_CMPX_LT_I64::~Inst_VOP3__V_CMPX_LT_I64()
22527 } // ~Inst_VOP3__V_CMPX_LT_I64
22529 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
22531 Inst_VOP3__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst
)
22533 Wavefront
*wf
= gpuDynInst
->wavefront();
22534 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
22535 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
22536 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22542 * input modifiers are supported by FP operations only
22544 assert(!(instData
.ABS
& 0x1));
22545 assert(!(instData
.ABS
& 0x2));
22546 assert(!(instData
.ABS
& 0x4));
22547 assert(!(extData
.NEG
& 0x1));
22548 assert(!(extData
.NEG
& 0x2));
22549 assert(!(extData
.NEG
& 0x4));
22551 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22552 if (wf
->execMask(lane
)) {
22553 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
22557 wf
->execMask() = sdst
.rawData();
22561 Inst_VOP3__V_CMPX_EQ_I64::Inst_VOP3__V_CMPX_EQ_I64(
22563 : Inst_VOP3(iFmt
, "v_cmpx_eq_i64", true)
22566 } // Inst_VOP3__V_CMPX_EQ_I64
22568 Inst_VOP3__V_CMPX_EQ_I64::~Inst_VOP3__V_CMPX_EQ_I64()
22570 } // ~Inst_VOP3__V_CMPX_EQ_I64
22572 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
22574 Inst_VOP3__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst
)
22576 Wavefront
*wf
= gpuDynInst
->wavefront();
22577 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
22578 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
22579 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22585 * input modifiers are supported by FP operations only
22587 assert(!(instData
.ABS
& 0x1));
22588 assert(!(instData
.ABS
& 0x2));
22589 assert(!(instData
.ABS
& 0x4));
22590 assert(!(extData
.NEG
& 0x1));
22591 assert(!(extData
.NEG
& 0x2));
22592 assert(!(extData
.NEG
& 0x4));
22594 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22595 if (wf
->execMask(lane
)) {
22596 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
22600 wf
->execMask() = sdst
.rawData();
22604 Inst_VOP3__V_CMPX_LE_I64::Inst_VOP3__V_CMPX_LE_I64(
22606 : Inst_VOP3(iFmt
, "v_cmpx_le_i64", true)
22609 } // Inst_VOP3__V_CMPX_LE_I64
22611 Inst_VOP3__V_CMPX_LE_I64::~Inst_VOP3__V_CMPX_LE_I64()
22613 } // ~Inst_VOP3__V_CMPX_LE_I64
22615 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
22617 Inst_VOP3__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst
)
22619 Wavefront
*wf
= gpuDynInst
->wavefront();
22620 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
22621 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
22622 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22628 * input modifiers are supported by FP operations only
22630 assert(!(instData
.ABS
& 0x1));
22631 assert(!(instData
.ABS
& 0x2));
22632 assert(!(instData
.ABS
& 0x4));
22633 assert(!(extData
.NEG
& 0x1));
22634 assert(!(extData
.NEG
& 0x2));
22635 assert(!(extData
.NEG
& 0x4));
22637 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22638 if (wf
->execMask(lane
)) {
22639 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
22643 wf
->execMask() = sdst
.rawData();
22647 Inst_VOP3__V_CMPX_GT_I64::Inst_VOP3__V_CMPX_GT_I64(
22649 : Inst_VOP3(iFmt
, "v_cmpx_gt_i64", true)
22652 } // Inst_VOP3__V_CMPX_GT_I64
22654 Inst_VOP3__V_CMPX_GT_I64::~Inst_VOP3__V_CMPX_GT_I64()
22656 } // ~Inst_VOP3__V_CMPX_GT_I64
22658 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
22660 Inst_VOP3__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst
)
22662 Wavefront
*wf
= gpuDynInst
->wavefront();
22663 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
22664 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
22665 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22671 * input modifiers are supported by FP operations only
22673 assert(!(instData
.ABS
& 0x1));
22674 assert(!(instData
.ABS
& 0x2));
22675 assert(!(instData
.ABS
& 0x4));
22676 assert(!(extData
.NEG
& 0x1));
22677 assert(!(extData
.NEG
& 0x2));
22678 assert(!(extData
.NEG
& 0x4));
22680 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22681 if (wf
->execMask(lane
)) {
22682 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
22686 wf
->execMask() = sdst
.rawData();
22690 Inst_VOP3__V_CMPX_NE_I64::Inst_VOP3__V_CMPX_NE_I64(
22692 : Inst_VOP3(iFmt
, "v_cmpx_ne_i64", true)
22695 } // Inst_VOP3__V_CMPX_NE_I64
22697 Inst_VOP3__V_CMPX_NE_I64::~Inst_VOP3__V_CMPX_NE_I64()
22699 } // ~Inst_VOP3__V_CMPX_NE_I64
22701 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
22703 Inst_VOP3__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst
)
22705 Wavefront
*wf
= gpuDynInst
->wavefront();
22706 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
22707 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
22708 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22714 * input modifiers are supported by FP operations only
22716 assert(!(instData
.ABS
& 0x1));
22717 assert(!(instData
.ABS
& 0x2));
22718 assert(!(instData
.ABS
& 0x4));
22719 assert(!(extData
.NEG
& 0x1));
22720 assert(!(extData
.NEG
& 0x2));
22721 assert(!(extData
.NEG
& 0x4));
22723 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22724 if (wf
->execMask(lane
)) {
22725 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
22729 wf
->execMask() = sdst
.rawData();
22733 Inst_VOP3__V_CMPX_GE_I64::Inst_VOP3__V_CMPX_GE_I64(
22735 : Inst_VOP3(iFmt
, "v_cmpx_ge_i64", true)
22738 } // Inst_VOP3__V_CMPX_GE_I64
22740 Inst_VOP3__V_CMPX_GE_I64::~Inst_VOP3__V_CMPX_GE_I64()
22742 } // ~Inst_VOP3__V_CMPX_GE_I64
22744 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
22746 Inst_VOP3__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst
)
22748 Wavefront
*wf
= gpuDynInst
->wavefront();
22749 ConstVecOperandI64
src0(gpuDynInst
, extData
.SRC0
);
22750 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
22751 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22757 * input modifiers are supported by FP operations only
22759 assert(!(instData
.ABS
& 0x1));
22760 assert(!(instData
.ABS
& 0x2));
22761 assert(!(instData
.ABS
& 0x4));
22762 assert(!(extData
.NEG
& 0x1));
22763 assert(!(extData
.NEG
& 0x2));
22764 assert(!(extData
.NEG
& 0x4));
22766 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22767 if (wf
->execMask(lane
)) {
22768 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
22772 wf
->execMask() = sdst
.rawData();
22776 Inst_VOP3__V_CMPX_T_I64::Inst_VOP3__V_CMPX_T_I64(
22778 : Inst_VOP3(iFmt
, "v_cmpx_t_i64", true)
22781 } // Inst_VOP3__V_CMPX_T_I64
22783 Inst_VOP3__V_CMPX_T_I64::~Inst_VOP3__V_CMPX_T_I64()
22785 } // ~Inst_VOP3__V_CMPX_T_I64
22787 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
22789 Inst_VOP3__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst
)
22791 Wavefront
*wf
= gpuDynInst
->wavefront();
22792 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22794 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22795 if (wf
->execMask(lane
)) {
22796 sdst
.setBit(lane
, 1);
22800 wf
->execMask() = sdst
.rawData();
22804 Inst_VOP3__V_CMPX_F_U64::Inst_VOP3__V_CMPX_F_U64(
22806 : Inst_VOP3(iFmt
, "v_cmpx_f_u64", true)
22809 } // Inst_VOP3__V_CMPX_F_U64
22811 Inst_VOP3__V_CMPX_F_U64::~Inst_VOP3__V_CMPX_F_U64()
22813 } // ~Inst_VOP3__V_CMPX_F_U64
22815 // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
22817 Inst_VOP3__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst
)
22819 Wavefront
*wf
= gpuDynInst
->wavefront();
22820 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22822 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22823 if (wf
->execMask(lane
)) {
22824 sdst
.setBit(lane
, 0);
22828 wf
->execMask() = sdst
.rawData();
22832 Inst_VOP3__V_CMPX_LT_U64::Inst_VOP3__V_CMPX_LT_U64(
22834 : Inst_VOP3(iFmt
, "v_cmpx_lt_u64", true)
22837 } // Inst_VOP3__V_CMPX_LT_U64
22839 Inst_VOP3__V_CMPX_LT_U64::~Inst_VOP3__V_CMPX_LT_U64()
22841 } // ~Inst_VOP3__V_CMPX_LT_U64
22843 // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
22845 Inst_VOP3__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst
)
22847 Wavefront
*wf
= gpuDynInst
->wavefront();
22848 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
22849 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
22850 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22856 * input modifiers are supported by FP operations only
22858 assert(!(instData
.ABS
& 0x1));
22859 assert(!(instData
.ABS
& 0x2));
22860 assert(!(instData
.ABS
& 0x4));
22861 assert(!(extData
.NEG
& 0x1));
22862 assert(!(extData
.NEG
& 0x2));
22863 assert(!(extData
.NEG
& 0x4));
22865 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22866 if (wf
->execMask(lane
)) {
22867 sdst
.setBit(lane
, src0
[lane
] < src1
[lane
] ? 1 : 0);
22871 wf
->execMask() = sdst
.rawData();
22875 Inst_VOP3__V_CMPX_EQ_U64::Inst_VOP3__V_CMPX_EQ_U64(
22877 : Inst_VOP3(iFmt
, "v_cmpx_eq_u64", true)
22880 } // Inst_VOP3__V_CMPX_EQ_U64
22882 Inst_VOP3__V_CMPX_EQ_U64::~Inst_VOP3__V_CMPX_EQ_U64()
22884 } // ~Inst_VOP3__V_CMPX_EQ_U64
22886 // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
22888 Inst_VOP3__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst
)
22890 Wavefront
*wf
= gpuDynInst
->wavefront();
22891 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
22892 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
22893 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22899 * input modifiers are supported by FP operations only
22901 assert(!(instData
.ABS
& 0x1));
22902 assert(!(instData
.ABS
& 0x2));
22903 assert(!(instData
.ABS
& 0x4));
22904 assert(!(extData
.NEG
& 0x1));
22905 assert(!(extData
.NEG
& 0x2));
22906 assert(!(extData
.NEG
& 0x4));
22908 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22909 if (wf
->execMask(lane
)) {
22910 sdst
.setBit(lane
, src0
[lane
] == src1
[lane
] ? 1 : 0);
22914 wf
->execMask() = sdst
.rawData();
22918 Inst_VOP3__V_CMPX_LE_U64::Inst_VOP3__V_CMPX_LE_U64(
22920 : Inst_VOP3(iFmt
, "v_cmpx_le_u64", true)
22923 } // Inst_VOP3__V_CMPX_LE_U64
22925 Inst_VOP3__V_CMPX_LE_U64::~Inst_VOP3__V_CMPX_LE_U64()
22927 } // ~Inst_VOP3__V_CMPX_LE_U64
22929 // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
22931 Inst_VOP3__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst
)
22933 Wavefront
*wf
= gpuDynInst
->wavefront();
22934 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
22935 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
22936 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22942 * input modifiers are supported by FP operations only
22944 assert(!(instData
.ABS
& 0x1));
22945 assert(!(instData
.ABS
& 0x2));
22946 assert(!(instData
.ABS
& 0x4));
22947 assert(!(extData
.NEG
& 0x1));
22948 assert(!(extData
.NEG
& 0x2));
22949 assert(!(extData
.NEG
& 0x4));
22951 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22952 if (wf
->execMask(lane
)) {
22953 sdst
.setBit(lane
, src0
[lane
] <= src1
[lane
] ? 1 : 0);
22957 wf
->execMask() = sdst
.rawData();
22961 Inst_VOP3__V_CMPX_GT_U64::Inst_VOP3__V_CMPX_GT_U64(
22963 : Inst_VOP3(iFmt
, "v_cmpx_gt_u64", true)
22966 } // Inst_VOP3__V_CMPX_GT_U64
22968 Inst_VOP3__V_CMPX_GT_U64::~Inst_VOP3__V_CMPX_GT_U64()
22970 } // ~Inst_VOP3__V_CMPX_GT_U64
22972 // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
22974 Inst_VOP3__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst
)
22976 Wavefront
*wf
= gpuDynInst
->wavefront();
22977 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
22978 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
22979 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
22985 * input modifiers are supported by FP operations only
22987 assert(!(instData
.ABS
& 0x1));
22988 assert(!(instData
.ABS
& 0x2));
22989 assert(!(instData
.ABS
& 0x4));
22990 assert(!(extData
.NEG
& 0x1));
22991 assert(!(extData
.NEG
& 0x2));
22992 assert(!(extData
.NEG
& 0x4));
22994 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
22995 if (wf
->execMask(lane
)) {
22996 sdst
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
23000 wf
->execMask() = sdst
.rawData();
23004 Inst_VOP3__V_CMPX_NE_U64::Inst_VOP3__V_CMPX_NE_U64(
23006 : Inst_VOP3(iFmt
, "v_cmpx_ne_u64", true)
23009 } // Inst_VOP3__V_CMPX_NE_U64
23011 Inst_VOP3__V_CMPX_NE_U64::~Inst_VOP3__V_CMPX_NE_U64()
23013 } // ~Inst_VOP3__V_CMPX_NE_U64
23015 // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
23017 Inst_VOP3__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst
)
23019 Wavefront
*wf
= gpuDynInst
->wavefront();
23020 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
23021 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
23022 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
23028 * input modifiers are supported by FP operations only
23030 assert(!(instData
.ABS
& 0x1));
23031 assert(!(instData
.ABS
& 0x2));
23032 assert(!(instData
.ABS
& 0x4));
23033 assert(!(extData
.NEG
& 0x1));
23034 assert(!(extData
.NEG
& 0x2));
23035 assert(!(extData
.NEG
& 0x4));
23037 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23038 if (wf
->execMask(lane
)) {
23039 sdst
.setBit(lane
, src0
[lane
] != src1
[lane
] ? 1 : 0);
23043 wf
->execMask() = sdst
.rawData();
23047 Inst_VOP3__V_CMPX_GE_U64::Inst_VOP3__V_CMPX_GE_U64(
23049 : Inst_VOP3(iFmt
, "v_cmpx_ge_u64", true)
23052 } // Inst_VOP3__V_CMPX_GE_U64
23054 Inst_VOP3__V_CMPX_GE_U64::~Inst_VOP3__V_CMPX_GE_U64()
23056 } // ~Inst_VOP3__V_CMPX_GE_U64
23058 // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
23060 Inst_VOP3__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst
)
23062 Wavefront
*wf
= gpuDynInst
->wavefront();
23063 ConstVecOperandU64
src0(gpuDynInst
, extData
.SRC0
);
23064 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
23065 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
23071 * input modifiers are supported by FP operations only
23073 assert(!(instData
.ABS
& 0x1));
23074 assert(!(instData
.ABS
& 0x2));
23075 assert(!(instData
.ABS
& 0x4));
23076 assert(!(extData
.NEG
& 0x1));
23077 assert(!(extData
.NEG
& 0x2));
23078 assert(!(extData
.NEG
& 0x4));
23080 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23081 if (wf
->execMask(lane
)) {
23082 sdst
.setBit(lane
, src0
[lane
] >= src1
[lane
] ? 1 : 0);
23086 wf
->execMask() = sdst
.rawData();
23090 Inst_VOP3__V_CMPX_T_U64::Inst_VOP3__V_CMPX_T_U64(
23092 : Inst_VOP3(iFmt
, "v_cmpx_t_u64", true)
23095 } // Inst_VOP3__V_CMPX_T_U64
23097 Inst_VOP3__V_CMPX_T_U64::~Inst_VOP3__V_CMPX_T_U64()
23099 } // ~Inst_VOP3__V_CMPX_T_U64
23101 // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
23103 Inst_VOP3__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst
)
23105 Wavefront
*wf
= gpuDynInst
->wavefront();
23106 ScalarOperandU64
sdst(gpuDynInst
, instData
.VDST
);
23109 * input modifiers are supported by FP operations only
23111 assert(!(instData
.ABS
& 0x1));
23112 assert(!(instData
.ABS
& 0x2));
23113 assert(!(instData
.ABS
& 0x4));
23114 assert(!(extData
.NEG
& 0x1));
23115 assert(!(extData
.NEG
& 0x2));
23116 assert(!(extData
.NEG
& 0x4));
23118 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23119 if (wf
->execMask(lane
)) {
23120 sdst
.setBit(lane
, 1);
23124 wf
->execMask() = sdst
.rawData();
23128 Inst_VOP3__V_CNDMASK_B32::Inst_VOP3__V_CNDMASK_B32(InFmt_VOP3
*iFmt
)
23129 : Inst_VOP3(iFmt
, "v_cndmask_b32", false)
23133 } // Inst_VOP3__V_CNDMASK_B32
23135 Inst_VOP3__V_CNDMASK_B32::~Inst_VOP3__V_CNDMASK_B32()
23137 } // ~Inst_VOP3__V_CNDMASK_B32
23139 // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC
23140 // as a scalar GPR in S2.
23142 Inst_VOP3__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst
)
23144 Wavefront
*wf
= gpuDynInst
->wavefront();
23145 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
23146 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
23147 ConstScalarOperandU64
vcc(gpuDynInst
, extData
.SRC2
);
23148 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
23155 * input modifiers are supported by FP operations only
23157 assert(!(instData
.ABS
& 0x1));
23158 assert(!(instData
.ABS
& 0x2));
23159 assert(!(instData
.ABS
& 0x4));
23160 assert(!(extData
.NEG
& 0x1));
23161 assert(!(extData
.NEG
& 0x2));
23162 assert(!(extData
.NEG
& 0x4));
23164 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23165 if (wf
->execMask(lane
)) {
23166 vdst
[lane
] = bits(vcc
.rawData(), lane
)
23167 ? src1
[lane
] : src0
[lane
];
23174 Inst_VOP3__V_ADD_F32::Inst_VOP3__V_ADD_F32(InFmt_VOP3
*iFmt
)
23175 : Inst_VOP3(iFmt
, "v_add_f32", false)
23179 } // Inst_VOP3__V_ADD_F32
23181 Inst_VOP3__V_ADD_F32::~Inst_VOP3__V_ADD_F32()
23183 } // ~Inst_VOP3__V_ADD_F32
23185 // D.f = S0.f + S1.f.
23187 Inst_VOP3__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst
)
23189 Wavefront
*wf
= gpuDynInst
->wavefront();
23190 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
23191 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
23192 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
23197 if (instData
.ABS
& 0x1) {
23198 src0
.absModifier();
23201 if (instData
.ABS
& 0x2) {
23202 src1
.absModifier();
23205 if (extData
.NEG
& 0x1) {
23206 src0
.negModifier();
23209 if (extData
.NEG
& 0x2) {
23210 src1
.negModifier();
23214 * input modifiers are supported by FP operations only
23216 assert(!(instData
.ABS
& 0x4));
23217 assert(!(extData
.NEG
& 0x4));
23219 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23220 if (wf
->execMask(lane
)) {
23221 vdst
[lane
] = src0
[lane
] + src1
[lane
];
23228 Inst_VOP3__V_SUB_F32::Inst_VOP3__V_SUB_F32(InFmt_VOP3
*iFmt
)
23229 : Inst_VOP3(iFmt
, "v_sub_f32", false)
23233 } // Inst_VOP3__V_SUB_F32
23235 Inst_VOP3__V_SUB_F32::~Inst_VOP3__V_SUB_F32()
23237 } // ~Inst_VOP3__V_SUB_F32
23239 // D.f = S0.f - S1.f.
23241 Inst_VOP3__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst
)
23243 Wavefront
*wf
= gpuDynInst
->wavefront();
23244 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
23245 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
23246 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
23251 if (instData
.ABS
& 0x1) {
23252 src0
.absModifier();
23255 if (instData
.ABS
& 0x2) {
23256 src1
.absModifier();
23259 if (extData
.NEG
& 0x1) {
23260 src0
.negModifier();
23263 if (extData
.NEG
& 0x2) {
23264 src1
.negModifier();
23268 * input modifiers are supported by FP operations only
23270 assert(!(instData
.ABS
& 0x4));
23271 assert(!(extData
.NEG
& 0x4));
23273 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23274 if (wf
->execMask(lane
)) {
23275 vdst
[lane
] = src0
[lane
] - src1
[lane
];
23282 Inst_VOP3__V_SUBREV_F32::Inst_VOP3__V_SUBREV_F32(InFmt_VOP3
*iFmt
)
23283 : Inst_VOP3(iFmt
, "v_subrev_f32", false)
23287 } // Inst_VOP3__V_SUBREV_F32
23289 Inst_VOP3__V_SUBREV_F32::~Inst_VOP3__V_SUBREV_F32()
23291 } // ~Inst_VOP3__V_SUBREV_F32
23293 // D.f = S1.f - S0.f.
23295 Inst_VOP3__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst
)
23297 Wavefront
*wf
= gpuDynInst
->wavefront();
23298 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
23299 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
23300 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
23305 if (instData
.ABS
& 0x1) {
23306 src0
.absModifier();
23309 if (instData
.ABS
& 0x2) {
23310 src1
.absModifier();
23313 if (extData
.NEG
& 0x1) {
23314 src0
.negModifier();
23317 if (extData
.NEG
& 0x2) {
23318 src1
.negModifier();
23322 * input modifiers are supported by FP operations only
23324 assert(!(instData
.ABS
& 0x4));
23325 assert(!(extData
.NEG
& 0x4));
23327 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23328 if (wf
->execMask(lane
)) {
23329 vdst
[lane
] = src1
[lane
] - src0
[lane
];
23336 Inst_VOP3__V_MUL_LEGACY_F32::Inst_VOP3__V_MUL_LEGACY_F32(InFmt_VOP3
*iFmt
)
23337 : Inst_VOP3(iFmt
, "v_mul_legacy_f32", false)
23341 } // Inst_VOP3__V_MUL_LEGACY_F32
23343 Inst_VOP3__V_MUL_LEGACY_F32::~Inst_VOP3__V_MUL_LEGACY_F32()
23345 } // ~Inst_VOP3__V_MUL_LEGACY_F32
23347 // D.f = S0.f * S1.f
23349 Inst_VOP3__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst
)
23351 Wavefront
*wf
= gpuDynInst
->wavefront();
23352 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
23353 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
23354 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
23359 if (instData
.ABS
& 0x1) {
23360 src0
.absModifier();
23363 if (instData
.ABS
& 0x2) {
23364 src1
.absModifier();
23367 if (extData
.NEG
& 0x1) {
23368 src0
.negModifier();
23371 if (extData
.NEG
& 0x2) {
23372 src1
.negModifier();
23376 * input modifiers are supported by FP operations only
23378 assert(!(instData
.ABS
& 0x4));
23379 assert(!(extData
.NEG
& 0x4));
23381 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23382 if (wf
->execMask(lane
)) {
23383 if (std::isnan(src0
[lane
]) ||
23384 std::isnan(src1
[lane
])) {
23386 } else if ((std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
||
23387 std::fpclassify(src0
[lane
]) == FP_ZERO
) &&
23388 !std::signbit(src0
[lane
])) {
23389 if (std::isinf(src1
[lane
])) {
23391 } else if (!std::signbit(src1
[lane
])) {
23396 } else if ((std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
||
23397 std::fpclassify(src0
[lane
]) == FP_ZERO
) &&
23398 std::signbit(src0
[lane
])) {
23399 if (std::isinf(src1
[lane
])) {
23401 } else if (std::signbit(src1
[lane
])) {
23406 } else if (std::isinf(src0
[lane
]) &&
23407 !std::signbit(src0
[lane
])) {
23408 if (std::fpclassify(src1
[lane
]) == FP_SUBNORMAL
||
23409 std::fpclassify(src1
[lane
]) == FP_ZERO
) {
23411 } else if (!std::signbit(src1
[lane
])) {
23412 vdst
[lane
] = +INFINITY
;
23414 vdst
[lane
] = -INFINITY
;
23416 } else if (std::isinf(src0
[lane
]) &&
23417 std::signbit(src0
[lane
])) {
23418 if (std::fpclassify(src1
[lane
]) == FP_SUBNORMAL
||
23419 std::fpclassify(src1
[lane
]) == FP_ZERO
) {
23421 } else if (std::signbit(src1
[lane
])) {
23422 vdst
[lane
] = +INFINITY
;
23424 vdst
[lane
] = -INFINITY
;
23427 vdst
[lane
] = src0
[lane
] * src1
[lane
];
23435 Inst_VOP3__V_MUL_F32::Inst_VOP3__V_MUL_F32(InFmt_VOP3
*iFmt
)
23436 : Inst_VOP3(iFmt
, "v_mul_f32", false)
23440 } // Inst_VOP3__V_MUL_F32
23442 Inst_VOP3__V_MUL_F32::~Inst_VOP3__V_MUL_F32()
23444 } // ~Inst_VOP3__V_MUL_F32
23446 // D.f = S0.f * S1.f.
23448 Inst_VOP3__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst
)
23450 Wavefront
*wf
= gpuDynInst
->wavefront();
23451 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
23452 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
23453 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
23458 if (instData
.ABS
& 0x1) {
23459 src0
.absModifier();
23462 if (instData
.ABS
& 0x2) {
23463 src1
.absModifier();
23466 if (extData
.NEG
& 0x1) {
23467 src0
.negModifier();
23470 if (extData
.NEG
& 0x2) {
23471 src1
.negModifier();
23475 * input modifiers are supported by FP operations only
23477 assert(!(instData
.ABS
& 0x4));
23478 assert(!(extData
.NEG
& 0x4));
23480 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23481 if (wf
->execMask(lane
)) {
23482 if (std::isnan(src0
[lane
]) ||
23483 std::isnan(src1
[lane
])) {
23485 } else if ((std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
||
23486 std::fpclassify(src0
[lane
]) == FP_ZERO
) &&
23487 !std::signbit(src0
[lane
])) {
23488 if (std::isinf(src1
[lane
])) {
23490 } else if (!std::signbit(src1
[lane
])) {
23495 } else if ((std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
||
23496 std::fpclassify(src0
[lane
]) == FP_ZERO
) &&
23497 std::signbit(src0
[lane
])) {
23498 if (std::isinf(src1
[lane
])) {
23500 } else if (std::signbit(src1
[lane
])) {
23505 } else if (std::isinf(src0
[lane
]) &&
23506 !std::signbit(src0
[lane
])) {
23507 if (std::fpclassify(src1
[lane
]) == FP_SUBNORMAL
||
23508 std::fpclassify(src1
[lane
]) == FP_ZERO
) {
23510 } else if (!std::signbit(src1
[lane
])) {
23511 vdst
[lane
] = +INFINITY
;
23513 vdst
[lane
] = -INFINITY
;
23515 } else if (std::isinf(src0
[lane
]) &&
23516 std::signbit(src0
[lane
])) {
23517 if (std::fpclassify(src1
[lane
]) == FP_SUBNORMAL
||
23518 std::fpclassify(src1
[lane
]) == FP_ZERO
) {
23520 } else if (std::signbit(src1
[lane
])) {
23521 vdst
[lane
] = +INFINITY
;
23523 vdst
[lane
] = -INFINITY
;
23526 vdst
[lane
] = src0
[lane
] * src1
[lane
];
23534 Inst_VOP3__V_MUL_I32_I24::Inst_VOP3__V_MUL_I32_I24(InFmt_VOP3
*iFmt
)
23535 : Inst_VOP3(iFmt
, "v_mul_i32_i24", false)
23538 } // Inst_VOP3__V_MUL_I32_I24
23540 Inst_VOP3__V_MUL_I32_I24::~Inst_VOP3__V_MUL_I32_I24()
23542 } // ~Inst_VOP3__V_MUL_I32_I24
23544 // D.i = S0.i[23:0] * S1.i[23:0].
23546 Inst_VOP3__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst
)
23548 Wavefront
*wf
= gpuDynInst
->wavefront();
23549 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
23550 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
23551 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
23557 * input modifiers are supported by FP operations only
23559 assert(!(instData
.ABS
& 0x1));
23560 assert(!(instData
.ABS
& 0x2));
23561 assert(!(instData
.ABS
& 0x4));
23562 assert(!(extData
.NEG
& 0x1));
23563 assert(!(extData
.NEG
& 0x2));
23564 assert(!(extData
.NEG
& 0x4));
23566 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23567 if (wf
->execMask(lane
)) {
23568 vdst
[lane
] = sext
<24>(bits(src0
[lane
], 23, 0))
23569 * sext
<24>(bits(src1
[lane
], 23, 0));
23576 Inst_VOP3__V_MUL_HI_I32_I24::Inst_VOP3__V_MUL_HI_I32_I24(InFmt_VOP3
*iFmt
)
23577 : Inst_VOP3(iFmt
, "v_mul_hi_i32_i24", false)
23580 } // Inst_VOP3__V_MUL_HI_I32_I24
23582 Inst_VOP3__V_MUL_HI_I32_I24::~Inst_VOP3__V_MUL_HI_I32_I24()
23584 } // ~Inst_VOP3__V_MUL_HI_I32_I24
23586 // D.i = (S0.i[23:0] * S1.i[23:0]) >> 32.
23588 Inst_VOP3__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst
)
23590 Wavefront
*wf
= gpuDynInst
->wavefront();
23591 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
23592 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
23593 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
23599 * input modifiers are supported by FP operations only
23601 assert(!(instData
.ABS
& 0x1));
23602 assert(!(instData
.ABS
& 0x2));
23603 assert(!(instData
.ABS
& 0x4));
23604 assert(!(extData
.NEG
& 0x1));
23605 assert(!(extData
.NEG
& 0x2));
23606 assert(!(extData
.NEG
& 0x4));
23608 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23609 if (wf
->execMask(lane
)) {
23610 VecElemI64 tmp_src0
23611 = (VecElemI64
)sext
<24>(bits(src0
[lane
], 23, 0));
23612 VecElemI64 tmp_src1
23613 = (VecElemI64
)sext
<24>(bits(src1
[lane
], 23, 0));
23615 vdst
[lane
] = (VecElemI32
)((tmp_src0
* tmp_src1
) >> 32);
23622 Inst_VOP3__V_MUL_U32_U24::Inst_VOP3__V_MUL_U32_U24(InFmt_VOP3
*iFmt
)
23623 : Inst_VOP3(iFmt
, "v_mul_u32_u24", false)
23626 } // Inst_VOP3__V_MUL_U32_U24
23628 Inst_VOP3__V_MUL_U32_U24::~Inst_VOP3__V_MUL_U32_U24()
23630 } // ~Inst_VOP3__V_MUL_U32_U24
23632 // D.u = S0.u[23:0] * S1.u[23:0].
23634 Inst_VOP3__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst
)
23636 Wavefront
*wf
= gpuDynInst
->wavefront();
23637 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
23638 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
23639 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
23645 * input modifiers are supported by FP operations only
23647 assert(!(instData
.ABS
& 0x1));
23648 assert(!(instData
.ABS
& 0x2));
23649 assert(!(instData
.ABS
& 0x4));
23650 assert(!(extData
.NEG
& 0x1));
23651 assert(!(extData
.NEG
& 0x2));
23652 assert(!(extData
.NEG
& 0x4));
23654 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23655 if (wf
->execMask(lane
)) {
23656 vdst
[lane
] = bits(src0
[lane
], 23, 0) * bits(src1
[lane
], 23, 0);
23663 Inst_VOP3__V_MUL_HI_U32_U24::Inst_VOP3__V_MUL_HI_U32_U24(InFmt_VOP3
*iFmt
)
23664 : Inst_VOP3(iFmt
, "v_mul_hi_u32_u24", false)
23667 } // Inst_VOP3__V_MUL_HI_U32_U24
23669 Inst_VOP3__V_MUL_HI_U32_U24::~Inst_VOP3__V_MUL_HI_U32_U24()
23671 } // ~Inst_VOP3__V_MUL_HI_U32_U24
23673 // D.i = (S0.u[23:0] * S1.u[23:0]) >> 32.
23675 Inst_VOP3__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst
)
23677 Wavefront
*wf
= gpuDynInst
->wavefront();
23678 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
23679 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
23680 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
23686 * input modifiers are supported by FP operations only
23688 assert(!(instData
.ABS
& 0x1));
23689 assert(!(instData
.ABS
& 0x2));
23690 assert(!(instData
.ABS
& 0x4));
23691 assert(!(extData
.NEG
& 0x1));
23692 assert(!(extData
.NEG
& 0x2));
23693 assert(!(extData
.NEG
& 0x4));
23695 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23696 if (wf
->execMask(lane
)) {
23697 VecElemU64 tmp_src0
= (VecElemU64
)bits(src0
[lane
], 23, 0);
23698 VecElemU64 tmp_src1
= (VecElemU64
)bits(src1
[lane
], 23, 0);
23699 vdst
[lane
] = (VecElemU32
)((tmp_src0
* tmp_src1
) >> 32);
23706 Inst_VOP3__V_MIN_F32::Inst_VOP3__V_MIN_F32(InFmt_VOP3
*iFmt
)
23707 : Inst_VOP3(iFmt
, "v_min_f32", false)
23711 } // Inst_VOP3__V_MIN_F32
23713 Inst_VOP3__V_MIN_F32::~Inst_VOP3__V_MIN_F32()
23715 } // ~Inst_VOP3__V_MIN_F32
23717 // D.f = (S0.f < S1.f ? S0.f : S1.f).
23719 Inst_VOP3__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst
)
23721 Wavefront
*wf
= gpuDynInst
->wavefront();
23722 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
23723 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
23724 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
23729 if (instData
.ABS
& 0x1) {
23730 src0
.absModifier();
23733 if (instData
.ABS
& 0x2) {
23734 src1
.absModifier();
23737 if (extData
.NEG
& 0x1) {
23738 src0
.negModifier();
23741 if (extData
.NEG
& 0x2) {
23742 src1
.negModifier();
23746 * input modifiers are supported by FP operations only
23748 assert(!(instData
.ABS
& 0x4));
23749 assert(!(extData
.NEG
& 0x4));
23751 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23752 if (wf
->execMask(lane
)) {
23753 vdst
[lane
] = std::fmin(src0
[lane
], src1
[lane
]);
23760 Inst_VOP3__V_MAX_F32::Inst_VOP3__V_MAX_F32(InFmt_VOP3
*iFmt
)
23761 : Inst_VOP3(iFmt
, "v_max_f32", false)
23765 } // Inst_VOP3__V_MAX_F32
23767 Inst_VOP3__V_MAX_F32::~Inst_VOP3__V_MAX_F32()
23769 } // ~Inst_VOP3__V_MAX_F32
23771 // D.f = (S0.f >= S1.f ? S0.f : S1.f).
23773 Inst_VOP3__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst
)
23775 Wavefront
*wf
= gpuDynInst
->wavefront();
23776 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
23777 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
23778 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
23783 if (instData
.ABS
& 0x1) {
23784 src0
.absModifier();
23787 if (instData
.ABS
& 0x2) {
23788 src1
.absModifier();
23791 if (extData
.NEG
& 0x1) {
23792 src0
.negModifier();
23795 if (extData
.NEG
& 0x2) {
23796 src1
.negModifier();
23800 * input modifiers are supported by FP operations only
23802 assert(!(instData
.ABS
& 0x4));
23803 assert(!(extData
.NEG
& 0x4));
23805 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23806 if (wf
->execMask(lane
)) {
23807 vdst
[lane
] = std::fmax(src0
[lane
], src1
[lane
]);
23814 Inst_VOP3__V_MIN_I32::Inst_VOP3__V_MIN_I32(InFmt_VOP3
*iFmt
)
23815 : Inst_VOP3(iFmt
, "v_min_i32", false)
23818 } // Inst_VOP3__V_MIN_I32
23820 Inst_VOP3__V_MIN_I32::~Inst_VOP3__V_MIN_I32()
23822 } // ~Inst_VOP3__V_MIN_I32
23824 // D.i = min(S0.i, S1.i).
23826 Inst_VOP3__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst
)
23828 Wavefront
*wf
= gpuDynInst
->wavefront();
23829 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
23830 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
23831 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
23837 * input modifiers are supported by FP operations only
23839 assert(!(instData
.ABS
& 0x1));
23840 assert(!(instData
.ABS
& 0x2));
23841 assert(!(instData
.ABS
& 0x4));
23842 assert(!(extData
.NEG
& 0x1));
23843 assert(!(extData
.NEG
& 0x2));
23844 assert(!(extData
.NEG
& 0x4));
23846 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23847 if (wf
->execMask(lane
)) {
23848 vdst
[lane
] = std::min(src0
[lane
], src1
[lane
]);
23855 Inst_VOP3__V_MAX_I32::Inst_VOP3__V_MAX_I32(InFmt_VOP3
*iFmt
)
23856 : Inst_VOP3(iFmt
, "v_max_i32", false)
23859 } // Inst_VOP3__V_MAX_I32
23861 Inst_VOP3__V_MAX_I32::~Inst_VOP3__V_MAX_I32()
23863 } // ~Inst_VOP3__V_MAX_I32
23865 // D.i = max(S0.i, S1.i).
23867 Inst_VOP3__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst
)
23869 Wavefront
*wf
= gpuDynInst
->wavefront();
23870 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
23871 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
23872 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
23878 * input modifiers are supported by FP operations only
23880 assert(!(instData
.ABS
& 0x1));
23881 assert(!(instData
.ABS
& 0x2));
23882 assert(!(instData
.ABS
& 0x4));
23883 assert(!(extData
.NEG
& 0x1));
23884 assert(!(extData
.NEG
& 0x2));
23885 assert(!(extData
.NEG
& 0x4));
23887 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23888 if (wf
->execMask(lane
)) {
23889 vdst
[lane
] = std::max(src0
[lane
], src1
[lane
]);
23896 Inst_VOP3__V_MIN_U32::Inst_VOP3__V_MIN_U32(InFmt_VOP3
*iFmt
)
23897 : Inst_VOP3(iFmt
, "v_min_u32", false)
23900 } // Inst_VOP3__V_MIN_U32
23902 Inst_VOP3__V_MIN_U32::~Inst_VOP3__V_MIN_U32()
23904 } // ~Inst_VOP3__V_MIN_U32
23906 // D.u = min(S0.u, S1.u).
23908 Inst_VOP3__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst
)
23910 Wavefront
*wf
= gpuDynInst
->wavefront();
23911 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
23912 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
23913 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
23919 * input modifiers are supported by FP operations only
23921 assert(!(instData
.ABS
& 0x1));
23922 assert(!(instData
.ABS
& 0x2));
23923 assert(!(instData
.ABS
& 0x4));
23924 assert(!(extData
.NEG
& 0x1));
23925 assert(!(extData
.NEG
& 0x2));
23926 assert(!(extData
.NEG
& 0x4));
23928 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23929 if (wf
->execMask(lane
)) {
23930 vdst
[lane
] = std::min(src0
[lane
], src1
[lane
]);
23937 Inst_VOP3__V_MAX_U32::Inst_VOP3__V_MAX_U32(InFmt_VOP3
*iFmt
)
23938 : Inst_VOP3(iFmt
, "v_max_u32", false)
23941 } // Inst_VOP3__V_MAX_U32
23943 Inst_VOP3__V_MAX_U32::~Inst_VOP3__V_MAX_U32()
23945 } // ~Inst_VOP3__V_MAX_U32
23947 // D.u = max(S0.u, S1.u).
23949 Inst_VOP3__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst
)
23951 Wavefront
*wf
= gpuDynInst
->wavefront();
23952 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
23953 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
23954 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
23960 * input modifiers are supported by FP operations only
23962 assert(!(instData
.ABS
& 0x1));
23963 assert(!(instData
.ABS
& 0x2));
23964 assert(!(instData
.ABS
& 0x4));
23965 assert(!(extData
.NEG
& 0x1));
23966 assert(!(extData
.NEG
& 0x2));
23967 assert(!(extData
.NEG
& 0x4));
23969 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
23970 if (wf
->execMask(lane
)) {
23971 vdst
[lane
] = std::max(src0
[lane
], src1
[lane
]);
23978 Inst_VOP3__V_LSHRREV_B32::Inst_VOP3__V_LSHRREV_B32(InFmt_VOP3
*iFmt
)
23979 : Inst_VOP3(iFmt
, "v_lshrrev_b32", false)
23982 } // Inst_VOP3__V_LSHRREV_B32
23984 Inst_VOP3__V_LSHRREV_B32::~Inst_VOP3__V_LSHRREV_B32()
23986 } // ~Inst_VOP3__V_LSHRREV_B32
23988 // D.u = S1.u >> S0.u[4:0].
23989 // The vacated bits are set to zero.
23991 Inst_VOP3__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst
)
23993 Wavefront
*wf
= gpuDynInst
->wavefront();
23994 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
23995 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
23996 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
24002 * input modifiers are supported by FP operations only
24004 assert(!(instData
.ABS
& 0x1));
24005 assert(!(instData
.ABS
& 0x2));
24006 assert(!(instData
.ABS
& 0x4));
24007 assert(!(extData
.NEG
& 0x1));
24008 assert(!(extData
.NEG
& 0x2));
24009 assert(!(extData
.NEG
& 0x4));
24011 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24012 if (wf
->execMask(lane
)) {
24013 vdst
[lane
] = src1
[lane
] >> bits(src0
[lane
], 4, 0);
24020 Inst_VOP3__V_ASHRREV_I32::Inst_VOP3__V_ASHRREV_I32(InFmt_VOP3
*iFmt
)
24021 : Inst_VOP3(iFmt
, "v_ashrrev_i32", false)
24024 } // Inst_VOP3__V_ASHRREV_I32
24026 Inst_VOP3__V_ASHRREV_I32::~Inst_VOP3__V_ASHRREV_I32()
24028 } // ~Inst_VOP3__V_ASHRREV_I32
24030 // D.i = signext(S1.i) >> S0.i[4:0].
24031 // The vacated bits are set to the sign bit of the input value.
24033 Inst_VOP3__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst
)
24035 Wavefront
*wf
= gpuDynInst
->wavefront();
24036 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
24037 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
24038 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
24044 * input modifiers are supported by FP operations only
24046 assert(!(instData
.ABS
& 0x1));
24047 assert(!(instData
.ABS
& 0x2));
24048 assert(!(instData
.ABS
& 0x4));
24049 assert(!(extData
.NEG
& 0x1));
24050 assert(!(extData
.NEG
& 0x2));
24051 assert(!(extData
.NEG
& 0x4));
24053 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24054 if (wf
->execMask(lane
)) {
24055 vdst
[lane
] = src1
[lane
] >> bits(src0
[lane
], 4, 0);
24062 Inst_VOP3__V_LSHLREV_B32::Inst_VOP3__V_LSHLREV_B32(InFmt_VOP3
*iFmt
)
24063 : Inst_VOP3(iFmt
, "v_lshlrev_b32", false)
24066 } // Inst_VOP3__V_LSHLREV_B32
24068 Inst_VOP3__V_LSHLREV_B32::~Inst_VOP3__V_LSHLREV_B32()
24070 } // ~Inst_VOP3__V_LSHLREV_B32
24072 // D.u = S1.u << S0.u[4:0].
24074 Inst_VOP3__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst
)
24076 Wavefront
*wf
= gpuDynInst
->wavefront();
24077 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
24078 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
24079 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
24085 * input modifiers are supported by FP operations only
24087 assert(!(instData
.ABS
& 0x1));
24088 assert(!(instData
.ABS
& 0x2));
24089 assert(!(instData
.ABS
& 0x4));
24090 assert(!(extData
.NEG
& 0x1));
24091 assert(!(extData
.NEG
& 0x2));
24092 assert(!(extData
.NEG
& 0x4));
24094 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24095 if (wf
->execMask(lane
)) {
24096 vdst
[lane
] = src1
[lane
] << bits(src0
[lane
], 4, 0);
24103 Inst_VOP3__V_AND_B32::Inst_VOP3__V_AND_B32(InFmt_VOP3
*iFmt
)
24104 : Inst_VOP3(iFmt
, "v_and_b32", false)
24107 } // Inst_VOP3__V_AND_B32
24109 Inst_VOP3__V_AND_B32::~Inst_VOP3__V_AND_B32()
24111 } // ~Inst_VOP3__V_AND_B32
24113 // D.u = S0.u & S1.u.
24114 // Input and output modifiers not supported.
24116 Inst_VOP3__V_AND_B32::execute(GPUDynInstPtr gpuDynInst
)
24118 Wavefront
*wf
= gpuDynInst
->wavefront();
24119 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
24120 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
24121 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
24127 * input modifiers are supported by FP operations only
24129 assert(!(instData
.ABS
& 0x1));
24130 assert(!(instData
.ABS
& 0x2));
24131 assert(!(instData
.ABS
& 0x4));
24132 assert(!(extData
.NEG
& 0x1));
24133 assert(!(extData
.NEG
& 0x2));
24134 assert(!(extData
.NEG
& 0x4));
24136 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24137 if (wf
->execMask(lane
)) {
24138 vdst
[lane
] = src0
[lane
] & src1
[lane
];
24145 Inst_VOP3__V_OR_B32::Inst_VOP3__V_OR_B32(InFmt_VOP3
*iFmt
)
24146 : Inst_VOP3(iFmt
, "v_or_b32", false)
24149 } // Inst_VOP3__V_OR_B32
24151 Inst_VOP3__V_OR_B32::~Inst_VOP3__V_OR_B32()
24153 } // ~Inst_VOP3__V_OR_B32
24155 // D.u = S0.u | S1.u.
24156 // Input and output modifiers not supported.
24158 Inst_VOP3__V_OR_B32::execute(GPUDynInstPtr gpuDynInst
)
24160 Wavefront
*wf
= gpuDynInst
->wavefront();
24161 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
24162 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
24163 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
24169 * input modifiers are supported by FP operations only
24171 assert(!(instData
.ABS
& 0x1));
24172 assert(!(instData
.ABS
& 0x2));
24173 assert(!(instData
.ABS
& 0x4));
24174 assert(!(extData
.NEG
& 0x1));
24175 assert(!(extData
.NEG
& 0x2));
24176 assert(!(extData
.NEG
& 0x4));
24178 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24179 if (wf
->execMask(lane
)) {
24180 vdst
[lane
] = src0
[lane
] | src1
[lane
];
24187 Inst_VOP3__V_XOR_B32::Inst_VOP3__V_XOR_B32(InFmt_VOP3
*iFmt
)
24188 : Inst_VOP3(iFmt
, "v_xor_b32", false)
24191 } // Inst_VOP3__V_XOR_B32
24193 Inst_VOP3__V_XOR_B32::~Inst_VOP3__V_XOR_B32()
24195 } // ~Inst_VOP3__V_XOR_B32
24197 // D.u = S0.u ^ S1.u.
24198 // Input and output modifiers not supported.
24200 Inst_VOP3__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst
)
24202 Wavefront
*wf
= gpuDynInst
->wavefront();
24203 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
24204 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
24205 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
24211 * input modifiers are supported by FP operations only
24213 assert(!(instData
.ABS
& 0x1));
24214 assert(!(instData
.ABS
& 0x2));
24215 assert(!(instData
.ABS
& 0x4));
24216 assert(!(extData
.NEG
& 0x1));
24217 assert(!(extData
.NEG
& 0x2));
24218 assert(!(extData
.NEG
& 0x4));
24220 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24221 if (wf
->execMask(lane
)) {
24222 vdst
[lane
] = src0
[lane
] ^ src1
[lane
];
24229 Inst_VOP3__V_MAC_F32::Inst_VOP3__V_MAC_F32(InFmt_VOP3
*iFmt
)
24230 : Inst_VOP3(iFmt
, "v_mac_f32", false)
24235 } // Inst_VOP3__V_MAC_F32
24237 Inst_VOP3__V_MAC_F32::~Inst_VOP3__V_MAC_F32()
24239 } // ~Inst_VOP3__V_MAC_F32
24241 // D.f = S0.f * S1.f + D.f.
24243 Inst_VOP3__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst
)
24245 Wavefront
*wf
= gpuDynInst
->wavefront();
24246 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
24247 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
24248 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
24254 if (instData
.ABS
& 0x1) {
24255 src0
.absModifier();
24258 if (instData
.ABS
& 0x2) {
24259 src1
.absModifier();
24262 if (extData
.NEG
& 0x1) {
24263 src0
.negModifier();
24266 if (extData
.NEG
& 0x2) {
24267 src1
.negModifier();
24271 * input modifiers are supported by FP operations only
24273 assert(!(instData
.ABS
& 0x4));
24274 assert(!(extData
.NEG
& 0x4));
24276 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24277 if (wf
->execMask(lane
)) {
24278 vdst
[lane
] = std::fma(src0
[lane
], src1
[lane
], vdst
[lane
]);
24285 Inst_VOP3__V_ADD_U32::Inst_VOP3__V_ADD_U32(InFmt_VOP3_SDST_ENC
*iFmt
)
24286 : Inst_VOP3_SDST_ENC(iFmt
, "v_add_u32")
24289 setFlag(WritesVCC
);
24290 } // Inst_VOP3__V_ADD_U32
24292 Inst_VOP3__V_ADD_U32::~Inst_VOP3__V_ADD_U32()
24294 } // ~Inst_VOP3__V_ADD_U32
24296 // D.u = S0.u + S1.u;
24297 // VCC[threadId] = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an UNSIGNED
24298 // overflow or carry-out.
24299 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
24301 Inst_VOP3__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst
)
24303 Wavefront
*wf
= gpuDynInst
->wavefront();
24304 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
24305 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
24306 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
24307 ScalarOperandU64
vcc(gpuDynInst
, instData
.SDST
);
24313 * input modifiers are supported by FP operations only
24315 assert(!(extData
.NEG
& 0x1));
24316 assert(!(extData
.NEG
& 0x2));
24317 assert(!(extData
.NEG
& 0x4));
24319 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24320 if (wf
->execMask(lane
)) {
24321 vdst
[lane
] = src0
[lane
] + src1
[lane
];
24322 vcc
.setBit(lane
, ((VecElemU64
)src0
[lane
]
24323 + (VecElemU64
)src1
[lane
]) >= 0x100000000ULL
? 1 : 0);
24331 Inst_VOP3__V_SUB_U32::Inst_VOP3__V_SUB_U32(InFmt_VOP3_SDST_ENC
*iFmt
)
24332 : Inst_VOP3_SDST_ENC(iFmt
, "v_sub_u32")
24335 setFlag(WritesVCC
);
24336 } // Inst_VOP3__V_SUB_U32
24338 Inst_VOP3__V_SUB_U32::~Inst_VOP3__V_SUB_U32()
24340 } // ~Inst_VOP3__V_SUB_U32
24342 // D.u = S0.u - S1.u;
24343 // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or
24345 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
24347 Inst_VOP3__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst
)
24349 Wavefront
*wf
= gpuDynInst
->wavefront();
24350 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
24351 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
24352 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
24353 ScalarOperandU64
vcc(gpuDynInst
, instData
.SDST
);
24359 * input modifiers are supported by FP operations only
24361 assert(!(extData
.NEG
& 0x1));
24362 assert(!(extData
.NEG
& 0x2));
24363 assert(!(extData
.NEG
& 0x4));
24365 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24366 if (wf
->execMask(lane
)) {
24367 vdst
[lane
] = src0
[lane
] - src1
[lane
];
24368 vcc
.setBit(lane
, src1
[lane
] > src0
[lane
] ? 1 : 0);
24376 Inst_VOP3__V_SUBREV_U32::Inst_VOP3__V_SUBREV_U32(
24377 InFmt_VOP3_SDST_ENC
*iFmt
)
24378 : Inst_VOP3_SDST_ENC(iFmt
, "v_subrev_u32")
24381 setFlag(WritesVCC
);
24382 } // Inst_VOP3__V_SUBREV_U32
24384 Inst_VOP3__V_SUBREV_U32::~Inst_VOP3__V_SUBREV_U32()
24386 } // ~Inst_VOP3__V_SUBREV_U32
24388 // D.u = S1.u - S0.u;
24389 // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or
24391 // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
24393 Inst_VOP3__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst
)
24395 Wavefront
*wf
= gpuDynInst
->wavefront();
24396 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
24397 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
24398 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
24399 ScalarOperandU64
vcc(gpuDynInst
, instData
.SDST
);
24405 * input modifiers are supported by FP operations only
24407 assert(!(extData
.NEG
& 0x1));
24408 assert(!(extData
.NEG
& 0x2));
24409 assert(!(extData
.NEG
& 0x4));
24411 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24412 if (wf
->execMask(lane
)) {
24413 vdst
[lane
] = src1
[lane
] - src0
[lane
];
24414 vcc
.setBit(lane
, src0
[lane
] > src1
[lane
] ? 1 : 0);
24422 Inst_VOP3__V_ADDC_U32::Inst_VOP3__V_ADDC_U32(InFmt_VOP3_SDST_ENC
*iFmt
)
24423 : Inst_VOP3_SDST_ENC(iFmt
, "v_addc_u32")
24426 setFlag(WritesVCC
);
24428 } // Inst_VOP3__V_ADDC_U32
24430 Inst_VOP3__V_ADDC_U32::~Inst_VOP3__V_ADDC_U32()
24432 } // ~Inst_VOP3__V_ADDC_U32
24434 // D.u = S0.u + S1.u + VCC[threadId];
24435 // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x100000000ULL ? 1 : 0)
24436 // is an UNSIGNED overflow.
24437 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
24438 // source comes from the SGPR-pair at S2.u.
24440 Inst_VOP3__V_ADDC_U32::execute(GPUDynInstPtr gpuDynInst
)
24442 Wavefront
*wf
= gpuDynInst
->wavefront();
24443 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
24444 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
24445 ConstScalarOperandU64
vcc(gpuDynInst
, extData
.SRC2
);
24446 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
24447 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
24454 * input modifiers are supported by FP operations only
24456 assert(!(extData
.NEG
& 0x1));
24457 assert(!(extData
.NEG
& 0x2));
24458 assert(!(extData
.NEG
& 0x4));
24460 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24461 if (wf
->execMask(lane
)) {
24462 vdst
[lane
] = src0
[lane
] + src1
[lane
]
24463 + bits(vcc
.rawData(), lane
);
24464 sdst
.setBit(lane
, ((VecElemU64
)src0
[lane
]
24465 + (VecElemU64
)src1
[lane
]
24466 + (VecElemU64
)bits(vcc
.rawData(), lane
))
24467 >= 0x100000000 ? 1 : 0);
24475 Inst_VOP3__V_SUBB_U32::Inst_VOP3__V_SUBB_U32(InFmt_VOP3_SDST_ENC
*iFmt
)
24476 : Inst_VOP3_SDST_ENC(iFmt
, "v_subb_u32")
24479 setFlag(WritesVCC
);
24481 } // Inst_VOP3__V_SUBB_U32
24483 Inst_VOP3__V_SUBB_U32::~Inst_VOP3__V_SUBB_U32()
24485 } // ~Inst_VOP3__V_SUBB_U32
24487 // D.u = S0.u - S1.u - VCC[threadId];
24488 // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
24490 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
24491 // source comes from the SGPR-pair at S2.u.
24493 Inst_VOP3__V_SUBB_U32::execute(GPUDynInstPtr gpuDynInst
)
24495 Wavefront
*wf
= gpuDynInst
->wavefront();
24496 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
24497 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
24498 ConstScalarOperandU64
vcc(gpuDynInst
, extData
.SRC2
);
24499 ScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
24500 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
24507 * input modifiers are supported by FP operations only
24509 assert(!(extData
.NEG
& 0x1));
24510 assert(!(extData
.NEG
& 0x2));
24511 assert(!(extData
.NEG
& 0x4));
24513 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24514 if (wf
->execMask(lane
)) {
24515 vdst
[lane
] = src0
[lane
] - src1
[lane
]
24516 - bits(vcc
.rawData(), lane
);
24517 sdst
.setBit(lane
, (src1
[lane
] + bits(vcc
.rawData(), lane
))
24518 > src0
[lane
] ? 1 : 0);
24526 Inst_VOP3__V_SUBBREV_U32::Inst_VOP3__V_SUBBREV_U32(
24527 InFmt_VOP3_SDST_ENC
*iFmt
)
24528 : Inst_VOP3_SDST_ENC(iFmt
, "v_subbrev_u32")
24531 setFlag(WritesVCC
);
24533 } // Inst_VOP3__V_SUBBREV_U32
24535 Inst_VOP3__V_SUBBREV_U32::~Inst_VOP3__V_SUBBREV_U32()
24537 } // ~Inst_VOP3__V_SUBBREV_U32
24539 // D.u = S1.u - S0.u - VCC[threadId];
24540 // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
24542 // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
24543 // source comes from the SGPR-pair at S2.u.
24545 Inst_VOP3__V_SUBBREV_U32::execute(GPUDynInstPtr gpuDynInst
)
24547 Wavefront
*wf
= gpuDynInst
->wavefront();
24548 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
24549 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
24550 ConstScalarOperandU64
sdst(gpuDynInst
, instData
.SDST
);
24551 ScalarOperandU64
vcc(gpuDynInst
, extData
.SRC2
);
24552 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
24559 * input modifiers are supported by FP operations only
24561 assert(!(extData
.NEG
& 0x1));
24562 assert(!(extData
.NEG
& 0x2));
24563 assert(!(extData
.NEG
& 0x4));
24565 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24566 if (wf
->execMask(lane
)) {
24567 vdst
[lane
] = src1
[lane
] - src0
[lane
]
24568 - bits(vcc
.rawData(), lane
);
24569 sdst
.setBit(lane
, (src1
[lane
] + bits(vcc
.rawData(), lane
))
24570 > src0
[lane
] ? 1 : 0);
24578 Inst_VOP3__V_ADD_F16::Inst_VOP3__V_ADD_F16(InFmt_VOP3
*iFmt
)
24579 : Inst_VOP3(iFmt
, "v_add_f16", false)
24583 } // Inst_VOP3__V_ADD_F16
24585 Inst_VOP3__V_ADD_F16::~Inst_VOP3__V_ADD_F16()
24587 } // ~Inst_VOP3__V_ADD_F16
24589 // D.f16 = S0.f16 + S1.f16.
24591 Inst_VOP3__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst
)
24593 panicUnimplemented();
24596 Inst_VOP3__V_SUB_F16::Inst_VOP3__V_SUB_F16(InFmt_VOP3
*iFmt
)
24597 : Inst_VOP3(iFmt
, "v_sub_f16", false)
24601 } // Inst_VOP3__V_SUB_F16
24603 Inst_VOP3__V_SUB_F16::~Inst_VOP3__V_SUB_F16()
24605 } // ~Inst_VOP3__V_SUB_F16
24607 // D.f16 = S0.f16 - S1.f16.
24609 Inst_VOP3__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst
)
24611 panicUnimplemented();
24614 Inst_VOP3__V_SUBREV_F16::Inst_VOP3__V_SUBREV_F16(InFmt_VOP3
*iFmt
)
24615 : Inst_VOP3(iFmt
, "v_subrev_f16", false)
24619 } // Inst_VOP3__V_SUBREV_F16
24621 Inst_VOP3__V_SUBREV_F16::~Inst_VOP3__V_SUBREV_F16()
24623 } // ~Inst_VOP3__V_SUBREV_F16
24625 // D.f16 = S1.f16 - S0.f16.
24627 Inst_VOP3__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst
)
24629 panicUnimplemented();
24632 Inst_VOP3__V_MUL_F16::Inst_VOP3__V_MUL_F16(InFmt_VOP3
*iFmt
)
24633 : Inst_VOP3(iFmt
, "v_mul_f16", false)
24637 } // Inst_VOP3__V_MUL_F16
24639 Inst_VOP3__V_MUL_F16::~Inst_VOP3__V_MUL_F16()
24641 } // ~Inst_VOP3__V_MUL_F16
24643 // D.f16 = S0.f16 * S1.f16.
24645 Inst_VOP3__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst
)
24647 panicUnimplemented();
24650 Inst_VOP3__V_MAC_F16::Inst_VOP3__V_MAC_F16(InFmt_VOP3
*iFmt
)
24651 : Inst_VOP3(iFmt
, "v_mac_f16", false)
24656 } // Inst_VOP3__V_MAC_F16
24658 Inst_VOP3__V_MAC_F16::~Inst_VOP3__V_MAC_F16()
24660 } // ~Inst_VOP3__V_MAC_F16
24662 // D.f16 = S0.f16 * S1.f16 + D.f16.
24664 Inst_VOP3__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst
)
24666 panicUnimplemented();
24669 Inst_VOP3__V_ADD_U16::Inst_VOP3__V_ADD_U16(InFmt_VOP3
*iFmt
)
24670 : Inst_VOP3(iFmt
, "v_add_u16", false)
24673 } // Inst_VOP3__V_ADD_U16
24675 Inst_VOP3__V_ADD_U16::~Inst_VOP3__V_ADD_U16()
24677 } // ~Inst_VOP3__V_ADD_U16
24679 // D.u16 = S0.u16 + S1.u16.
24681 Inst_VOP3__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst
)
24683 Wavefront
*wf
= gpuDynInst
->wavefront();
24684 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
24685 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
24686 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
24692 * input modifiers are supported by FP operations only
24694 assert(!(instData
.ABS
& 0x1));
24695 assert(!(instData
.ABS
& 0x2));
24696 assert(!(instData
.ABS
& 0x4));
24697 assert(!(extData
.NEG
& 0x1));
24698 assert(!(extData
.NEG
& 0x2));
24699 assert(!(extData
.NEG
& 0x4));
24701 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24702 if (wf
->execMask(lane
)) {
24703 vdst
[lane
] = src0
[lane
] + src1
[lane
];
24710 Inst_VOP3__V_SUB_U16::Inst_VOP3__V_SUB_U16(InFmt_VOP3
*iFmt
)
24711 : Inst_VOP3(iFmt
, "v_sub_u16", false)
24714 } // Inst_VOP3__V_SUB_U16
24716 Inst_VOP3__V_SUB_U16::~Inst_VOP3__V_SUB_U16()
24718 } // ~Inst_VOP3__V_SUB_U16
24720 // D.u16 = S0.u16 - S1.u16.
24722 Inst_VOP3__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst
)
24724 Wavefront
*wf
= gpuDynInst
->wavefront();
24725 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
24726 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
24727 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
24733 * input modifiers are supported by FP operations only
24735 assert(!(instData
.ABS
& 0x1));
24736 assert(!(instData
.ABS
& 0x2));
24737 assert(!(instData
.ABS
& 0x4));
24738 assert(!(extData
.NEG
& 0x1));
24739 assert(!(extData
.NEG
& 0x2));
24740 assert(!(extData
.NEG
& 0x4));
24742 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24743 if (wf
->execMask(lane
)) {
24744 vdst
[lane
] = src0
[lane
] - src1
[lane
];
24751 Inst_VOP3__V_SUBREV_U16::Inst_VOP3__V_SUBREV_U16(InFmt_VOP3
*iFmt
)
24752 : Inst_VOP3(iFmt
, "v_subrev_u16", false)
24755 } // Inst_VOP3__V_SUBREV_U16
24757 Inst_VOP3__V_SUBREV_U16::~Inst_VOP3__V_SUBREV_U16()
24759 } // ~Inst_VOP3__V_SUBREV_U16
24761 // D.u16 = S1.u16 - S0.u16.
24763 Inst_VOP3__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst
)
24765 Wavefront
*wf
= gpuDynInst
->wavefront();
24766 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
24767 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
24768 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
24774 * input modifiers are supported by FP operations only
24776 assert(!(instData
.ABS
& 0x1));
24777 assert(!(instData
.ABS
& 0x2));
24778 assert(!(instData
.ABS
& 0x4));
24779 assert(!(extData
.NEG
& 0x1));
24780 assert(!(extData
.NEG
& 0x2));
24781 assert(!(extData
.NEG
& 0x4));
24783 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24784 if (wf
->execMask(lane
)) {
24785 vdst
[lane
] = src1
[lane
] - src0
[lane
];
24792 Inst_VOP3__V_MUL_LO_U16::Inst_VOP3__V_MUL_LO_U16(InFmt_VOP3
*iFmt
)
24793 : Inst_VOP3(iFmt
, "v_mul_lo_u16", false)
24796 } // Inst_VOP3__V_MUL_LO_U16
24798 Inst_VOP3__V_MUL_LO_U16::~Inst_VOP3__V_MUL_LO_U16()
24800 } // ~Inst_VOP3__V_MUL_LO_U16
24802 // D.u16 = S0.u16 * S1.u16.
24804 Inst_VOP3__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst
)
24806 Wavefront
*wf
= gpuDynInst
->wavefront();
24807 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
24808 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
24809 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
24815 * input modifiers are supported by FP operations only
24817 assert(!(instData
.ABS
& 0x1));
24818 assert(!(instData
.ABS
& 0x2));
24819 assert(!(instData
.ABS
& 0x4));
24820 assert(!(extData
.NEG
& 0x1));
24821 assert(!(extData
.NEG
& 0x2));
24822 assert(!(extData
.NEG
& 0x4));
24824 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24825 if (wf
->execMask(lane
)) {
24826 vdst
[lane
] = src0
[lane
] * src1
[lane
];
24833 Inst_VOP3__V_LSHLREV_B16::Inst_VOP3__V_LSHLREV_B16(InFmt_VOP3
*iFmt
)
24834 : Inst_VOP3(iFmt
, "v_lshlrev_b16", false)
24837 } // Inst_VOP3__V_LSHLREV_B16
24839 Inst_VOP3__V_LSHLREV_B16::~Inst_VOP3__V_LSHLREV_B16()
24841 } // ~Inst_VOP3__V_LSHLREV_B16
24843 // D.u[15:0] = S1.u[15:0] << S0.u[3:0].
24845 Inst_VOP3__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst
)
24847 Wavefront
*wf
= gpuDynInst
->wavefront();
24848 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
24849 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
24850 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
24856 * input modifiers are supported by FP operations only
24858 assert(!(instData
.ABS
& 0x1));
24859 assert(!(instData
.ABS
& 0x2));
24860 assert(!(instData
.ABS
& 0x4));
24861 assert(!(extData
.NEG
& 0x1));
24862 assert(!(extData
.NEG
& 0x2));
24863 assert(!(extData
.NEG
& 0x4));
24865 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24866 if (wf
->execMask(lane
)) {
24867 vdst
[lane
] = src1
[lane
] << bits(src0
[lane
], 3, 0);
24874 Inst_VOP3__V_LSHRREV_B16::Inst_VOP3__V_LSHRREV_B16(InFmt_VOP3
*iFmt
)
24875 : Inst_VOP3(iFmt
, "v_lshrrev_b16", false)
24878 } // Inst_VOP3__V_LSHRREV_B16
24880 Inst_VOP3__V_LSHRREV_B16::~Inst_VOP3__V_LSHRREV_B16()
24882 } // ~Inst_VOP3__V_LSHRREV_B16
24884 // D.u[15:0] = S1.u[15:0] >> S0.u[3:0].
24885 // The vacated bits are set to zero.
24887 Inst_VOP3__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst
)
24889 Wavefront
*wf
= gpuDynInst
->wavefront();
24890 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
24891 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
24892 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
24897 if (instData
.ABS
& 0x1) {
24898 src0
.absModifier();
24901 if (instData
.ABS
& 0x2) {
24902 src1
.absModifier();
24905 if (extData
.NEG
& 0x1) {
24906 src0
.negModifier();
24909 if (extData
.NEG
& 0x2) {
24910 src1
.negModifier();
24913 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24914 if (wf
->execMask(lane
)) {
24915 vdst
[lane
] = src1
[lane
] >> bits(src0
[lane
], 3, 0);
24922 Inst_VOP3__V_ASHRREV_I16::Inst_VOP3__V_ASHRREV_I16(InFmt_VOP3
*iFmt
)
24923 : Inst_VOP3(iFmt
, "v_ashrrev_i16", false)
24926 } // Inst_VOP3__V_ASHRREV_I16
24928 Inst_VOP3__V_ASHRREV_I16::~Inst_VOP3__V_ASHRREV_I16()
24930 } // ~Inst_VOP3__V_ASHRREV_I16
24932 // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0].
24933 // The vacated bits are set to the sign bit of the input value.
24935 Inst_VOP3__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst
)
24937 Wavefront
*wf
= gpuDynInst
->wavefront();
24938 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
24939 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
24940 VecOperandI16
vdst(gpuDynInst
, instData
.VDST
);
24946 * input modifiers are supported by FP operations only
24948 assert(!(instData
.ABS
& 0x1));
24949 assert(!(instData
.ABS
& 0x2));
24950 assert(!(instData
.ABS
& 0x4));
24951 assert(!(extData
.NEG
& 0x1));
24952 assert(!(extData
.NEG
& 0x2));
24953 assert(!(extData
.NEG
& 0x4));
24955 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
24956 if (wf
->execMask(lane
)) {
24957 vdst
[lane
] = src1
[lane
] >> bits(src0
[lane
], 3, 0);
24964 Inst_VOP3__V_MAX_F16::Inst_VOP3__V_MAX_F16(InFmt_VOP3
*iFmt
)
24965 : Inst_VOP3(iFmt
, "v_max_f16", false)
24969 } // Inst_VOP3__V_MAX_F16
24971 Inst_VOP3__V_MAX_F16::~Inst_VOP3__V_MAX_F16()
24973 } // ~Inst_VOP3__V_MAX_F16
24975 // D.f16 = max(S0.f16, S1.f16).
24977 Inst_VOP3__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst
)
24979 panicUnimplemented();
24982 Inst_VOP3__V_MIN_F16::Inst_VOP3__V_MIN_F16(InFmt_VOP3
*iFmt
)
24983 : Inst_VOP3(iFmt
, "v_min_f16", false)
24987 } // Inst_VOP3__V_MIN_F16
24989 Inst_VOP3__V_MIN_F16::~Inst_VOP3__V_MIN_F16()
24991 } // ~Inst_VOP3__V_MIN_F16
24993 // D.f16 = min(S0.f16, S1.f16).
24995 Inst_VOP3__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst
)
24997 panicUnimplemented();
25000 Inst_VOP3__V_MAX_U16::Inst_VOP3__V_MAX_U16(InFmt_VOP3
*iFmt
)
25001 : Inst_VOP3(iFmt
, "v_max_u16", false)
25004 } // Inst_VOP3__V_MAX_U16
25006 Inst_VOP3__V_MAX_U16::~Inst_VOP3__V_MAX_U16()
25008 } // ~Inst_VOP3__V_MAX_U16
25010 // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]).
25012 Inst_VOP3__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst
)
25014 Wavefront
*wf
= gpuDynInst
->wavefront();
25015 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
25016 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
25017 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
25022 if (instData
.ABS
& 0x1) {
25023 src0
.absModifier();
25026 if (instData
.ABS
& 0x2) {
25027 src1
.absModifier();
25030 if (extData
.NEG
& 0x1) {
25031 src0
.negModifier();
25034 if (extData
.NEG
& 0x2) {
25035 src1
.negModifier();
25038 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25039 if (wf
->execMask(lane
)) {
25040 vdst
[lane
] = std::max(src0
[lane
], src1
[lane
]);
25047 Inst_VOP3__V_MAX_I16::Inst_VOP3__V_MAX_I16(InFmt_VOP3
*iFmt
)
25048 : Inst_VOP3(iFmt
, "v_max_i16", false)
25051 } // Inst_VOP3__V_MAX_I16
25053 Inst_VOP3__V_MAX_I16::~Inst_VOP3__V_MAX_I16()
25055 } // ~Inst_VOP3__V_MAX_I16
25057 // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]).
25059 Inst_VOP3__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst
)
25061 Wavefront
*wf
= gpuDynInst
->wavefront();
25062 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
25063 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
25064 VecOperandI16
vdst(gpuDynInst
, instData
.VDST
);
25069 if (instData
.ABS
& 0x1) {
25070 src0
.absModifier();
25073 if (instData
.ABS
& 0x2) {
25074 src1
.absModifier();
25077 if (extData
.NEG
& 0x1) {
25078 src0
.negModifier();
25081 if (extData
.NEG
& 0x2) {
25082 src1
.negModifier();
25085 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25086 if (wf
->execMask(lane
)) {
25087 vdst
[lane
] = std::max(src0
[lane
], src1
[lane
]);
25094 Inst_VOP3__V_MIN_U16::Inst_VOP3__V_MIN_U16(InFmt_VOP3
*iFmt
)
25095 : Inst_VOP3(iFmt
, "v_min_u16", false)
25098 } // Inst_VOP3__V_MIN_U16
25100 Inst_VOP3__V_MIN_U16::~Inst_VOP3__V_MIN_U16()
25102 } // ~Inst_VOP3__V_MIN_U16
25104 // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]).
25106 Inst_VOP3__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst
)
25108 Wavefront
*wf
= gpuDynInst
->wavefront();
25109 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
25110 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
25111 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
25116 if (instData
.ABS
& 0x1) {
25117 src0
.absModifier();
25120 if (instData
.ABS
& 0x2) {
25121 src1
.absModifier();
25124 if (extData
.NEG
& 0x1) {
25125 src0
.negModifier();
25128 if (extData
.NEG
& 0x2) {
25129 src1
.negModifier();
25132 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25133 if (wf
->execMask(lane
)) {
25134 vdst
[lane
] = std::min(src0
[lane
], src1
[lane
]);
25141 Inst_VOP3__V_MIN_I16::Inst_VOP3__V_MIN_I16(InFmt_VOP3
*iFmt
)
25142 : Inst_VOP3(iFmt
, "v_min_i16", false)
25145 } // Inst_VOP3__V_MIN_I16
25147 Inst_VOP3__V_MIN_I16::~Inst_VOP3__V_MIN_I16()
25149 } // ~Inst_VOP3__V_MIN_I16
25151 // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]).
25153 Inst_VOP3__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst
)
25155 Wavefront
*wf
= gpuDynInst
->wavefront();
25156 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
25157 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
25158 VecOperandI16
vdst(gpuDynInst
, instData
.VDST
);
25163 if (instData
.ABS
& 0x1) {
25164 src0
.absModifier();
25167 if (instData
.ABS
& 0x2) {
25168 src1
.absModifier();
25171 if (extData
.NEG
& 0x1) {
25172 src0
.negModifier();
25175 if (extData
.NEG
& 0x2) {
25176 src1
.negModifier();
25179 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25180 if (wf
->execMask(lane
)) {
25181 vdst
[lane
] = std::min(src0
[lane
], src1
[lane
]);
25188 Inst_VOP3__V_LDEXP_F16::Inst_VOP3__V_LDEXP_F16(InFmt_VOP3
*iFmt
)
25189 : Inst_VOP3(iFmt
, "v_ldexp_f16", false)
25193 } // Inst_VOP3__V_LDEXP_F16
25195 Inst_VOP3__V_LDEXP_F16::~Inst_VOP3__V_LDEXP_F16()
25197 } // ~Inst_VOP3__V_LDEXP_F16
25199 // D.f16 = S0.f16 * (2 ** S1.i16).
25201 Inst_VOP3__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst
)
25203 panicUnimplemented();
25206 Inst_VOP3__V_NOP::Inst_VOP3__V_NOP(InFmt_VOP3
*iFmt
)
25207 : Inst_VOP3(iFmt
, "v_nop", false)
25211 } // Inst_VOP3__V_NOP
25213 Inst_VOP3__V_NOP::~Inst_VOP3__V_NOP()
25215 } // ~Inst_VOP3__V_NOP
25219 Inst_VOP3__V_NOP::execute(GPUDynInstPtr gpuDynInst
)
25223 Inst_VOP3__V_MOV_B32::Inst_VOP3__V_MOV_B32(InFmt_VOP3
*iFmt
)
25224 : Inst_VOP3(iFmt
, "v_mov_b32", false)
25227 } // Inst_VOP3__V_MOV_B32
25229 Inst_VOP3__V_MOV_B32::~Inst_VOP3__V_MOV_B32()
25231 } // ~Inst_VOP3__V_MOV_B32
25234 // Input and output modifiers not supported; this is an untyped operation.
25236 Inst_VOP3__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst
)
25238 Wavefront
*wf
= gpuDynInst
->wavefront();
25239 ConstVecOperandU32
src(gpuDynInst
, extData
.SRC0
);
25240 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
25244 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25245 if (wf
->execMask(lane
)) {
25246 vdst
[lane
] = src
[lane
];
25253 Inst_VOP3__V_CVT_I32_F64::Inst_VOP3__V_CVT_I32_F64(InFmt_VOP3
*iFmt
)
25254 : Inst_VOP3(iFmt
, "v_cvt_i32_f64", false)
25258 } // Inst_VOP3__V_CVT_I32_F64
25260 Inst_VOP3__V_CVT_I32_F64::~Inst_VOP3__V_CVT_I32_F64()
25262 } // ~Inst_VOP3__V_CVT_I32_F64
25264 // D.i = (int)S0.d.
25265 // Out-of-range floating point values (including infinity) saturate. NaN
25266 // is converted to 0.
25268 Inst_VOP3__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst
)
25270 Wavefront
*wf
= gpuDynInst
->wavefront();
25271 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
25272 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
25276 if (instData
.ABS
& 0x1) {
25280 if (extData
.NEG
& 0x1) {
25284 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25285 if (wf
->execMask(lane
)) {
25287 std::frexp(src
[lane
],&exp
);
25288 if (std::isnan(src
[lane
])) {
25290 } else if (std::isinf(src
[lane
]) || exp
> 30) {
25291 if (std::signbit(src
[lane
])) {
25292 vdst
[lane
] = INT_MIN
;
25294 vdst
[lane
] = INT_MAX
;
25297 vdst
[lane
] = (VecElemI32
)src
[lane
];
25305 Inst_VOP3__V_CVT_F64_I32::Inst_VOP3__V_CVT_F64_I32(InFmt_VOP3
*iFmt
)
25306 : Inst_VOP3(iFmt
, "v_cvt_f64_i32", false)
25310 } // Inst_VOP3__V_CVT_F64_I32
25312 Inst_VOP3__V_CVT_F64_I32::~Inst_VOP3__V_CVT_F64_I32()
25314 } // ~Inst_VOP3__V_CVT_F64_I32
25316 // D.d = (double)S0.i.
25318 Inst_VOP3__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst
)
25320 Wavefront
*wf
= gpuDynInst
->wavefront();
25321 ConstVecOperandI32
src(gpuDynInst
, extData
.SRC0
);
25322 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
25326 if (instData
.ABS
& 0x1) {
25330 if (extData
.NEG
& 0x1) {
25334 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25335 if (wf
->execMask(lane
)) {
25336 vdst
[lane
] = (VecElemF64
)src
[lane
];
25343 Inst_VOP3__V_CVT_F32_I32::Inst_VOP3__V_CVT_F32_I32(InFmt_VOP3
*iFmt
)
25344 : Inst_VOP3(iFmt
, "v_cvt_f32_i32", false)
25348 } // Inst_VOP3__V_CVT_F32_I32
25350 Inst_VOP3__V_CVT_F32_I32::~Inst_VOP3__V_CVT_F32_I32()
25352 } // ~Inst_VOP3__V_CVT_F32_I32
25354 // D.f = (float)S0.i.
25356 Inst_VOP3__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst
)
25358 Wavefront
*wf
= gpuDynInst
->wavefront();
25359 VecOperandI32
src(gpuDynInst
, extData
.SRC0
);
25360 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
25365 * input modifiers are supported by FP operations only
25367 assert(!(instData
.ABS
& 0x1));
25368 assert(!(instData
.ABS
& 0x2));
25369 assert(!(instData
.ABS
& 0x4));
25370 assert(!(extData
.NEG
& 0x1));
25371 assert(!(extData
.NEG
& 0x2));
25372 assert(!(extData
.NEG
& 0x4));
25374 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25375 if (wf
->execMask(lane
)) {
25376 vdst
[lane
] = (VecElemF32
)src
[lane
];
25383 Inst_VOP3__V_CVT_F32_U32::Inst_VOP3__V_CVT_F32_U32(InFmt_VOP3
*iFmt
)
25384 : Inst_VOP3(iFmt
, "v_cvt_f32_u32", false)
25388 } // Inst_VOP3__V_CVT_F32_U32
25390 Inst_VOP3__V_CVT_F32_U32::~Inst_VOP3__V_CVT_F32_U32()
25392 } // ~Inst_VOP3__V_CVT_F32_U32
25394 // D.f = (float)S0.u.
25396 Inst_VOP3__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst
)
25398 Wavefront
*wf
= gpuDynInst
->wavefront();
25399 ConstVecOperandU32
src(gpuDynInst
, extData
.SRC0
);
25400 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
25404 if (instData
.ABS
& 0x1) {
25408 if (extData
.NEG
& 0x1) {
25412 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25413 if (wf
->execMask(lane
)) {
25414 vdst
[lane
] = (VecElemF32
)src
[lane
];
25421 Inst_VOP3__V_CVT_U32_F32::Inst_VOP3__V_CVT_U32_F32(InFmt_VOP3
*iFmt
)
25422 : Inst_VOP3(iFmt
, "v_cvt_u32_f32", false)
25426 } // Inst_VOP3__V_CVT_U32_F32
25428 Inst_VOP3__V_CVT_U32_F32::~Inst_VOP3__V_CVT_U32_F32()
25430 } // ~Inst_VOP3__V_CVT_U32_F32
25432 // D.u = (unsigned)S0.f.
25433 // Out-of-range floating point values (including infinity) saturate. NaN
25434 // is converted to 0.
25436 Inst_VOP3__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst
)
25438 Wavefront
*wf
= gpuDynInst
->wavefront();
25439 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
25440 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
25444 if (instData
.ABS
& 0x1) {
25448 if (extData
.NEG
& 0x1) {
25452 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25453 if (wf
->execMask(lane
)) {
25455 std::frexp(src
[lane
],&exp
);
25456 if (std::isnan(src
[lane
])) {
25458 } else if (std::isinf(src
[lane
])) {
25459 if (std::signbit(src
[lane
])) {
25462 vdst
[lane
] = UINT_MAX
;
25464 } else if (exp
> 31) {
25465 vdst
[lane
] = UINT_MAX
;
25467 vdst
[lane
] = (VecElemU32
)src
[lane
];
25475 Inst_VOP3__V_CVT_I32_F32::Inst_VOP3__V_CVT_I32_F32(InFmt_VOP3
*iFmt
)
25476 : Inst_VOP3(iFmt
, "v_cvt_i32_f32", false)
25480 } // Inst_VOP3__V_CVT_I32_F32
25482 Inst_VOP3__V_CVT_I32_F32::~Inst_VOP3__V_CVT_I32_F32()
25484 } // ~Inst_VOP3__V_CVT_I32_F32
25486 // D.i = (int)S0.f.
25487 // Out-of-range floating point values (including infinity) saturate. NaN
25488 // is converted to 0.
25490 Inst_VOP3__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst
)
25492 Wavefront
*wf
= gpuDynInst
->wavefront();
25493 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
25494 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
25498 if (instData
.ABS
& 0x1) {
25502 if (extData
.NEG
& 0x1) {
25507 * input modifiers are supported by FP operations only
25509 assert(!(instData
.ABS
& 0x2));
25510 assert(!(instData
.ABS
& 0x4));
25511 assert(!(extData
.NEG
& 0x2));
25512 assert(!(extData
.NEG
& 0x4));
25514 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25515 if (wf
->execMask(lane
)) {
25517 std::frexp(src
[lane
],&exp
);
25518 if (std::isnan(src
[lane
])) {
25520 } else if (std::isinf(src
[lane
]) || exp
> 30) {
25521 if (std::signbit(src
[lane
])) {
25522 vdst
[lane
] = INT_MIN
;
25524 vdst
[lane
] = INT_MAX
;
25527 vdst
[lane
] = (VecElemI32
)src
[lane
];
25535 Inst_VOP3__V_MOV_FED_B32::Inst_VOP3__V_MOV_FED_B32(InFmt_VOP3
*iFmt
)
25536 : Inst_VOP3(iFmt
, "v_mov_fed_b32", false)
25539 } // Inst_VOP3__V_MOV_FED_B32
25541 Inst_VOP3__V_MOV_FED_B32::~Inst_VOP3__V_MOV_FED_B32()
25543 } // ~Inst_VOP3__V_MOV_FED_B32
25546 // Input and output modifiers not supported; this is an untyped operation.
25548 Inst_VOP3__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst
)
25550 panicUnimplemented();
25553 Inst_VOP3__V_CVT_F16_F32::Inst_VOP3__V_CVT_F16_F32(InFmt_VOP3
*iFmt
)
25554 : Inst_VOP3(iFmt
, "v_cvt_f16_f32", false)
25558 } // Inst_VOP3__V_CVT_F16_F32
25560 Inst_VOP3__V_CVT_F16_F32::~Inst_VOP3__V_CVT_F16_F32()
25562 } // ~Inst_VOP3__V_CVT_F16_F32
25564 // D.f16 = flt32_to_flt16(S0.f).
25566 Inst_VOP3__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst
)
25568 panicUnimplemented();
25571 Inst_VOP3__V_CVT_F32_F16::Inst_VOP3__V_CVT_F32_F16(InFmt_VOP3
*iFmt
)
25572 : Inst_VOP3(iFmt
, "v_cvt_f32_f16", false)
25576 } // Inst_VOP3__V_CVT_F32_F16
25578 Inst_VOP3__V_CVT_F32_F16::~Inst_VOP3__V_CVT_F32_F16()
25580 } // ~Inst_VOP3__V_CVT_F32_F16
25582 // D.f = flt16_to_flt32(S0.f16).
25584 Inst_VOP3__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst
)
25586 panicUnimplemented();
25589 Inst_VOP3__V_CVT_RPI_I32_F32::Inst_VOP3__V_CVT_RPI_I32_F32(
25591 : Inst_VOP3(iFmt
, "v_cvt_rpi_i32_f32", false)
25595 } // Inst_VOP3__V_CVT_RPI_I32_F32
25597 Inst_VOP3__V_CVT_RPI_I32_F32::~Inst_VOP3__V_CVT_RPI_I32_F32()
25599 } // ~Inst_VOP3__V_CVT_RPI_I32_F32
25601 // D.i = (int)floor(S0.f + 0.5).
25603 Inst_VOP3__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst
)
25605 Wavefront
*wf
= gpuDynInst
->wavefront();
25606 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
25607 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
25611 if (instData
.ABS
& 0x1) {
25615 if (extData
.NEG
& 0x1) {
25619 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25620 if (wf
->execMask(lane
)) {
25621 vdst
[lane
] = (VecElemI32
)std::floor(src
[lane
] + 0.5);
25628 Inst_VOP3__V_CVT_FLR_I32_F32::Inst_VOP3__V_CVT_FLR_I32_F32(
25630 : Inst_VOP3(iFmt
, "v_cvt_flr_i32_f32", false)
25634 } // Inst_VOP3__V_CVT_FLR_I32_F32
25636 Inst_VOP3__V_CVT_FLR_I32_F32::~Inst_VOP3__V_CVT_FLR_I32_F32()
25638 } // ~Inst_VOP3__V_CVT_FLR_I32_F32
25640 // D.i = (int)floor(S0.f).
25642 Inst_VOP3__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst
)
25644 Wavefront
*wf
= gpuDynInst
->wavefront();
25645 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
25646 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
25650 if (instData
.ABS
& 0x1) {
25654 if (extData
.NEG
& 0x1) {
25658 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25659 if (wf
->execMask(lane
)) {
25660 vdst
[lane
] = (VecElemI32
)std::floor(src
[lane
]);
25667 Inst_VOP3__V_CVT_OFF_F32_I4::Inst_VOP3__V_CVT_OFF_F32_I4(InFmt_VOP3
*iFmt
)
25668 : Inst_VOP3(iFmt
, "v_cvt_off_f32_i4", false)
25672 } // Inst_VOP3__V_CVT_OFF_F32_I4
25674 Inst_VOP3__V_CVT_OFF_F32_I4::~Inst_VOP3__V_CVT_OFF_F32_I4()
25676 } // ~Inst_VOP3__V_CVT_OFF_F32_I4
25678 // 4-bit signed int to 32-bit float.
25680 Inst_VOP3__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst
)
25682 panicUnimplemented();
25685 Inst_VOP3__V_CVT_F32_F64::Inst_VOP3__V_CVT_F32_F64(InFmt_VOP3
*iFmt
)
25686 : Inst_VOP3(iFmt
, "v_cvt_f32_f64", false)
25690 } // Inst_VOP3__V_CVT_F32_F64
25692 Inst_VOP3__V_CVT_F32_F64::~Inst_VOP3__V_CVT_F32_F64()
25694 } // ~Inst_VOP3__V_CVT_F32_F64
25696 // D.f = (float)S0.d.
25698 Inst_VOP3__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst
)
25700 Wavefront
*wf
= gpuDynInst
->wavefront();
25701 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
25702 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
25706 if (instData
.ABS
& 0x1) {
25710 if (extData
.NEG
& 0x1) {
25715 * input modifiers are supported by FP operations only
25717 assert(!(instData
.ABS
& 0x2));
25718 assert(!(instData
.ABS
& 0x4));
25719 assert(!(extData
.NEG
& 0x2));
25720 assert(!(extData
.NEG
& 0x4));
25722 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25723 if (wf
->execMask(lane
)) {
25724 vdst
[lane
] = (VecElemF32
)src
[lane
];
25731 Inst_VOP3__V_CVT_F64_F32::Inst_VOP3__V_CVT_F64_F32(InFmt_VOP3
*iFmt
)
25732 : Inst_VOP3(iFmt
, "v_cvt_f64_f32", false)
25736 } // Inst_VOP3__V_CVT_F64_F32
25738 Inst_VOP3__V_CVT_F64_F32::~Inst_VOP3__V_CVT_F64_F32()
25740 } // ~Inst_VOP3__V_CVT_F64_F32
25742 // D.d = (double)S0.f.
25744 Inst_VOP3__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst
)
25746 Wavefront
*wf
= gpuDynInst
->wavefront();
25747 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
25748 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
25752 if (instData
.ABS
& 0x1) {
25756 if (extData
.NEG
& 0x1) {
25761 * input modifiers are supported by FP operations only
25763 assert(!(instData
.ABS
& 0x2));
25764 assert(!(instData
.ABS
& 0x4));
25765 assert(!(extData
.NEG
& 0x2));
25766 assert(!(extData
.NEG
& 0x4));
25768 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25769 if (wf
->execMask(lane
)) {
25770 vdst
[lane
] = (VecElemF64
)src
[lane
];
25777 Inst_VOP3__V_CVT_F32_UBYTE0::Inst_VOP3__V_CVT_F32_UBYTE0(InFmt_VOP3
*iFmt
)
25778 : Inst_VOP3(iFmt
, "v_cvt_f32_ubyte0", false)
25782 } // Inst_VOP3__V_CVT_F32_UBYTE0
25784 Inst_VOP3__V_CVT_F32_UBYTE0::~Inst_VOP3__V_CVT_F32_UBYTE0()
25786 } // ~Inst_VOP3__V_CVT_F32_UBYTE0
25788 // D.f = (float)(S0.u[7:0]).
25790 Inst_VOP3__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst
)
25792 Wavefront
*wf
= gpuDynInst
->wavefront();
25793 ConstVecOperandU32
src(gpuDynInst
, extData
.SRC0
);
25794 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
25798 if (instData
.ABS
& 0x1) {
25802 if (extData
.NEG
& 0x1) {
25806 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25807 if (wf
->execMask(lane
)) {
25808 vdst
[lane
] = (VecElemF32
)bits(src
[lane
], 7, 0);
25815 Inst_VOP3__V_CVT_F32_UBYTE1::Inst_VOP3__V_CVT_F32_UBYTE1(InFmt_VOP3
*iFmt
)
25816 : Inst_VOP3(iFmt
, "v_cvt_f32_ubyte1", false)
25820 } // Inst_VOP3__V_CVT_F32_UBYTE1
25822 Inst_VOP3__V_CVT_F32_UBYTE1::~Inst_VOP3__V_CVT_F32_UBYTE1()
25824 } // ~Inst_VOP3__V_CVT_F32_UBYTE1
25826 // D.f = (float)(S0.u[15:8]).
25828 Inst_VOP3__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst
)
25830 Wavefront
*wf
= gpuDynInst
->wavefront();
25831 ConstVecOperandU32
src(gpuDynInst
, extData
.SRC0
);
25832 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
25836 if (instData
.ABS
& 0x1) {
25840 if (extData
.NEG
& 0x1) {
25844 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25845 if (wf
->execMask(lane
)) {
25846 vdst
[lane
] = (VecElemF32
)bits(src
[lane
], 15, 8);
25853 Inst_VOP3__V_CVT_F32_UBYTE2::Inst_VOP3__V_CVT_F32_UBYTE2(InFmt_VOP3
*iFmt
)
25854 : Inst_VOP3(iFmt
, "v_cvt_f32_ubyte2", false)
25858 } // Inst_VOP3__V_CVT_F32_UBYTE2
25860 Inst_VOP3__V_CVT_F32_UBYTE2::~Inst_VOP3__V_CVT_F32_UBYTE2()
25862 } // ~Inst_VOP3__V_CVT_F32_UBYTE2
25864 // D.f = (float)(S0.u[23:16]).
25866 Inst_VOP3__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst
)
25868 Wavefront
*wf
= gpuDynInst
->wavefront();
25869 ConstVecOperandU32
src(gpuDynInst
, extData
.SRC0
);
25870 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
25874 if (instData
.ABS
& 0x1) {
25878 if (extData
.NEG
& 0x1) {
25882 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25883 if (wf
->execMask(lane
)) {
25884 vdst
[lane
] = (VecElemF32
)bits(src
[lane
], 23, 16);
25891 Inst_VOP3__V_CVT_F32_UBYTE3::Inst_VOP3__V_CVT_F32_UBYTE3(InFmt_VOP3
*iFmt
)
25892 : Inst_VOP3(iFmt
, "v_cvt_f32_ubyte3", false)
25896 } // Inst_VOP3__V_CVT_F32_UBYTE3
25898 Inst_VOP3__V_CVT_F32_UBYTE3::~Inst_VOP3__V_CVT_F32_UBYTE3()
25900 } // ~Inst_VOP3__V_CVT_F32_UBYTE3
25902 // D.f = (float)(S0.u[31:24]).
25904 Inst_VOP3__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst
)
25906 Wavefront
*wf
= gpuDynInst
->wavefront();
25907 ConstVecOperandU32
src(gpuDynInst
, extData
.SRC0
);
25908 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
25912 if (instData
.ABS
& 0x1) {
25916 if (extData
.NEG
& 0x1) {
25920 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25921 if (wf
->execMask(lane
)) {
25922 vdst
[lane
] = (VecElemF32
)bits(src
[lane
], 31, 24);
25929 Inst_VOP3__V_CVT_U32_F64::Inst_VOP3__V_CVT_U32_F64(InFmt_VOP3
*iFmt
)
25930 : Inst_VOP3(iFmt
, "v_cvt_u32_f64", false)
25934 } // Inst_VOP3__V_CVT_U32_F64
25936 Inst_VOP3__V_CVT_U32_F64::~Inst_VOP3__V_CVT_U32_F64()
25938 } // ~Inst_VOP3__V_CVT_U32_F64
25940 // D.u = (unsigned)S0.d.
25941 // Out-of-range floating point values (including infinity) saturate. NaN
25942 // is converted to 0.
25944 Inst_VOP3__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst
)
25946 Wavefront
*wf
= gpuDynInst
->wavefront();
25947 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
25948 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
25952 if (instData
.ABS
& 0x1) {
25956 if (extData
.NEG
& 0x1) {
25960 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
25961 if (wf
->execMask(lane
)) {
25963 std::frexp(src
[lane
],&exp
);
25964 if (std::isnan(src
[lane
])) {
25966 } else if (std::isinf(src
[lane
])) {
25967 if (std::signbit(src
[lane
])) {
25970 vdst
[lane
] = UINT_MAX
;
25972 } else if (exp
> 31) {
25973 vdst
[lane
] = UINT_MAX
;
25975 vdst
[lane
] = (VecElemU32
)src
[lane
];
25983 Inst_VOP3__V_CVT_F64_U32::Inst_VOP3__V_CVT_F64_U32(InFmt_VOP3
*iFmt
)
25984 : Inst_VOP3(iFmt
, "v_cvt_f64_u32", false)
25988 } // Inst_VOP3__V_CVT_F64_U32
25990 Inst_VOP3__V_CVT_F64_U32::~Inst_VOP3__V_CVT_F64_U32()
25992 } // ~Inst_VOP3__V_CVT_F64_U32
25994 // D.d = (double)S0.u.
25996 Inst_VOP3__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst
)
25998 Wavefront
*wf
= gpuDynInst
->wavefront();
25999 ConstVecOperandU32
src(gpuDynInst
, extData
.SRC0
);
26000 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
26004 if (instData
.ABS
& 0x1) {
26008 if (extData
.NEG
& 0x1) {
26012 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26013 if (wf
->execMask(lane
)) {
26014 vdst
[lane
] = (VecElemF64
)src
[lane
];
26021 Inst_VOP3__V_TRUNC_F64::Inst_VOP3__V_TRUNC_F64(InFmt_VOP3
*iFmt
)
26022 : Inst_VOP3(iFmt
, "v_trunc_f64", false)
26026 } // Inst_VOP3__V_TRUNC_F64
26028 Inst_VOP3__V_TRUNC_F64::~Inst_VOP3__V_TRUNC_F64()
26030 } // ~Inst_VOP3__V_TRUNC_F64
26032 // D.d = trunc(S0.d), return integer part of S0.d.
26034 Inst_VOP3__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst
)
26036 Wavefront
*wf
= gpuDynInst
->wavefront();
26037 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
26038 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
26042 if (instData
.ABS
& 0x1) {
26046 if (extData
.NEG
& 0x1) {
26050 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26051 if (wf
->execMask(lane
)) {
26052 vdst
[lane
] = std::trunc(src
[lane
]);
26059 Inst_VOP3__V_CEIL_F64::Inst_VOP3__V_CEIL_F64(InFmt_VOP3
*iFmt
)
26060 : Inst_VOP3(iFmt
, "v_ceil_f64", false)
26064 } // Inst_VOP3__V_CEIL_F64
26066 Inst_VOP3__V_CEIL_F64::~Inst_VOP3__V_CEIL_F64()
26068 } // ~Inst_VOP3__V_CEIL_F64
26070 // D.d = ceil(S0.d);
26072 Inst_VOP3__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst
)
26074 Wavefront
*wf
= gpuDynInst
->wavefront();
26075 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
26076 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
26080 if (instData
.ABS
& 0x1) {
26084 if (extData
.NEG
& 0x1) {
26088 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26089 if (wf
->execMask(lane
)) {
26090 vdst
[lane
] = std::ceil(src
[lane
]);
26097 Inst_VOP3__V_RNDNE_F64::Inst_VOP3__V_RNDNE_F64(InFmt_VOP3
*iFmt
)
26098 : Inst_VOP3(iFmt
, "v_rndne_f64", false)
26102 } // Inst_VOP3__V_RNDNE_F64
26104 Inst_VOP3__V_RNDNE_F64::~Inst_VOP3__V_RNDNE_F64()
26106 } // ~Inst_VOP3__V_RNDNE_F64
26108 // D.d = round_nearest_even(S0.d).
26110 Inst_VOP3__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst
)
26112 Wavefront
*wf
= gpuDynInst
->wavefront();
26113 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
26114 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
26118 if (instData
.ABS
& 0x1) {
26122 if (extData
.NEG
& 0x1) {
26126 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26127 if (wf
->execMask(lane
)) {
26128 vdst
[lane
] = roundNearestEven(src
[lane
]);
26135 Inst_VOP3__V_FLOOR_F64::Inst_VOP3__V_FLOOR_F64(InFmt_VOP3
*iFmt
)
26136 : Inst_VOP3(iFmt
, "v_floor_f64", false)
26140 } // Inst_VOP3__V_FLOOR_F64
26142 Inst_VOP3__V_FLOOR_F64::~Inst_VOP3__V_FLOOR_F64()
26144 } // ~Inst_VOP3__V_FLOOR_F64
26146 // D.d = floor(S0.d);
26148 Inst_VOP3__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst
)
26150 Wavefront
*wf
= gpuDynInst
->wavefront();
26151 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
26152 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
26156 if (instData
.ABS
& 0x1) {
26160 if (extData
.NEG
& 0x1) {
26164 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26165 if (wf
->execMask(lane
)) {
26166 vdst
[lane
] = std::floor(src
[lane
]);
26173 Inst_VOP3__V_FRACT_F32::Inst_VOP3__V_FRACT_F32(InFmt_VOP3
*iFmt
)
26174 : Inst_VOP3(iFmt
, "v_fract_f32", false)
26178 } // Inst_VOP3__V_FRACT_F32
26180 Inst_VOP3__V_FRACT_F32::~Inst_VOP3__V_FRACT_F32()
26182 } // ~Inst_VOP3__V_FRACT_F32
26184 // D.f = modf(S0.f).
26186 Inst_VOP3__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst
)
26188 Wavefront
*wf
= gpuDynInst
->wavefront();
26189 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26190 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26194 if (instData
.ABS
& 0x1) {
26198 if (extData
.NEG
& 0x1) {
26202 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26203 if (wf
->execMask(lane
)) {
26204 VecElemF32
int_part(0.0);
26205 vdst
[lane
] = std::modf(src
[lane
], &int_part
);
26212 Inst_VOP3__V_TRUNC_F32::Inst_VOP3__V_TRUNC_F32(InFmt_VOP3
*iFmt
)
26213 : Inst_VOP3(iFmt
, "v_trunc_f32", false)
26217 } // Inst_VOP3__V_TRUNC_F32
26219 Inst_VOP3__V_TRUNC_F32::~Inst_VOP3__V_TRUNC_F32()
26221 } // ~Inst_VOP3__V_TRUNC_F32
26223 // D.f = trunc(S0.f), return integer part of S0.f.
26225 Inst_VOP3__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst
)
26227 Wavefront
*wf
= gpuDynInst
->wavefront();
26228 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26229 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26233 if (instData
.ABS
& 0x1) {
26237 if (extData
.NEG
& 0x1) {
26241 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26242 if (wf
->execMask(lane
)) {
26243 vdst
[lane
] = std::trunc(src
[lane
]);
26250 Inst_VOP3__V_CEIL_F32::Inst_VOP3__V_CEIL_F32(InFmt_VOP3
*iFmt
)
26251 : Inst_VOP3(iFmt
, "v_ceil_f32", false)
26255 } // Inst_VOP3__V_CEIL_F32
26257 Inst_VOP3__V_CEIL_F32::~Inst_VOP3__V_CEIL_F32()
26259 } // ~Inst_VOP3__V_CEIL_F32
26261 // D.f = ceil(S0.f);
26263 Inst_VOP3__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst
)
26265 Wavefront
*wf
= gpuDynInst
->wavefront();
26266 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26267 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26271 if (instData
.ABS
& 0x1) {
26275 if (extData
.NEG
& 0x1) {
26279 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26280 if (wf
->execMask(lane
)) {
26281 vdst
[lane
] = std::ceil(src
[lane
]);
26288 Inst_VOP3__V_RNDNE_F32::Inst_VOP3__V_RNDNE_F32(InFmt_VOP3
*iFmt
)
26289 : Inst_VOP3(iFmt
, "v_rndne_f32", false)
26293 } // Inst_VOP3__V_RNDNE_F32
26295 Inst_VOP3__V_RNDNE_F32::~Inst_VOP3__V_RNDNE_F32()
26297 } // ~Inst_VOP3__V_RNDNE_F32
26299 // D.f = round_nearest_even(S0.f).
26301 Inst_VOP3__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst
)
26303 Wavefront
*wf
= gpuDynInst
->wavefront();
26304 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26305 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26309 if (instData
.ABS
& 0x1) {
26313 if (extData
.NEG
& 0x1) {
26317 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26318 if (wf
->execMask(lane
)) {
26319 vdst
[lane
] = roundNearestEven(src
[lane
]);
26326 Inst_VOP3__V_FLOOR_F32::Inst_VOP3__V_FLOOR_F32(InFmt_VOP3
*iFmt
)
26327 : Inst_VOP3(iFmt
, "v_floor_f32", false)
26331 } // Inst_VOP3__V_FLOOR_F32
26333 Inst_VOP3__V_FLOOR_F32::~Inst_VOP3__V_FLOOR_F32()
26335 } // ~Inst_VOP3__V_FLOOR_F32
26337 // D.f = floor(S0.f);
26339 Inst_VOP3__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst
)
26341 Wavefront
*wf
= gpuDynInst
->wavefront();
26342 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26343 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26347 if (instData
.ABS
& 0x1) {
26351 if (extData
.NEG
& 0x1) {
26355 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26356 if (wf
->execMask(lane
)) {
26357 vdst
[lane
] = std::floor(src
[lane
]);
26364 Inst_VOP3__V_EXP_F32::Inst_VOP3__V_EXP_F32(InFmt_VOP3
*iFmt
)
26365 : Inst_VOP3(iFmt
, "v_exp_f32", false)
26369 } // Inst_VOP3__V_EXP_F32
26371 Inst_VOP3__V_EXP_F32::~Inst_VOP3__V_EXP_F32()
26373 } // ~Inst_VOP3__V_EXP_F32
26375 // D.f = pow(2.0, S0.f).
26377 Inst_VOP3__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst
)
26379 Wavefront
*wf
= gpuDynInst
->wavefront();
26380 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26381 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26385 if (instData
.ABS
& 0x1) {
26389 if (extData
.NEG
& 0x1) {
26393 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26394 if (wf
->execMask(lane
)) {
26395 vdst
[lane
] = std::pow(2.0, src
[lane
]);
26402 Inst_VOP3__V_LOG_F32::Inst_VOP3__V_LOG_F32(InFmt_VOP3
*iFmt
)
26403 : Inst_VOP3(iFmt
, "v_log_f32", false)
26407 } // Inst_VOP3__V_LOG_F32
26409 Inst_VOP3__V_LOG_F32::~Inst_VOP3__V_LOG_F32()
26411 } // ~Inst_VOP3__V_LOG_F32
26413 // D.f = log2(S0.f).
26415 Inst_VOP3__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst
)
26417 Wavefront
*wf
= gpuDynInst
->wavefront();
26418 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26419 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26423 if (instData
.ABS
& 0x1) {
26427 if (extData
.NEG
& 0x1) {
26432 * input modifiers are supported by FP operations only
26434 assert(!(instData
.ABS
& 0x2));
26435 assert(!(instData
.ABS
& 0x4));
26436 assert(!(extData
.NEG
& 0x2));
26437 assert(!(extData
.NEG
& 0x4));
26439 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26440 if (wf
->execMask(lane
)) {
26441 vdst
[lane
] = std::log2(src
[lane
]);
26448 Inst_VOP3__V_RCP_F32::Inst_VOP3__V_RCP_F32(InFmt_VOP3
*iFmt
)
26449 : Inst_VOP3(iFmt
, "v_rcp_f32", false)
26453 } // Inst_VOP3__V_RCP_F32
26455 Inst_VOP3__V_RCP_F32::~Inst_VOP3__V_RCP_F32()
26457 } // ~Inst_VOP3__V_RCP_F32
26459 // D.f = 1.0 / S0.f.
26461 Inst_VOP3__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst
)
26463 Wavefront
*wf
= gpuDynInst
->wavefront();
26464 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26465 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26469 if (instData
.ABS
& 0x1) {
26473 if (extData
.NEG
& 0x1) {
26477 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26478 if (wf
->execMask(lane
)) {
26479 vdst
[lane
] = 1.0 / src
[lane
];
26486 Inst_VOP3__V_RCP_IFLAG_F32::Inst_VOP3__V_RCP_IFLAG_F32(InFmt_VOP3
*iFmt
)
26487 : Inst_VOP3(iFmt
, "v_rcp_iflag_f32", false)
26491 } // Inst_VOP3__V_RCP_IFLAG_F32
26493 Inst_VOP3__V_RCP_IFLAG_F32::~Inst_VOP3__V_RCP_IFLAG_F32()
26495 } // ~Inst_VOP3__V_RCP_IFLAG_F32
26497 // D.f = 1.0 / S0.f.
26499 Inst_VOP3__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst
)
26501 Wavefront
*wf
= gpuDynInst
->wavefront();
26502 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26503 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26507 if (instData
.ABS
& 0x1) {
26511 if (extData
.NEG
& 0x1) {
26515 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26516 if (wf
->execMask(lane
)) {
26517 vdst
[lane
] = 1.0 / src
[lane
];
26524 Inst_VOP3__V_RSQ_F32::Inst_VOP3__V_RSQ_F32(InFmt_VOP3
*iFmt
)
26525 : Inst_VOP3(iFmt
, "v_rsq_f32", false)
26529 } // Inst_VOP3__V_RSQ_F32
26531 Inst_VOP3__V_RSQ_F32::~Inst_VOP3__V_RSQ_F32()
26533 } // ~Inst_VOP3__V_RSQ_F32
26535 // D.f = 1.0 / sqrt(S0.f).
26537 Inst_VOP3__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst
)
26539 Wavefront
*wf
= gpuDynInst
->wavefront();
26540 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26541 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26545 if (instData
.ABS
& 0x1) {
26549 if (extData
.NEG
& 0x1) {
26553 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26554 if (wf
->execMask(lane
)) {
26555 vdst
[lane
] = 1.0 / std::sqrt(src
[lane
]);
26562 Inst_VOP3__V_RCP_F64::Inst_VOP3__V_RCP_F64(InFmt_VOP3
*iFmt
)
26563 : Inst_VOP3(iFmt
, "v_rcp_f64", false)
26567 } // Inst_VOP3__V_RCP_F64
26569 Inst_VOP3__V_RCP_F64::~Inst_VOP3__V_RCP_F64()
26571 } // ~Inst_VOP3__V_RCP_F64
26573 // D.d = 1.0 / S0.d.
26575 Inst_VOP3__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst
)
26577 Wavefront
*wf
= gpuDynInst
->wavefront();
26578 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
26579 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
26583 if (instData
.ABS
& 0x1) {
26587 if (extData
.NEG
& 0x1) {
26591 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26592 if (wf
->execMask(lane
)) {
26593 if (std::fpclassify(src
[lane
]) == FP_ZERO
) {
26594 vdst
[lane
] = +INFINITY
;
26595 } else if (std::isnan(src
[lane
])) {
26597 } else if (std::isinf(src
[lane
])) {
26598 if (std::signbit(src
[lane
])) {
26604 vdst
[lane
] = 1.0 / src
[lane
];
26612 Inst_VOP3__V_RSQ_F64::Inst_VOP3__V_RSQ_F64(InFmt_VOP3
*iFmt
)
26613 : Inst_VOP3(iFmt
, "v_rsq_f64", false)
26617 } // Inst_VOP3__V_RSQ_F64
26619 Inst_VOP3__V_RSQ_F64::~Inst_VOP3__V_RSQ_F64()
26621 } // ~Inst_VOP3__V_RSQ_F64
26623 // D.d = 1.0 / sqrt(S0.d).
26625 Inst_VOP3__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst
)
26627 Wavefront
*wf
= gpuDynInst
->wavefront();
26628 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
26629 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
26633 if (instData
.ABS
& 0x1) {
26637 if (extData
.NEG
& 0x1) {
26641 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26642 if (wf
->execMask(lane
)) {
26643 if (std::fpclassify(src
[lane
]) == FP_ZERO
) {
26644 vdst
[lane
] = +INFINITY
;
26645 } else if (std::isnan(src
[lane
])) {
26647 } else if (std::isinf(src
[lane
]) && !std::signbit(src
[lane
])) {
26649 } else if (std::signbit(src
[lane
])) {
26652 vdst
[lane
] = 1.0 / std::sqrt(src
[lane
]);
26660 Inst_VOP3__V_SQRT_F32::Inst_VOP3__V_SQRT_F32(InFmt_VOP3
*iFmt
)
26661 : Inst_VOP3(iFmt
, "v_sqrt_f32", false)
26665 } // Inst_VOP3__V_SQRT_F32
26667 Inst_VOP3__V_SQRT_F32::~Inst_VOP3__V_SQRT_F32()
26669 } // ~Inst_VOP3__V_SQRT_F32
26671 // D.f = sqrt(S0.f).
26673 Inst_VOP3__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst
)
26675 Wavefront
*wf
= gpuDynInst
->wavefront();
26676 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26677 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26681 if (instData
.ABS
& 0x1) {
26685 if (extData
.NEG
& 0x1) {
26689 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26690 if (wf
->execMask(lane
)) {
26691 vdst
[lane
] = std::sqrt(src
[lane
]);
26698 Inst_VOP3__V_SQRT_F64::Inst_VOP3__V_SQRT_F64(InFmt_VOP3
*iFmt
)
26699 : Inst_VOP3(iFmt
, "v_sqrt_f64", false)
26703 } // Inst_VOP3__V_SQRT_F64
26705 Inst_VOP3__V_SQRT_F64::~Inst_VOP3__V_SQRT_F64()
26707 } // ~Inst_VOP3__V_SQRT_F64
26709 // D.d = sqrt(S0.d).
26711 Inst_VOP3__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst
)
26713 Wavefront
*wf
= gpuDynInst
->wavefront();
26714 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
26715 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
26719 if (instData
.ABS
& 0x1) {
26723 if (extData
.NEG
& 0x1) {
26727 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26728 if (wf
->execMask(lane
)) {
26729 vdst
[lane
] = std::sqrt(src
[lane
]);
26736 Inst_VOP3__V_SIN_F32::Inst_VOP3__V_SIN_F32(InFmt_VOP3
*iFmt
)
26737 : Inst_VOP3(iFmt
, "v_sin_f32", false)
26741 } // Inst_VOP3__V_SIN_F32
26743 Inst_VOP3__V_SIN_F32::~Inst_VOP3__V_SIN_F32()
26745 } // ~Inst_VOP3__V_SIN_F32
26747 // D.f = sin(S0.f * 2 * PI).
26749 Inst_VOP3__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst
)
26751 Wavefront
*wf
= gpuDynInst
->wavefront();
26752 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26753 ConstScalarOperandF32
pi(gpuDynInst
, REG_PI
);
26754 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26759 if (instData
.ABS
& 0x1) {
26763 if (extData
.NEG
& 0x1) {
26767 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26768 if (wf
->execMask(lane
)) {
26769 vdst
[lane
] = std::sin(src
[lane
] * 2 * pi
.rawData());
26776 Inst_VOP3__V_COS_F32::Inst_VOP3__V_COS_F32(InFmt_VOP3
*iFmt
)
26777 : Inst_VOP3(iFmt
, "v_cos_f32", false)
26781 } // Inst_VOP3__V_COS_F32
26783 Inst_VOP3__V_COS_F32::~Inst_VOP3__V_COS_F32()
26785 } // ~Inst_VOP3__V_COS_F32
26787 // D.f = cos(S0.f * 2 * PI).
26789 Inst_VOP3__V_COS_F32::execute(GPUDynInstPtr gpuDynInst
)
26791 Wavefront
*wf
= gpuDynInst
->wavefront();
26792 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
26793 ConstScalarOperandF32
pi(gpuDynInst
, REG_PI
);
26794 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
26799 if (instData
.ABS
& 0x1) {
26803 if (extData
.NEG
& 0x1) {
26807 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26808 if (wf
->execMask(lane
)) {
26809 vdst
[lane
] = std::cos(src
[lane
] * 2 * pi
.rawData());
26816 Inst_VOP3__V_NOT_B32::Inst_VOP3__V_NOT_B32(InFmt_VOP3
*iFmt
)
26817 : Inst_VOP3(iFmt
, "v_not_b32", false)
26820 } // Inst_VOP3__V_NOT_B32
26822 Inst_VOP3__V_NOT_B32::~Inst_VOP3__V_NOT_B32()
26824 } // ~Inst_VOP3__V_NOT_B32
26827 // Input and output modifiers not supported.
26829 Inst_VOP3__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst
)
26831 Wavefront
*wf
= gpuDynInst
->wavefront();
26832 ConstVecOperandU32
src(gpuDynInst
, extData
.SRC0
);
26833 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
26837 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26838 if (wf
->execMask(lane
)) {
26839 vdst
[lane
] = ~src
[lane
];
26846 Inst_VOP3__V_BFREV_B32::Inst_VOP3__V_BFREV_B32(InFmt_VOP3
*iFmt
)
26847 : Inst_VOP3(iFmt
, "v_bfrev_b32", false)
26850 } // Inst_VOP3__V_BFREV_B32
26852 Inst_VOP3__V_BFREV_B32::~Inst_VOP3__V_BFREV_B32()
26854 } // ~Inst_VOP3__V_BFREV_B32
26856 // D.u[31:0] = S0.u[0:31], bitfield reverse.
26857 // Input and output modifiers not supported.
26859 Inst_VOP3__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst
)
26861 Wavefront
*wf
= gpuDynInst
->wavefront();
26862 ConstVecOperandU32
src(gpuDynInst
, extData
.SRC0
);
26863 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
26867 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26868 if (wf
->execMask(lane
)) {
26869 vdst
[lane
] = reverseBits(src
[lane
]);
26876 Inst_VOP3__V_FFBH_U32::Inst_VOP3__V_FFBH_U32(InFmt_VOP3
*iFmt
)
26877 : Inst_VOP3(iFmt
, "v_ffbh_u32", false)
26880 } // Inst_VOP3__V_FFBH_U32
26882 Inst_VOP3__V_FFBH_U32::~Inst_VOP3__V_FFBH_U32()
26884 } // ~Inst_VOP3__V_FFBH_U32
26886 // D.u = position of first 1 in S0.u from MSB;
26887 // D.u = 0xffffffff if S0.u == 0.
26889 Inst_VOP3__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst
)
26891 Wavefront
*wf
= gpuDynInst
->wavefront();
26892 ConstVecOperandU32
src(gpuDynInst
, extData
.SRC0
);
26893 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
26897 if (instData
.ABS
& 0x1) {
26901 if (extData
.NEG
& 0x1) {
26905 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26906 if (wf
->execMask(lane
)) {
26907 vdst
[lane
] = findFirstOneMsb(src
[lane
]);
26914 Inst_VOP3__V_FFBL_B32::Inst_VOP3__V_FFBL_B32(InFmt_VOP3
*iFmt
)
26915 : Inst_VOP3(iFmt
, "v_ffbl_b32", false)
26918 } // Inst_VOP3__V_FFBL_B32
26920 Inst_VOP3__V_FFBL_B32::~Inst_VOP3__V_FFBL_B32()
26922 } // ~Inst_VOP3__V_FFBL_B32
26924 // D.u = position of first 1 in S0.u from LSB;
26925 // D.u = 0xffffffff if S0.u == 0.
26927 Inst_VOP3__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst
)
26929 Wavefront
*wf
= gpuDynInst
->wavefront();
26930 ConstVecOperandU32
src(gpuDynInst
, extData
.SRC0
);
26931 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
26935 if (instData
.ABS
& 0x1) {
26939 if (extData
.NEG
& 0x1) {
26943 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26944 if (wf
->execMask(lane
)) {
26945 vdst
[lane
] = findFirstOne(src
[lane
]);
26952 Inst_VOP3__V_FFBH_I32::Inst_VOP3__V_FFBH_I32(InFmt_VOP3
*iFmt
)
26953 : Inst_VOP3(iFmt
, "v_ffbh_i32", false)
26956 } // Inst_VOP3__V_FFBH_I32
26958 Inst_VOP3__V_FFBH_I32::~Inst_VOP3__V_FFBH_I32()
26960 } // ~Inst_VOP3__V_FFBH_I32
26962 // D.u = position of first bit different from sign bit in S0.i from MSB;
26963 // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff.
26965 Inst_VOP3__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst
)
26967 Wavefront
*wf
= gpuDynInst
->wavefront();
26968 ConstVecOperandI32
src(gpuDynInst
, extData
.SRC0
);
26969 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
26973 if (instData
.ABS
& 0x1) {
26977 if (extData
.NEG
& 0x1) {
26981 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
26982 if (wf
->execMask(lane
)) {
26983 vdst
[lane
] = firstOppositeSignBit(src
[lane
]);
26990 Inst_VOP3__V_FREXP_EXP_I32_F64::Inst_VOP3__V_FREXP_EXP_I32_F64(
26992 : Inst_VOP3(iFmt
, "v_frexp_exp_i32_f64", false)
26996 } // Inst_VOP3__V_FREXP_EXP_I32_F64
26998 Inst_VOP3__V_FREXP_EXP_I32_F64::~Inst_VOP3__V_FREXP_EXP_I32_F64()
27000 } // ~Inst_VOP3__V_FREXP_EXP_I32_F64
27002 // See V_FREXP_EXP_I32_F32.
27004 Inst_VOP3__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst
)
27006 Wavefront
*wf
= gpuDynInst
->wavefront();
27007 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
27008 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
27012 if (instData
.ABS
& 0x1) {
27016 if (extData
.NEG
& 0x1) {
27020 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27021 if (wf
->execMask(lane
)) {
27022 if (std::isinf(src
[lane
]) || std::isnan(src
[lane
])) {
27026 std::frexp(src
[lane
], &exp
);
27035 Inst_VOP3__V_FREXP_MANT_F64::Inst_VOP3__V_FREXP_MANT_F64(InFmt_VOP3
*iFmt
)
27036 : Inst_VOP3(iFmt
, "v_frexp_mant_f64", false)
27040 } // Inst_VOP3__V_FREXP_MANT_F64
27042 Inst_VOP3__V_FREXP_MANT_F64::~Inst_VOP3__V_FREXP_MANT_F64()
27044 } // ~Inst_VOP3__V_FREXP_MANT_F64
27047 Inst_VOP3__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst
)
27049 Wavefront
*wf
= gpuDynInst
->wavefront();
27050 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
27051 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
27055 if (instData
.ABS
& 0x1) {
27059 if (extData
.NEG
& 0x1) {
27063 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27064 if (wf
->execMask(lane
)) {
27066 vdst
[lane
] = std::frexp(src
[lane
], &exp
);
27073 Inst_VOP3__V_FRACT_F64::Inst_VOP3__V_FRACT_F64(InFmt_VOP3
*iFmt
)
27074 : Inst_VOP3(iFmt
, "v_fract_f64", false)
27078 } // Inst_VOP3__V_FRACT_F64
27080 Inst_VOP3__V_FRACT_F64::~Inst_VOP3__V_FRACT_F64()
27082 } // ~Inst_VOP3__V_FRACT_F64
27085 Inst_VOP3__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst
)
27087 Wavefront
*wf
= gpuDynInst
->wavefront();
27088 ConstVecOperandF64
src(gpuDynInst
, extData
.SRC0
);
27089 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
27093 if (instData
.ABS
& 0x1) {
27097 if (extData
.NEG
& 0x1) {
27101 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27102 if (wf
->execMask(lane
)) {
27103 VecElemF32
int_part(0.0);
27104 vdst
[lane
] = std::modf(src
[lane
], &int_part
);
27111 Inst_VOP3__V_FREXP_EXP_I32_F32::Inst_VOP3__V_FREXP_EXP_I32_F32(
27113 : Inst_VOP3(iFmt
, "v_frexp_exp_i32_f32", false)
27117 } // Inst_VOP3__V_FREXP_EXP_I32_F32
27119 Inst_VOP3__V_FREXP_EXP_I32_F32::~Inst_VOP3__V_FREXP_EXP_I32_F32()
27121 } // ~Inst_VOP3__V_FREXP_EXP_I32_F32
27123 // frexp(S0.f, Exponenti(S0.f))
27124 // if (S0.f == INF || S0.f == NAN) then D.i = 0;
27125 // else D.i = Exponent(S0.f)
27127 Inst_VOP3__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst
)
27129 Wavefront
*wf
= gpuDynInst
->wavefront();
27130 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
27131 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
27135 if (instData
.ABS
& 0x1) {
27139 if (extData
.NEG
& 0x1) {
27143 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27144 if (wf
->execMask(lane
)) {
27145 if (std::isinf(src
[lane
])|| std::isnan(src
[lane
])) {
27149 std::frexp(src
[lane
], &exp
);
27158 Inst_VOP3__V_FREXP_MANT_F32::Inst_VOP3__V_FREXP_MANT_F32(InFmt_VOP3
*iFmt
)
27159 : Inst_VOP3(iFmt
, "v_frexp_mant_f32", false)
27163 } // Inst_VOP3__V_FREXP_MANT_F32
27165 Inst_VOP3__V_FREXP_MANT_F32::~Inst_VOP3__V_FREXP_MANT_F32()
27167 } // ~Inst_VOP3__V_FREXP_MANT_F32
27169 // if (S0.f == INF || S0.f == NAN) then D.f = S0.f;
27170 // else D.f = Mantissa(S0.f).
27172 Inst_VOP3__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst
)
27174 Wavefront
*wf
= gpuDynInst
->wavefront();
27175 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
27176 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
27180 if (instData
.ABS
& 0x1) {
27184 if (extData
.NEG
& 0x1) {
27188 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27189 if (wf
->execMask(lane
)) {
27190 if (std::isinf(src
[lane
]) || std::isnan(src
[lane
])) {
27191 vdst
[lane
] = src
[lane
];
27194 vdst
[lane
] = std::frexp(src
[lane
], &exp
);
27202 Inst_VOP3__V_CLREXCP::Inst_VOP3__V_CLREXCP(InFmt_VOP3
*iFmt
)
27203 : Inst_VOP3(iFmt
, "v_clrexcp", false)
27205 } // Inst_VOP3__V_CLREXCP
27207 Inst_VOP3__V_CLREXCP::~Inst_VOP3__V_CLREXCP()
27209 } // ~Inst_VOP3__V_CLREXCP
27212 Inst_VOP3__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst
)
27214 panicUnimplemented();
27217 Inst_VOP3__V_CVT_F16_U16::Inst_VOP3__V_CVT_F16_U16(InFmt_VOP3
*iFmt
)
27218 : Inst_VOP3(iFmt
, "v_cvt_f16_u16", false)
27222 } // Inst_VOP3__V_CVT_F16_U16
27224 Inst_VOP3__V_CVT_F16_U16::~Inst_VOP3__V_CVT_F16_U16()
27226 } // ~Inst_VOP3__V_CVT_F16_U16
27228 // D.f16 = uint16_to_flt16(S.u16).
27230 Inst_VOP3__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst
)
27232 panicUnimplemented();
27235 Inst_VOP3__V_CVT_F16_I16::Inst_VOP3__V_CVT_F16_I16(InFmt_VOP3
*iFmt
)
27236 : Inst_VOP3(iFmt
, "v_cvt_f16_i16", false)
27240 } // Inst_VOP3__V_CVT_F16_I16
27242 Inst_VOP3__V_CVT_F16_I16::~Inst_VOP3__V_CVT_F16_I16()
27244 } // ~Inst_VOP3__V_CVT_F16_I16
27246 // D.f16 = int16_to_flt16(S.i16).
27248 Inst_VOP3__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst
)
27250 panicUnimplemented();
27253 Inst_VOP3__V_CVT_U16_F16::Inst_VOP3__V_CVT_U16_F16(InFmt_VOP3
*iFmt
)
27254 : Inst_VOP3(iFmt
, "v_cvt_u16_f16", false)
27258 } // Inst_VOP3__V_CVT_U16_F16
27260 Inst_VOP3__V_CVT_U16_F16::~Inst_VOP3__V_CVT_U16_F16()
27262 } // ~Inst_VOP3__V_CVT_U16_F16
27264 // D.u16 = flt16_to_uint16(S.f16).
27266 Inst_VOP3__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst
)
27268 panicUnimplemented();
27271 Inst_VOP3__V_CVT_I16_F16::Inst_VOP3__V_CVT_I16_F16(InFmt_VOP3
*iFmt
)
27272 : Inst_VOP3(iFmt
, "v_cvt_i16_f16", false)
27276 } // Inst_VOP3__V_CVT_I16_F16
27278 Inst_VOP3__V_CVT_I16_F16::~Inst_VOP3__V_CVT_I16_F16()
27280 } // ~Inst_VOP3__V_CVT_I16_F16
27282 // D.i16 = flt16_to_int16(S.f16).
27284 Inst_VOP3__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst
)
27286 panicUnimplemented();
27289 Inst_VOP3__V_RCP_F16::Inst_VOP3__V_RCP_F16(InFmt_VOP3
*iFmt
)
27290 : Inst_VOP3(iFmt
, "v_rcp_f16", false)
27294 } // Inst_VOP3__V_RCP_F16
27296 Inst_VOP3__V_RCP_F16::~Inst_VOP3__V_RCP_F16()
27298 } // ~Inst_VOP3__V_RCP_F16
27300 // if (S0.f16 == 1.0f)
27303 // D.f16 = 1 / S0.f16.
27305 Inst_VOP3__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst
)
27307 panicUnimplemented();
27310 Inst_VOP3__V_SQRT_F16::Inst_VOP3__V_SQRT_F16(InFmt_VOP3
*iFmt
)
27311 : Inst_VOP3(iFmt
, "v_sqrt_f16", false)
27315 } // Inst_VOP3__V_SQRT_F16
27317 Inst_VOP3__V_SQRT_F16::~Inst_VOP3__V_SQRT_F16()
27319 } // ~Inst_VOP3__V_SQRT_F16
27321 // if (S0.f16 == 1.0f)
27324 // D.f16 = sqrt(S0.f16).
27326 Inst_VOP3__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst
)
27328 panicUnimplemented();
27331 Inst_VOP3__V_RSQ_F16::Inst_VOP3__V_RSQ_F16(InFmt_VOP3
*iFmt
)
27332 : Inst_VOP3(iFmt
, "v_rsq_f16", false)
27336 } // Inst_VOP3__V_RSQ_F16
27338 Inst_VOP3__V_RSQ_F16::~Inst_VOP3__V_RSQ_F16()
27340 } // ~Inst_VOP3__V_RSQ_F16
27342 // if (S0.f16 == 1.0f)
27345 // D.f16 = 1 / sqrt(S0.f16).
27347 Inst_VOP3__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst
)
27349 panicUnimplemented();
27352 Inst_VOP3__V_LOG_F16::Inst_VOP3__V_LOG_F16(InFmt_VOP3
*iFmt
)
27353 : Inst_VOP3(iFmt
, "v_log_f16", false)
27357 } // Inst_VOP3__V_LOG_F16
27359 Inst_VOP3__V_LOG_F16::~Inst_VOP3__V_LOG_F16()
27361 } // ~Inst_VOP3__V_LOG_F16
27363 // if (S0.f16 == 1.0f)
27366 // D.f16 = log2(S0.f16).
27368 Inst_VOP3__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst
)
27370 panicUnimplemented();
27373 Inst_VOP3__V_EXP_F16::Inst_VOP3__V_EXP_F16(InFmt_VOP3
*iFmt
)
27374 : Inst_VOP3(iFmt
, "v_exp_f16", false)
27378 } // Inst_VOP3__V_EXP_F16
27380 Inst_VOP3__V_EXP_F16::~Inst_VOP3__V_EXP_F16()
27382 } // ~Inst_VOP3__V_EXP_F16
27384 // if (S0.f16 == 0.0f)
27387 // D.f16 = pow(2.0, S0.f16).
27389 Inst_VOP3__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst
)
27391 panicUnimplemented();
27394 Inst_VOP3__V_FREXP_MANT_F16::Inst_VOP3__V_FREXP_MANT_F16(InFmt_VOP3
*iFmt
)
27395 : Inst_VOP3(iFmt
, "v_frexp_mant_f16", false)
27399 } // Inst_VOP3__V_FREXP_MANT_F16
27401 Inst_VOP3__V_FREXP_MANT_F16::~Inst_VOP3__V_FREXP_MANT_F16()
27403 } // ~Inst_VOP3__V_FREXP_MANT_F16
27405 // if (S0.f16 == +-INF || S0.f16 == NAN)
27408 // D.f16 = mantissa(S0.f16).
27410 Inst_VOP3__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst
)
27412 panicUnimplemented();
27415 Inst_VOP3__V_FREXP_EXP_I16_F16::Inst_VOP3__V_FREXP_EXP_I16_F16(
27417 : Inst_VOP3(iFmt
, "v_frexp_exp_i16_f16", false)
27421 } // Inst_VOP3__V_FREXP_EXP_I16_F16
27423 Inst_VOP3__V_FREXP_EXP_I16_F16::~Inst_VOP3__V_FREXP_EXP_I16_F16()
27425 } // ~Inst_VOP3__V_FREXP_EXP_I16_F16
27428 Inst_VOP3__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst
)
27430 panicUnimplemented();
27433 Inst_VOP3__V_FLOOR_F16::Inst_VOP3__V_FLOOR_F16(InFmt_VOP3
*iFmt
)
27434 : Inst_VOP3(iFmt
, "v_floor_f16", false)
27438 } // Inst_VOP3__V_FLOOR_F16
27440 Inst_VOP3__V_FLOOR_F16::~Inst_VOP3__V_FLOOR_F16()
27442 } // ~Inst_VOP3__V_FLOOR_F16
27444 // D.f16 = floor(S0.f16);
27446 Inst_VOP3__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst
)
27448 panicUnimplemented();
27451 Inst_VOP3__V_CEIL_F16::Inst_VOP3__V_CEIL_F16(InFmt_VOP3
*iFmt
)
27452 : Inst_VOP3(iFmt
, "v_ceil_f16", false)
27456 } // Inst_VOP3__V_CEIL_F16
27458 Inst_VOP3__V_CEIL_F16::~Inst_VOP3__V_CEIL_F16()
27460 } // ~Inst_VOP3__V_CEIL_F16
27462 // D.f16 = ceil(S0.f16);
27464 Inst_VOP3__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst
)
27466 panicUnimplemented();
27469 Inst_VOP3__V_TRUNC_F16::Inst_VOP3__V_TRUNC_F16(InFmt_VOP3
*iFmt
)
27470 : Inst_VOP3(iFmt
, "v_trunc_f16", false)
27474 } // Inst_VOP3__V_TRUNC_F16
27476 Inst_VOP3__V_TRUNC_F16::~Inst_VOP3__V_TRUNC_F16()
27478 } // ~Inst_VOP3__V_TRUNC_F16
27480 // D.f16 = trunc(S0.f16).
27482 Inst_VOP3__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst
)
27484 panicUnimplemented();
27487 Inst_VOP3__V_RNDNE_F16::Inst_VOP3__V_RNDNE_F16(InFmt_VOP3
*iFmt
)
27488 : Inst_VOP3(iFmt
, "v_rndne_f16", false)
27492 } // Inst_VOP3__V_RNDNE_F16
27494 Inst_VOP3__V_RNDNE_F16::~Inst_VOP3__V_RNDNE_F16()
27496 } // ~Inst_VOP3__V_RNDNE_F16
27498 // D.f16 = roundNearestEven(S0.f16);
27500 Inst_VOP3__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst
)
27502 panicUnimplemented();
27505 Inst_VOP3__V_FRACT_F16::Inst_VOP3__V_FRACT_F16(InFmt_VOP3
*iFmt
)
27506 : Inst_VOP3(iFmt
, "v_fract_f16", false)
27510 } // Inst_VOP3__V_FRACT_F16
27512 Inst_VOP3__V_FRACT_F16::~Inst_VOP3__V_FRACT_F16()
27514 } // ~Inst_VOP3__V_FRACT_F16
27516 // D.f16 = S0.f16 + -floor(S0.f16).
27518 Inst_VOP3__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst
)
27520 panicUnimplemented();
27523 Inst_VOP3__V_SIN_F16::Inst_VOP3__V_SIN_F16(InFmt_VOP3
*iFmt
)
27524 : Inst_VOP3(iFmt
, "v_sin_f16", false)
27528 } // Inst_VOP3__V_SIN_F16
27530 Inst_VOP3__V_SIN_F16::~Inst_VOP3__V_SIN_F16()
27532 } // ~Inst_VOP3__V_SIN_F16
27534 // D.f16 = sin(S0.f16 * 2 * PI).
27536 Inst_VOP3__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst
)
27538 panicUnimplemented();
27541 Inst_VOP3__V_COS_F16::Inst_VOP3__V_COS_F16(InFmt_VOP3
*iFmt
)
27542 : Inst_VOP3(iFmt
, "v_cos_f16", false)
27546 } // Inst_VOP3__V_COS_F16
27548 Inst_VOP3__V_COS_F16::~Inst_VOP3__V_COS_F16()
27550 } // ~Inst_VOP3__V_COS_F16
27552 // D.f16 = cos(S0.f16 * 2 * PI).
27554 Inst_VOP3__V_COS_F16::execute(GPUDynInstPtr gpuDynInst
)
27556 panicUnimplemented();
27559 Inst_VOP3__V_EXP_LEGACY_F32::Inst_VOP3__V_EXP_LEGACY_F32(InFmt_VOP3
*iFmt
)
27560 : Inst_VOP3(iFmt
, "v_exp_legacy_f32", false)
27564 } // Inst_VOP3__V_EXP_LEGACY_F32
27566 Inst_VOP3__V_EXP_LEGACY_F32::~Inst_VOP3__V_EXP_LEGACY_F32()
27568 } // ~Inst_VOP3__V_EXP_LEGACY_F32
27570 // D.f = pow(2.0, S0.f)
27572 Inst_VOP3__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst
)
27574 Wavefront
*wf
= gpuDynInst
->wavefront();
27575 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
27576 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
27580 if (instData
.ABS
& 0x1) {
27584 if (extData
.NEG
& 0x1) {
27589 * input modifiers are supported by FP operations only
27591 assert(!(instData
.ABS
& 0x2));
27592 assert(!(instData
.ABS
& 0x4));
27593 assert(!(extData
.NEG
& 0x2));
27594 assert(!(extData
.NEG
& 0x4));
27596 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27597 if (wf
->execMask(lane
)) {
27598 vdst
[lane
] = std::pow(2.0, src
[lane
]);
27605 Inst_VOP3__V_LOG_LEGACY_F32::Inst_VOP3__V_LOG_LEGACY_F32(InFmt_VOP3
*iFmt
)
27606 : Inst_VOP3(iFmt
, "v_log_legacy_f32", false)
27610 } // Inst_VOP3__V_LOG_LEGACY_F32
27612 Inst_VOP3__V_LOG_LEGACY_F32::~Inst_VOP3__V_LOG_LEGACY_F32()
27614 } // ~Inst_VOP3__V_LOG_LEGACY_F32
27616 // D.f = log2(S0.f).
27618 Inst_VOP3__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst
)
27620 Wavefront
*wf
= gpuDynInst
->wavefront();
27621 ConstVecOperandF32
src(gpuDynInst
, extData
.SRC0
);
27622 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
27626 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27627 if (wf
->execMask(lane
)) {
27628 vdst
[lane
] = std::log2(src
[lane
]);
27635 Inst_VOP3__V_MAD_LEGACY_F32::Inst_VOP3__V_MAD_LEGACY_F32(InFmt_VOP3
*iFmt
)
27636 : Inst_VOP3(iFmt
, "v_mad_legacy_f32", false)
27641 } // Inst_VOP3__V_MAD_LEGACY_F32
27643 Inst_VOP3__V_MAD_LEGACY_F32::~Inst_VOP3__V_MAD_LEGACY_F32()
27645 } // ~Inst_VOP3__V_MAD_LEGACY_F32
27647 // D.f = S0.f * S1.f + S2.f
27649 Inst_VOP3__V_MAD_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst
)
27651 Wavefront
*wf
= gpuDynInst
->wavefront();
27652 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
27653 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
27654 ConstVecOperandF32
src2(gpuDynInst
, extData
.SRC2
);
27655 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
27661 if (instData
.ABS
& 0x1) {
27662 src0
.absModifier();
27665 if (instData
.ABS
& 0x2) {
27666 src1
.absModifier();
27669 if (instData
.ABS
& 0x4) {
27670 src2
.absModifier();
27673 if (extData
.NEG
& 0x1) {
27674 src0
.negModifier();
27677 if (extData
.NEG
& 0x2) {
27678 src1
.negModifier();
27681 if (extData
.NEG
& 0x4) {
27682 src2
.negModifier();
27685 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27686 if (wf
->execMask(lane
)) {
27687 vdst
[lane
] = std::fma(src0
[lane
], src1
[lane
], src2
[lane
]);
27694 Inst_VOP3__V_MAD_F32::Inst_VOP3__V_MAD_F32(InFmt_VOP3
*iFmt
)
27695 : Inst_VOP3(iFmt
, "v_mad_f32", false)
27700 } // Inst_VOP3__V_MAD_F32
27702 Inst_VOP3__V_MAD_F32::~Inst_VOP3__V_MAD_F32()
27704 } // ~Inst_VOP3__V_MAD_F32
27706 // D.f = S0.f * S1.f + S2.f.
27708 Inst_VOP3__V_MAD_F32::execute(GPUDynInstPtr gpuDynInst
)
27710 Wavefront
*wf
= gpuDynInst
->wavefront();
27711 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
27712 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
27713 ConstVecOperandF32
src2(gpuDynInst
, extData
.SRC2
);
27714 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
27720 if (instData
.ABS
& 0x1) {
27721 src0
.absModifier();
27724 if (instData
.ABS
& 0x2) {
27725 src1
.absModifier();
27728 if (instData
.ABS
& 0x4) {
27729 src2
.absModifier();
27732 if (extData
.NEG
& 0x1) {
27733 src0
.negModifier();
27736 if (extData
.NEG
& 0x2) {
27737 src1
.negModifier();
27740 if (extData
.NEG
& 0x4) {
27741 src2
.negModifier();
27744 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27745 if (wf
->execMask(lane
)) {
27746 vdst
[lane
] = std::fma(src0
[lane
], src1
[lane
], src2
[lane
]);
27753 Inst_VOP3__V_MAD_I32_I24::Inst_VOP3__V_MAD_I32_I24(InFmt_VOP3
*iFmt
)
27754 : Inst_VOP3(iFmt
, "v_mad_i32_i24", false)
27758 } // Inst_VOP3__V_MAD_I32_I24
27760 Inst_VOP3__V_MAD_I32_I24::~Inst_VOP3__V_MAD_I32_I24()
27762 } // ~Inst_VOP3__V_MAD_I32_I24
27764 // D.i = S0.i[23:0] * S1.i[23:0] + S2.i.
27766 Inst_VOP3__V_MAD_I32_I24::execute(GPUDynInstPtr gpuDynInst
)
27768 Wavefront
*wf
= gpuDynInst
->wavefront();
27769 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
27770 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
27771 ConstVecOperandI32
src2(gpuDynInst
, extData
.SRC2
);
27772 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
27779 * input modifiers are supported by FP operations only
27781 assert(!(instData
.ABS
& 0x1));
27782 assert(!(instData
.ABS
& 0x2));
27783 assert(!(instData
.ABS
& 0x4));
27784 assert(!(extData
.NEG
& 0x1));
27785 assert(!(extData
.NEG
& 0x2));
27786 assert(!(extData
.NEG
& 0x4));
27788 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27789 if (wf
->execMask(lane
)) {
27790 vdst
[lane
] = sext
<24>(bits(src0
[lane
], 23, 0))
27791 * sext
<24>(bits(src1
[lane
], 23, 0)) + src2
[lane
];
27798 Inst_VOP3__V_MAD_U32_U24::Inst_VOP3__V_MAD_U32_U24(InFmt_VOP3
*iFmt
)
27799 : Inst_VOP3(iFmt
, "v_mad_u32_u24", false)
27803 } // Inst_VOP3__V_MAD_U32_U24
27805 Inst_VOP3__V_MAD_U32_U24::~Inst_VOP3__V_MAD_U32_U24()
27807 } // ~Inst_VOP3__V_MAD_U32_U24
27809 // D.u = S0.u[23:0] * S1.u[23:0] + S2.u.
27811 Inst_VOP3__V_MAD_U32_U24::execute(GPUDynInstPtr gpuDynInst
)
27813 Wavefront
*wf
= gpuDynInst
->wavefront();
27814 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
27815 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
27816 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
27817 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
27824 * input modifiers are supported by FP operations only
27826 assert(!(instData
.ABS
& 0x1));
27827 assert(!(instData
.ABS
& 0x2));
27828 assert(!(instData
.ABS
& 0x4));
27829 assert(!(extData
.NEG
& 0x1));
27830 assert(!(extData
.NEG
& 0x2));
27831 assert(!(extData
.NEG
& 0x4));
27833 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27834 if (wf
->execMask(lane
)) {
27835 vdst
[lane
] = bits(src0
[lane
], 23, 0) * bits(src1
[lane
], 23, 0)
27843 Inst_VOP3__V_CUBEID_F32::Inst_VOP3__V_CUBEID_F32(InFmt_VOP3
*iFmt
)
27844 : Inst_VOP3(iFmt
, "v_cubeid_f32", false)
27848 } // Inst_VOP3__V_CUBEID_F32
27850 Inst_VOP3__V_CUBEID_F32::~Inst_VOP3__V_CUBEID_F32()
27852 } // ~Inst_VOP3__V_CUBEID_F32
27855 Inst_VOP3__V_CUBEID_F32::execute(GPUDynInstPtr gpuDynInst
)
27857 panicUnimplemented();
27860 Inst_VOP3__V_CUBESC_F32::Inst_VOP3__V_CUBESC_F32(InFmt_VOP3
*iFmt
)
27861 : Inst_VOP3(iFmt
, "v_cubesc_f32", false)
27865 } // Inst_VOP3__V_CUBESC_F32
27867 Inst_VOP3__V_CUBESC_F32::~Inst_VOP3__V_CUBESC_F32()
27869 } // ~Inst_VOP3__V_CUBESC_F32
27872 Inst_VOP3__V_CUBESC_F32::execute(GPUDynInstPtr gpuDynInst
)
27874 panicUnimplemented();
27877 Inst_VOP3__V_CUBETC_F32::Inst_VOP3__V_CUBETC_F32(InFmt_VOP3
*iFmt
)
27878 : Inst_VOP3(iFmt
, "v_cubetc_f32", false)
27882 } // Inst_VOP3__V_CUBETC_F32
27884 Inst_VOP3__V_CUBETC_F32::~Inst_VOP3__V_CUBETC_F32()
27886 } // ~Inst_VOP3__V_CUBETC_F32
27889 Inst_VOP3__V_CUBETC_F32::execute(GPUDynInstPtr gpuDynInst
)
27891 panicUnimplemented();
27894 Inst_VOP3__V_CUBEMA_F32::Inst_VOP3__V_CUBEMA_F32(InFmt_VOP3
*iFmt
)
27895 : Inst_VOP3(iFmt
, "v_cubema_f32", false)
27899 } // Inst_VOP3__V_CUBEMA_F32
27901 Inst_VOP3__V_CUBEMA_F32::~Inst_VOP3__V_CUBEMA_F32()
27903 } // ~Inst_VOP3__V_CUBEMA_F32
27906 Inst_VOP3__V_CUBEMA_F32::execute(GPUDynInstPtr gpuDynInst
)
27908 panicUnimplemented();
27911 Inst_VOP3__V_BFE_U32::Inst_VOP3__V_BFE_U32(InFmt_VOP3
*iFmt
)
27912 : Inst_VOP3(iFmt
, "v_bfe_u32", false)
27915 } // Inst_VOP3__V_BFE_U32
27917 Inst_VOP3__V_BFE_U32::~Inst_VOP3__V_BFE_U32()
27919 } // ~Inst_VOP3__V_BFE_U32
27921 // D.u = (S0.u >> S1.u[4:0]) & ((1 << S2.u[4:0]) - 1).
27922 // Bitfield extract with S0 = data, S1 = field_offset, S2 = field_width.
27924 Inst_VOP3__V_BFE_U32::execute(GPUDynInstPtr gpuDynInst
)
27926 Wavefront
*wf
= gpuDynInst
->wavefront();
27927 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
27928 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
27929 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
27930 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
27937 * input modifiers are supported by FP operations only
27939 assert(!(instData
.ABS
& 0x1));
27940 assert(!(instData
.ABS
& 0x2));
27941 assert(!(instData
.ABS
& 0x4));
27942 assert(!(extData
.NEG
& 0x1));
27943 assert(!(extData
.NEG
& 0x2));
27944 assert(!(extData
.NEG
& 0x4));
27946 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27947 if (wf
->execMask(lane
)) {
27948 vdst
[lane
] = (src0
[lane
] >> bits(src1
[lane
], 4, 0))
27949 & ((1 << bits(src2
[lane
], 4, 0)) - 1);
27956 Inst_VOP3__V_BFE_I32::Inst_VOP3__V_BFE_I32(InFmt_VOP3
*iFmt
)
27957 : Inst_VOP3(iFmt
, "v_bfe_i32", false)
27960 } // Inst_VOP3__V_BFE_I32
27962 Inst_VOP3__V_BFE_I32::~Inst_VOP3__V_BFE_I32()
27964 } // ~Inst_VOP3__V_BFE_I32
27966 // D.i = (S0.i >> S1.u[4:0]) & ((1 << S2.u[4:0]) - 1).
27967 // Bitfield extract with S0 = data, S1 = field_offset, S2 = field_width.
27969 Inst_VOP3__V_BFE_I32::execute(GPUDynInstPtr gpuDynInst
)
27971 Wavefront
*wf
= gpuDynInst
->wavefront();
27972 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
27973 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
27974 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
27975 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
27982 * input modifiers are supported by FP operations only
27984 assert(!(instData
.ABS
& 0x1));
27985 assert(!(instData
.ABS
& 0x2));
27986 assert(!(instData
.ABS
& 0x4));
27987 assert(!(extData
.NEG
& 0x1));
27988 assert(!(extData
.NEG
& 0x2));
27989 assert(!(extData
.NEG
& 0x4));
27991 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
27992 if (wf
->execMask(lane
)) {
27993 vdst
[lane
] = (src0
[lane
] >> bits(src1
[lane
], 4, 0))
27994 & ((1 << bits(src2
[lane
], 4, 0)) - 1);
28001 Inst_VOP3__V_BFI_B32::Inst_VOP3__V_BFI_B32(InFmt_VOP3
*iFmt
)
28002 : Inst_VOP3(iFmt
, "v_bfi_b32", false)
28005 } // Inst_VOP3__V_BFI_B32
28007 Inst_VOP3__V_BFI_B32::~Inst_VOP3__V_BFI_B32()
28009 } // ~Inst_VOP3__V_BFI_B32
28011 // D.u = (S0.u & S1.u) | (~S0.u & S2.u); bitfield insert.
28013 Inst_VOP3__V_BFI_B32::execute(GPUDynInstPtr gpuDynInst
)
28015 Wavefront
*wf
= gpuDynInst
->wavefront();
28016 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
28017 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
28018 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28019 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28026 * input modifiers are supported by FP operations only
28028 assert(!(instData
.ABS
& 0x1));
28029 assert(!(instData
.ABS
& 0x2));
28030 assert(!(instData
.ABS
& 0x4));
28031 assert(!(extData
.NEG
& 0x1));
28032 assert(!(extData
.NEG
& 0x2));
28033 assert(!(extData
.NEG
& 0x4));
28035 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28036 if (wf
->execMask(lane
)) {
28037 vdst
[lane
] = (src0
[lane
] & src1
[lane
]) | (~src0
[lane
]
28045 Inst_VOP3__V_FMA_F32::Inst_VOP3__V_FMA_F32(InFmt_VOP3
*iFmt
)
28046 : Inst_VOP3(iFmt
, "v_fma_f32", false)
28051 } // Inst_VOP3__V_FMA_F32
28053 Inst_VOP3__V_FMA_F32::~Inst_VOP3__V_FMA_F32()
28055 } // ~Inst_VOP3__V_FMA_F32
28057 // D.f = S0.f * S1.f + S2.f.
28059 Inst_VOP3__V_FMA_F32::execute(GPUDynInstPtr gpuDynInst
)
28061 Wavefront
*wf
= gpuDynInst
->wavefront();
28062 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
28063 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
28064 ConstVecOperandF32
src2(gpuDynInst
, extData
.SRC2
);
28065 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
28071 if (instData
.ABS
& 0x1) {
28072 src0
.absModifier();
28075 if (instData
.ABS
& 0x2) {
28076 src1
.absModifier();
28079 if (instData
.ABS
& 0x4) {
28080 src2
.absModifier();
28083 if (extData
.NEG
& 0x1) {
28084 src0
.negModifier();
28087 if (extData
.NEG
& 0x2) {
28088 src1
.negModifier();
28091 if (extData
.NEG
& 0x4) {
28092 src2
.negModifier();
28095 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28096 if (wf
->execMask(lane
)) {
28097 vdst
[lane
] = std::fma(src0
[lane
], src1
[lane
], src2
[lane
]);
28104 Inst_VOP3__V_FMA_F64::Inst_VOP3__V_FMA_F64(InFmt_VOP3
*iFmt
)
28105 : Inst_VOP3(iFmt
, "v_fma_f64", false)
28110 } // Inst_VOP3__V_FMA_F64
28112 Inst_VOP3__V_FMA_F64::~Inst_VOP3__V_FMA_F64()
28114 } // ~Inst_VOP3__V_FMA_F64
28116 // D.d = S0.d * S1.d + S2.d.
28118 Inst_VOP3__V_FMA_F64::execute(GPUDynInstPtr gpuDynInst
)
28120 Wavefront
*wf
= gpuDynInst
->wavefront();
28121 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
28122 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
28123 ConstVecOperandF64
src2(gpuDynInst
, extData
.SRC2
);
28124 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
28130 if (instData
.ABS
& 0x1) {
28131 src0
.absModifier();
28134 if (instData
.ABS
& 0x2) {
28135 src1
.absModifier();
28138 if (instData
.ABS
& 0x4) {
28139 src2
.absModifier();
28142 if (extData
.NEG
& 0x1) {
28143 src0
.negModifier();
28146 if (extData
.NEG
& 0x2) {
28147 src1
.negModifier();
28150 if (extData
.NEG
& 0x4) {
28151 src2
.negModifier();
28154 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28155 if (wf
->execMask(lane
)) {
28156 vdst
[lane
] = std::fma(src0
[lane
], src1
[lane
], src2
[lane
]);
28163 Inst_VOP3__V_LERP_U8::Inst_VOP3__V_LERP_U8(InFmt_VOP3
*iFmt
)
28164 : Inst_VOP3(iFmt
, "v_lerp_u8", false)
28167 } // Inst_VOP3__V_LERP_U8
28169 Inst_VOP3__V_LERP_U8::~Inst_VOP3__V_LERP_U8()
28171 } // ~Inst_VOP3__V_LERP_U8
28173 // D.u = ((S0.u[31:24] + S1.u[31:24] + S2.u[24]) >> 1) << 24
28174 // D.u += ((S0.u[23:16] + S1.u[23:16] + S2.u[16]) >> 1) << 16;
28175 // D.u += ((S0.u[15:8] + S1.u[15:8] + S2.u[8]) >> 1) << 8;
28176 // D.u += ((S0.u[7:0] + S1.u[7:0] + S2.u[0]) >> 1).
28178 Inst_VOP3__V_LERP_U8::execute(GPUDynInstPtr gpuDynInst
)
28180 Wavefront
*wf
= gpuDynInst
->wavefront();
28181 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
28182 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
28183 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28184 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28191 * input modifiers are supported by FP operations only
28193 assert(!(instData
.ABS
& 0x1));
28194 assert(!(instData
.ABS
& 0x2));
28195 assert(!(instData
.ABS
& 0x4));
28196 assert(!(extData
.NEG
& 0x1));
28197 assert(!(extData
.NEG
& 0x2));
28198 assert(!(extData
.NEG
& 0x4));
28200 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28201 if (wf
->execMask(lane
)) {
28202 vdst
[lane
] = ((bits(src0
[lane
], 31, 24)
28203 + bits(src1
[lane
], 31, 24) + bits(src2
[lane
], 24)) >> 1)
28205 vdst
[lane
] += ((bits(src0
[lane
], 23, 16)
28206 + bits(src1
[lane
], 23, 16) + bits(src2
[lane
], 16)) >> 1)
28208 vdst
[lane
] += ((bits(src0
[lane
], 15, 8)
28209 + bits(src1
[lane
], 15, 8) + bits(src2
[lane
], 8)) >> 1)
28211 vdst
[lane
] += ((bits(src0
[lane
], 7, 0) + bits(src1
[lane
], 7, 0)
28212 + bits(src2
[lane
], 0)) >> 1);
28219 Inst_VOP3__V_ALIGNBIT_B32::Inst_VOP3__V_ALIGNBIT_B32(InFmt_VOP3
*iFmt
)
28220 : Inst_VOP3(iFmt
, "v_alignbit_b32", false)
28223 } // Inst_VOP3__V_ALIGNBIT_B32
28225 Inst_VOP3__V_ALIGNBIT_B32::~Inst_VOP3__V_ALIGNBIT_B32()
28227 } // ~Inst_VOP3__V_ALIGNBIT_B32
28229 // D.u = ({S0, S1} >> S2.u[4:0]) & 0xffffffff.
28231 Inst_VOP3__V_ALIGNBIT_B32::execute(GPUDynInstPtr gpuDynInst
)
28233 Wavefront
*wf
= gpuDynInst
->wavefront();
28234 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
28235 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
28236 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28237 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28244 * input modifiers are supported by FP operations only
28246 assert(!(instData
.ABS
& 0x1));
28247 assert(!(instData
.ABS
& 0x2));
28248 assert(!(instData
.ABS
& 0x4));
28249 assert(!(extData
.NEG
& 0x1));
28250 assert(!(extData
.NEG
& 0x2));
28251 assert(!(extData
.NEG
& 0x4));
28253 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28254 if (wf
->execMask(lane
)) {
28255 VecElemU64 src_0_1
= (((VecElemU64
)src0
[lane
] << 32)
28256 | (VecElemU64
)src1
[lane
]);
28257 vdst
[lane
] = (VecElemU32
)((src_0_1
28258 >> (VecElemU64
)bits(src2
[lane
], 4, 0)) & 0xffffffff);
28265 Inst_VOP3__V_ALIGNBYTE_B32::Inst_VOP3__V_ALIGNBYTE_B32(InFmt_VOP3
*iFmt
)
28266 : Inst_VOP3(iFmt
, "v_alignbyte_b32", false)
28269 } // Inst_VOP3__V_ALIGNBYTE_B32
28271 Inst_VOP3__V_ALIGNBYTE_B32::~Inst_VOP3__V_ALIGNBYTE_B32()
28273 } // ~Inst_VOP3__V_ALIGNBYTE_B32
28275 // D.u = ({S0, S1} >> (8 * S2.u[4:0])) & 0xffffffff.
28277 Inst_VOP3__V_ALIGNBYTE_B32::execute(GPUDynInstPtr gpuDynInst
)
28279 Wavefront
*wf
= gpuDynInst
->wavefront();
28280 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
28281 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
28282 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28283 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28290 * input modifiers are supported by FP operations only
28292 assert(!(instData
.ABS
& 0x1));
28293 assert(!(instData
.ABS
& 0x2));
28294 assert(!(instData
.ABS
& 0x4));
28295 assert(!(extData
.NEG
& 0x1));
28296 assert(!(extData
.NEG
& 0x2));
28297 assert(!(extData
.NEG
& 0x4));
28299 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28300 if (wf
->execMask(lane
)) {
28301 VecElemU64 src_0_1
= (((VecElemU64
)src0
[lane
] << 32)
28302 | (VecElemU64
)src1
[lane
]);
28303 vdst
[lane
] = (VecElemU32
)((src_0_1
28304 >> (8ULL * (VecElemU64
)bits(src2
[lane
], 4, 0)))
28312 Inst_VOP3__V_MIN3_F32::Inst_VOP3__V_MIN3_F32(InFmt_VOP3
*iFmt
)
28313 : Inst_VOP3(iFmt
, "v_min3_f32", false)
28317 } // Inst_VOP3__V_MIN3_F32
28319 Inst_VOP3__V_MIN3_F32::~Inst_VOP3__V_MIN3_F32()
28321 } // ~Inst_VOP3__V_MIN3_F32
28323 // D.f = min(S0.f, S1.f, S2.f).
28325 Inst_VOP3__V_MIN3_F32::execute(GPUDynInstPtr gpuDynInst
)
28327 Wavefront
*wf
= gpuDynInst
->wavefront();
28328 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
28329 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
28330 ConstVecOperandF32
src2(gpuDynInst
, extData
.SRC2
);
28331 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
28337 if (instData
.ABS
& 0x1) {
28338 src0
.absModifier();
28341 if (instData
.ABS
& 0x2) {
28342 src1
.absModifier();
28345 if (instData
.ABS
& 0x4) {
28346 src2
.absModifier();
28349 if (extData
.NEG
& 0x1) {
28350 src0
.negModifier();
28353 if (extData
.NEG
& 0x2) {
28354 src1
.negModifier();
28357 if (extData
.NEG
& 0x4) {
28358 src2
.negModifier();
28361 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28362 if (wf
->execMask(lane
)) {
28363 VecElemF32 min_0_1
= std::fmin(src0
[lane
], src1
[lane
]);
28364 vdst
[lane
] = std::fmin(min_0_1
, src2
[lane
]);
28371 Inst_VOP3__V_MIN3_I32::Inst_VOP3__V_MIN3_I32(InFmt_VOP3
*iFmt
)
28372 : Inst_VOP3(iFmt
, "v_min3_i32", false)
28375 } // Inst_VOP3__V_MIN3_I32
28377 Inst_VOP3__V_MIN3_I32::~Inst_VOP3__V_MIN3_I32()
28379 } // ~Inst_VOP3__V_MIN3_I32
28381 // D.i = min(S0.i, S1.i, S2.i).
28383 Inst_VOP3__V_MIN3_I32::execute(GPUDynInstPtr gpuDynInst
)
28385 Wavefront
*wf
= gpuDynInst
->wavefront();
28386 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
28387 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
28388 ConstVecOperandI32
src2(gpuDynInst
, extData
.SRC2
);
28389 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
28396 * input modifiers are supported by FP operations only
28398 assert(!(instData
.ABS
& 0x1));
28399 assert(!(instData
.ABS
& 0x2));
28400 assert(!(instData
.ABS
& 0x4));
28401 assert(!(extData
.NEG
& 0x1));
28402 assert(!(extData
.NEG
& 0x2));
28403 assert(!(extData
.NEG
& 0x4));
28405 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28406 if (wf
->execMask(lane
)) {
28407 VecElemI32 min_0_1
= std::min(src0
[lane
], src1
[lane
]);
28408 vdst
[lane
] = std::min(min_0_1
, src2
[lane
]);
28415 Inst_VOP3__V_MIN3_U32::Inst_VOP3__V_MIN3_U32(InFmt_VOP3
*iFmt
)
28416 : Inst_VOP3(iFmt
, "v_min3_u32", false)
28419 } // Inst_VOP3__V_MIN3_U32
28421 Inst_VOP3__V_MIN3_U32::~Inst_VOP3__V_MIN3_U32()
28423 } // ~Inst_VOP3__V_MIN3_U32
28425 // D.u = min(S0.u, S1.u, S2.u).
28427 Inst_VOP3__V_MIN3_U32::execute(GPUDynInstPtr gpuDynInst
)
28429 Wavefront
*wf
= gpuDynInst
->wavefront();
28430 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
28431 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
28432 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28433 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28440 * input modifiers are supported by FP operations only
28442 assert(!(instData
.ABS
& 0x1));
28443 assert(!(instData
.ABS
& 0x2));
28444 assert(!(instData
.ABS
& 0x4));
28445 assert(!(extData
.NEG
& 0x1));
28446 assert(!(extData
.NEG
& 0x2));
28447 assert(!(extData
.NEG
& 0x4));
28449 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28450 if (wf
->execMask(lane
)) {
28451 VecElemU32 min_0_1
= std::min(src0
[lane
], src1
[lane
]);
28452 vdst
[lane
] = std::min(min_0_1
, src2
[lane
]);
28459 Inst_VOP3__V_MAX3_F32::Inst_VOP3__V_MAX3_F32(InFmt_VOP3
*iFmt
)
28460 : Inst_VOP3(iFmt
, "v_max3_f32", false)
28464 } // Inst_VOP3__V_MAX3_F32
28466 Inst_VOP3__V_MAX3_F32::~Inst_VOP3__V_MAX3_F32()
28468 } // ~Inst_VOP3__V_MAX3_F32
28470 // D.f = max(S0.f, S1.f, S2.f).
28472 Inst_VOP3__V_MAX3_F32::execute(GPUDynInstPtr gpuDynInst
)
28474 Wavefront
*wf
= gpuDynInst
->wavefront();
28475 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
28476 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
28477 ConstVecOperandF32
src2(gpuDynInst
, extData
.SRC2
);
28478 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
28484 if (instData
.ABS
& 0x1) {
28485 src0
.absModifier();
28488 if (instData
.ABS
& 0x2) {
28489 src1
.absModifier();
28492 if (instData
.ABS
& 0x4) {
28493 src2
.absModifier();
28496 if (extData
.NEG
& 0x1) {
28497 src0
.negModifier();
28500 if (extData
.NEG
& 0x2) {
28501 src1
.negModifier();
28504 if (extData
.NEG
& 0x4) {
28505 src2
.negModifier();
28508 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28509 if (wf
->execMask(lane
)) {
28510 VecElemF32 max_0_1
= std::fmax(src0
[lane
], src1
[lane
]);
28511 vdst
[lane
] = std::fmax(max_0_1
, src2
[lane
]);
28518 Inst_VOP3__V_MAX3_I32::Inst_VOP3__V_MAX3_I32(InFmt_VOP3
*iFmt
)
28519 : Inst_VOP3(iFmt
, "v_max3_i32", false)
28522 } // Inst_VOP3__V_MAX3_I32
28524 Inst_VOP3__V_MAX3_I32::~Inst_VOP3__V_MAX3_I32()
28526 } // ~Inst_VOP3__V_MAX3_I32
28528 // D.i = max(S0.i, S1.i, S2.i).
28530 Inst_VOP3__V_MAX3_I32::execute(GPUDynInstPtr gpuDynInst
)
28532 Wavefront
*wf
= gpuDynInst
->wavefront();
28533 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
28534 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
28535 ConstVecOperandI32
src2(gpuDynInst
, extData
.SRC2
);
28536 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
28543 * input modifiers are supported by FP operations only
28545 assert(!(instData
.ABS
& 0x1));
28546 assert(!(instData
.ABS
& 0x2));
28547 assert(!(instData
.ABS
& 0x4));
28548 assert(!(extData
.NEG
& 0x1));
28549 assert(!(extData
.NEG
& 0x2));
28550 assert(!(extData
.NEG
& 0x4));
28552 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28553 if (wf
->execMask(lane
)) {
28554 VecElemI32 max_0_1
= std::max(src0
[lane
], src1
[lane
]);
28555 vdst
[lane
] = std::max(max_0_1
, src2
[lane
]);
28562 Inst_VOP3__V_MAX3_U32::Inst_VOP3__V_MAX3_U32(InFmt_VOP3
*iFmt
)
28563 : Inst_VOP3(iFmt
, "v_max3_u32", false)
28566 } // Inst_VOP3__V_MAX3_U32
28568 Inst_VOP3__V_MAX3_U32::~Inst_VOP3__V_MAX3_U32()
28570 } // ~Inst_VOP3__V_MAX3_U32
28572 // D.u = max(S0.u, S1.u, S2.u).
28574 Inst_VOP3__V_MAX3_U32::execute(GPUDynInstPtr gpuDynInst
)
28576 Wavefront
*wf
= gpuDynInst
->wavefront();
28577 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
28578 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
28579 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28580 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28587 * input modifiers are supported by FP operations only
28589 assert(!(instData
.ABS
& 0x1));
28590 assert(!(instData
.ABS
& 0x2));
28591 assert(!(instData
.ABS
& 0x4));
28592 assert(!(extData
.NEG
& 0x1));
28593 assert(!(extData
.NEG
& 0x2));
28594 assert(!(extData
.NEG
& 0x4));
28596 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28597 if (wf
->execMask(lane
)) {
28598 VecElemU32 max_0_1
= std::max(src0
[lane
], src1
[lane
]);
28599 vdst
[lane
] = std::max(max_0_1
, src2
[lane
]);
28606 Inst_VOP3__V_MED3_F32::Inst_VOP3__V_MED3_F32(InFmt_VOP3
*iFmt
)
28607 : Inst_VOP3(iFmt
, "v_med3_f32", false)
28611 } // Inst_VOP3__V_MED3_F32
28613 Inst_VOP3__V_MED3_F32::~Inst_VOP3__V_MED3_F32()
28615 } // ~Inst_VOP3__V_MED3_F32
28617 // D.f = median(S0.f, S1.f, S2.f).
28619 Inst_VOP3__V_MED3_F32::execute(GPUDynInstPtr gpuDynInst
)
28621 Wavefront
*wf
= gpuDynInst
->wavefront();
28622 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
28623 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
28624 ConstVecOperandF32
src2(gpuDynInst
, extData
.SRC2
);
28625 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
28631 if (instData
.ABS
& 0x1) {
28632 src0
.absModifier();
28635 if (instData
.ABS
& 0x2) {
28636 src1
.absModifier();
28639 if (instData
.ABS
& 0x4) {
28640 src2
.absModifier();
28643 if (extData
.NEG
& 0x1) {
28644 src0
.negModifier();
28647 if (extData
.NEG
& 0x2) {
28648 src1
.negModifier();
28651 if (extData
.NEG
& 0x4) {
28652 src2
.negModifier();
28655 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28656 if (wf
->execMask(lane
)) {
28657 vdst
[lane
] = median(src0
[lane
], src1
[lane
], src2
[lane
]);
28664 Inst_VOP3__V_MED3_I32::Inst_VOP3__V_MED3_I32(InFmt_VOP3
*iFmt
)
28665 : Inst_VOP3(iFmt
, "v_med3_i32", false)
28668 } // Inst_VOP3__V_MED3_I32
28670 Inst_VOP3__V_MED3_I32::~Inst_VOP3__V_MED3_I32()
28672 } // ~Inst_VOP3__V_MED3_I32
28674 // D.i = median(S0.i, S1.i, S2.i).
28676 Inst_VOP3__V_MED3_I32::execute(GPUDynInstPtr gpuDynInst
)
28678 Wavefront
*wf
= gpuDynInst
->wavefront();
28679 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
28680 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
28681 ConstVecOperandI32
src2(gpuDynInst
, extData
.SRC2
);
28682 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
28689 * input modifiers are supported by FP operations only
28691 assert(!(instData
.ABS
& 0x1));
28692 assert(!(instData
.ABS
& 0x2));
28693 assert(!(instData
.ABS
& 0x4));
28694 assert(!(extData
.NEG
& 0x1));
28695 assert(!(extData
.NEG
& 0x2));
28696 assert(!(extData
.NEG
& 0x4));
28698 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28699 if (wf
->execMask(lane
)) {
28700 vdst
[lane
] = median(src0
[lane
], src1
[lane
], src2
[lane
]);
28707 Inst_VOP3__V_MED3_U32::Inst_VOP3__V_MED3_U32(InFmt_VOP3
*iFmt
)
28708 : Inst_VOP3(iFmt
, "v_med3_u32", false)
28711 } // Inst_VOP3__V_MED3_U32
28713 Inst_VOP3__V_MED3_U32::~Inst_VOP3__V_MED3_U32()
28715 } // ~Inst_VOP3__V_MED3_U32
28717 // D.u = median(S0.u, S1.u, S2.u).
28719 Inst_VOP3__V_MED3_U32::execute(GPUDynInstPtr gpuDynInst
)
28721 Wavefront
*wf
= gpuDynInst
->wavefront();
28722 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
28723 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
28724 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28725 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28732 * input modifiers are supported by FP operations only
28734 assert(!(instData
.ABS
& 0x1));
28735 assert(!(instData
.ABS
& 0x2));
28736 assert(!(instData
.ABS
& 0x4));
28737 assert(!(extData
.NEG
& 0x1));
28738 assert(!(extData
.NEG
& 0x2));
28739 assert(!(extData
.NEG
& 0x4));
28741 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28742 if (wf
->execMask(lane
)) {
28743 vdst
[lane
] = median(src0
[lane
], src1
[lane
], src2
[lane
]);
28750 Inst_VOP3__V_SAD_U8::Inst_VOP3__V_SAD_U8(InFmt_VOP3
*iFmt
)
28751 : Inst_VOP3(iFmt
, "v_sad_u8", false)
28754 } // Inst_VOP3__V_SAD_U8
28756 Inst_VOP3__V_SAD_U8::~Inst_VOP3__V_SAD_U8()
28758 } // ~Inst_VOP3__V_SAD_U8
28760 // D.u = abs(S0.i[31:24] - S1.i[31:24]) + abs(S0.i[23:16] - S1.i[23:16]) +
28761 // abs(S0.i[15:8] - S1.i[15:8]) + abs(S0.i[7:0] - S1.i[7:0]) + S2.u.
28762 // Sum of absolute differences with accumulation, overflow into upper bits
28765 Inst_VOP3__V_SAD_U8::execute(GPUDynInstPtr gpuDynInst
)
28767 Wavefront
*wf
= gpuDynInst
->wavefront();
28768 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
28769 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
28770 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28771 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28778 * input modifiers are supported by FP operations only
28780 assert(!(instData
.ABS
& 0x1));
28781 assert(!(instData
.ABS
& 0x2));
28782 assert(!(instData
.ABS
& 0x4));
28783 assert(!(extData
.NEG
& 0x1));
28784 assert(!(extData
.NEG
& 0x2));
28785 assert(!(extData
.NEG
& 0x4));
28787 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28788 if (wf
->execMask(lane
)) {
28789 vdst
[lane
] = std::abs(bits(src0
[lane
], 31, 24)
28790 - bits(src1
[lane
], 31, 24))
28791 + std::abs(bits(src0
[lane
], 23, 16)
28792 - bits(src1
[lane
], 23, 16))
28793 + std::abs(bits(src0
[lane
], 15, 8)
28794 - bits(src1
[lane
], 15, 8))
28795 + std::abs(bits(src0
[lane
], 7, 0)
28796 - bits(src1
[lane
], 7, 0)) + src2
[lane
];
28803 Inst_VOP3__V_SAD_HI_U8::Inst_VOP3__V_SAD_HI_U8(InFmt_VOP3
*iFmt
)
28804 : Inst_VOP3(iFmt
, "v_sad_hi_u8", false)
28807 } // Inst_VOP3__V_SAD_HI_U8
28809 Inst_VOP3__V_SAD_HI_U8::~Inst_VOP3__V_SAD_HI_U8()
28811 } // ~Inst_VOP3__V_SAD_HI_U8
28813 // D.u = (SAD_U8(S0, S1, 0) << 16) + S2.u.
28814 // Sum of absolute differences with accumulation, overflow is lost.
28816 Inst_VOP3__V_SAD_HI_U8::execute(GPUDynInstPtr gpuDynInst
)
28818 Wavefront
*wf
= gpuDynInst
->wavefront();
28819 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
28820 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
28821 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28822 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28829 * input modifiers are supported by FP operations only
28831 assert(!(instData
.ABS
& 0x1));
28832 assert(!(instData
.ABS
& 0x2));
28833 assert(!(instData
.ABS
& 0x4));
28834 assert(!(extData
.NEG
& 0x1));
28835 assert(!(extData
.NEG
& 0x2));
28836 assert(!(extData
.NEG
& 0x4));
28838 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28839 if (wf
->execMask(lane
)) {
28840 vdst
[lane
] = (((bits(src0
[lane
], 31, 24)
28841 - bits(src1
[lane
], 31, 24)) + (bits(src0
[lane
], 23, 16)
28842 - bits(src1
[lane
], 23, 16)) + (bits(src0
[lane
], 15, 8)
28843 - bits(src1
[lane
], 15, 8)) + (bits(src0
[lane
], 7, 0)
28844 - bits(src1
[lane
], 7, 0))) << 16) + src2
[lane
];
28851 Inst_VOP3__V_SAD_U16::Inst_VOP3__V_SAD_U16(InFmt_VOP3
*iFmt
)
28852 : Inst_VOP3(iFmt
, "v_sad_u16", false)
28855 } // Inst_VOP3__V_SAD_U16
28857 Inst_VOP3__V_SAD_U16::~Inst_VOP3__V_SAD_U16()
28859 } // ~Inst_VOP3__V_SAD_U16
28861 // D.u = abs(S0.i[31:16] - S1.i[31:16]) + abs(S0.i[15:0] - S1.i[15:0])
28863 // Word SAD with accumulation.
28865 Inst_VOP3__V_SAD_U16::execute(GPUDynInstPtr gpuDynInst
)
28867 Wavefront
*wf
= gpuDynInst
->wavefront();
28868 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
28869 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
28870 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28871 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28878 * input modifiers are supported by FP operations only
28880 assert(!(instData
.ABS
& 0x1));
28881 assert(!(instData
.ABS
& 0x2));
28882 assert(!(instData
.ABS
& 0x4));
28883 assert(!(extData
.NEG
& 0x1));
28884 assert(!(extData
.NEG
& 0x2));
28885 assert(!(extData
.NEG
& 0x4));
28887 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28888 if (wf
->execMask(lane
)) {
28889 vdst
[lane
] = std::abs(bits(src0
[lane
], 31, 16)
28890 - bits(src1
[lane
], 31, 16))
28891 + std::abs(bits(src0
[lane
], 15, 0)
28892 - bits(src1
[lane
], 15, 0)) + src2
[lane
];
28899 Inst_VOP3__V_SAD_U32::Inst_VOP3__V_SAD_U32(InFmt_VOP3
*iFmt
)
28900 : Inst_VOP3(iFmt
, "v_sad_u32", false)
28903 } // Inst_VOP3__V_SAD_U32
28905 Inst_VOP3__V_SAD_U32::~Inst_VOP3__V_SAD_U32()
28907 } // ~Inst_VOP3__V_SAD_U32
28909 // D.u = abs(S0.i - S1.i) + S2.u.
28910 // Dword SAD with accumulation.
28912 Inst_VOP3__V_SAD_U32::execute(GPUDynInstPtr gpuDynInst
)
28914 Wavefront
*wf
= gpuDynInst
->wavefront();
28915 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
28916 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
28917 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28918 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28925 * input modifiers are supported by FP operations only
28927 assert(!(instData
.ABS
& 0x1));
28928 assert(!(instData
.ABS
& 0x2));
28929 assert(!(instData
.ABS
& 0x4));
28930 assert(!(extData
.NEG
& 0x1));
28931 assert(!(extData
.NEG
& 0x2));
28932 assert(!(extData
.NEG
& 0x4));
28934 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28935 if (wf
->execMask(lane
)) {
28936 vdst
[lane
] = std::abs(src0
[lane
] - src1
[lane
]) + src2
[lane
];
28943 Inst_VOP3__V_CVT_PK_U8_F32::Inst_VOP3__V_CVT_PK_U8_F32(InFmt_VOP3
*iFmt
)
28944 : Inst_VOP3(iFmt
, "v_cvt_pk_u8_f32", false)
28948 } // Inst_VOP3__V_CVT_PK_U8_F32
28950 Inst_VOP3__V_CVT_PK_U8_F32::~Inst_VOP3__V_CVT_PK_U8_F32()
28952 } // ~Inst_VOP3__V_CVT_PK_U8_F32
28954 // D.u = ((flt32_to_uint8(S0.f) & 0xff) << (8 * S1.u[1:0]))
28955 // | (S2.u & ~(0xff << (8 * S1.u[1:0]))).
28956 // Convert floating point value S0 to 8-bit unsigned integer and pack the
28957 // result into byte S1 of dword S2.
28959 Inst_VOP3__V_CVT_PK_U8_F32::execute(GPUDynInstPtr gpuDynInst
)
28961 Wavefront
*wf
= gpuDynInst
->wavefront();
28962 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
28963 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
28964 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
28965 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
28971 if (instData
.ABS
& 0x1) {
28972 src0
.absModifier();
28976 if (extData
.NEG
& 0x1) {
28977 src0
.negModifier();
28981 * input modifiers are supported by FP operations only
28983 assert(!(instData
.ABS
& 0x2));
28984 assert(!(instData
.ABS
& 0x4));
28985 assert(!(extData
.NEG
& 0x2));
28986 assert(!(extData
.NEG
& 0x4));
28988 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
28989 if (wf
->execMask(lane
)) {
28990 vdst
[lane
] = (((VecElemU8
)src0
[lane
] & 0xff)
28991 << (8 * bits(src1
[lane
], 1, 0)))
28992 | (src2
[lane
] & ~(0xff << (8 * bits(src1
[lane
], 1, 0))));
28999 Inst_VOP3__V_DIV_FIXUP_F32::Inst_VOP3__V_DIV_FIXUP_F32(InFmt_VOP3
*iFmt
)
29000 : Inst_VOP3(iFmt
, "v_div_fixup_f32", false)
29004 } // Inst_VOP3__V_DIV_FIXUP_F32
29006 Inst_VOP3__V_DIV_FIXUP_F32::~Inst_VOP3__V_DIV_FIXUP_F32()
29008 } // ~Inst_VOP3__V_DIV_FIXUP_F32
29010 // D.f = Divide fixup and flags -- s0.f = Quotient, s1.f = Denominator,
29011 // s2.f = Numerator.
29013 Inst_VOP3__V_DIV_FIXUP_F32::execute(GPUDynInstPtr gpuDynInst
)
29015 Wavefront
*wf
= gpuDynInst
->wavefront();
29016 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
29017 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
29018 ConstVecOperandF32
src2(gpuDynInst
, extData
.SRC2
);
29019 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
29025 if (instData
.ABS
& 0x1) {
29026 src0
.absModifier();
29029 if (instData
.ABS
& 0x2) {
29030 src1
.absModifier();
29033 if (instData
.ABS
& 0x4) {
29034 src2
.absModifier();
29037 if (extData
.NEG
& 0x1) {
29038 src0
.negModifier();
29041 if (extData
.NEG
& 0x2) {
29042 src1
.negModifier();
29045 if (extData
.NEG
& 0x4) {
29046 src2
.negModifier();
29049 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29050 if (wf
->execMask(lane
)) {
29051 if (std::fpclassify(src1
[lane
]) == FP_ZERO
) {
29052 if (std::signbit(src1
[lane
])) {
29053 vdst
[lane
] = -INFINITY
;
29055 vdst
[lane
] = +INFINITY
;
29057 } else if (std::isnan(src2
[lane
]) || std::isnan(src1
[lane
])) {
29059 } else if (std::isinf(src1
[lane
])) {
29060 if (std::signbit(src1
[lane
])) {
29061 vdst
[lane
] = -INFINITY
;
29063 vdst
[lane
] = +INFINITY
;
29066 vdst
[lane
] = src2
[lane
] / src1
[lane
];
29073 // --- Inst_VOP3__V_DIV_FIXUP_F64 class methods ---
29075 Inst_VOP3__V_DIV_FIXUP_F64::Inst_VOP3__V_DIV_FIXUP_F64(InFmt_VOP3
*iFmt
)
29076 : Inst_VOP3(iFmt
, "v_div_fixup_f64", false)
29080 } // Inst_VOP3__V_DIV_FIXUP_F64
29082 Inst_VOP3__V_DIV_FIXUP_F64::~Inst_VOP3__V_DIV_FIXUP_F64()
29084 } // ~Inst_VOP3__V_DIV_FIXUP_F64
29086 // D.d = Divide fixup and flags -- s0.d = Quotient, s1.d = Denominator,
29087 // s2.d = Numerator.
29089 Inst_VOP3__V_DIV_FIXUP_F64::execute(GPUDynInstPtr gpuDynInst
)
29091 Wavefront
*wf
= gpuDynInst
->wavefront();
29092 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
29093 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
29094 ConstVecOperandF64
src2(gpuDynInst
, extData
.SRC2
);
29095 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
29101 if (instData
.ABS
& 0x1) {
29102 src0
.absModifier();
29105 if (instData
.ABS
& 0x2) {
29106 src1
.absModifier();
29109 if (instData
.ABS
& 0x4) {
29110 src2
.absModifier();
29113 if (extData
.NEG
& 0x1) {
29114 src0
.negModifier();
29117 if (extData
.NEG
& 0x2) {
29118 src1
.negModifier();
29121 if (extData
.NEG
& 0x4) {
29122 src2
.negModifier();
29125 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29126 if (wf
->execMask(lane
)) {
29127 int sign_out
= std::signbit(src1
[lane
])
29128 ^ std::signbit(src2
[lane
]);
29131 std::frexp(src1
[lane
], &exp1
);
29132 std::frexp(src2
[lane
], &exp2
);
29134 if (std::isnan(src1
[lane
]) || std::isnan(src2
[lane
])) {
29135 vdst
[lane
] = std::numeric_limits
<VecElemF64
>::quiet_NaN();
29136 } else if (std::fpclassify(src1
[lane
]) == FP_ZERO
29137 && std::fpclassify(src2
[lane
]) == FP_ZERO
) {
29139 = std::numeric_limits
<VecElemF64
>::signaling_NaN();
29140 } else if (std::isinf(src1
[lane
]) && std::isinf(src2
[lane
])) {
29142 = std::numeric_limits
<VecElemF64
>::signaling_NaN();
29143 } else if (std::fpclassify(src1
[lane
]) == FP_ZERO
29144 || std::isinf(src2
[lane
])) {
29145 vdst
[lane
] = sign_out
? -INFINITY
: +INFINITY
;
29146 } else if (std::isinf(src1
[lane
])
29147 || std::fpclassify(src2
[lane
]) == FP_ZERO
) {
29148 vdst
[lane
] = sign_out
? -0.0 : +0.0;
29149 } else if (exp2
- exp1
< -1075) {
29150 vdst
[lane
] = src0
[lane
];
29151 } else if (exp1
== 2047) {
29152 vdst
[lane
] = src0
[lane
];
29154 vdst
[lane
] = sign_out
? -std::fabs(src0
[lane
])
29155 : std::fabs(src0
[lane
]);
29163 Inst_VOP3__V_DIV_SCALE_F32::Inst_VOP3__V_DIV_SCALE_F32(
29164 InFmt_VOP3_SDST_ENC
*iFmt
)
29165 : Inst_VOP3_SDST_ENC(iFmt
, "v_div_scale_f32")
29168 setFlag(WritesVCC
);
29170 } // Inst_VOP3__V_DIV_SCALE_F32
29172 Inst_VOP3__V_DIV_SCALE_F32::~Inst_VOP3__V_DIV_SCALE_F32()
29174 } // ~Inst_VOP3__V_DIV_SCALE_F32
29176 // {vcc,D.f} = Divide preop and flags -- s0.f = Quotient, s1.f =
29177 // Denominator, s2.f = Numerator -- s0 must equal s1 or s2. Given a
29178 // numerator and denominator, this opcode will appropriately scale inputs
29179 // for division to avoid subnormal terms during Newton-Raphson correction
29180 // algorithm. This opcode producses a VCC flag for post-scale of quotient.
29182 Inst_VOP3__V_DIV_SCALE_F32::execute(GPUDynInstPtr gpuDynInst
)
29184 Wavefront
*wf
= gpuDynInst
->wavefront();
29185 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
29186 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
29187 ConstVecOperandF32
src2(gpuDynInst
, extData
.SRC2
);
29188 ScalarOperandU64
vcc(gpuDynInst
, instData
.SDST
);
29189 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
29195 if (extData
.NEG
& 0x1) {
29196 src0
.negModifier();
29199 if (extData
.NEG
& 0x2) {
29200 src1
.negModifier();
29203 if (extData
.NEG
& 0x4) {
29204 src2
.negModifier();
29207 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29208 if (wf
->execMask(lane
)) {
29209 vdst
[lane
] = src0
[lane
];
29210 vcc
.setBit(lane
, 0);
29217 // --- Inst_VOP3__V_DIV_SCALE_F64 class methods ---
29219 Inst_VOP3__V_DIV_SCALE_F64::Inst_VOP3__V_DIV_SCALE_F64(
29220 InFmt_VOP3_SDST_ENC
*iFmt
)
29221 : Inst_VOP3_SDST_ENC(iFmt
, "v_div_scale_f64")
29224 setFlag(WritesVCC
);
29226 } // Inst_VOP3__V_DIV_SCALE_F64
29228 Inst_VOP3__V_DIV_SCALE_F64::~Inst_VOP3__V_DIV_SCALE_F64()
29230 } // ~Inst_VOP3__V_DIV_SCALE_F64
29232 // {vcc,D.d} = Divide preop and flags -- s0.d = Quotient, s1.d =
29233 // Denominator, s2.d = Numerator -- s0 must equal s1 or s2. Given a
29234 // numerator and denominator, this opcode will appropriately scale inputs
29235 // for division to avoid subnormal terms during Newton-Raphson correction
29236 // algorithm. This opcode producses a VCC flag for post-scale of quotient.
29238 Inst_VOP3__V_DIV_SCALE_F64::execute(GPUDynInstPtr gpuDynInst
)
29240 Wavefront
*wf
= gpuDynInst
->wavefront();
29241 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
29242 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
29243 ConstVecOperandF64
src2(gpuDynInst
, extData
.SRC2
);
29244 ScalarOperandU64
vcc(gpuDynInst
, instData
.SDST
);
29245 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
29251 if (extData
.NEG
& 0x1) {
29252 src0
.negModifier();
29255 if (extData
.NEG
& 0x2) {
29256 src1
.negModifier();
29259 if (extData
.NEG
& 0x4) {
29260 src2
.negModifier();
29263 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29264 if (wf
->execMask(lane
)) {
29267 std::frexp(src1
[lane
], &exp1
);
29268 std::frexp(src2
[lane
], &exp2
);
29269 vcc
.setBit(lane
, 0);
29271 if (std::fpclassify(src1
[lane
]) == FP_ZERO
29272 || std::fpclassify(src2
[lane
]) == FP_ZERO
) {
29274 } else if (exp2
- exp1
>= 768) {
29275 vcc
.setBit(lane
, 1);
29276 if (src0
[lane
] == src1
[lane
]) {
29277 vdst
[lane
] = std::ldexp(src0
[lane
], 128);
29279 } else if (std::fpclassify(src1
[lane
]) == FP_SUBNORMAL
) {
29280 vdst
[lane
] = std::ldexp(src0
[lane
], 128);
29281 } else if (std::fpclassify(1.0 / src1
[lane
]) == FP_SUBNORMAL
29282 && std::fpclassify(src2
[lane
] / src1
[lane
])
29284 vcc
.setBit(lane
, 1);
29285 if (src0
[lane
] == src1
[lane
]) {
29286 vdst
[lane
] = std::ldexp(src0
[lane
], 128);
29288 } else if (std::fpclassify(1.0 / src1
[lane
]) == FP_SUBNORMAL
) {
29289 vdst
[lane
] = std::ldexp(src0
[lane
], -128);
29290 } else if (std::fpclassify(src2
[lane
] / src1
[lane
])
29292 vcc
.setBit(lane
, 1);
29293 if (src0
[lane
] == src2
[lane
]) {
29294 vdst
[lane
] = std::ldexp(src0
[lane
], 128);
29296 } else if (exp2
<= 53) {
29297 vdst
[lane
] = std::ldexp(src0
[lane
], 128);
29306 Inst_VOP3__V_DIV_FMAS_F32::Inst_VOP3__V_DIV_FMAS_F32(InFmt_VOP3
*iFmt
)
29307 : Inst_VOP3(iFmt
, "v_div_fmas_f32", false)
29313 } // Inst_VOP3__V_DIV_FMAS_F32
29315 Inst_VOP3__V_DIV_FMAS_F32::~Inst_VOP3__V_DIV_FMAS_F32()
29317 } // ~Inst_VOP3__V_DIV_FMAS_F32
29319 // D.f = Special case divide FMA with scale and flags(s0.f = Quotient,
29320 // s1.f = Denominator, s2.f = Numerator)
29322 Inst_VOP3__V_DIV_FMAS_F32::execute(GPUDynInstPtr gpuDynInst
)
29324 Wavefront
*wf
= gpuDynInst
->wavefront();
29325 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
29326 ConstVecOperandF32
src1(gpuDynInst
, extData
.SRC1
);
29327 ConstVecOperandF32
src2(gpuDynInst
, extData
.SRC2
);
29328 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
29334 if (instData
.ABS
& 0x1) {
29335 src0
.absModifier();
29338 if (instData
.ABS
& 0x2) {
29339 src1
.absModifier();
29342 if (instData
.ABS
& 0x4) {
29343 src2
.absModifier();
29346 if (extData
.NEG
& 0x1) {
29347 src0
.negModifier();
29350 if (extData
.NEG
& 0x2) {
29351 src1
.negModifier();
29354 if (extData
.NEG
& 0x4) {
29355 src2
.negModifier();
29358 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29359 if (wf
->execMask(lane
)) {
29360 vdst
[lane
] = std::fma(src0
[lane
], src1
[lane
], src2
[lane
]);
29366 // --- Inst_VOP3__V_DIV_FMAS_F64 class methods ---
29368 Inst_VOP3__V_DIV_FMAS_F64::Inst_VOP3__V_DIV_FMAS_F64(InFmt_VOP3
*iFmt
)
29369 : Inst_VOP3(iFmt
, "v_div_fmas_f64", false)
29375 } // Inst_VOP3__V_DIV_FMAS_F64
29377 Inst_VOP3__V_DIV_FMAS_F64::~Inst_VOP3__V_DIV_FMAS_F64()
29379 } // ~Inst_VOP3__V_DIV_FMAS_F64
29381 // D.d = Special case divide FMA with scale and flags(s0.d = Quotient,
29382 // s1.d = Denominator, s2.d = Numerator)
29384 Inst_VOP3__V_DIV_FMAS_F64::execute(GPUDynInstPtr gpuDynInst
)
29386 Wavefront
*wf
= gpuDynInst
->wavefront();
29387 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
29388 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
29389 ConstVecOperandF64
src2(gpuDynInst
, extData
.SRC2
);
29390 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
29391 ConstScalarOperandU64
vcc(gpuDynInst
, REG_VCC_LO
);
29398 if (instData
.ABS
& 0x1) {
29399 src0
.absModifier();
29402 if (instData
.ABS
& 0x2) {
29403 src1
.absModifier();
29406 if (instData
.ABS
& 0x4) {
29407 src2
.absModifier();
29410 if (extData
.NEG
& 0x1) {
29411 src0
.negModifier();
29414 if (extData
.NEG
& 0x2) {
29415 src1
.negModifier();
29418 if (extData
.NEG
& 0x4) {
29419 src2
.negModifier();
29422 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29423 if (wf
->execMask(lane
)) {
29424 if (bits(vcc
.rawData(), lane
)) {
29425 vdst
[lane
] = std::pow(2, 64)
29426 * std::fma(src0
[lane
], src1
[lane
], src2
[lane
]);
29428 vdst
[lane
] = std::fma(src0
[lane
], src1
[lane
], src2
[lane
]);
29436 Inst_VOP3__V_MSAD_U8::Inst_VOP3__V_MSAD_U8(InFmt_VOP3
*iFmt
)
29437 : Inst_VOP3(iFmt
, "v_msad_u8", false)
29440 } // Inst_VOP3__V_MSAD_U8
29442 Inst_VOP3__V_MSAD_U8::~Inst_VOP3__V_MSAD_U8()
29444 } // ~Inst_VOP3__V_MSAD_U8
29446 // D.u = Masked Byte SAD with accum_lo(S0.u, S1.u, S2.u).
29448 Inst_VOP3__V_MSAD_U8::execute(GPUDynInstPtr gpuDynInst
)
29450 panicUnimplemented();
29453 Inst_VOP3__V_QSAD_PK_U16_U8::Inst_VOP3__V_QSAD_PK_U16_U8(InFmt_VOP3
*iFmt
)
29454 : Inst_VOP3(iFmt
, "v_qsad_pk_u16_u8", false)
29457 } // Inst_VOP3__V_QSAD_PK_U16_U8
29459 Inst_VOP3__V_QSAD_PK_U16_U8::~Inst_VOP3__V_QSAD_PK_U16_U8()
29461 } // ~Inst_VOP3__V_QSAD_PK_U16_U8
29463 // D.u = Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0],
29464 // S1.u[31:0], S2.u[63:0])
29466 Inst_VOP3__V_QSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst
)
29468 panicUnimplemented();
29471 Inst_VOP3__V_MQSAD_PK_U16_U8::Inst_VOP3__V_MQSAD_PK_U16_U8(
29473 : Inst_VOP3(iFmt
, "v_mqsad_pk_u16_u8", false)
29476 } // Inst_VOP3__V_MQSAD_PK_U16_U8
29478 Inst_VOP3__V_MQSAD_PK_U16_U8::~Inst_VOP3__V_MQSAD_PK_U16_U8()
29480 } // ~Inst_VOP3__V_MQSAD_PK_U16_U8
29482 // D.u = Masked Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0],
29483 // S1.u[31:0], S2.u[63:0])
29485 Inst_VOP3__V_MQSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst
)
29487 panicUnimplemented();
29490 Inst_VOP3__V_MQSAD_U32_U8::Inst_VOP3__V_MQSAD_U32_U8(InFmt_VOP3
*iFmt
)
29491 : Inst_VOP3(iFmt
, "v_mqsad_u32_u8", false)
29494 } // Inst_VOP3__V_MQSAD_U32_U8
29496 Inst_VOP3__V_MQSAD_U32_U8::~Inst_VOP3__V_MQSAD_U32_U8()
29498 } // ~Inst_VOP3__V_MQSAD_U32_U8
29500 // D.u128 = Masked Quad-Byte SAD with 32-bit accum_lo/hi(S0.u[63:0],
29501 // S1.u[31:0], S2.u[127:0])
29503 Inst_VOP3__V_MQSAD_U32_U8::execute(GPUDynInstPtr gpuDynInst
)
29505 panicUnimplemented();
29508 Inst_VOP3__V_MAD_U64_U32::Inst_VOP3__V_MAD_U64_U32(
29509 InFmt_VOP3_SDST_ENC
*iFmt
)
29510 : Inst_VOP3_SDST_ENC(iFmt
, "v_mad_u64_u32")
29513 setFlag(WritesVCC
);
29515 } // Inst_VOP3__V_MAD_U64_U32
29517 Inst_VOP3__V_MAD_U64_U32::~Inst_VOP3__V_MAD_U64_U32()
29519 } // ~Inst_VOP3__V_MAD_U64_U32
29521 // {vcc_out, D.u64} = S0.u32 * S1.u32 + S2.u64.
29523 Inst_VOP3__V_MAD_U64_U32::execute(GPUDynInstPtr gpuDynInst
)
29525 Wavefront
*wf
= gpuDynInst
->wavefront();
29526 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
29527 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
29528 ConstVecOperandU64
src2(gpuDynInst
, extData
.SRC2
);
29529 ScalarOperandU64
vcc(gpuDynInst
, instData
.SDST
);
29530 VecOperandU64
vdst(gpuDynInst
, instData
.VDST
);
29538 * input modifiers are supported by FP operations only
29540 assert(!(extData
.NEG
& 0x1));
29541 assert(!(extData
.NEG
& 0x2));
29542 assert(!(extData
.NEG
& 0x4));
29544 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29545 if (wf
->execMask(lane
)) {
29546 vcc
.setBit(lane
, muladd(vdst
[lane
], src0
[lane
], src1
[lane
],
29555 Inst_VOP3__V_MAD_I64_I32::Inst_VOP3__V_MAD_I64_I32(
29556 InFmt_VOP3_SDST_ENC
*iFmt
)
29557 : Inst_VOP3_SDST_ENC(iFmt
, "v_mad_i64_i32")
29560 setFlag(WritesVCC
);
29562 } // Inst_VOP3__V_MAD_I64_I32
29564 Inst_VOP3__V_MAD_I64_I32::~Inst_VOP3__V_MAD_I64_I32()
29566 } // ~Inst_VOP3__V_MAD_I64_I32
29568 // {vcc_out,D.i64} = S0.i32 * S1.i32 + S2.i64.
29570 Inst_VOP3__V_MAD_I64_I32::execute(GPUDynInstPtr gpuDynInst
)
29572 Wavefront
*wf
= gpuDynInst
->wavefront();
29573 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
29574 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
29575 ConstVecOperandI64
src2(gpuDynInst
, extData
.SRC2
);
29576 ScalarOperandU64
vcc(gpuDynInst
, instData
.SDST
);
29577 VecOperandI64
vdst(gpuDynInst
, instData
.VDST
);
29584 * input modifiers are supported by FP operations only
29586 assert(!(extData
.NEG
& 0x1));
29587 assert(!(extData
.NEG
& 0x2));
29588 assert(!(extData
.NEG
& 0x4));
29590 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29591 if (wf
->execMask(lane
)) {
29592 vcc
.setBit(lane
, muladd(vdst
[lane
], src0
[lane
], src1
[lane
],
29601 Inst_VOP3__V_MAD_F16::Inst_VOP3__V_MAD_F16(InFmt_VOP3
*iFmt
)
29602 : Inst_VOP3(iFmt
, "v_mad_f16", false)
29607 } // Inst_VOP3__V_MAD_F16
29609 Inst_VOP3__V_MAD_F16::~Inst_VOP3__V_MAD_F16()
29611 } // ~Inst_VOP3__V_MAD_F16
29613 // D.f16 = S0.f16 * S1.f16 + S2.f16.
29614 // Supports round mode, exception flags, saturation.
29616 Inst_VOP3__V_MAD_F16::execute(GPUDynInstPtr gpuDynInst
)
29618 panicUnimplemented();
29621 Inst_VOP3__V_MAD_U16::Inst_VOP3__V_MAD_U16(InFmt_VOP3
*iFmt
)
29622 : Inst_VOP3(iFmt
, "v_mad_u16", false)
29626 } // Inst_VOP3__V_MAD_U16
29628 Inst_VOP3__V_MAD_U16::~Inst_VOP3__V_MAD_U16()
29630 } // ~Inst_VOP3__V_MAD_U16
29632 // D.u16 = S0.u16 * S1.u16 + S2.u16.
29633 // Supports saturation (unsigned 16-bit integer domain).
29635 Inst_VOP3__V_MAD_U16::execute(GPUDynInstPtr gpuDynInst
)
29637 Wavefront
*wf
= gpuDynInst
->wavefront();
29638 ConstVecOperandU16
src0(gpuDynInst
, extData
.SRC0
);
29639 ConstVecOperandU16
src1(gpuDynInst
, extData
.SRC1
);
29640 ConstVecOperandU16
src2(gpuDynInst
, extData
.SRC2
);
29641 VecOperandU16
vdst(gpuDynInst
, instData
.VDST
);
29648 * input modifiers are supported by FP operations only
29650 assert(!(instData
.ABS
& 0x1));
29651 assert(!(instData
.ABS
& 0x2));
29652 assert(!(instData
.ABS
& 0x4));
29653 assert(!(extData
.NEG
& 0x1));
29654 assert(!(extData
.NEG
& 0x2));
29655 assert(!(extData
.NEG
& 0x4));
29657 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29658 if (wf
->execMask(lane
)) {
29659 vdst
[lane
] = src0
[lane
] * src1
[lane
] + src2
[lane
];
29666 Inst_VOP3__V_MAD_I16::Inst_VOP3__V_MAD_I16(InFmt_VOP3
*iFmt
)
29667 : Inst_VOP3(iFmt
, "v_mad_i16", false)
29671 } // Inst_VOP3__V_MAD_I16
29673 Inst_VOP3__V_MAD_I16::~Inst_VOP3__V_MAD_I16()
29675 } // ~Inst_VOP3__V_MAD_I16
29677 // D.i16 = S0.i16 * S1.i16 + S2.i16.
29678 // Supports saturation (signed 16-bit integer domain).
29680 Inst_VOP3__V_MAD_I16::execute(GPUDynInstPtr gpuDynInst
)
29682 Wavefront
*wf
= gpuDynInst
->wavefront();
29683 ConstVecOperandI16
src0(gpuDynInst
, extData
.SRC0
);
29684 ConstVecOperandI16
src1(gpuDynInst
, extData
.SRC1
);
29685 ConstVecOperandI16
src2(gpuDynInst
, extData
.SRC2
);
29686 VecOperandI16
vdst(gpuDynInst
, instData
.VDST
);
29693 * input modifiers are supported by FP operations only
29695 assert(!(instData
.ABS
& 0x1));
29696 assert(!(instData
.ABS
& 0x2));
29697 assert(!(instData
.ABS
& 0x4));
29698 assert(!(extData
.NEG
& 0x1));
29699 assert(!(extData
.NEG
& 0x2));
29700 assert(!(extData
.NEG
& 0x4));
29702 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29703 if (wf
->execMask(lane
)) {
29704 vdst
[lane
] = src0
[lane
] * src1
[lane
] + src2
[lane
];
29711 Inst_VOP3__V_PERM_B32::Inst_VOP3__V_PERM_B32(InFmt_VOP3
*iFmt
)
29712 : Inst_VOP3(iFmt
, "v_perm_b32", false)
29715 } // Inst_VOP3__V_PERM_B32
29717 Inst_VOP3__V_PERM_B32::~Inst_VOP3__V_PERM_B32()
29719 } // ~Inst_VOP3__V_PERM_B32
29721 // D.u[31:24] = permute({S0.u, S1.u}, S2.u[31:24]);
29722 // D.u[23:16] = permute({S0.u, S1.u}, S2.u[23:16]);
29723 // D.u[15:8] = permute({S0.u, S1.u}, S2.u[15:8]);
29724 // D.u[7:0] = permute({S0.u, S1.u}, S2.u[7:0]);
29725 // byte permute(byte in[8], byte sel) {
29726 // if(sel>=13) then return 0xff;
29727 // elsif(sel==12) then return 0x00;
29728 // elsif(sel==11) then return in[7][7] * 0xff;
29729 // elsif(sel==10) then return in[5][7] * 0xff;
29730 // elsif(sel==9) then return in[3][7] * 0xff;
29731 // elsif(sel==8) then return in[1][7] * 0xff;
29732 // else return in[sel];
29735 Inst_VOP3__V_PERM_B32::execute(GPUDynInstPtr gpuDynInst
)
29737 Wavefront
*wf
= gpuDynInst
->wavefront();
29738 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
29739 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
29740 ConstVecOperandU32
src2(gpuDynInst
, extData
.SRC2
);
29741 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
29747 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
29748 if (wf
->execMask(lane
)) {
29749 VecElemU64 selector
= (VecElemU64
)src0
[lane
];
29750 selector
= (selector
<< 32) | (VecElemU64
)src1
[lane
];
29753 DPRINTF(GCN3
, "Executing v_perm_b32 src_0 0x%08x, src_1 "
29754 "0x%08x, src_2 0x%08x, vdst 0x%08x\n", src0
[lane
],
29755 src1
[lane
], src2
[lane
], vdst
[lane
]);
29756 DPRINTF(GCN3
, "Selector: 0x%08x \n", selector
);
29758 for (int i
= 0; i
< 4 ; ++i
) {
29759 VecElemU32 permuted_val
= permute(selector
, 0xFF
29760 & ((VecElemU32
)src2
[lane
] >> (8 * i
)));
29761 vdst
[lane
] |= (permuted_val
<< i
);
29764 DPRINTF(GCN3
, "v_perm result: 0x%08x\n", vdst
[lane
]);
29771 Inst_VOP3__V_FMA_F16::Inst_VOP3__V_FMA_F16(InFmt_VOP3
*iFmt
)
29772 : Inst_VOP3(iFmt
, "v_fma_f16", false)
29777 } // Inst_VOP3__V_FMA_F16
29779 Inst_VOP3__V_FMA_F16::~Inst_VOP3__V_FMA_F16()
29781 } // ~Inst_VOP3__V_FMA_F16
29783 // D.f16 = S0.f16 * S1.f16 + S2.f16.
29784 // Fused half precision multiply add.
29786 Inst_VOP3__V_FMA_F16::execute(GPUDynInstPtr gpuDynInst
)
29788 panicUnimplemented();
29791 Inst_VOP3__V_DIV_FIXUP_F16::Inst_VOP3__V_DIV_FIXUP_F16(InFmt_VOP3
*iFmt
)
29792 : Inst_VOP3(iFmt
, "v_div_fixup_f16", false)
29796 } // Inst_VOP3__V_DIV_FIXUP_F16
29798 Inst_VOP3__V_DIV_FIXUP_F16::~Inst_VOP3__V_DIV_FIXUP_F16()
29800 } // ~Inst_VOP3__V_DIV_FIXUP_F16
29802 // sign_out = sign(S1.f16)^sign(S2.f16);
29803 // if (S2.f16 == NAN)
29804 // D.f16 = Quiet(S2.f16);
29805 // else if (S1.f16 == NAN)
29806 // D.f16 = Quiet(S1.f16);
29807 // else if (S1.f16 == S2.f16 == 0)
29809 // D.f16 = pele_nan(0xfe00);
29810 // else if (abs(S1.f16) == abs(S2.f16) == +-INF)
29812 // D.f16 = pele_nan(0xfe00);
29813 // else if (S1.f16 ==0 || abs(S2.f16) == +-INF)
29815 // D.f16 = sign_out ? -INF : INF;
29816 // else if (abs(S1.f16) == +-INF || S2.f16 == 0)
29818 // D.f16 = sign_out ? -0 : 0;
29819 // else if ((exp(S2.f16) - exp(S1.f16)) < -150)
29820 // D.f16 = sign_out ? -underflow : underflow;
29821 // else if (exp(S1.f16) == 255)
29822 // D.f16 = sign_out ? -overflow : overflow;
29824 // D.f16 = sign_out ? -abs(S0.f16) : abs(S0.f16).
29825 // Half precision division fixup.
29826 // S0 = Quotient, S1 = Denominator, S3 = Numerator.
29827 // Given a numerator, denominator, and quotient from a divide, this opcode
29828 // will detect and apply special case numerics, touching up the quotient if
29829 // necessary. This opcode also generates invalid, denorm and divide by
29830 // zero exceptions caused by the division.
29832 Inst_VOP3__V_DIV_FIXUP_F16::execute(GPUDynInstPtr gpuDynInst
)
29834 panicUnimplemented();
29837 Inst_VOP3__V_CVT_PKACCUM_U8_F32::Inst_VOP3__V_CVT_PKACCUM_U8_F32(
29839 : Inst_VOP3(iFmt
, "v_cvt_pkaccum_u8_f32", false)
29843 } // Inst_VOP3__V_CVT_PKACCUM_U8_F32
29845 Inst_VOP3__V_CVT_PKACCUM_U8_F32::~Inst_VOP3__V_CVT_PKACCUM_U8_F32()
29847 } // ~Inst_VOP3__V_CVT_PKACCUM_U8_F32
29849 // byte = S1.u[1:0]; bit = byte * 8;
29850 // D.u[bit + 7:bit] = flt32_to_uint8(S0.f);
29851 // Pack converted value of S0.f into byte S1 of the destination.
29852 // SQ translates to V_CVT_PK_U8_F32.
29853 // Note: this opcode uses src_c to pass destination in as a source.
29855 Inst_VOP3__V_CVT_PKACCUM_U8_F32::execute(GPUDynInstPtr gpuDynInst
)
29857 panicUnimplemented();
29860 Inst_VOP3__V_INTERP_P1_F32::Inst_VOP3__V_INTERP_P1_F32(InFmt_VOP3
*iFmt
)
29861 : Inst_VOP3(iFmt
, "v_interp_p1_f32", false)
29865 } // Inst_VOP3__V_INTERP_P1_F32
29867 Inst_VOP3__V_INTERP_P1_F32::~Inst_VOP3__V_INTERP_P1_F32()
29869 } // ~Inst_VOP3__V_INTERP_P1_F32
29871 // D.f = P10 * S.f + P0;
29873 Inst_VOP3__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst
)
29875 panicUnimplemented();
29878 Inst_VOP3__V_INTERP_P2_F32::Inst_VOP3__V_INTERP_P2_F32(InFmt_VOP3
*iFmt
)
29879 : Inst_VOP3(iFmt
, "v_interp_p2_f32", false)
29883 } // Inst_VOP3__V_INTERP_P2_F32
29885 Inst_VOP3__V_INTERP_P2_F32::~Inst_VOP3__V_INTERP_P2_F32()
29887 } // ~Inst_VOP3__V_INTERP_P2_F32
29889 // D.f = P20 * S.f + D.f;
29891 Inst_VOP3__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst
)
29893 panicUnimplemented();
29896 Inst_VOP3__V_INTERP_MOV_F32::Inst_VOP3__V_INTERP_MOV_F32(InFmt_VOP3
*iFmt
)
29897 : Inst_VOP3(iFmt
, "v_interp_mov_f32", false)
29901 } // Inst_VOP3__V_INTERP_MOV_F32
29903 Inst_VOP3__V_INTERP_MOV_F32::~Inst_VOP3__V_INTERP_MOV_F32()
29905 } // ~Inst_VOP3__V_INTERP_MOV_F32
29907 // D.f = {P10,P20,P0}[S.u]; parameter load.
29909 Inst_VOP3__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst
)
29911 panicUnimplemented();
29914 Inst_VOP3__V_INTERP_P1LL_F16::Inst_VOP3__V_INTERP_P1LL_F16(
29916 : Inst_VOP3(iFmt
, "v_interp_p1ll_f16", false)
29920 } // Inst_VOP3__V_INTERP_P1LL_F16
29922 Inst_VOP3__V_INTERP_P1LL_F16::~Inst_VOP3__V_INTERP_P1LL_F16()
29924 } // ~Inst_VOP3__V_INTERP_P1LL_F16
29926 // D.f32 = P10.f16 * S0.f32 + P0.f16.
29928 Inst_VOP3__V_INTERP_P1LL_F16::execute(GPUDynInstPtr gpuDynInst
)
29930 panicUnimplemented();
29933 Inst_VOP3__V_INTERP_P1LV_F16::Inst_VOP3__V_INTERP_P1LV_F16(
29935 : Inst_VOP3(iFmt
, "v_interp_p1lv_f16", false)
29939 } // Inst_VOP3__V_INTERP_P1LV_F16
29941 Inst_VOP3__V_INTERP_P1LV_F16::~Inst_VOP3__V_INTERP_P1LV_F16()
29943 } // ~Inst_VOP3__V_INTERP_P1LV_F16
29946 Inst_VOP3__V_INTERP_P1LV_F16::execute(GPUDynInstPtr gpuDynInst
)
29948 panicUnimplemented();
29951 Inst_VOP3__V_INTERP_P2_F16::Inst_VOP3__V_INTERP_P2_F16(InFmt_VOP3
*iFmt
)
29952 : Inst_VOP3(iFmt
, "v_interp_p2_f16", false)
29956 } // Inst_VOP3__V_INTERP_P2_F16
29958 Inst_VOP3__V_INTERP_P2_F16::~Inst_VOP3__V_INTERP_P2_F16()
29960 } // ~Inst_VOP3__V_INTERP_P2_F16
29962 // D.f16 = P20.f16 * S0.f32 + S2.f32.
29964 Inst_VOP3__V_INTERP_P2_F16::execute(GPUDynInstPtr gpuDynInst
)
29966 panicUnimplemented();
29969 Inst_VOP3__V_ADD_F64::Inst_VOP3__V_ADD_F64(InFmt_VOP3
*iFmt
)
29970 : Inst_VOP3(iFmt
, "v_add_f64", false)
29974 } // Inst_VOP3__V_ADD_F64
29976 Inst_VOP3__V_ADD_F64::~Inst_VOP3__V_ADD_F64()
29978 } // ~Inst_VOP3__V_ADD_F64
29980 // D.d = S0.d + S1.d.
29982 Inst_VOP3__V_ADD_F64::execute(GPUDynInstPtr gpuDynInst
)
29984 Wavefront
*wf
= gpuDynInst
->wavefront();
29985 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
29986 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
29987 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
29992 if (instData
.ABS
& 0x1) {
29993 src0
.absModifier();
29996 if (instData
.ABS
& 0x2) {
29997 src1
.absModifier();
30000 if (extData
.NEG
& 0x1) {
30001 src0
.negModifier();
30004 if (extData
.NEG
& 0x2) {
30005 src1
.negModifier();
30009 * input modifiers are supported by FP operations only
30011 assert(!(instData
.ABS
& 0x4));
30012 assert(!(extData
.NEG
& 0x4));
30014 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30015 if (wf
->execMask(lane
)) {
30016 if (std::isnan(src0
[lane
]) ||
30017 std::isnan(src1
[lane
]) ) {
30019 } else if (std::isinf(src0
[lane
]) &&
30020 std::isinf(src1
[lane
])) {
30021 if (std::signbit(src0
[lane
]) !=
30022 std::signbit(src1
[lane
])) {
30025 vdst
[lane
] = src0
[lane
];
30027 } else if (std::isinf(src0
[lane
])) {
30028 vdst
[lane
] = src0
[lane
];
30029 } else if (std::isinf(src1
[lane
])) {
30030 vdst
[lane
] = src1
[lane
];
30031 } else if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
||
30032 std::fpclassify(src0
[lane
]) == FP_ZERO
) {
30033 if (std::fpclassify(src1
[lane
]) == FP_SUBNORMAL
||
30034 std::fpclassify(src1
[lane
]) == FP_ZERO
) {
30035 if (std::signbit(src0
[lane
]) &&
30036 std::signbit(src1
[lane
])) {
30042 vdst
[lane
] = src1
[lane
];
30044 } else if (std::fpclassify(src1
[lane
]) == FP_SUBNORMAL
||
30045 std::fpclassify(src1
[lane
]) == FP_ZERO
) {
30046 if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
||
30047 std::fpclassify(src0
[lane
]) == FP_ZERO
) {
30048 if (std::signbit(src0
[lane
]) &&
30049 std::signbit(src1
[lane
])) {
30055 vdst
[lane
] = src0
[lane
];
30058 vdst
[lane
] = src0
[lane
] + src1
[lane
];
30066 Inst_VOP3__V_MUL_F64::Inst_VOP3__V_MUL_F64(InFmt_VOP3
*iFmt
)
30067 : Inst_VOP3(iFmt
, "v_mul_f64", false)
30071 } // Inst_VOP3__V_MUL_F64
30073 Inst_VOP3__V_MUL_F64::~Inst_VOP3__V_MUL_F64()
30075 } // ~Inst_VOP3__V_MUL_F64
30077 // D.d = S0.d * S1.d.
30079 Inst_VOP3__V_MUL_F64::execute(GPUDynInstPtr gpuDynInst
)
30081 Wavefront
*wf
= gpuDynInst
->wavefront();
30082 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
30083 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
30084 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
30089 if (instData
.ABS
& 0x1) {
30090 src0
.absModifier();
30093 if (instData
.ABS
& 0x2) {
30094 src1
.absModifier();
30097 if (extData
.NEG
& 0x1) {
30098 src0
.negModifier();
30101 if (extData
.NEG
& 0x2) {
30102 src1
.negModifier();
30106 * input modifiers are supported by FP operations only
30108 assert(!(instData
.ABS
& 0x4));
30109 assert(!(extData
.NEG
& 0x4));
30111 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30112 if (wf
->execMask(lane
)) {
30113 if (std::isnan(src0
[lane
]) ||
30114 std::isnan(src1
[lane
])) {
30116 } else if ((std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
||
30117 std::fpclassify(src0
[lane
]) == FP_ZERO
) &&
30118 !std::signbit(src0
[lane
])) {
30119 if (std::isinf(src1
[lane
])) {
30121 } else if (!std::signbit(src1
[lane
])) {
30126 } else if ((std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
||
30127 std::fpclassify(src0
[lane
]) == FP_ZERO
) &&
30128 std::signbit(src0
[lane
])) {
30129 if (std::isinf(src1
[lane
])) {
30131 } else if (std::signbit(src1
[lane
])) {
30136 } else if (std::isinf(src0
[lane
]) &&
30137 !std::signbit(src0
[lane
])) {
30138 if (std::fpclassify(src1
[lane
]) == FP_SUBNORMAL
||
30139 std::fpclassify(src1
[lane
]) == FP_ZERO
) {
30141 } else if (!std::signbit(src1
[lane
])) {
30142 vdst
[lane
] = +INFINITY
;
30144 vdst
[lane
] = -INFINITY
;
30146 } else if (std::isinf(src0
[lane
]) &&
30147 std::signbit(src0
[lane
])) {
30148 if (std::fpclassify(src1
[lane
]) == FP_SUBNORMAL
||
30149 std::fpclassify(src1
[lane
]) == FP_ZERO
) {
30151 } else if (std::signbit(src1
[lane
])) {
30152 vdst
[lane
] = +INFINITY
;
30154 vdst
[lane
] = -INFINITY
;
30157 vdst
[lane
] = src0
[lane
] * src1
[lane
];
30165 Inst_VOP3__V_MIN_F64::Inst_VOP3__V_MIN_F64(InFmt_VOP3
*iFmt
)
30166 : Inst_VOP3(iFmt
, "v_min_f64", false)
30170 } // Inst_VOP3__V_MIN_F64
30172 Inst_VOP3__V_MIN_F64::~Inst_VOP3__V_MIN_F64()
30174 } // ~Inst_VOP3__V_MIN_F64
30176 // D.d = min(S0.d, S1.d).
30178 Inst_VOP3__V_MIN_F64::execute(GPUDynInstPtr gpuDynInst
)
30180 Wavefront
*wf
= gpuDynInst
->wavefront();
30181 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
30182 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
30183 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
30188 if (instData
.ABS
& 0x1) {
30189 src0
.absModifier();
30192 if (instData
.ABS
& 0x2) {
30193 src1
.absModifier();
30196 if (extData
.NEG
& 0x1) {
30197 src0
.negModifier();
30200 if (extData
.NEG
& 0x2) {
30201 src1
.negModifier();
30205 * input modifiers are supported by FP operations only
30207 assert(!(instData
.ABS
& 0x4));
30208 assert(!(extData
.NEG
& 0x4));
30210 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30211 if (wf
->execMask(lane
)) {
30212 vdst
[lane
] = std::fmin(src0
[lane
], src1
[lane
]);
30219 Inst_VOP3__V_MAX_F64::Inst_VOP3__V_MAX_F64(InFmt_VOP3
*iFmt
)
30220 : Inst_VOP3(iFmt
, "v_max_f64", false)
30224 } // Inst_VOP3__V_MAX_F64
30226 Inst_VOP3__V_MAX_F64::~Inst_VOP3__V_MAX_F64()
30228 } // ~Inst_VOP3__V_MAX_F64
30230 // D.d = max(S0.d, S1.d).
30232 Inst_VOP3__V_MAX_F64::execute(GPUDynInstPtr gpuDynInst
)
30234 Wavefront
*wf
= gpuDynInst
->wavefront();
30235 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
30236 ConstVecOperandF64
src1(gpuDynInst
, extData
.SRC1
);
30237 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
30242 if (instData
.ABS
& 0x1) {
30243 src0
.absModifier();
30246 if (instData
.ABS
& 0x2) {
30247 src1
.absModifier();
30250 if (extData
.NEG
& 0x1) {
30251 src0
.negModifier();
30254 if (extData
.NEG
& 0x2) {
30255 src1
.negModifier();
30259 * input modifiers are supported by FP operations only
30261 assert(!(instData
.ABS
& 0x4));
30262 assert(!(extData
.NEG
& 0x4));
30264 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30265 if (wf
->execMask(lane
)) {
30266 vdst
[lane
] = std::fmax(src0
[lane
], src1
[lane
]);
30273 Inst_VOP3__V_LDEXP_F64::Inst_VOP3__V_LDEXP_F64(InFmt_VOP3
*iFmt
)
30274 : Inst_VOP3(iFmt
, "v_ldexp_f64", false)
30278 } // Inst_VOP3__V_LDEXP_F64
30280 Inst_VOP3__V_LDEXP_F64::~Inst_VOP3__V_LDEXP_F64()
30282 } // ~Inst_VOP3__V_LDEXP_F64
30284 // D.d = pow(S0.d, S1.i[31:0]).
30286 Inst_VOP3__V_LDEXP_F64::execute(GPUDynInstPtr gpuDynInst
)
30288 Wavefront
*wf
= gpuDynInst
->wavefront();
30289 ConstVecOperandF64
src0(gpuDynInst
, extData
.SRC0
);
30290 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
30291 VecOperandF64
vdst(gpuDynInst
, instData
.VDST
);
30296 if (instData
.ABS
& 0x1) {
30297 src0
.absModifier();
30300 if (extData
.NEG
& 0x1) {
30301 src0
.negModifier();
30305 * input modifiers are supported by FP operations only
30307 assert(!(instData
.ABS
& 0x2));
30308 assert(!(instData
.ABS
& 0x4));
30309 assert(!(extData
.NEG
& 0x2));
30310 assert(!(extData
.NEG
& 0x4));
30312 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30313 if (wf
->execMask(lane
)) {
30314 if (std::isnan(src0
[lane
]) || std::isinf(src0
[lane
])) {
30315 vdst
[lane
] = src0
[lane
];
30316 } else if (std::fpclassify(src0
[lane
]) == FP_SUBNORMAL
30317 || std::fpclassify(src0
[lane
]) == FP_ZERO
) {
30318 if (std::signbit(src0
[lane
])) {
30324 vdst
[lane
] = std::ldexp(src0
[lane
], src1
[lane
]);
30332 Inst_VOP3__V_MUL_LO_U32::Inst_VOP3__V_MUL_LO_U32(InFmt_VOP3
*iFmt
)
30333 : Inst_VOP3(iFmt
, "v_mul_lo_u32", false)
30336 } // Inst_VOP3__V_MUL_LO_U32
30338 Inst_VOP3__V_MUL_LO_U32::~Inst_VOP3__V_MUL_LO_U32()
30340 } // ~Inst_VOP3__V_MUL_LO_U32
30342 // D.u = S0.u * S1.u.
30344 Inst_VOP3__V_MUL_LO_U32::execute(GPUDynInstPtr gpuDynInst
)
30346 Wavefront
*wf
= gpuDynInst
->wavefront();
30347 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
30348 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
30349 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
30355 * input modifiers are supported by FP operations only
30357 assert(!(instData
.ABS
& 0x1));
30358 assert(!(instData
.ABS
& 0x2));
30359 assert(!(instData
.ABS
& 0x4));
30360 assert(!(extData
.NEG
& 0x1));
30361 assert(!(extData
.NEG
& 0x2));
30362 assert(!(extData
.NEG
& 0x4));
30364 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30365 if (wf
->execMask(lane
)) {
30366 VecElemI64 s0
= (VecElemI64
)src0
[lane
];
30367 VecElemI64 s1
= (VecElemI64
)src1
[lane
];
30368 vdst
[lane
] = (VecElemU32
)((s0
* s1
) & 0xffffffffLL
);
30375 Inst_VOP3__V_MUL_HI_U32::Inst_VOP3__V_MUL_HI_U32(InFmt_VOP3
*iFmt
)
30376 : Inst_VOP3(iFmt
, "v_mul_hi_u32", false)
30379 } // Inst_VOP3__V_MUL_HI_U32
30381 Inst_VOP3__V_MUL_HI_U32::~Inst_VOP3__V_MUL_HI_U32()
30383 } // ~Inst_VOP3__V_MUL_HI_U32
30385 // D.u = (S0.u * S1.u) >> 32.
30387 Inst_VOP3__V_MUL_HI_U32::execute(GPUDynInstPtr gpuDynInst
)
30389 Wavefront
*wf
= gpuDynInst
->wavefront();
30390 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
30391 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
30392 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
30398 * input modifiers are supported by FP operations only
30400 assert(!(instData
.ABS
& 0x1));
30401 assert(!(instData
.ABS
& 0x2));
30402 assert(!(instData
.ABS
& 0x4));
30403 assert(!(extData
.NEG
& 0x1));
30404 assert(!(extData
.NEG
& 0x2));
30405 assert(!(extData
.NEG
& 0x4));
30407 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30408 if (wf
->execMask(lane
)) {
30409 VecElemI64 s0
= (VecElemI64
)src0
[lane
];
30410 VecElemI64 s1
= (VecElemI64
)src1
[lane
];
30412 = (VecElemU32
)(((s0
* s1
) >> 32) & 0xffffffffLL
);
30419 Inst_VOP3__V_MUL_HI_I32::Inst_VOP3__V_MUL_HI_I32(InFmt_VOP3
*iFmt
)
30420 : Inst_VOP3(iFmt
, "v_mul_hi_i32", false)
30423 } // Inst_VOP3__V_MUL_HI_I32
30425 Inst_VOP3__V_MUL_HI_I32::~Inst_VOP3__V_MUL_HI_I32()
30427 } // ~Inst_VOP3__V_MUL_HI_I32
30429 // D.i = (S0.i * S1.i) >> 32.
30431 Inst_VOP3__V_MUL_HI_I32::execute(GPUDynInstPtr gpuDynInst
)
30433 Wavefront
*wf
= gpuDynInst
->wavefront();
30434 ConstVecOperandI32
src0(gpuDynInst
, extData
.SRC0
);
30435 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
30436 VecOperandI32
vdst(gpuDynInst
, instData
.VDST
);
30442 * input modifiers are supported by FP operations only
30444 assert(!(instData
.ABS
& 0x1));
30445 assert(!(instData
.ABS
& 0x2));
30446 assert(!(instData
.ABS
& 0x4));
30447 assert(!(extData
.NEG
& 0x1));
30448 assert(!(extData
.NEG
& 0x2));
30449 assert(!(extData
.NEG
& 0x4));
30451 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30452 if (wf
->execMask(lane
)) {
30453 VecElemI64 s0
= (VecElemI64
)src0
[lane
];
30454 VecElemI64 s1
= (VecElemI64
)src1
[lane
];
30456 = (VecElemI32
)(((s0
* s1
) >> 32LL) & 0xffffffffLL
);
30463 Inst_VOP3__V_LDEXP_F32::Inst_VOP3__V_LDEXP_F32(InFmt_VOP3
*iFmt
)
30464 : Inst_VOP3(iFmt
, "v_ldexp_f32", false)
30468 } // Inst_VOP3__V_LDEXP_F32
30470 Inst_VOP3__V_LDEXP_F32::~Inst_VOP3__V_LDEXP_F32()
30472 } // ~Inst_VOP3__V_LDEXP_F32
30474 // D.f = pow(S0.f, S1.i)
30476 Inst_VOP3__V_LDEXP_F32::execute(GPUDynInstPtr gpuDynInst
)
30478 Wavefront
*wf
= gpuDynInst
->wavefront();
30479 ConstVecOperandF32
src0(gpuDynInst
, extData
.SRC0
);
30480 ConstVecOperandI32
src1(gpuDynInst
, extData
.SRC1
);
30481 VecOperandF32
vdst(gpuDynInst
, instData
.VDST
);
30487 * input modifiers are supported by FP operations only
30489 assert(!(instData
.ABS
& 0x2));
30490 assert(!(instData
.ABS
& 0x4));
30491 assert(!(extData
.NEG
& 0x2));
30492 assert(!(extData
.NEG
& 0x4));
30494 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30495 if (wf
->execMask(lane
)) {
30496 vdst
[lane
] = std::ldexp(src0
[lane
], src1
[lane
]);
30503 Inst_VOP3__V_READLANE_B32::Inst_VOP3__V_READLANE_B32(InFmt_VOP3
*iFmt
)
30504 : Inst_VOP3(iFmt
, "v_readlane_b32", true)
30507 setFlag(IgnoreExec
);
30508 } // Inst_VOP3__V_READLANE_B32
30510 Inst_VOP3__V_READLANE_B32::~Inst_VOP3__V_READLANE_B32()
30512 } // ~Inst_VOP3__V_READLANE_B32
30514 // Copy one VGPR value to one SGPR. D = SGPR-dest, S0 = Source Data (VGPR#
30515 // or M0(lds-direct)), S1 = Lane Select (SGPR or M0). Ignores exec mask.
30516 // Input and output modifiers not supported; this is an untyped operation.
30518 Inst_VOP3__V_READLANE_B32::execute(GPUDynInstPtr gpuDynInst
)
30520 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
30521 ConstScalarOperandU32
src1(gpuDynInst
, extData
.SRC1
);
30522 ScalarOperandU32
sdst(gpuDynInst
, instData
.VDST
);
30528 * input modifiers are supported by FP operations only
30530 assert(!(instData
.ABS
& 0x1));
30531 assert(!(instData
.ABS
& 0x2));
30532 assert(!(instData
.ABS
& 0x4));
30533 assert(!(extData
.NEG
& 0x1));
30534 assert(!(extData
.NEG
& 0x2));
30535 assert(!(extData
.NEG
& 0x4));
30537 sdst
= src0
[src1
.rawData() & 0x3f];
30542 Inst_VOP3__V_WRITELANE_B32::Inst_VOP3__V_WRITELANE_B32(InFmt_VOP3
*iFmt
)
30543 : Inst_VOP3(iFmt
, "v_writelane_b32", false)
30546 setFlag(IgnoreExec
);
30547 } // Inst_VOP3__V_WRITELANE_B32
30549 Inst_VOP3__V_WRITELANE_B32::~Inst_VOP3__V_WRITELANE_B32()
30551 } // ~Inst_VOP3__V_WRITELANE_B32
30553 // Write value into one VGPR in one lane. D = VGPR-dest, S0 = Source Data
30554 // (sgpr, m0, exec or constants), S1 = Lane Select (SGPR or M0). Ignores
30555 // exec mask. Input and output modifiers not supported; this is an untyped
30558 Inst_VOP3__V_WRITELANE_B32::execute(GPUDynInstPtr gpuDynInst
)
30560 ConstScalarOperandU32
src0(gpuDynInst
, extData
.SRC0
);
30561 ConstScalarOperandU32
src1(gpuDynInst
, extData
.SRC1
);
30562 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
30569 * input modifiers are supported by FP operations only
30571 assert(!(instData
.ABS
& 0x1));
30572 assert(!(instData
.ABS
& 0x2));
30573 assert(!(instData
.ABS
& 0x4));
30574 assert(!(extData
.NEG
& 0x1));
30575 assert(!(extData
.NEG
& 0x2));
30576 assert(!(extData
.NEG
& 0x4));
30578 vdst
[src1
.rawData() & 0x3f] = src0
.rawData();
30583 Inst_VOP3__V_BCNT_U32_B32::Inst_VOP3__V_BCNT_U32_B32(InFmt_VOP3
*iFmt
)
30584 : Inst_VOP3(iFmt
, "v_bcnt_u32_b32", false)
30587 } // Inst_VOP3__V_BCNT_U32_B32
30589 Inst_VOP3__V_BCNT_U32_B32::~Inst_VOP3__V_BCNT_U32_B32()
30591 } // ~Inst_VOP3__V_BCNT_U32_B32
30593 // D.u = CountOneBits(S0.u) + S1.u. Bit count.
30595 Inst_VOP3__V_BCNT_U32_B32::execute(GPUDynInstPtr gpuDynInst
)
30597 Wavefront
*wf
= gpuDynInst
->wavefront();
30598 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
30599 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
30600 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
30606 * input modifiers are supported by FP operations only
30608 assert(!(instData
.ABS
& 0x1));
30609 assert(!(instData
.ABS
& 0x2));
30610 assert(!(instData
.ABS
& 0x4));
30611 assert(!(extData
.NEG
& 0x1));
30612 assert(!(extData
.NEG
& 0x2));
30613 assert(!(extData
.NEG
& 0x4));
30615 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30616 if (wf
->execMask(lane
)) {
30617 vdst
[lane
] = popCount(src0
[lane
]) + src1
[lane
];
30624 Inst_VOP3__V_MBCNT_LO_U32_B32::Inst_VOP3__V_MBCNT_LO_U32_B32(
30626 : Inst_VOP3(iFmt
, "v_mbcnt_lo_u32_b32", false)
30629 } // Inst_VOP3__V_MBCNT_LO_U32_B32
30631 Inst_VOP3__V_MBCNT_LO_U32_B32::~Inst_VOP3__V_MBCNT_LO_U32_B32()
30633 } // ~Inst_VOP3__V_MBCNT_LO_U32_B32
30635 // Masked bit count, ThreadPosition is the position of this thread in the
30636 // wavefront (in 0..63).
30638 Inst_VOP3__V_MBCNT_LO_U32_B32::execute(GPUDynInstPtr gpuDynInst
)
30640 Wavefront
*wf
= gpuDynInst
->wavefront();
30641 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
30642 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
30643 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
30644 uint64_t threadMask
= 0;
30650 * input modifiers are supported by FP operations only
30652 assert(!(instData
.ABS
& 0x1));
30653 assert(!(instData
.ABS
& 0x2));
30654 assert(!(instData
.ABS
& 0x4));
30655 assert(!(extData
.NEG
& 0x1));
30656 assert(!(extData
.NEG
& 0x2));
30657 assert(!(extData
.NEG
& 0x4));
30659 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30660 if (wf
->execMask(lane
)) {
30661 threadMask
= ((1LL << lane
) - 1LL);
30662 vdst
[lane
] = popCount(src0
[lane
] & bits(threadMask
, 31, 0)) +
30669 // --- Inst_VOP3__V_MBCNT_HI_U32_B32 class methods ---
30671 Inst_VOP3__V_MBCNT_HI_U32_B32::Inst_VOP3__V_MBCNT_HI_U32_B32(
30673 : Inst_VOP3(iFmt
, "v_mbcnt_hi_u32_b32", false)
30676 } // Inst_VOP3__V_MBCNT_HI_U32_B32
30678 Inst_VOP3__V_MBCNT_HI_U32_B32::~Inst_VOP3__V_MBCNT_HI_U32_B32()
30680 } // ~Inst_VOP3__V_MBCNT_HI_U32_B32
30682 // ThreadMask = (1 << ThreadPosition) - 1;
30683 // D.u = CountOneBits(S0.u & ThreadMask[63:32]) + S1.u.
30684 // Masked bit count, ThreadPosition is the position of this thread in the
30685 // wavefront (in 0..63).
30687 Inst_VOP3__V_MBCNT_HI_U32_B32::execute(GPUDynInstPtr gpuDynInst
)
30689 Wavefront
*wf
= gpuDynInst
->wavefront();
30690 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
30691 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
30692 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
30693 uint64_t threadMask
= 0;
30699 * input modifiers are supported by FP operations only
30701 assert(!(instData
.ABS
& 0x1));
30702 assert(!(instData
.ABS
& 0x2));
30703 assert(!(instData
.ABS
& 0x4));
30704 assert(!(extData
.NEG
& 0x1));
30705 assert(!(extData
.NEG
& 0x2));
30706 assert(!(extData
.NEG
& 0x4));
30708 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30709 if (wf
->execMask(lane
)) {
30710 threadMask
= ((1LL << lane
) - 1LL);
30711 vdst
[lane
] = popCount(src0
[lane
] & bits(threadMask
, 63, 32)) +
30718 // --- Inst_VOP3__V_LSHLREV_B64 class methods ---
30720 Inst_VOP3__V_LSHLREV_B64::Inst_VOP3__V_LSHLREV_B64(InFmt_VOP3
*iFmt
)
30721 : Inst_VOP3(iFmt
, "v_lshlrev_b64", false)
30724 } // Inst_VOP3__V_LSHLREV_B64
30726 Inst_VOP3__V_LSHLREV_B64::~Inst_VOP3__V_LSHLREV_B64()
30728 } // ~Inst_VOP3__V_LSHLREV_B64
30730 // D.u64 = S1.u64 << S0.u[5:0].
30732 Inst_VOP3__V_LSHLREV_B64::execute(GPUDynInstPtr gpuDynInst
)
30734 Wavefront
*wf
= gpuDynInst
->wavefront();
30735 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
30736 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
30737 VecOperandU64
vdst(gpuDynInst
, instData
.VDST
);
30743 * input modifiers are supported by FP operations only
30745 assert(!(instData
.ABS
& 0x1));
30746 assert(!(instData
.ABS
& 0x2));
30747 assert(!(instData
.ABS
& 0x4));
30748 assert(!(extData
.NEG
& 0x1));
30749 assert(!(extData
.NEG
& 0x2));
30750 assert(!(extData
.NEG
& 0x4));
30752 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30753 if (wf
->execMask(lane
)) {
30754 vdst
[lane
] = src1
[lane
] << bits(src0
[lane
], 5, 0);
30761 Inst_VOP3__V_LSHRREV_B64::Inst_VOP3__V_LSHRREV_B64(InFmt_VOP3
*iFmt
)
30762 : Inst_VOP3(iFmt
, "v_lshrrev_b64", false)
30765 } // Inst_VOP3__V_LSHRREV_B64
30767 Inst_VOP3__V_LSHRREV_B64::~Inst_VOP3__V_LSHRREV_B64()
30769 } // ~Inst_VOP3__V_LSHRREV_B64
30771 // D.u64 = S1.u64 >> S0.u[5:0].
30772 // The vacated bits are set to zero.
30774 Inst_VOP3__V_LSHRREV_B64::execute(GPUDynInstPtr gpuDynInst
)
30776 Wavefront
*wf
= gpuDynInst
->wavefront();
30777 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
30778 ConstVecOperandU64
src1(gpuDynInst
, extData
.SRC1
);
30779 VecOperandU64
vdst(gpuDynInst
, instData
.VDST
);
30785 * input modifiers are supported by FP operations only
30787 assert(!(instData
.ABS
& 0x1));
30788 assert(!(instData
.ABS
& 0x2));
30789 assert(!(instData
.ABS
& 0x4));
30790 assert(!(extData
.NEG
& 0x1));
30791 assert(!(extData
.NEG
& 0x2));
30792 assert(!(extData
.NEG
& 0x4));
30794 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30795 if (wf
->execMask(lane
)) {
30796 vdst
[lane
] = src1
[lane
] >> bits(src0
[lane
], 5, 0);
30803 Inst_VOP3__V_ASHRREV_I64::Inst_VOP3__V_ASHRREV_I64(InFmt_VOP3
*iFmt
)
30804 : Inst_VOP3(iFmt
, "v_ashrrev_i64", false)
30807 } // Inst_VOP3__V_ASHRREV_I64
30809 Inst_VOP3__V_ASHRREV_I64::~Inst_VOP3__V_ASHRREV_I64()
30811 } // ~Inst_VOP3__V_ASHRREV_I64
30813 // D.u64 = signext(S1.u64) >> S0.u[5:0].
30814 // The vacated bits are set to the sign bit of the input value.
30816 Inst_VOP3__V_ASHRREV_I64::execute(GPUDynInstPtr gpuDynInst
)
30818 Wavefront
*wf
= gpuDynInst
->wavefront();
30819 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
30820 ConstVecOperandI64
src1(gpuDynInst
, extData
.SRC1
);
30821 VecOperandU64
vdst(gpuDynInst
, instData
.VDST
);
30827 * input modifiers are supported by FP operations only
30829 assert(!(instData
.ABS
& 0x1));
30830 assert(!(instData
.ABS
& 0x2));
30831 assert(!(instData
.ABS
& 0x4));
30832 assert(!(extData
.NEG
& 0x1));
30833 assert(!(extData
.NEG
& 0x2));
30834 assert(!(extData
.NEG
& 0x4));
30836 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30837 if (wf
->execMask(lane
)) {
30839 = src1
[lane
] >> bits(src0
[lane
], 5, 0);
30846 Inst_VOP3__V_TRIG_PREOP_F64::Inst_VOP3__V_TRIG_PREOP_F64(InFmt_VOP3
*iFmt
)
30847 : Inst_VOP3(iFmt
, "v_trig_preop_f64", false)
30851 } // Inst_VOP3__V_TRIG_PREOP_F64
30853 Inst_VOP3__V_TRIG_PREOP_F64::~Inst_VOP3__V_TRIG_PREOP_F64()
30855 } // ~Inst_VOP3__V_TRIG_PREOP_F64
30858 Inst_VOP3__V_TRIG_PREOP_F64::execute(GPUDynInstPtr gpuDynInst
)
30860 panicUnimplemented();
30863 Inst_VOP3__V_BFM_B32::Inst_VOP3__V_BFM_B32(InFmt_VOP3
*iFmt
)
30864 : Inst_VOP3(iFmt
, "v_bfm_b32", false)
30867 } // Inst_VOP3__V_BFM_B32
30869 Inst_VOP3__V_BFM_B32::~Inst_VOP3__V_BFM_B32()
30871 } // ~Inst_VOP3__V_BFM_B32
30873 // D.u = ((1 << S0.u[4:0]) - 1) << S1.u[4:0];
30875 Inst_VOP3__V_BFM_B32::execute(GPUDynInstPtr gpuDynInst
)
30877 Wavefront
*wf
= gpuDynInst
->wavefront();
30878 ConstVecOperandU32
src0(gpuDynInst
, extData
.SRC0
);
30879 ConstVecOperandU32
src1(gpuDynInst
, extData
.SRC1
);
30880 VecOperandU32
vdst(gpuDynInst
, instData
.VDST
);
30886 * input modifiers are supported by FP operations only
30888 assert(!(instData
.ABS
& 0x1));
30889 assert(!(instData
.ABS
& 0x2));
30890 assert(!(instData
.ABS
& 0x4));
30891 assert(!(extData
.NEG
& 0x1));
30892 assert(!(extData
.NEG
& 0x2));
30893 assert(!(extData
.NEG
& 0x4));
30895 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
30896 if (wf
->execMask(lane
)) {
30897 vdst
[lane
] = ((1 << bits(src0
[lane
], 4, 0)) - 1)
30898 << bits(src1
[lane
], 4, 0);
30905 Inst_VOP3__V_CVT_PKNORM_I16_F32::Inst_VOP3__V_CVT_PKNORM_I16_F32(
30907 : Inst_VOP3(iFmt
, "v_cvt_pknorm_i16_f32", false)
30911 } // Inst_VOP3__V_CVT_PKNORM_I16_F32
30913 Inst_VOP3__V_CVT_PKNORM_I16_F32::~Inst_VOP3__V_CVT_PKNORM_I16_F32()
30915 } // ~Inst_VOP3__V_CVT_PKNORM_I16_F32
30917 // D = {(snorm)S1.f, (snorm)S0.f}.
30919 Inst_VOP3__V_CVT_PKNORM_I16_F32::execute(GPUDynInstPtr gpuDynInst
)
30921 panicUnimplemented();
30924 Inst_VOP3__V_CVT_PKNORM_U16_F32::Inst_VOP3__V_CVT_PKNORM_U16_F32(
30926 : Inst_VOP3(iFmt
, "v_cvt_pknorm_u16_f32", false)
30930 } // Inst_VOP3__V_CVT_PKNORM_U16_F32
30932 Inst_VOP3__V_CVT_PKNORM_U16_F32::~Inst_VOP3__V_CVT_PKNORM_U16_F32()
30934 } // ~Inst_VOP3__V_CVT_PKNORM_U16_F32
30936 // D = {(unorm)S1.f, (unorm)S0.f}.
30938 Inst_VOP3__V_CVT_PKNORM_U16_F32::execute(GPUDynInstPtr gpuDynInst
)
30940 panicUnimplemented();
30943 Inst_VOP3__V_CVT_PKRTZ_F16_F32::Inst_VOP3__V_CVT_PKRTZ_F16_F32(
30945 : Inst_VOP3(iFmt
, "v_cvt_pkrtz_f16_f32", false)
30949 } // Inst_VOP3__V_CVT_PKRTZ_F16_F32
30951 Inst_VOP3__V_CVT_PKRTZ_F16_F32::~Inst_VOP3__V_CVT_PKRTZ_F16_F32()
30953 } // ~Inst_VOP3__V_CVT_PKRTZ_F16_F32
30956 Inst_VOP3__V_CVT_PKRTZ_F16_F32::execute(GPUDynInstPtr gpuDynInst
)
30958 panicUnimplemented();
30961 Inst_VOP3__V_CVT_PK_U16_U32::Inst_VOP3__V_CVT_PK_U16_U32(InFmt_VOP3
*iFmt
)
30962 : Inst_VOP3(iFmt
, "v_cvt_pk_u16_u32", false)
30965 } // Inst_VOP3__V_CVT_PK_U16_U32
30967 Inst_VOP3__V_CVT_PK_U16_U32::~Inst_VOP3__V_CVT_PK_U16_U32()
30969 } // ~Inst_VOP3__V_CVT_PK_U16_U32
30971 // D = {uint32_to_uint16(S1.u), uint32_to_uint16(S0.u)}.
30973 Inst_VOP3__V_CVT_PK_U16_U32::execute(GPUDynInstPtr gpuDynInst
)
30975 panicUnimplemented();
30978 Inst_VOP3__V_CVT_PK_I16_I32::Inst_VOP3__V_CVT_PK_I16_I32(InFmt_VOP3
*iFmt
)
30979 : Inst_VOP3(iFmt
, "v_cvt_pk_i16_i32", false)
30982 } // Inst_VOP3__V_CVT_PK_I16_I32
30984 Inst_VOP3__V_CVT_PK_I16_I32::~Inst_VOP3__V_CVT_PK_I16_I32()
30986 } // ~Inst_VOP3__V_CVT_PK_I16_I32
30988 // D = {int32_to_int16(S1.i), int32_to_int16(S0.i)}.
30990 Inst_VOP3__V_CVT_PK_I16_I32::execute(GPUDynInstPtr gpuDynInst
)
30992 panicUnimplemented();
30995 Inst_DS__DS_ADD_U32::Inst_DS__DS_ADD_U32(InFmt_DS
*iFmt
)
30996 : Inst_DS(iFmt
, "ds_add_u32")
30998 } // Inst_DS__DS_ADD_U32
31000 Inst_DS__DS_ADD_U32::~Inst_DS__DS_ADD_U32()
31002 } // ~Inst_DS__DS_ADD_U32
31004 // tmp = MEM[ADDR];
31005 // MEM[ADDR] += DATA;
31006 // RETURN_DATA = tmp.
31008 Inst_DS__DS_ADD_U32::execute(GPUDynInstPtr gpuDynInst
)
31010 panicUnimplemented();
31013 Inst_DS__DS_SUB_U32::Inst_DS__DS_SUB_U32(InFmt_DS
*iFmt
)
31014 : Inst_DS(iFmt
, "ds_sub_u32")
31016 } // Inst_DS__DS_SUB_U32
31018 Inst_DS__DS_SUB_U32::~Inst_DS__DS_SUB_U32()
31020 } // ~Inst_DS__DS_SUB_U32
31022 // tmp = MEM[ADDR];
31023 // MEM[ADDR] -= DATA;
31024 // RETURN_DATA = tmp.
31026 Inst_DS__DS_SUB_U32::execute(GPUDynInstPtr gpuDynInst
)
31028 panicUnimplemented();
31031 Inst_DS__DS_RSUB_U32::Inst_DS__DS_RSUB_U32(InFmt_DS
*iFmt
)
31032 : Inst_DS(iFmt
, "ds_rsub_u32")
31034 } // Inst_DS__DS_RSUB_U32
31036 Inst_DS__DS_RSUB_U32::~Inst_DS__DS_RSUB_U32()
31038 } // ~Inst_DS__DS_RSUB_U32
31040 // tmp = MEM[ADDR];
31041 // MEM[ADDR] = DATA - MEM[ADDR];
31042 // RETURN_DATA = tmp.
31043 // Subtraction with reversed operands.
31045 Inst_DS__DS_RSUB_U32::execute(GPUDynInstPtr gpuDynInst
)
31047 panicUnimplemented();
31050 Inst_DS__DS_INC_U32::Inst_DS__DS_INC_U32(InFmt_DS
*iFmt
)
31051 : Inst_DS(iFmt
, "ds_inc_u32")
31053 } // Inst_DS__DS_INC_U32
31055 Inst_DS__DS_INC_U32::~Inst_DS__DS_INC_U32()
31057 } // ~Inst_DS__DS_INC_U32
31059 // tmp = MEM[ADDR];
31060 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
31061 // RETURN_DATA = tmp.
31063 Inst_DS__DS_INC_U32::execute(GPUDynInstPtr gpuDynInst
)
31065 panicUnimplemented();
31068 Inst_DS__DS_DEC_U32::Inst_DS__DS_DEC_U32(InFmt_DS
*iFmt
)
31069 : Inst_DS(iFmt
, "ds_dec_u32")
31071 } // Inst_DS__DS_DEC_U32
31073 Inst_DS__DS_DEC_U32::~Inst_DS__DS_DEC_U32()
31075 } // ~Inst_DS__DS_DEC_U32
31077 // tmp = MEM[ADDR];
31078 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
31079 // (unsigned compare); RETURN_DATA = tmp.
31081 Inst_DS__DS_DEC_U32::execute(GPUDynInstPtr gpuDynInst
)
31083 panicUnimplemented();
31086 Inst_DS__DS_MIN_I32::Inst_DS__DS_MIN_I32(InFmt_DS
*iFmt
)
31087 : Inst_DS(iFmt
, "ds_min_i32")
31089 } // Inst_DS__DS_MIN_I32
31091 Inst_DS__DS_MIN_I32::~Inst_DS__DS_MIN_I32()
31093 } // ~Inst_DS__DS_MIN_I32
31095 // tmp = MEM[ADDR];
31096 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
31097 // RETURN_DATA = tmp.
31099 Inst_DS__DS_MIN_I32::execute(GPUDynInstPtr gpuDynInst
)
31101 panicUnimplemented();
31104 Inst_DS__DS_MAX_I32::Inst_DS__DS_MAX_I32(InFmt_DS
*iFmt
)
31105 : Inst_DS(iFmt
, "ds_max_i32")
31107 } // Inst_DS__DS_MAX_I32
31109 Inst_DS__DS_MAX_I32::~Inst_DS__DS_MAX_I32()
31111 } // ~Inst_DS__DS_MAX_I32
31113 // tmp = MEM[ADDR];
31114 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
31115 // RETURN_DATA = tmp.
31117 Inst_DS__DS_MAX_I32::execute(GPUDynInstPtr gpuDynInst
)
31119 panicUnimplemented();
31122 Inst_DS__DS_MIN_U32::Inst_DS__DS_MIN_U32(InFmt_DS
*iFmt
)
31123 : Inst_DS(iFmt
, "ds_min_u32")
31125 } // Inst_DS__DS_MIN_U32
31127 Inst_DS__DS_MIN_U32::~Inst_DS__DS_MIN_U32()
31129 } // ~Inst_DS__DS_MIN_U32
31131 // tmp = MEM[ADDR];
31132 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
31133 // RETURN_DATA = tmp.
31135 Inst_DS__DS_MIN_U32::execute(GPUDynInstPtr gpuDynInst
)
31137 panicUnimplemented();
31140 Inst_DS__DS_MAX_U32::Inst_DS__DS_MAX_U32(InFmt_DS
*iFmt
)
31141 : Inst_DS(iFmt
, "ds_max_u32")
31143 } // Inst_DS__DS_MAX_U32
31145 Inst_DS__DS_MAX_U32::~Inst_DS__DS_MAX_U32()
31147 } // ~Inst_DS__DS_MAX_U32
31149 // tmp = MEM[ADDR];
31150 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
31151 // RETURN_DATA = tmp.
31153 Inst_DS__DS_MAX_U32::execute(GPUDynInstPtr gpuDynInst
)
31155 panicUnimplemented();
31158 Inst_DS__DS_AND_B32::Inst_DS__DS_AND_B32(InFmt_DS
*iFmt
)
31159 : Inst_DS(iFmt
, "ds_and_b32")
31161 } // Inst_DS__DS_AND_B32
31163 Inst_DS__DS_AND_B32::~Inst_DS__DS_AND_B32()
31165 } // ~Inst_DS__DS_AND_B32
31167 // tmp = MEM[ADDR];
31168 // MEM[ADDR] &= DATA;
31169 // RETURN_DATA = tmp.
31171 Inst_DS__DS_AND_B32::execute(GPUDynInstPtr gpuDynInst
)
31173 panicUnimplemented();
31176 Inst_DS__DS_OR_B32::Inst_DS__DS_OR_B32(InFmt_DS
*iFmt
)
31177 : Inst_DS(iFmt
, "ds_or_b32")
31179 } // Inst_DS__DS_OR_B32
31181 Inst_DS__DS_OR_B32::~Inst_DS__DS_OR_B32()
31183 } // ~Inst_DS__DS_OR_B32
31185 // tmp = MEM[ADDR];
31186 // MEM[ADDR] |= DATA;
31187 // RETURN_DATA = tmp.
31189 Inst_DS__DS_OR_B32::execute(GPUDynInstPtr gpuDynInst
)
31191 panicUnimplemented();
31194 Inst_DS__DS_XOR_B32::Inst_DS__DS_XOR_B32(InFmt_DS
*iFmt
)
31195 : Inst_DS(iFmt
, "ds_xor_b32")
31197 } // Inst_DS__DS_XOR_B32
31199 Inst_DS__DS_XOR_B32::~Inst_DS__DS_XOR_B32()
31201 } // ~Inst_DS__DS_XOR_B32
31203 // tmp = MEM[ADDR];
31204 // MEM[ADDR] ^= DATA;
31205 // RETURN_DATA = tmp.
31207 Inst_DS__DS_XOR_B32::execute(GPUDynInstPtr gpuDynInst
)
31209 panicUnimplemented();
31212 Inst_DS__DS_MSKOR_B32::Inst_DS__DS_MSKOR_B32(InFmt_DS
*iFmt
)
31213 : Inst_DS(iFmt
, "ds_mskor_b32")
31215 } // Inst_DS__DS_MSKOR_B32
31217 Inst_DS__DS_MSKOR_B32::~Inst_DS__DS_MSKOR_B32()
31219 } // ~Inst_DS__DS_MSKOR_B32
31221 // tmp = MEM[ADDR];
31222 // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
31223 // RETURN_DATA = tmp.
31225 Inst_DS__DS_MSKOR_B32::execute(GPUDynInstPtr gpuDynInst
)
31227 panicUnimplemented();
31230 Inst_DS__DS_WRITE_B32::Inst_DS__DS_WRITE_B32(InFmt_DS
*iFmt
)
31231 : Inst_DS(iFmt
, "ds_write_b32")
31233 setFlag(MemoryRef
);
31235 } // Inst_DS__DS_WRITE_B32
31237 Inst_DS__DS_WRITE_B32::~Inst_DS__DS_WRITE_B32()
31239 } // ~Inst_DS__DS_WRITE_B32
31241 // MEM[ADDR] = DATA.
31244 Inst_DS__DS_WRITE_B32::execute(GPUDynInstPtr gpuDynInst
)
31246 Wavefront
*wf
= gpuDynInst
->wavefront();
31247 gpuDynInst
->execUnitId
= wf
->execUnitId
;
31248 gpuDynInst
->exec_mask
= wf
->execMask();
31249 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
31250 gpuDynInst
->latency
.set(
31251 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
31252 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
31253 ConstVecOperandU32
data(gpuDynInst
, extData
.DATA0
);
31258 calcAddr(gpuDynInst
, addr
);
31260 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
31261 if (wf
->execMask(lane
)) {
31262 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
]
31267 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
31269 wf
->wrLmReqsInPipe
--;
31270 wf
->outstandingReqsWrLm
++;
31271 wf
->outstandingReqs
++;
31272 wf
->validateRequestCounters();
31276 Inst_DS__DS_WRITE_B32::initiateAcc(GPUDynInstPtr gpuDynInst
)
31278 Addr offset0
= instData
.OFFSET0
;
31279 Addr offset1
= instData
.OFFSET1
;
31280 Addr offset
= (offset1
<< 8) | offset0
;
31282 initMemWrite
<VecElemU32
>(gpuDynInst
, offset
);
31286 Inst_DS__DS_WRITE_B32::completeAcc(GPUDynInstPtr gpuDynInst
)
31290 Inst_DS__DS_WRITE2_B32::Inst_DS__DS_WRITE2_B32(InFmt_DS
*iFmt
)
31291 : Inst_DS(iFmt
, "ds_write2_b32")
31293 setFlag(MemoryRef
);
31295 } // Inst_DS__DS_WRITE2_B32
31297 Inst_DS__DS_WRITE2_B32::~Inst_DS__DS_WRITE2_B32()
31299 } // ~Inst_DS__DS_WRITE2_B32
31301 // MEM[ADDR_BASE + OFFSET0 * 4] = DATA;
31302 // MEM[ADDR_BASE + OFFSET1 * 4] = DATA2.
31305 Inst_DS__DS_WRITE2_B32::execute(GPUDynInstPtr gpuDynInst
)
31307 Wavefront
*wf
= gpuDynInst
->wavefront();
31308 gpuDynInst
->execUnitId
= wf
->execUnitId
;
31309 gpuDynInst
->exec_mask
= wf
->execMask();
31310 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
31311 gpuDynInst
->latency
.set(
31312 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
31313 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
31314 ConstVecOperandU32
data0(gpuDynInst
, extData
.DATA0
);
31315 ConstVecOperandU32
data1(gpuDynInst
, extData
.DATA1
);
31321 calcAddr(gpuDynInst
, addr
);
31323 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
31324 if (wf
->execMask(lane
)) {
31325 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
* 2]
31327 (reinterpret_cast<VecElemU32
*>(
31328 gpuDynInst
->d_data
))[lane
* 2 + 1] = data1
[lane
];
31332 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
31334 wf
->wrLmReqsInPipe
--;
31335 wf
->outstandingReqsWrLm
++;
31336 wf
->outstandingReqs
++;
31337 wf
->validateRequestCounters();
31341 Inst_DS__DS_WRITE2_B32::initiateAcc(GPUDynInstPtr gpuDynInst
)
31343 Addr offset0
= instData
.OFFSET0
* 4;
31344 Addr offset1
= instData
.OFFSET1
* 4;
31346 initDualMemWrite
<VecElemU32
>(gpuDynInst
, offset0
, offset1
);
31350 Inst_DS__DS_WRITE2_B32::completeAcc(GPUDynInstPtr gpuDynInst
)
31354 Inst_DS__DS_WRITE2ST64_B32::Inst_DS__DS_WRITE2ST64_B32(InFmt_DS
*iFmt
)
31355 : Inst_DS(iFmt
, "ds_write2st64_b32")
31357 setFlag(MemoryRef
);
31359 } // Inst_DS__DS_WRITE2ST64_B32
31361 Inst_DS__DS_WRITE2ST64_B32::~Inst_DS__DS_WRITE2ST64_B32()
31363 } // ~Inst_DS__DS_WRITE2ST64_B32
31365 // MEM[ADDR_BASE + OFFSET0 * 4 * 64] = DATA;
31366 // MEM[ADDR_BASE + OFFSET1 * 4 * 64] = DATA2;
31369 Inst_DS__DS_WRITE2ST64_B32::execute(GPUDynInstPtr gpuDynInst
)
31371 Wavefront
*wf
= gpuDynInst
->wavefront();
31372 gpuDynInst
->execUnitId
= wf
->execUnitId
;
31373 gpuDynInst
->exec_mask
= wf
->execMask();
31374 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
31375 gpuDynInst
->latency
.set(
31376 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
31377 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
31378 ConstVecOperandU32
data0(gpuDynInst
, extData
.DATA0
);
31379 ConstVecOperandU32
data1(gpuDynInst
, extData
.DATA1
);
31385 calcAddr(gpuDynInst
, addr
);
31387 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
31388 if (wf
->execMask(lane
)) {
31389 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
* 2]
31391 (reinterpret_cast<VecElemU32
*>(
31392 gpuDynInst
->d_data
))[lane
* 2 + 1] = data1
[lane
];
31396 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
31398 wf
->wrLmReqsInPipe
--;
31399 wf
->outstandingReqsWrLm
++;
31400 wf
->outstandingReqs
++;
31401 wf
->validateRequestCounters();
31405 Inst_DS__DS_WRITE2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst
)
31407 Addr offset0
= instData
.OFFSET0
* 4 * 64;
31408 Addr offset1
= instData
.OFFSET1
* 4 * 64;
31410 initDualMemWrite
<VecElemU32
>(gpuDynInst
, offset0
, offset1
);
31414 Inst_DS__DS_WRITE2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst
)
31417 // --- Inst_DS__DS_CMPST_B32 class methods ---
31419 Inst_DS__DS_CMPST_B32::Inst_DS__DS_CMPST_B32(InFmt_DS
*iFmt
)
31420 : Inst_DS(iFmt
, "ds_cmpst_b32")
31422 } // Inst_DS__DS_CMPST_B32
31424 Inst_DS__DS_CMPST_B32::~Inst_DS__DS_CMPST_B32()
31426 } // ~Inst_DS__DS_CMPST_B32
31428 // tmp = MEM[ADDR];
31431 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
31432 // RETURN_DATA[0] = tmp.
31433 // Compare and store.
31435 Inst_DS__DS_CMPST_B32::execute(GPUDynInstPtr gpuDynInst
)
31437 panicUnimplemented();
31440 Inst_DS__DS_CMPST_F32::Inst_DS__DS_CMPST_F32(InFmt_DS
*iFmt
)
31441 : Inst_DS(iFmt
, "ds_cmpst_f32")
31444 } // Inst_DS__DS_CMPST_F32
31446 Inst_DS__DS_CMPST_F32::~Inst_DS__DS_CMPST_F32()
31448 } // ~Inst_DS__DS_CMPST_F32
31450 // tmp = MEM[ADDR];
31453 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
31454 // RETURN_DATA[0] = tmp.
31456 Inst_DS__DS_CMPST_F32::execute(GPUDynInstPtr gpuDynInst
)
31458 panicUnimplemented();
31461 Inst_DS__DS_MIN_F32::Inst_DS__DS_MIN_F32(InFmt_DS
*iFmt
)
31462 : Inst_DS(iFmt
, "ds_min_f32")
31465 } // Inst_DS__DS_MIN_F32
31467 Inst_DS__DS_MIN_F32::~Inst_DS__DS_MIN_F32()
31469 } // ~Inst_DS__DS_MIN_F32
31471 // tmp = MEM[ADDR];
31474 // MEM[ADDR] = (cmp < tmp) ? src : tmp.
31476 Inst_DS__DS_MIN_F32::execute(GPUDynInstPtr gpuDynInst
)
31478 panicUnimplemented();
31481 Inst_DS__DS_MAX_F32::Inst_DS__DS_MAX_F32(InFmt_DS
*iFmt
)
31482 : Inst_DS(iFmt
, "ds_max_f32")
31485 } // Inst_DS__DS_MAX_F32
31487 Inst_DS__DS_MAX_F32::~Inst_DS__DS_MAX_F32()
31489 } // ~Inst_DS__DS_MAX_F32
31491 // tmp = MEM[ADDR];
31494 // MEM[ADDR] = (tmp > cmp) ? src : tmp.
31496 Inst_DS__DS_MAX_F32::execute(GPUDynInstPtr gpuDynInst
)
31498 panicUnimplemented();
31501 Inst_DS__DS_NOP::Inst_DS__DS_NOP(InFmt_DS
*iFmt
)
31502 : Inst_DS(iFmt
, "ds_nop")
31505 } // Inst_DS__DS_NOP
31507 Inst_DS__DS_NOP::~Inst_DS__DS_NOP()
31509 } // ~Inst_DS__DS_NOP
31513 Inst_DS__DS_NOP::execute(GPUDynInstPtr gpuDynInst
)
31517 Inst_DS__DS_ADD_F32::Inst_DS__DS_ADD_F32(InFmt_DS
*iFmt
)
31518 : Inst_DS(iFmt
, "ds_add_f32")
31521 } // Inst_DS__DS_ADD_F32
31523 Inst_DS__DS_ADD_F32::~Inst_DS__DS_ADD_F32()
31525 } // ~Inst_DS__DS_ADD_F32
31527 // tmp = MEM[ADDR];
31528 // MEM[ADDR] += DATA;
31529 // RETURN_DATA = tmp.
31531 Inst_DS__DS_ADD_F32::execute(GPUDynInstPtr gpuDynInst
)
31533 panicUnimplemented();
31536 Inst_DS__DS_WRITE_B8::Inst_DS__DS_WRITE_B8(InFmt_DS
*iFmt
)
31537 : Inst_DS(iFmt
, "ds_write_b8")
31539 setFlag(MemoryRef
);
31541 } // Inst_DS__DS_WRITE_B8
31543 Inst_DS__DS_WRITE_B8::~Inst_DS__DS_WRITE_B8()
31545 } // ~Inst_DS__DS_WRITE_B8
31547 // MEM[ADDR] = DATA[7:0].
31549 Inst_DS__DS_WRITE_B8::execute(GPUDynInstPtr gpuDynInst
)
31551 Wavefront
*wf
= gpuDynInst
->wavefront();
31552 gpuDynInst
->execUnitId
= wf
->execUnitId
;
31553 gpuDynInst
->exec_mask
= wf
->execMask();
31554 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
31555 gpuDynInst
->latency
.set(
31556 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
31557 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
31558 ConstVecOperandU8
data(gpuDynInst
, extData
.DATA0
);
31563 calcAddr(gpuDynInst
, addr
);
31565 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
31566 if (wf
->execMask(lane
)) {
31567 (reinterpret_cast<VecElemU8
*>(gpuDynInst
->d_data
))[lane
]
31572 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
31574 wf
->wrLmReqsInPipe
--;
31575 wf
->outstandingReqsWrLm
++;
31576 wf
->outstandingReqs
++;
31577 wf
->validateRequestCounters();
31581 Inst_DS__DS_WRITE_B8::initiateAcc(GPUDynInstPtr gpuDynInst
)
31583 Addr offset0
= instData
.OFFSET0
;
31584 Addr offset1
= instData
.OFFSET1
;
31585 Addr offset
= (offset1
<< 8) | offset0
;
31587 initMemWrite
<VecElemU8
>(gpuDynInst
, offset
);
31591 Inst_DS__DS_WRITE_B8::completeAcc(GPUDynInstPtr gpuDynInst
)
31594 // --- Inst_DS__DS_WRITE_B16 class methods ---
31596 Inst_DS__DS_WRITE_B16::Inst_DS__DS_WRITE_B16(InFmt_DS
*iFmt
)
31597 : Inst_DS(iFmt
, "ds_write_b16")
31599 setFlag(MemoryRef
);
31601 } // Inst_DS__DS_WRITE_B16
31603 Inst_DS__DS_WRITE_B16::~Inst_DS__DS_WRITE_B16()
31605 } // ~Inst_DS__DS_WRITE_B16
31607 // MEM[ADDR] = DATA[15:0]
31609 Inst_DS__DS_WRITE_B16::execute(GPUDynInstPtr gpuDynInst
)
31611 Wavefront
*wf
= gpuDynInst
->wavefront();
31612 gpuDynInst
->execUnitId
= wf
->execUnitId
;
31613 gpuDynInst
->exec_mask
= wf
->execMask();
31614 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
31615 gpuDynInst
->latency
.set(
31616 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
31617 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
31618 ConstVecOperandU16
data(gpuDynInst
, extData
.DATA0
);
31623 calcAddr(gpuDynInst
, addr
);
31625 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
31626 if (wf
->execMask(lane
)) {
31627 (reinterpret_cast<VecElemU16
*>(gpuDynInst
->d_data
))[lane
]
31632 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
31634 wf
->wrLmReqsInPipe
--;
31635 wf
->outstandingReqsWrLm
++;
31636 wf
->outstandingReqs
++;
31637 wf
->validateRequestCounters();
31641 Inst_DS__DS_WRITE_B16::initiateAcc(GPUDynInstPtr gpuDynInst
)
31643 Addr offset0
= instData
.OFFSET0
;
31644 Addr offset1
= instData
.OFFSET1
;
31645 Addr offset
= (offset1
<< 8) | offset0
;
31647 initMemWrite
<VecElemU16
>(gpuDynInst
, offset
);
31651 Inst_DS__DS_WRITE_B16::completeAcc(GPUDynInstPtr gpuDynInst
)
31654 // --- Inst_DS__DS_ADD_RTN_U32 class methods ---
31656 Inst_DS__DS_ADD_RTN_U32::Inst_DS__DS_ADD_RTN_U32(InFmt_DS
*iFmt
)
31657 : Inst_DS(iFmt
, "ds_add_rtn_u32")
31659 } // Inst_DS__DS_ADD_RTN_U32
31661 Inst_DS__DS_ADD_RTN_U32::~Inst_DS__DS_ADD_RTN_U32()
31663 } // ~Inst_DS__DS_ADD_RTN_U32
31665 // tmp = MEM[ADDR];
31666 // MEM[ADDR] += DATA;
31667 // RETURN_DATA = tmp.
31669 Inst_DS__DS_ADD_RTN_U32::execute(GPUDynInstPtr gpuDynInst
)
31671 panicUnimplemented();
31674 Inst_DS__DS_SUB_RTN_U32::Inst_DS__DS_SUB_RTN_U32(InFmt_DS
*iFmt
)
31675 : Inst_DS(iFmt
, "ds_sub_rtn_u32")
31677 } // Inst_DS__DS_SUB_RTN_U32
31679 Inst_DS__DS_SUB_RTN_U32::~Inst_DS__DS_SUB_RTN_U32()
31681 } // ~Inst_DS__DS_SUB_RTN_U32
31683 // tmp = MEM[ADDR];
31684 // MEM[ADDR] -= DATA;
31685 // RETURN_DATA = tmp.
31687 Inst_DS__DS_SUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst
)
31689 panicUnimplemented();
31692 Inst_DS__DS_RSUB_RTN_U32::Inst_DS__DS_RSUB_RTN_U32(InFmt_DS
*iFmt
)
31693 : Inst_DS(iFmt
, "ds_rsub_rtn_u32")
31695 } // Inst_DS__DS_RSUB_RTN_U32
31697 Inst_DS__DS_RSUB_RTN_U32::~Inst_DS__DS_RSUB_RTN_U32()
31699 } // ~Inst_DS__DS_RSUB_RTN_U32
31701 // tmp = MEM[ADDR];
31702 // MEM[ADDR] = DATA - MEM[ADDR];
31703 // RETURN_DATA = tmp.
31705 Inst_DS__DS_RSUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst
)
31707 panicUnimplemented();
31710 Inst_DS__DS_INC_RTN_U32::Inst_DS__DS_INC_RTN_U32(InFmt_DS
*iFmt
)
31711 : Inst_DS(iFmt
, "ds_inc_rtn_u32")
31713 } // Inst_DS__DS_INC_RTN_U32
31715 Inst_DS__DS_INC_RTN_U32::~Inst_DS__DS_INC_RTN_U32()
31717 } // ~Inst_DS__DS_INC_RTN_U32
31719 // tmp = MEM[ADDR];
31720 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
31721 // RETURN_DATA = tmp.
31723 Inst_DS__DS_INC_RTN_U32::execute(GPUDynInstPtr gpuDynInst
)
31725 panicUnimplemented();
31728 Inst_DS__DS_DEC_RTN_U32::Inst_DS__DS_DEC_RTN_U32(InFmt_DS
*iFmt
)
31729 : Inst_DS(iFmt
, "ds_dec_rtn_u32")
31731 } // Inst_DS__DS_DEC_RTN_U32
31733 Inst_DS__DS_DEC_RTN_U32::~Inst_DS__DS_DEC_RTN_U32()
31735 } // ~Inst_DS__DS_DEC_RTN_U32
31737 // tmp = MEM[ADDR];
31738 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
31739 // (unsigned compare); RETURN_DATA = tmp.
31741 Inst_DS__DS_DEC_RTN_U32::execute(GPUDynInstPtr gpuDynInst
)
31743 panicUnimplemented();
31746 Inst_DS__DS_MIN_RTN_I32::Inst_DS__DS_MIN_RTN_I32(InFmt_DS
*iFmt
)
31747 : Inst_DS(iFmt
, "ds_min_rtn_i32")
31749 } // Inst_DS__DS_MIN_RTN_I32
31751 Inst_DS__DS_MIN_RTN_I32::~Inst_DS__DS_MIN_RTN_I32()
31753 } // ~Inst_DS__DS_MIN_RTN_I32
31755 // tmp = MEM[ADDR];
31756 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
31757 // RETURN_DATA = tmp.
31759 Inst_DS__DS_MIN_RTN_I32::execute(GPUDynInstPtr gpuDynInst
)
31761 panicUnimplemented();
31764 Inst_DS__DS_MAX_RTN_I32::Inst_DS__DS_MAX_RTN_I32(InFmt_DS
*iFmt
)
31765 : Inst_DS(iFmt
, "ds_max_rtn_i32")
31767 } // Inst_DS__DS_MAX_RTN_I32
31769 Inst_DS__DS_MAX_RTN_I32::~Inst_DS__DS_MAX_RTN_I32()
31771 } // ~Inst_DS__DS_MAX_RTN_I32
31773 // tmp = MEM[ADDR];
31774 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
31775 // RETURN_DATA = tmp.
31777 Inst_DS__DS_MAX_RTN_I32::execute(GPUDynInstPtr gpuDynInst
)
31779 panicUnimplemented();
31782 Inst_DS__DS_MIN_RTN_U32::Inst_DS__DS_MIN_RTN_U32(InFmt_DS
*iFmt
)
31783 : Inst_DS(iFmt
, "ds_min_rtn_u32")
31785 } // Inst_DS__DS_MIN_RTN_U32
31787 Inst_DS__DS_MIN_RTN_U32::~Inst_DS__DS_MIN_RTN_U32()
31789 } // ~Inst_DS__DS_MIN_RTN_U32
31791 // tmp = MEM[ADDR];
31792 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
31793 // RETURN_DATA = tmp.
31795 Inst_DS__DS_MIN_RTN_U32::execute(GPUDynInstPtr gpuDynInst
)
31797 panicUnimplemented();
31800 Inst_DS__DS_MAX_RTN_U32::Inst_DS__DS_MAX_RTN_U32(InFmt_DS
*iFmt
)
31801 : Inst_DS(iFmt
, "ds_max_rtn_u32")
31803 } // Inst_DS__DS_MAX_RTN_U32
31805 Inst_DS__DS_MAX_RTN_U32::~Inst_DS__DS_MAX_RTN_U32()
31807 } // ~Inst_DS__DS_MAX_RTN_U32
31809 // tmp = MEM[ADDR];
31810 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
31811 // RETURN_DATA = tmp.
31813 Inst_DS__DS_MAX_RTN_U32::execute(GPUDynInstPtr gpuDynInst
)
31815 panicUnimplemented();
31818 Inst_DS__DS_AND_RTN_B32::Inst_DS__DS_AND_RTN_B32(InFmt_DS
*iFmt
)
31819 : Inst_DS(iFmt
, "ds_and_rtn_b32")
31821 } // Inst_DS__DS_AND_RTN_B32
31823 Inst_DS__DS_AND_RTN_B32::~Inst_DS__DS_AND_RTN_B32()
31825 } // ~Inst_DS__DS_AND_RTN_B32
31827 // tmp = MEM[ADDR];
31828 // MEM[ADDR] &= DATA;
31829 // RETURN_DATA = tmp.
31831 Inst_DS__DS_AND_RTN_B32::execute(GPUDynInstPtr gpuDynInst
)
31833 panicUnimplemented();
31836 Inst_DS__DS_OR_RTN_B32::Inst_DS__DS_OR_RTN_B32(InFmt_DS
*iFmt
)
31837 : Inst_DS(iFmt
, "ds_or_rtn_b32")
31839 } // Inst_DS__DS_OR_RTN_B32
31841 Inst_DS__DS_OR_RTN_B32::~Inst_DS__DS_OR_RTN_B32()
31843 } // ~Inst_DS__DS_OR_RTN_B32
31845 // tmp = MEM[ADDR];
31846 // MEM[ADDR] |= DATA;
31847 // RETURN_DATA = tmp.
31849 Inst_DS__DS_OR_RTN_B32::execute(GPUDynInstPtr gpuDynInst
)
31851 panicUnimplemented();
31854 Inst_DS__DS_XOR_RTN_B32::Inst_DS__DS_XOR_RTN_B32(InFmt_DS
*iFmt
)
31855 : Inst_DS(iFmt
, "ds_xor_rtn_b32")
31857 } // Inst_DS__DS_XOR_RTN_B32
31859 Inst_DS__DS_XOR_RTN_B32::~Inst_DS__DS_XOR_RTN_B32()
31861 } // ~Inst_DS__DS_XOR_RTN_B32
31863 // tmp = MEM[ADDR];
31864 // MEM[ADDR] ^= DATA;
31865 // RETURN_DATA = tmp.
31867 Inst_DS__DS_XOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst
)
31869 panicUnimplemented();
31872 Inst_DS__DS_MSKOR_RTN_B32::Inst_DS__DS_MSKOR_RTN_B32(InFmt_DS
*iFmt
)
31873 : Inst_DS(iFmt
, "ds_mskor_rtn_b32")
31875 } // Inst_DS__DS_MSKOR_RTN_B32
31877 Inst_DS__DS_MSKOR_RTN_B32::~Inst_DS__DS_MSKOR_RTN_B32()
31879 } // ~Inst_DS__DS_MSKOR_RTN_B32
31881 // tmp = MEM[ADDR];
31882 // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
31883 // RETURN_DATA = tmp.
31885 Inst_DS__DS_MSKOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst
)
31887 panicUnimplemented();
31890 Inst_DS__DS_WRXCHG_RTN_B32::Inst_DS__DS_WRXCHG_RTN_B32(InFmt_DS
*iFmt
)
31891 : Inst_DS(iFmt
, "ds_wrxchg_rtn_b32")
31893 } // Inst_DS__DS_WRXCHG_RTN_B32
31895 Inst_DS__DS_WRXCHG_RTN_B32::~Inst_DS__DS_WRXCHG_RTN_B32()
31897 } // ~Inst_DS__DS_WRXCHG_RTN_B32
31899 // tmp = MEM[ADDR];
31900 // MEM[ADDR] = DATA;
31901 // RETURN_DATA = tmp.
31902 // Write-exchange operation.
31904 Inst_DS__DS_WRXCHG_RTN_B32::execute(GPUDynInstPtr gpuDynInst
)
31906 panicUnimplemented();
31909 Inst_DS__DS_WRXCHG2_RTN_B32::Inst_DS__DS_WRXCHG2_RTN_B32(InFmt_DS
*iFmt
)
31910 : Inst_DS(iFmt
, "ds_wrxchg2_rtn_b32")
31912 } // Inst_DS__DS_WRXCHG2_RTN_B32
31914 Inst_DS__DS_WRXCHG2_RTN_B32::~Inst_DS__DS_WRXCHG2_RTN_B32()
31916 } // ~Inst_DS__DS_WRXCHG2_RTN_B32
31918 // Write-exchange 2 separate dwords.
31920 Inst_DS__DS_WRXCHG2_RTN_B32::execute(GPUDynInstPtr gpuDynInst
)
31922 panicUnimplemented();
31925 Inst_DS__DS_WRXCHG2ST64_RTN_B32::Inst_DS__DS_WRXCHG2ST64_RTN_B32(
31927 : Inst_DS(iFmt
, "ds_wrxchg2st64_rtn_b32")
31929 } // Inst_DS__DS_WRXCHG2ST64_RTN_B32
31931 Inst_DS__DS_WRXCHG2ST64_RTN_B32::~Inst_DS__DS_WRXCHG2ST64_RTN_B32()
31933 } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B32
31935 // Write-exchange 2 separate dwords with a stride of 64 dwords.
31937 Inst_DS__DS_WRXCHG2ST64_RTN_B32::execute(GPUDynInstPtr gpuDynInst
)
31939 panicUnimplemented();
31942 Inst_DS__DS_CMPST_RTN_B32::Inst_DS__DS_CMPST_RTN_B32(InFmt_DS
*iFmt
)
31943 : Inst_DS(iFmt
, "ds_cmpst_rtn_b32")
31945 } // Inst_DS__DS_CMPST_RTN_B32
31947 Inst_DS__DS_CMPST_RTN_B32::~Inst_DS__DS_CMPST_RTN_B32()
31949 } // ~Inst_DS__DS_CMPST_RTN_B32
31951 // tmp = MEM[ADDR];
31954 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
31955 // RETURN_DATA[0] = tmp.
31956 // Compare and store.
31958 Inst_DS__DS_CMPST_RTN_B32::execute(GPUDynInstPtr gpuDynInst
)
31960 panicUnimplemented();
31963 Inst_DS__DS_CMPST_RTN_F32::Inst_DS__DS_CMPST_RTN_F32(InFmt_DS
*iFmt
)
31964 : Inst_DS(iFmt
, "ds_cmpst_rtn_f32")
31967 } // Inst_DS__DS_CMPST_RTN_F32
31969 Inst_DS__DS_CMPST_RTN_F32::~Inst_DS__DS_CMPST_RTN_F32()
31971 } // ~Inst_DS__DS_CMPST_RTN_F32
31973 // tmp = MEM[ADDR];
31976 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
31977 // RETURN_DATA[0] = tmp.
31979 Inst_DS__DS_CMPST_RTN_F32::execute(GPUDynInstPtr gpuDynInst
)
31981 panicUnimplemented();
31984 Inst_DS__DS_MIN_RTN_F32::Inst_DS__DS_MIN_RTN_F32(InFmt_DS
*iFmt
)
31985 : Inst_DS(iFmt
, "ds_min_rtn_f32")
31988 } // Inst_DS__DS_MIN_RTN_F32
31990 Inst_DS__DS_MIN_RTN_F32::~Inst_DS__DS_MIN_RTN_F32()
31992 } // ~Inst_DS__DS_MIN_RTN_F32
31994 // tmp = MEM[ADDR];
31997 // MEM[ADDR] = (cmp < tmp) ? src : tmp.
31999 Inst_DS__DS_MIN_RTN_F32::execute(GPUDynInstPtr gpuDynInst
)
32001 panicUnimplemented();
32004 Inst_DS__DS_MAX_RTN_F32::Inst_DS__DS_MAX_RTN_F32(InFmt_DS
*iFmt
)
32005 : Inst_DS(iFmt
, "ds_max_rtn_f32")
32008 } // Inst_DS__DS_MAX_RTN_F32
32010 Inst_DS__DS_MAX_RTN_F32::~Inst_DS__DS_MAX_RTN_F32()
32012 } // ~Inst_DS__DS_MAX_RTN_F32
32014 // tmp = MEM[ADDR];
32017 // MEM[ADDR] = (tmp > cmp) ? src : tmp.
32019 Inst_DS__DS_MAX_RTN_F32::execute(GPUDynInstPtr gpuDynInst
)
32021 panicUnimplemented();
32024 Inst_DS__DS_WRAP_RTN_B32::Inst_DS__DS_WRAP_RTN_B32(InFmt_DS
*iFmt
)
32025 : Inst_DS(iFmt
, "ds_wrap_rtn_b32")
32027 } // Inst_DS__DS_WRAP_RTN_B32
32029 Inst_DS__DS_WRAP_RTN_B32::~Inst_DS__DS_WRAP_RTN_B32()
32031 } // ~Inst_DS__DS_WRAP_RTN_B32
32033 // tmp = MEM[ADDR];
32034 // MEM[ADDR] = (tmp >= DATA) ? tmp - DATA : tmp + DATA2;
32035 // RETURN_DATA = tmp.
32037 Inst_DS__DS_WRAP_RTN_B32::execute(GPUDynInstPtr gpuDynInst
)
32039 panicUnimplemented();
32042 Inst_DS__DS_ADD_RTN_F32::Inst_DS__DS_ADD_RTN_F32(InFmt_DS
*iFmt
)
32043 : Inst_DS(iFmt
, "ds_add_rtn_f32")
32046 } // Inst_DS__DS_ADD_RTN_F32
32048 Inst_DS__DS_ADD_RTN_F32::~Inst_DS__DS_ADD_RTN_F32()
32050 } // ~Inst_DS__DS_ADD_RTN_F32
32052 // tmp = MEM[ADDR];
32053 // MEM[ADDR] += DATA;
32054 // RETURN_DATA = tmp.
32056 Inst_DS__DS_ADD_RTN_F32::execute(GPUDynInstPtr gpuDynInst
)
32060 Inst_DS__DS_READ_B32::Inst_DS__DS_READ_B32(InFmt_DS
*iFmt
)
32061 : Inst_DS(iFmt
, "ds_read_b32")
32063 setFlag(MemoryRef
);
32065 } // Inst_DS__DS_READ_B32
32067 Inst_DS__DS_READ_B32::~Inst_DS__DS_READ_B32()
32069 } // ~Inst_DS__DS_READ_B32
32071 // RETURN_DATA = MEM[ADDR].
32074 Inst_DS__DS_READ_B32::execute(GPUDynInstPtr gpuDynInst
)
32076 Wavefront
*wf
= gpuDynInst
->wavefront();
32077 gpuDynInst
->execUnitId
= wf
->execUnitId
;
32078 gpuDynInst
->exec_mask
= wf
->execMask();
32079 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
32080 gpuDynInst
->latency
.set(
32081 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
32082 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
32086 calcAddr(gpuDynInst
, addr
);
32088 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
32090 wf
->rdLmReqsInPipe
--;
32091 wf
->outstandingReqsRdLm
++;
32092 wf
->outstandingReqs
++;
32093 wf
->validateRequestCounters();
32097 Inst_DS__DS_READ_B32::initiateAcc(GPUDynInstPtr gpuDynInst
)
32099 Addr offset0
= instData
.OFFSET0
;
32100 Addr offset1
= instData
.OFFSET1
;
32101 Addr offset
= (offset1
<< 8) | offset0
;
32103 initMemRead
<VecElemU32
>(gpuDynInst
, offset
);
32107 Inst_DS__DS_READ_B32::completeAcc(GPUDynInstPtr gpuDynInst
)
32109 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
32111 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
32112 if (gpuDynInst
->exec_mask
[lane
]) {
32113 vdst
[lane
] = (reinterpret_cast<VecElemU32
*>(
32114 gpuDynInst
->d_data
))[lane
];
32121 Inst_DS__DS_READ2_B32::Inst_DS__DS_READ2_B32(InFmt_DS
*iFmt
)
32122 : Inst_DS(iFmt
, "ds_read2_b32")
32124 setFlag(MemoryRef
);
32126 } // Inst_DS__DS_READ2_B32
32128 Inst_DS__DS_READ2_B32::~Inst_DS__DS_READ2_B32()
32130 } // ~Inst_DS__DS_READ2_B32
32132 // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4];
32133 // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4].
32136 Inst_DS__DS_READ2_B32::execute(GPUDynInstPtr gpuDynInst
)
32138 Wavefront
*wf
= gpuDynInst
->wavefront();
32139 gpuDynInst
->execUnitId
= wf
->execUnitId
;
32140 gpuDynInst
->exec_mask
= wf
->execMask();
32141 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
32142 gpuDynInst
->latency
.set(
32143 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
32144 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
32148 calcAddr(gpuDynInst
, addr
);
32150 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
32152 wf
->rdLmReqsInPipe
--;
32153 wf
->outstandingReqsRdLm
++;
32154 wf
->outstandingReqs
++;
32155 wf
->validateRequestCounters();
32159 Inst_DS__DS_READ2_B32::initiateAcc(GPUDynInstPtr gpuDynInst
)
32161 Addr offset0
= instData
.OFFSET0
* 4;
32162 Addr offset1
= instData
.OFFSET1
* 4;
32164 initDualMemRead
<VecElemU32
>(gpuDynInst
, offset0
, offset1
);
32168 Inst_DS__DS_READ2_B32::completeAcc(GPUDynInstPtr gpuDynInst
)
32170 VecOperandU32
vdst0(gpuDynInst
, extData
.VDST
);
32171 VecOperandU32
vdst1(gpuDynInst
, extData
.VDST
+ 1);
32173 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
32174 if (gpuDynInst
->exec_mask
[lane
]) {
32175 vdst0
[lane
] = (reinterpret_cast<VecElemU32
*>(
32176 gpuDynInst
->d_data
))[lane
* 2];
32177 vdst1
[lane
] = (reinterpret_cast<VecElemU32
*>(
32178 gpuDynInst
->d_data
))[lane
* 2 + 1];
32186 Inst_DS__DS_READ2ST64_B32::Inst_DS__DS_READ2ST64_B32(InFmt_DS
*iFmt
)
32187 : Inst_DS(iFmt
, "ds_read2st64_b32")
32189 setFlag(MemoryRef
);
32191 } // Inst_DS__DS_READ2ST64_B32
32193 Inst_DS__DS_READ2ST64_B32::~Inst_DS__DS_READ2ST64_B32()
32195 } // ~Inst_DS__DS_READ2ST64_B32
32197 // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4 * 64];
32198 // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4 * 64].
32201 Inst_DS__DS_READ2ST64_B32::execute(GPUDynInstPtr gpuDynInst
)
32203 Wavefront
*wf
= gpuDynInst
->wavefront();
32204 gpuDynInst
->execUnitId
= wf
->execUnitId
;
32205 gpuDynInst
->exec_mask
= wf
->execMask();
32206 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
32207 gpuDynInst
->latency
.set(
32208 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
32209 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
32213 calcAddr(gpuDynInst
, addr
);
32215 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
32217 wf
->rdLmReqsInPipe
--;
32218 wf
->outstandingReqsRdLm
++;
32219 wf
->outstandingReqs
++;
32220 wf
->validateRequestCounters();
32224 Inst_DS__DS_READ2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst
)
32226 Addr offset0
= (instData
.OFFSET0
* 4 * 64);
32227 Addr offset1
= (instData
.OFFSET1
* 4 * 64);
32229 initDualMemRead
<VecElemU32
>(gpuDynInst
, offset0
, offset1
);
32233 Inst_DS__DS_READ2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst
)
32235 VecOperandU32
vdst0(gpuDynInst
, extData
.VDST
);
32236 VecOperandU32
vdst1(gpuDynInst
, extData
.VDST
+ 1);
32238 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
32239 if (gpuDynInst
->exec_mask
[lane
]) {
32240 vdst0
[lane
] = (reinterpret_cast<VecElemU64
*>(
32241 gpuDynInst
->d_data
))[lane
* 2];
32242 vdst1
[lane
] = (reinterpret_cast<VecElemU64
*>(
32243 gpuDynInst
->d_data
))[lane
* 2 + 1];
32250 // --- Inst_DS__DS_READ_I8 class methods ---
32252 Inst_DS__DS_READ_I8::Inst_DS__DS_READ_I8(InFmt_DS
*iFmt
)
32253 : Inst_DS(iFmt
, "ds_read_i8")
32255 setFlag(MemoryRef
);
32257 } // Inst_DS__DS_READ_I8
32259 Inst_DS__DS_READ_I8::~Inst_DS__DS_READ_I8()
32261 } // ~Inst_DS__DS_READ_I8
32263 // RETURN_DATA = signext(MEM[ADDR][7:0]).
32264 // Signed byte read.
32266 Inst_DS__DS_READ_I8::execute(GPUDynInstPtr gpuDynInst
)
32268 panicUnimplemented();
32271 Inst_DS__DS_READ_U8::Inst_DS__DS_READ_U8(InFmt_DS
*iFmt
)
32272 : Inst_DS(iFmt
, "ds_read_u8")
32274 setFlag(MemoryRef
);
32276 } // Inst_DS__DS_READ_U8
32278 Inst_DS__DS_READ_U8::~Inst_DS__DS_READ_U8()
32280 } // ~Inst_DS__DS_READ_U8
32282 // RETURN_DATA = {24'h0,MEM[ADDR][7:0]}.
32283 // Unsigned byte read.
32285 Inst_DS__DS_READ_U8::execute(GPUDynInstPtr gpuDynInst
)
32287 Wavefront
*wf
= gpuDynInst
->wavefront();
32288 gpuDynInst
->execUnitId
= wf
->execUnitId
;
32289 gpuDynInst
->exec_mask
= wf
->execMask();
32290 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
32291 gpuDynInst
->latency
.set(
32292 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
32293 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
32297 calcAddr(gpuDynInst
, addr
);
32299 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
32301 wf
->rdLmReqsInPipe
--;
32302 wf
->outstandingReqsRdLm
++;
32303 wf
->outstandingReqs
++;
32304 wf
->validateRequestCounters();
32308 Inst_DS__DS_READ_U8::initiateAcc(GPUDynInstPtr gpuDynInst
)
32310 Addr offset0
= instData
.OFFSET0
;
32311 Addr offset1
= instData
.OFFSET1
;
32312 Addr offset
= (offset1
<< 8) | offset0
;
32314 initMemRead
<VecElemU8
>(gpuDynInst
, offset
);
32318 Inst_DS__DS_READ_U8::completeAcc(GPUDynInstPtr gpuDynInst
)
32320 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
32322 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
32323 if (gpuDynInst
->exec_mask
[lane
]) {
32324 vdst
[lane
] = (VecElemU32
)(reinterpret_cast<VecElemU8
*>(
32325 gpuDynInst
->d_data
))[lane
];
32331 // --- Inst_DS__DS_READ_I16 class methods ---
32333 Inst_DS__DS_READ_I16::Inst_DS__DS_READ_I16(InFmt_DS
*iFmt
)
32334 : Inst_DS(iFmt
, "ds_read_i16")
32336 setFlag(MemoryRef
);
32338 } // Inst_DS__DS_READ_I16
32340 Inst_DS__DS_READ_I16::~Inst_DS__DS_READ_I16()
32342 } // ~Inst_DS__DS_READ_I16
32344 // RETURN_DATA = signext(MEM[ADDR][15:0]).
32345 // Signed short read.
32347 Inst_DS__DS_READ_I16::execute(GPUDynInstPtr gpuDynInst
)
32349 panicUnimplemented();
32352 Inst_DS__DS_READ_U16::Inst_DS__DS_READ_U16(InFmt_DS
*iFmt
)
32353 : Inst_DS(iFmt
, "ds_read_u16")
32355 setFlag(MemoryRef
);
32357 } // Inst_DS__DS_READ_U16
32359 Inst_DS__DS_READ_U16::~Inst_DS__DS_READ_U16()
32361 } // ~Inst_DS__DS_READ_U16
32363 // RETURN_DATA = {16'h0,MEM[ADDR][15:0]}.
32364 // Unsigned short read.
32366 Inst_DS__DS_READ_U16::execute(GPUDynInstPtr gpuDynInst
)
32368 Wavefront
*wf
= gpuDynInst
->wavefront();
32369 gpuDynInst
->execUnitId
= wf
->execUnitId
;
32370 gpuDynInst
->exec_mask
= wf
->execMask();
32371 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
32372 gpuDynInst
->latency
.set(
32373 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
32374 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
32378 calcAddr(gpuDynInst
, addr
);
32380 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
32382 wf
->rdLmReqsInPipe
--;
32383 wf
->outstandingReqsRdLm
++;
32384 wf
->outstandingReqs
++;
32385 wf
->validateRequestCounters();
32388 Inst_DS__DS_READ_U16::initiateAcc(GPUDynInstPtr gpuDynInst
)
32390 Addr offset0
= instData
.OFFSET0
;
32391 Addr offset1
= instData
.OFFSET1
;
32392 Addr offset
= (offset1
<< 8) | offset0
;
32394 initMemRead
<VecElemU16
>(gpuDynInst
, offset
);
32398 Inst_DS__DS_READ_U16::completeAcc(GPUDynInstPtr gpuDynInst
)
32400 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
32402 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
32403 if (gpuDynInst
->exec_mask
[lane
]) {
32404 vdst
[lane
] = (VecElemU32
)(reinterpret_cast<VecElemU16
*>(
32405 gpuDynInst
->d_data
))[lane
];
32411 // --- Inst_DS__DS_SWIZZLE_B32 class methods ---
32413 Inst_DS__DS_SWIZZLE_B32::Inst_DS__DS_SWIZZLE_B32(InFmt_DS
*iFmt
)
32414 : Inst_DS(iFmt
, "ds_swizzle_b32")
32417 } // Inst_DS__DS_SWIZZLE_B32
32419 Inst_DS__DS_SWIZZLE_B32::~Inst_DS__DS_SWIZZLE_B32()
32421 } // ~Inst_DS__DS_SWIZZLE_B32
32423 // RETURN_DATA = swizzle(vgpr_data, offset1:offset0).
32424 // Dword swizzle, no data is written to LDS memory;
32426 Inst_DS__DS_SWIZZLE_B32::execute(GPUDynInstPtr gpuDynInst
)
32428 Wavefront
*wf
= gpuDynInst
->wavefront();
32429 wf
->rdLmReqsInPipe
--;
32430 wf
->validateRequestCounters();
32432 if (gpuDynInst
->exec_mask
.none()) {
32436 gpuDynInst
->execUnitId
= wf
->execUnitId
;
32437 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
32438 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()
32439 ->cyclesToTicks(Cycles(24)));
32441 ConstVecOperandU32
data(gpuDynInst
, extData
.DATA0
);
32442 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
32444 * The "DS pattern" is comprised of both offset fields. That is, the
32445 * swizzle pattern between lanes. Bit 15 of the DS pattern dictates
32446 * which swizzle mode to use. There are two different swizzle
32447 * patterns: 1) QDMode and 2) Bit-masks mode. If bit 15 is set use
32448 * QDMode else use Bit-masks mode. The remaining bits dictate how to
32449 * swizzle the lanes.
32451 * QDMode: Chunks the lanes into 4s and swizzles among them.
32452 * Bits 7:6 dictate where lane 3 (of the current chunk)
32453 * gets its date, 5:4 lane 2, etc.
32455 * Bit-mask: This mode breaks bits 14:0 into 3 equal-sized chunks.
32456 * 14:10 is the xor_mask, 9:5 is the or_mask, and 4:0
32457 * is the and_mask. Each lane is swizzled by performing
32458 * the appropriate operation using these masks.
32460 VecElemU16 ds_pattern
= ((instData
.OFFSET1
<< 8) | instData
.OFFSET0
);
32464 if (bits(ds_pattern
, 15)) {
32466 for (int lane
= 0; lane
< NumVecElemPerVecReg
; lane
+= 4) {
32468 * This operation allows data sharing between groups
32469 * of four consecutive threads. Note the increment by
32470 * 4 in the for loop.
32472 if (gpuDynInst
->exec_mask
[lane
]) {
32473 int index0
= lane
+ bits(ds_pattern
, 1, 0);
32474 panic_if(index0
>= NumVecElemPerVecReg
, "%s: index0 (%d) "
32475 "is out of bounds.\n", gpuDynInst
->disassemble(),
32478 = gpuDynInst
->exec_mask
[index0
] ? data
[index0
]: 0;
32480 if (gpuDynInst
->exec_mask
[lane
+ 1]) {
32481 int index1
= lane
+ bits(ds_pattern
, 3, 2);
32482 panic_if(index1
>= NumVecElemPerVecReg
, "%s: index1 (%d) "
32483 "is out of bounds.\n", gpuDynInst
->disassemble(),
32486 = gpuDynInst
->exec_mask
[index1
] ? data
[index1
]: 0;
32488 if (gpuDynInst
->exec_mask
[lane
+ 2]) {
32489 int index2
= lane
+ bits(ds_pattern
, 5, 4);
32490 panic_if(index2
>= NumVecElemPerVecReg
, "%s: index2 (%d) "
32491 "is out of bounds.\n", gpuDynInst
->disassemble(),
32494 = gpuDynInst
->exec_mask
[index2
] ? data
[index2
]: 0;
32496 if (gpuDynInst
->exec_mask
[lane
+ 3]) {
32497 int index3
= lane
+ bits(ds_pattern
, 7, 6);
32498 panic_if(index3
>= NumVecElemPerVecReg
, "%s: index3 (%d) "
32499 "is out of bounds.\n", gpuDynInst
->disassemble(),
32502 = gpuDynInst
->exec_mask
[index3
] ? data
[index3
]: 0;
32507 int and_mask
= bits(ds_pattern
, 4, 0);
32508 int or_mask
= bits(ds_pattern
, 9, 5);
32509 int xor_mask
= bits(ds_pattern
, 14, 10);
32510 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
32511 if (gpuDynInst
->exec_mask
[lane
]) {
32512 int index
= (((lane
& and_mask
) | or_mask
) ^ xor_mask
);
32513 // Adjust for the next 32 lanes.
32517 panic_if(index
>= NumVecElemPerVecReg
, "%s: index (%d) is "
32518 "out of bounds.\n", gpuDynInst
->disassemble(),
32521 = gpuDynInst
->exec_mask
[index
] ? data
[index
] : 0;
32528 // --- Inst_DS__DS_PERMUTE_B32 class methods ---
32530 Inst_DS__DS_PERMUTE_B32::Inst_DS__DS_PERMUTE_B32(InFmt_DS
*iFmt
)
32531 : Inst_DS(iFmt
, "ds_permute_b32")
32533 setFlag(MemoryRef
);
32535 * While this operation doesn't actually use DS storage we classify
32536 * it as a load here because it does a writeback to a VGPR, which
32537 * fits in better with the LDS pipeline logic.
32540 } // Inst_DS__DS_PERMUTE_B32
32542 Inst_DS__DS_PERMUTE_B32::~Inst_DS__DS_PERMUTE_B32()
32544 } // ~Inst_DS__DS_PERMUTE_B32
32546 // Forward permute.
32548 Inst_DS__DS_PERMUTE_B32::execute(GPUDynInstPtr gpuDynInst
)
32550 Wavefront
*wf
= gpuDynInst
->wavefront();
32551 gpuDynInst
->execUnitId
= wf
->execUnitId
;
32552 gpuDynInst
->exec_mask
= wf
->execMask();
32553 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
32554 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()
32555 ->cyclesToTicks(Cycles(24)));
32556 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
32557 ConstVecOperandU32
data(gpuDynInst
, extData
.DATA0
);
32558 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
32563 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
32564 if (wf
->execMask(lane
)) {
32566 * One of the offset fields can be used for the index.
32567 * It is assumed OFFSET0 would be used, as OFFSET1 is
32568 * typically only used for DS ops that operate on two
32569 * disparate pieces of data.
32571 assert(!instData
.OFFSET1
);
32573 * The address provided is a byte address, but VGPRs are
32574 * 4 bytes, so we must divide by 4 to get the actual VGPR
32575 * index. Additionally, the index is calculated modulo the
32576 * WF size, 64 in this case, so we simply extract bits 7-2.
32578 int index
= bits(addr
[lane
] + instData
.OFFSET0
, 7, 2);
32579 panic_if(index
>= NumVecElemPerVecReg
, "%s: index (%d) is out "
32580 "of bounds.\n", gpuDynInst
->disassemble(), index
);
32582 * If the shuffled index corresponds to a lane that is
32583 * inactive then this instruction writes a 0 to the active
32586 if (wf
->execMask(index
)) {
32587 vdst
[index
] = data
[lane
];
32596 wf
->decLGKMInstsIssued();
32597 wf
->rdLmReqsInPipe
--;
32598 wf
->validateRequestCounters();
32600 // --- Inst_DS__DS_BPERMUTE_B32 class methods ---
32602 Inst_DS__DS_BPERMUTE_B32::Inst_DS__DS_BPERMUTE_B32(InFmt_DS
*iFmt
)
32603 : Inst_DS(iFmt
, "ds_bpermute_b32")
32605 setFlag(MemoryRef
);
32607 * While this operation doesn't actually use DS storage we classify
32608 * it as a load here because it does a writeback to a VGPR, which
32609 * fits in better with the LDS pipeline logic.
32612 } // Inst_DS__DS_BPERMUTE_B32
32614 Inst_DS__DS_BPERMUTE_B32::~Inst_DS__DS_BPERMUTE_B32()
32616 } // ~Inst_DS__DS_BPERMUTE_B32
32618 // Backward permute.
32620 Inst_DS__DS_BPERMUTE_B32::execute(GPUDynInstPtr gpuDynInst
)
32622 Wavefront
*wf
= gpuDynInst
->wavefront();
32623 gpuDynInst
->execUnitId
= wf
->execUnitId
;
32624 gpuDynInst
->exec_mask
= wf
->execMask();
32625 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
32626 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()
32627 ->cyclesToTicks(Cycles(24)));
32628 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
32629 ConstVecOperandU32
data(gpuDynInst
, extData
.DATA0
);
32630 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
32635 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
32636 if (wf
->execMask(lane
)) {
32638 * One of the offset fields can be used for the index.
32639 * It is assumed OFFSET0 would be used, as OFFSET1 is
32640 * typically only used for DS ops that operate on two
32641 * disparate pieces of data.
32643 assert(!instData
.OFFSET1
);
32645 * The address provided is a byte address, but VGPRs are
32646 * 4 bytes, so we must divide by 4 to get the actual VGPR
32647 * index. Additionally, the index is calculated modulo the
32648 * WF size, 64 in this case, so we simply extract bits 7-2.
32650 int index
= bits(addr
[lane
] + instData
.OFFSET0
, 7, 2);
32651 panic_if(index
>= NumVecElemPerVecReg
, "%s: index (%d) is out "
32652 "of bounds.\n", gpuDynInst
->disassemble(), index
);
32654 * If the shuffled index corresponds to a lane that is
32655 * inactive then this instruction writes a 0 to the active
32658 if (wf
->execMask(index
)) {
32659 vdst
[lane
] = data
[index
];
32668 wf
->decLGKMInstsIssued();
32669 wf
->rdLmReqsInPipe
--;
32670 wf
->validateRequestCounters();
32673 // --- Inst_DS__DS_ADD_U64 class methods ---
32675 Inst_DS__DS_ADD_U64::Inst_DS__DS_ADD_U64(InFmt_DS
*iFmt
)
32676 : Inst_DS(iFmt
, "ds_add_u64")
32678 } // Inst_DS__DS_ADD_U64
32680 Inst_DS__DS_ADD_U64::~Inst_DS__DS_ADD_U64()
32682 } // ~Inst_DS__DS_ADD_U64
32684 // tmp = MEM[ADDR];
32685 // MEM[ADDR] += DATA[0:1];
32686 // RETURN_DATA[0:1] = tmp.
32688 Inst_DS__DS_ADD_U64::execute(GPUDynInstPtr gpuDynInst
)
32690 panicUnimplemented();
32693 Inst_DS__DS_SUB_U64::Inst_DS__DS_SUB_U64(InFmt_DS
*iFmt
)
32694 : Inst_DS(iFmt
, "ds_sub_u64")
32696 } // Inst_DS__DS_SUB_U64
32698 Inst_DS__DS_SUB_U64::~Inst_DS__DS_SUB_U64()
32700 } // ~Inst_DS__DS_SUB_U64
32702 // tmp = MEM[ADDR];
32703 // MEM[ADDR] -= DATA[0:1];
32704 // RETURN_DATA[0:1] = tmp.
32706 Inst_DS__DS_SUB_U64::execute(GPUDynInstPtr gpuDynInst
)
32708 panicUnimplemented();
32711 Inst_DS__DS_RSUB_U64::Inst_DS__DS_RSUB_U64(InFmt_DS
*iFmt
)
32712 : Inst_DS(iFmt
, "ds_rsub_u64")
32714 } // Inst_DS__DS_RSUB_U64
32716 Inst_DS__DS_RSUB_U64::~Inst_DS__DS_RSUB_U64()
32718 } // ~Inst_DS__DS_RSUB_U64
32720 // tmp = MEM[ADDR];
32721 // MEM[ADDR] = DATA - MEM[ADDR];
32722 // RETURN_DATA = tmp.
32723 // Subtraction with reversed operands.
32725 Inst_DS__DS_RSUB_U64::execute(GPUDynInstPtr gpuDynInst
)
32727 panicUnimplemented();
32730 Inst_DS__DS_INC_U64::Inst_DS__DS_INC_U64(InFmt_DS
*iFmt
)
32731 : Inst_DS(iFmt
, "ds_inc_u64")
32733 } // Inst_DS__DS_INC_U64
32735 Inst_DS__DS_INC_U64::~Inst_DS__DS_INC_U64()
32737 } // ~Inst_DS__DS_INC_U64
32739 // tmp = MEM[ADDR];
32740 // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
32741 // RETURN_DATA[0:1] = tmp.
32743 Inst_DS__DS_INC_U64::execute(GPUDynInstPtr gpuDynInst
)
32745 panicUnimplemented();
32748 Inst_DS__DS_DEC_U64::Inst_DS__DS_DEC_U64(InFmt_DS
*iFmt
)
32749 : Inst_DS(iFmt
, "ds_dec_u64")
32751 } // Inst_DS__DS_DEC_U64
32753 Inst_DS__DS_DEC_U64::~Inst_DS__DS_DEC_U64()
32755 } // ~Inst_DS__DS_DEC_U64
32757 // tmp = MEM[ADDR];
32758 // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
32759 // (unsigned compare);
32760 // RETURN_DATA[0:1] = tmp.
32762 Inst_DS__DS_DEC_U64::execute(GPUDynInstPtr gpuDynInst
)
32764 panicUnimplemented();
32767 Inst_DS__DS_MIN_I64::Inst_DS__DS_MIN_I64(InFmt_DS
*iFmt
)
32768 : Inst_DS(iFmt
, "ds_min_i64")
32770 } // Inst_DS__DS_MIN_I64
32772 Inst_DS__DS_MIN_I64::~Inst_DS__DS_MIN_I64()
32774 } // ~Inst_DS__DS_MIN_I64
32776 // tmp = MEM[ADDR];
32777 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
32778 // RETURN_DATA[0:1] = tmp.
32780 Inst_DS__DS_MIN_I64::execute(GPUDynInstPtr gpuDynInst
)
32782 panicUnimplemented();
32785 Inst_DS__DS_MAX_I64::Inst_DS__DS_MAX_I64(InFmt_DS
*iFmt
)
32786 : Inst_DS(iFmt
, "ds_max_i64")
32788 } // Inst_DS__DS_MAX_I64
32790 Inst_DS__DS_MAX_I64::~Inst_DS__DS_MAX_I64()
32792 } // ~Inst_DS__DS_MAX_I64
32794 // tmp = MEM[ADDR];
32795 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
32796 // RETURN_DATA[0:1] = tmp.
32798 Inst_DS__DS_MAX_I64::execute(GPUDynInstPtr gpuDynInst
)
32800 panicUnimplemented();
32803 Inst_DS__DS_MIN_U64::Inst_DS__DS_MIN_U64(InFmt_DS
*iFmt
)
32804 : Inst_DS(iFmt
, "ds_min_u64")
32806 } // Inst_DS__DS_MIN_U64
32808 Inst_DS__DS_MIN_U64::~Inst_DS__DS_MIN_U64()
32810 } // ~Inst_DS__DS_MIN_U64
32812 // tmp = MEM[ADDR];
32813 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
32814 // RETURN_DATA[0:1] = tmp.
32816 Inst_DS__DS_MIN_U64::execute(GPUDynInstPtr gpuDynInst
)
32818 panicUnimplemented();
32821 Inst_DS__DS_MAX_U64::Inst_DS__DS_MAX_U64(InFmt_DS
*iFmt
)
32822 : Inst_DS(iFmt
, "ds_max_u64")
32824 } // Inst_DS__DS_MAX_U64
32826 Inst_DS__DS_MAX_U64::~Inst_DS__DS_MAX_U64()
32828 } // ~Inst_DS__DS_MAX_U64
32830 // tmp = MEM[ADDR];
32831 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
32832 // RETURN_DATA[0:1] = tmp.
32834 Inst_DS__DS_MAX_U64::execute(GPUDynInstPtr gpuDynInst
)
32836 panicUnimplemented();
32839 Inst_DS__DS_AND_B64::Inst_DS__DS_AND_B64(InFmt_DS
*iFmt
)
32840 : Inst_DS(iFmt
, "ds_and_b64")
32842 } // Inst_DS__DS_AND_B64
32844 Inst_DS__DS_AND_B64::~Inst_DS__DS_AND_B64()
32846 } // ~Inst_DS__DS_AND_B64
32848 // tmp = MEM[ADDR];
32849 // MEM[ADDR] &= DATA[0:1];
32850 // RETURN_DATA[0:1] = tmp.
32852 Inst_DS__DS_AND_B64::execute(GPUDynInstPtr gpuDynInst
)
32854 panicUnimplemented();
32857 Inst_DS__DS_OR_B64::Inst_DS__DS_OR_B64(InFmt_DS
*iFmt
)
32858 : Inst_DS(iFmt
, "ds_or_b64")
32860 } // Inst_DS__DS_OR_B64
32862 Inst_DS__DS_OR_B64::~Inst_DS__DS_OR_B64()
32864 } // ~Inst_DS__DS_OR_B64
32866 // tmp = MEM[ADDR];
32867 // MEM[ADDR] |= DATA[0:1];
32868 // RETURN_DATA[0:1] = tmp.
32870 Inst_DS__DS_OR_B64::execute(GPUDynInstPtr gpuDynInst
)
32872 panicUnimplemented();
32875 Inst_DS__DS_XOR_B64::Inst_DS__DS_XOR_B64(InFmt_DS
*iFmt
)
32876 : Inst_DS(iFmt
, "ds_xor_b64")
32878 } // Inst_DS__DS_XOR_B64
32880 Inst_DS__DS_XOR_B64::~Inst_DS__DS_XOR_B64()
32882 } // ~Inst_DS__DS_XOR_B64
32884 // tmp = MEM[ADDR];
32885 // MEM[ADDR] ^= DATA[0:1];
32886 // RETURN_DATA[0:1] = tmp.
32888 Inst_DS__DS_XOR_B64::execute(GPUDynInstPtr gpuDynInst
)
32890 panicUnimplemented();
32893 Inst_DS__DS_MSKOR_B64::Inst_DS__DS_MSKOR_B64(InFmt_DS
*iFmt
)
32894 : Inst_DS(iFmt
, "ds_mskor_b64")
32896 } // Inst_DS__DS_MSKOR_B64
32898 Inst_DS__DS_MSKOR_B64::~Inst_DS__DS_MSKOR_B64()
32900 } // ~Inst_DS__DS_MSKOR_B64
32902 // tmp = MEM[ADDR];
32903 // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
32904 // RETURN_DATA = tmp.
32906 Inst_DS__DS_MSKOR_B64::execute(GPUDynInstPtr gpuDynInst
)
32908 panicUnimplemented();
32911 Inst_DS__DS_WRITE_B64::Inst_DS__DS_WRITE_B64(InFmt_DS
*iFmt
)
32912 : Inst_DS(iFmt
, "ds_write_b64")
32914 setFlag(MemoryRef
);
32916 } // Inst_DS__DS_WRITE_B64
32918 Inst_DS__DS_WRITE_B64::~Inst_DS__DS_WRITE_B64()
32920 } // ~Inst_DS__DS_WRITE_B64
32922 // MEM[ADDR] = DATA.
32925 Inst_DS__DS_WRITE_B64::execute(GPUDynInstPtr gpuDynInst
)
32927 Wavefront
*wf
= gpuDynInst
->wavefront();
32928 gpuDynInst
->execUnitId
= wf
->execUnitId
;
32929 gpuDynInst
->exec_mask
= wf
->execMask();
32930 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
32931 gpuDynInst
->latency
.set(
32932 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
32933 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
32934 ConstVecOperandU64
data(gpuDynInst
, extData
.DATA0
);
32939 calcAddr(gpuDynInst
, addr
);
32941 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
32942 if (wf
->execMask(lane
)) {
32943 (reinterpret_cast<VecElemU64
*>(gpuDynInst
->d_data
))[lane
]
32948 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
32950 wf
->wrLmReqsInPipe
--;
32951 wf
->outstandingReqsWrLm
++;
32952 wf
->outstandingReqs
++;
32953 wf
->validateRequestCounters();
32957 Inst_DS__DS_WRITE_B64::initiateAcc(GPUDynInstPtr gpuDynInst
)
32959 Addr offset0
= instData
.OFFSET0
;
32960 Addr offset1
= instData
.OFFSET1
;
32961 Addr offset
= (offset1
<< 8) | offset0
;
32963 initMemWrite
<VecElemU64
>(gpuDynInst
, offset
);
32967 Inst_DS__DS_WRITE_B64::completeAcc(GPUDynInstPtr gpuDynInst
)
32971 Inst_DS__DS_WRITE2_B64::Inst_DS__DS_WRITE2_B64(InFmt_DS
*iFmt
)
32972 : Inst_DS(iFmt
, "ds_write2_b64")
32974 setFlag(MemoryRef
);
32976 } // Inst_DS__DS_WRITE2_B64
32978 Inst_DS__DS_WRITE2_B64::~Inst_DS__DS_WRITE2_B64()
32980 } // ~Inst_DS__DS_WRITE2_B64
32982 // MEM[ADDR_BASE + OFFSET0 * 8] = DATA;
32983 // MEM[ADDR_BASE + OFFSET1 * 8] = DATA2.
32986 Inst_DS__DS_WRITE2_B64::execute(GPUDynInstPtr gpuDynInst
)
32988 Wavefront
*wf
= gpuDynInst
->wavefront();
32989 gpuDynInst
->execUnitId
= wf
->execUnitId
;
32990 gpuDynInst
->exec_mask
= wf
->execMask();
32991 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
32992 gpuDynInst
->latency
.set(
32993 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
32994 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
32995 ConstVecOperandU64
data0(gpuDynInst
, extData
.DATA0
);
32996 ConstVecOperandU64
data1(gpuDynInst
, extData
.DATA1
);
33002 calcAddr(gpuDynInst
, addr
);
33004 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
33005 if (wf
->execMask(lane
)) {
33006 (reinterpret_cast<VecElemU64
*>(
33007 gpuDynInst
->d_data
))[lane
* 2] = data0
[lane
];
33008 (reinterpret_cast<VecElemU64
*>(
33009 gpuDynInst
->d_data
))[lane
* 2 + 1] = data1
[lane
];
33013 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
33015 wf
->wrLmReqsInPipe
--;
33016 wf
->outstandingReqsWrLm
++;
33017 wf
->outstandingReqs
++;
33018 wf
->validateRequestCounters();
33022 Inst_DS__DS_WRITE2_B64::initiateAcc(GPUDynInstPtr gpuDynInst
)
33024 Addr offset0
= instData
.OFFSET0
* 8;
33025 Addr offset1
= instData
.OFFSET1
* 8;
33027 initDualMemWrite
<VecElemU64
>(gpuDynInst
, offset0
, offset1
);
33031 Inst_DS__DS_WRITE2_B64::completeAcc(GPUDynInstPtr gpuDynInst
)
33035 Inst_DS__DS_WRITE2ST64_B64::Inst_DS__DS_WRITE2ST64_B64(InFmt_DS
*iFmt
)
33036 : Inst_DS(iFmt
, "ds_write2st64_b64")
33038 setFlag(MemoryRef
);
33040 } // Inst_DS__DS_WRITE2ST64_B64
33042 Inst_DS__DS_WRITE2ST64_B64::~Inst_DS__DS_WRITE2ST64_B64()
33044 } // ~Inst_DS__DS_WRITE2ST64_B64
33046 // MEM[ADDR_BASE + OFFSET0 * 8 * 64] = DATA;
33047 // MEM[ADDR_BASE + OFFSET1 * 8 * 64] = DATA2;
33050 Inst_DS__DS_WRITE2ST64_B64::execute(GPUDynInstPtr gpuDynInst
)
33052 panicUnimplemented();
33055 Inst_DS__DS_CMPST_B64::Inst_DS__DS_CMPST_B64(InFmt_DS
*iFmt
)
33056 : Inst_DS(iFmt
, "ds_cmpst_b64")
33058 } // Inst_DS__DS_CMPST_B64
33060 Inst_DS__DS_CMPST_B64::~Inst_DS__DS_CMPST_B64()
33062 } // ~Inst_DS__DS_CMPST_B64
33064 // tmp = MEM[ADDR];
33067 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
33068 // RETURN_DATA[0] = tmp.
33069 // Compare and store.
33071 Inst_DS__DS_CMPST_B64::execute(GPUDynInstPtr gpuDynInst
)
33073 panicUnimplemented();
33076 Inst_DS__DS_CMPST_F64::Inst_DS__DS_CMPST_F64(InFmt_DS
*iFmt
)
33077 : Inst_DS(iFmt
, "ds_cmpst_f64")
33080 } // Inst_DS__DS_CMPST_F64
33082 Inst_DS__DS_CMPST_F64::~Inst_DS__DS_CMPST_F64()
33084 } // ~Inst_DS__DS_CMPST_F64
33086 // tmp = MEM[ADDR];
33089 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
33090 // RETURN_DATA[0] = tmp.
33092 Inst_DS__DS_CMPST_F64::execute(GPUDynInstPtr gpuDynInst
)
33094 panicUnimplemented();
33097 Inst_DS__DS_MIN_F64::Inst_DS__DS_MIN_F64(InFmt_DS
*iFmt
)
33098 : Inst_DS(iFmt
, "ds_min_f64")
33101 } // Inst_DS__DS_MIN_F64
33103 Inst_DS__DS_MIN_F64::~Inst_DS__DS_MIN_F64()
33105 } // ~Inst_DS__DS_MIN_F64
33107 // tmp = MEM[ADDR];
33110 // MEM[ADDR] = (cmp < tmp) ? src : tmp.
33112 Inst_DS__DS_MIN_F64::execute(GPUDynInstPtr gpuDynInst
)
33114 panicUnimplemented();
33117 Inst_DS__DS_MAX_F64::Inst_DS__DS_MAX_F64(InFmt_DS
*iFmt
)
33118 : Inst_DS(iFmt
, "ds_max_f64")
33121 } // Inst_DS__DS_MAX_F64
33123 Inst_DS__DS_MAX_F64::~Inst_DS__DS_MAX_F64()
33125 } // ~Inst_DS__DS_MAX_F64
33127 // tmp = MEM[ADDR];
33130 // MEM[ADDR] = (tmp > cmp) ? src : tmp.
33132 Inst_DS__DS_MAX_F64::execute(GPUDynInstPtr gpuDynInst
)
33134 panicUnimplemented();
33137 Inst_DS__DS_ADD_RTN_U64::Inst_DS__DS_ADD_RTN_U64(InFmt_DS
*iFmt
)
33138 : Inst_DS(iFmt
, "ds_add_rtn_u64")
33140 } // Inst_DS__DS_ADD_RTN_U64
33142 Inst_DS__DS_ADD_RTN_U64::~Inst_DS__DS_ADD_RTN_U64()
33144 } // ~Inst_DS__DS_ADD_RTN_U64
33146 // tmp = MEM[ADDR];
33147 // MEM[ADDR] += DATA[0:1];
33148 // RETURN_DATA[0:1] = tmp.
33150 Inst_DS__DS_ADD_RTN_U64::execute(GPUDynInstPtr gpuDynInst
)
33152 panicUnimplemented();
33155 Inst_DS__DS_SUB_RTN_U64::Inst_DS__DS_SUB_RTN_U64(InFmt_DS
*iFmt
)
33156 : Inst_DS(iFmt
, "ds_sub_rtn_u64")
33158 } // Inst_DS__DS_SUB_RTN_U64
33160 Inst_DS__DS_SUB_RTN_U64::~Inst_DS__DS_SUB_RTN_U64()
33162 } // ~Inst_DS__DS_SUB_RTN_U64
33164 // tmp = MEM[ADDR];
33165 // MEM[ADDR] -= DATA[0:1];
33166 // RETURN_DATA[0:1] = tmp.
33168 Inst_DS__DS_SUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst
)
33170 panicUnimplemented();
33173 Inst_DS__DS_RSUB_RTN_U64::Inst_DS__DS_RSUB_RTN_U64(InFmt_DS
*iFmt
)
33174 : Inst_DS(iFmt
, "ds_rsub_rtn_u64")
33176 } // Inst_DS__DS_RSUB_RTN_U64
33178 Inst_DS__DS_RSUB_RTN_U64::~Inst_DS__DS_RSUB_RTN_U64()
33180 } // ~Inst_DS__DS_RSUB_RTN_U64
33182 // tmp = MEM[ADDR];
33183 // MEM[ADDR] = DATA - MEM[ADDR];
33184 // RETURN_DATA = tmp.
33185 // Subtraction with reversed operands.
33187 Inst_DS__DS_RSUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst
)
33189 panicUnimplemented();
33192 Inst_DS__DS_INC_RTN_U64::Inst_DS__DS_INC_RTN_U64(InFmt_DS
*iFmt
)
33193 : Inst_DS(iFmt
, "ds_inc_rtn_u64")
33195 } // Inst_DS__DS_INC_RTN_U64
33197 Inst_DS__DS_INC_RTN_U64::~Inst_DS__DS_INC_RTN_U64()
33199 } // ~Inst_DS__DS_INC_RTN_U64
33201 // tmp = MEM[ADDR];
33202 // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
33203 // RETURN_DATA[0:1] = tmp.
33205 Inst_DS__DS_INC_RTN_U64::execute(GPUDynInstPtr gpuDynInst
)
33207 panicUnimplemented();
33210 Inst_DS__DS_DEC_RTN_U64::Inst_DS__DS_DEC_RTN_U64(InFmt_DS
*iFmt
)
33211 : Inst_DS(iFmt
, "ds_dec_rtn_u64")
33213 } // Inst_DS__DS_DEC_RTN_U64
33215 Inst_DS__DS_DEC_RTN_U64::~Inst_DS__DS_DEC_RTN_U64()
33217 } // ~Inst_DS__DS_DEC_RTN_U64
33219 // tmp = MEM[ADDR];
33220 // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
33221 // (unsigned compare);
33222 // RETURN_DATA[0:1] = tmp.
33224 Inst_DS__DS_DEC_RTN_U64::execute(GPUDynInstPtr gpuDynInst
)
33226 panicUnimplemented();
33229 Inst_DS__DS_MIN_RTN_I64::Inst_DS__DS_MIN_RTN_I64(InFmt_DS
*iFmt
)
33230 : Inst_DS(iFmt
, "ds_min_rtn_i64")
33232 } // Inst_DS__DS_MIN_RTN_I64
33234 Inst_DS__DS_MIN_RTN_I64::~Inst_DS__DS_MIN_RTN_I64()
33236 } // ~Inst_DS__DS_MIN_RTN_I64
33238 // tmp = MEM[ADDR];
33239 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
33240 // RETURN_DATA[0:1] = tmp.
33242 Inst_DS__DS_MIN_RTN_I64::execute(GPUDynInstPtr gpuDynInst
)
33244 panicUnimplemented();
33247 Inst_DS__DS_MAX_RTN_I64::Inst_DS__DS_MAX_RTN_I64(InFmt_DS
*iFmt
)
33248 : Inst_DS(iFmt
, "ds_max_rtn_i64")
33250 } // Inst_DS__DS_MAX_RTN_I64
33252 Inst_DS__DS_MAX_RTN_I64::~Inst_DS__DS_MAX_RTN_I64()
33254 } // ~Inst_DS__DS_MAX_RTN_I64
33256 // tmp = MEM[ADDR];
33257 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
33258 // RETURN_DATA[0:1] = tmp.
33260 Inst_DS__DS_MAX_RTN_I64::execute(GPUDynInstPtr gpuDynInst
)
33262 panicUnimplemented();
33265 Inst_DS__DS_MIN_RTN_U64::Inst_DS__DS_MIN_RTN_U64(InFmt_DS
*iFmt
)
33266 : Inst_DS(iFmt
, "ds_min_rtn_u64")
33268 } // Inst_DS__DS_MIN_RTN_U64
33270 Inst_DS__DS_MIN_RTN_U64::~Inst_DS__DS_MIN_RTN_U64()
33272 } // ~Inst_DS__DS_MIN_RTN_U64
33274 // tmp = MEM[ADDR];
33275 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
33276 // RETURN_DATA[0:1] = tmp.
33278 Inst_DS__DS_MIN_RTN_U64::execute(GPUDynInstPtr gpuDynInst
)
33280 panicUnimplemented();
33283 Inst_DS__DS_MAX_RTN_U64::Inst_DS__DS_MAX_RTN_U64(InFmt_DS
*iFmt
)
33284 : Inst_DS(iFmt
, "ds_max_rtn_u64")
33286 } // Inst_DS__DS_MAX_RTN_U64
33288 Inst_DS__DS_MAX_RTN_U64::~Inst_DS__DS_MAX_RTN_U64()
33290 } // ~Inst_DS__DS_MAX_RTN_U64
33292 // tmp = MEM[ADDR];
33293 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
33294 // RETURN_DATA[0:1] = tmp.
33296 Inst_DS__DS_MAX_RTN_U64::execute(GPUDynInstPtr gpuDynInst
)
33298 panicUnimplemented();
33301 Inst_DS__DS_AND_RTN_B64::Inst_DS__DS_AND_RTN_B64(InFmt_DS
*iFmt
)
33302 : Inst_DS(iFmt
, "ds_and_rtn_b64")
33304 } // Inst_DS__DS_AND_RTN_B64
33306 Inst_DS__DS_AND_RTN_B64::~Inst_DS__DS_AND_RTN_B64()
33308 } // ~Inst_DS__DS_AND_RTN_B64
33310 // tmp = MEM[ADDR];
33311 // MEM[ADDR] &= DATA[0:1];
33312 // RETURN_DATA[0:1] = tmp.
33314 Inst_DS__DS_AND_RTN_B64::execute(GPUDynInstPtr gpuDynInst
)
33316 panicUnimplemented();
33319 Inst_DS__DS_OR_RTN_B64::Inst_DS__DS_OR_RTN_B64(InFmt_DS
*iFmt
)
33320 : Inst_DS(iFmt
, "ds_or_rtn_b64")
33322 } // Inst_DS__DS_OR_RTN_B64
33324 Inst_DS__DS_OR_RTN_B64::~Inst_DS__DS_OR_RTN_B64()
33326 } // ~Inst_DS__DS_OR_RTN_B64
33328 // tmp = MEM[ADDR];
33329 // MEM[ADDR] |= DATA[0:1];
33330 // RETURN_DATA[0:1] = tmp.
33332 Inst_DS__DS_OR_RTN_B64::execute(GPUDynInstPtr gpuDynInst
)
33334 panicUnimplemented();
33337 Inst_DS__DS_XOR_RTN_B64::Inst_DS__DS_XOR_RTN_B64(InFmt_DS
*iFmt
)
33338 : Inst_DS(iFmt
, "ds_xor_rtn_b64")
33340 } // Inst_DS__DS_XOR_RTN_B64
33342 Inst_DS__DS_XOR_RTN_B64::~Inst_DS__DS_XOR_RTN_B64()
33344 } // ~Inst_DS__DS_XOR_RTN_B64
33346 // tmp = MEM[ADDR];
33347 // MEM[ADDR] ^= DATA[0:1];
33348 // RETURN_DATA[0:1] = tmp.
33350 Inst_DS__DS_XOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst
)
33352 panicUnimplemented();
33355 Inst_DS__DS_MSKOR_RTN_B64::Inst_DS__DS_MSKOR_RTN_B64(InFmt_DS
*iFmt
)
33356 : Inst_DS(iFmt
, "ds_mskor_rtn_b64")
33358 } // Inst_DS__DS_MSKOR_RTN_B64
33360 Inst_DS__DS_MSKOR_RTN_B64::~Inst_DS__DS_MSKOR_RTN_B64()
33362 } // ~Inst_DS__DS_MSKOR_RTN_B64
33364 // tmp = MEM[ADDR];
33365 // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
33366 // RETURN_DATA = tmp.
33367 // Masked dword OR, D0 contains the mask and D1 contains the new value.
33369 Inst_DS__DS_MSKOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst
)
33371 panicUnimplemented();
33374 Inst_DS__DS_WRXCHG_RTN_B64::Inst_DS__DS_WRXCHG_RTN_B64(InFmt_DS
*iFmt
)
33375 : Inst_DS(iFmt
, "ds_wrxchg_rtn_b64")
33377 } // Inst_DS__DS_WRXCHG_RTN_B64
33379 Inst_DS__DS_WRXCHG_RTN_B64::~Inst_DS__DS_WRXCHG_RTN_B64()
33381 } // ~Inst_DS__DS_WRXCHG_RTN_B64
33383 // tmp = MEM[ADDR];
33384 // MEM[ADDR] = DATA;
33385 // RETURN_DATA = tmp.
33386 // Write-exchange operation.
33388 Inst_DS__DS_WRXCHG_RTN_B64::execute(GPUDynInstPtr gpuDynInst
)
33390 panicUnimplemented();
33393 Inst_DS__DS_WRXCHG2_RTN_B64::Inst_DS__DS_WRXCHG2_RTN_B64(InFmt_DS
*iFmt
)
33394 : Inst_DS(iFmt
, "ds_wrxchg2_rtn_b64")
33396 } // Inst_DS__DS_WRXCHG2_RTN_B64
33398 Inst_DS__DS_WRXCHG2_RTN_B64::~Inst_DS__DS_WRXCHG2_RTN_B64()
33400 } // ~Inst_DS__DS_WRXCHG2_RTN_B64
33402 // Write-exchange 2 separate qwords.
33404 Inst_DS__DS_WRXCHG2_RTN_B64::execute(GPUDynInstPtr gpuDynInst
)
33406 panicUnimplemented();
33409 Inst_DS__DS_WRXCHG2ST64_RTN_B64::Inst_DS__DS_WRXCHG2ST64_RTN_B64(
33411 : Inst_DS(iFmt
, "ds_wrxchg2st64_rtn_b64")
33413 } // Inst_DS__DS_WRXCHG2ST64_RTN_B64
33415 Inst_DS__DS_WRXCHG2ST64_RTN_B64::~Inst_DS__DS_WRXCHG2ST64_RTN_B64()
33417 } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B64
33419 // Write-exchange 2 qwords with a stride of 64 qwords.
33421 Inst_DS__DS_WRXCHG2ST64_RTN_B64::execute(GPUDynInstPtr gpuDynInst
)
33423 panicUnimplemented();
33426 Inst_DS__DS_CMPST_RTN_B64::Inst_DS__DS_CMPST_RTN_B64(InFmt_DS
*iFmt
)
33427 : Inst_DS(iFmt
, "ds_cmpst_rtn_b64")
33429 } // Inst_DS__DS_CMPST_RTN_B64
33431 Inst_DS__DS_CMPST_RTN_B64::~Inst_DS__DS_CMPST_RTN_B64()
33433 } // ~Inst_DS__DS_CMPST_RTN_B64
33435 // tmp = MEM[ADDR];
33438 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
33439 // RETURN_DATA[0] = tmp.
33440 // Compare and store.
33442 Inst_DS__DS_CMPST_RTN_B64::execute(GPUDynInstPtr gpuDynInst
)
33444 panicUnimplemented();
33447 Inst_DS__DS_CMPST_RTN_F64::Inst_DS__DS_CMPST_RTN_F64(InFmt_DS
*iFmt
)
33448 : Inst_DS(iFmt
, "ds_cmpst_rtn_f64")
33451 } // Inst_DS__DS_CMPST_RTN_F64
33453 Inst_DS__DS_CMPST_RTN_F64::~Inst_DS__DS_CMPST_RTN_F64()
33455 } // ~Inst_DS__DS_CMPST_RTN_F64
33457 // tmp = MEM[ADDR];
33460 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
33461 // RETURN_DATA[0] = tmp.
33463 Inst_DS__DS_CMPST_RTN_F64::execute(GPUDynInstPtr gpuDynInst
)
33465 panicUnimplemented();
33468 Inst_DS__DS_MIN_RTN_F64::Inst_DS__DS_MIN_RTN_F64(InFmt_DS
*iFmt
)
33469 : Inst_DS(iFmt
, "ds_min_rtn_f64")
33472 } // Inst_DS__DS_MIN_RTN_F64
33474 Inst_DS__DS_MIN_RTN_F64::~Inst_DS__DS_MIN_RTN_F64()
33476 } // ~Inst_DS__DS_MIN_RTN_F64
33478 // tmp = MEM[ADDR];
33481 // MEM[ADDR] = (cmp < tmp) ? src : tmp.
33483 Inst_DS__DS_MIN_RTN_F64::execute(GPUDynInstPtr gpuDynInst
)
33485 panicUnimplemented();
33488 Inst_DS__DS_MAX_RTN_F64::Inst_DS__DS_MAX_RTN_F64(InFmt_DS
*iFmt
)
33489 : Inst_DS(iFmt
, "ds_max_rtn_f64")
33492 } // Inst_DS__DS_MAX_RTN_F64
33494 Inst_DS__DS_MAX_RTN_F64::~Inst_DS__DS_MAX_RTN_F64()
33496 } // ~Inst_DS__DS_MAX_RTN_F64
33498 // tmp = MEM[ADDR];
33501 // MEM[ADDR] = (tmp > cmp) ? src : tmp.
33503 Inst_DS__DS_MAX_RTN_F64::execute(GPUDynInstPtr gpuDynInst
)
33505 panicUnimplemented();
33508 Inst_DS__DS_READ_B64::Inst_DS__DS_READ_B64(InFmt_DS
*iFmt
)
33509 : Inst_DS(iFmt
, "ds_read_b64")
33511 setFlag(MemoryRef
);
33513 } // Inst_DS__DS_READ_B64
33515 Inst_DS__DS_READ_B64::~Inst_DS__DS_READ_B64()
33517 } // ~Inst_DS__DS_READ_B64
33519 // RETURN_DATA = MEM[ADDR].
33522 Inst_DS__DS_READ_B64::execute(GPUDynInstPtr gpuDynInst
)
33524 Wavefront
*wf
= gpuDynInst
->wavefront();
33525 gpuDynInst
->execUnitId
= wf
->execUnitId
;
33526 gpuDynInst
->exec_mask
= wf
->execMask();
33527 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
33528 gpuDynInst
->latency
.set(
33529 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
33530 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
33534 calcAddr(gpuDynInst
, addr
);
33536 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
33538 wf
->rdLmReqsInPipe
--;
33539 wf
->outstandingReqsRdLm
++;
33540 wf
->outstandingReqs
++;
33541 wf
->validateRequestCounters();
33545 Inst_DS__DS_READ_B64::initiateAcc(GPUDynInstPtr gpuDynInst
)
33547 Addr offset0
= instData
.OFFSET0
;
33548 Addr offset1
= instData
.OFFSET1
;
33549 Addr offset
= (offset1
<< 8) | offset0
;
33551 initMemRead
<VecElemU64
>(gpuDynInst
, offset
);
33555 Inst_DS__DS_READ_B64::completeAcc(GPUDynInstPtr gpuDynInst
)
33557 VecOperandU64
vdst(gpuDynInst
, extData
.VDST
);
33559 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
33560 if (gpuDynInst
->exec_mask
[lane
]) {
33561 vdst
[lane
] = (reinterpret_cast<VecElemU64
*>(
33562 gpuDynInst
->d_data
))[lane
];
33569 Inst_DS__DS_READ2_B64::Inst_DS__DS_READ2_B64(InFmt_DS
*iFmt
)
33570 : Inst_DS(iFmt
, "ds_read2_b64")
33572 setFlag(MemoryRef
);
33574 } // Inst_DS__DS_READ2_B64
33576 Inst_DS__DS_READ2_B64::~Inst_DS__DS_READ2_B64()
33578 } // ~Inst_DS__DS_READ2_B64
33580 // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8];
33581 // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8].
33584 Inst_DS__DS_READ2_B64::execute(GPUDynInstPtr gpuDynInst
)
33586 Wavefront
*wf
= gpuDynInst
->wavefront();
33587 gpuDynInst
->execUnitId
= wf
->execUnitId
;
33588 gpuDynInst
->exec_mask
= wf
->execMask();
33589 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
33590 gpuDynInst
->latency
.set(
33591 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
33592 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
33596 calcAddr(gpuDynInst
, addr
);
33598 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
33600 wf
->rdLmReqsInPipe
--;
33601 wf
->outstandingReqsRdLm
++;
33602 wf
->outstandingReqs
++;
33603 wf
->validateRequestCounters();
33607 Inst_DS__DS_READ2_B64::initiateAcc(GPUDynInstPtr gpuDynInst
)
33609 Addr offset0
= instData
.OFFSET0
* 8;
33610 Addr offset1
= instData
.OFFSET1
* 8;
33612 initDualMemRead
<VecElemU64
>(gpuDynInst
, offset0
, offset1
);
33616 Inst_DS__DS_READ2_B64::completeAcc(GPUDynInstPtr gpuDynInst
)
33618 VecOperandU64
vdst0(gpuDynInst
, extData
.VDST
);
33619 VecOperandU64
vdst1(gpuDynInst
, extData
.VDST
+ 2);
33621 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
33622 if (gpuDynInst
->exec_mask
[lane
]) {
33623 vdst0
[lane
] = (reinterpret_cast<VecElemU64
*>(
33624 gpuDynInst
->d_data
))[lane
* 2];
33625 vdst1
[lane
] = (reinterpret_cast<VecElemU64
*>(
33626 gpuDynInst
->d_data
))[lane
* 2 + 1];
33634 Inst_DS__DS_READ2ST64_B64::Inst_DS__DS_READ2ST64_B64(InFmt_DS
*iFmt
)
33635 : Inst_DS(iFmt
, "ds_read2st64_b64")
33637 setFlag(MemoryRef
);
33639 } // Inst_DS__DS_READ2ST64_B64
33641 Inst_DS__DS_READ2ST64_B64::~Inst_DS__DS_READ2ST64_B64()
33643 } // ~Inst_DS__DS_READ2ST64_B64
33645 // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8 * 64];
33646 // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8 * 64].
33649 Inst_DS__DS_READ2ST64_B64::execute(GPUDynInstPtr gpuDynInst
)
33651 Wavefront
*wf
= gpuDynInst
->wavefront();
33652 gpuDynInst
->execUnitId
= wf
->execUnitId
;
33653 gpuDynInst
->exec_mask
= wf
->execMask();
33654 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
33655 gpuDynInst
->latency
.set(
33656 gpuDynInst
->computeUnit()->cyclesToTicks(Cycles(24)));
33657 ConstVecOperandU32
addr(gpuDynInst
, extData
.ADDR
);
33661 calcAddr(gpuDynInst
, addr
);
33663 gpuDynInst
->computeUnit()->localMemoryPipe
.issueRequest(gpuDynInst
);
33665 wf
->rdLmReqsInPipe
--;
33666 wf
->outstandingReqsRdLm
++;
33667 wf
->outstandingReqs
++;
33668 wf
->validateRequestCounters();
33672 Inst_DS__DS_READ2ST64_B64::initiateAcc(GPUDynInstPtr gpuDynInst
)
33674 Addr offset0
= (instData
.OFFSET0
* 8 * 64);
33675 Addr offset1
= (instData
.OFFSET1
* 8 * 64);
33677 initDualMemRead
<VecElemU64
>(gpuDynInst
, offset0
, offset1
);
33681 Inst_DS__DS_READ2ST64_B64::completeAcc(GPUDynInstPtr gpuDynInst
)
33683 VecOperandU64
vdst0(gpuDynInst
, extData
.VDST
);
33684 VecOperandU64
vdst1(gpuDynInst
, extData
.VDST
+ 2);
33686 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
33687 if (gpuDynInst
->exec_mask
[lane
]) {
33688 vdst0
[lane
] = (reinterpret_cast<VecElemU64
*>(
33689 gpuDynInst
->d_data
))[lane
* 2];
33690 vdst1
[lane
] = (reinterpret_cast<VecElemU64
*>(
33691 gpuDynInst
->d_data
))[lane
* 2 + 1];
33699 Inst_DS__DS_CONDXCHG32_RTN_B64::Inst_DS__DS_CONDXCHG32_RTN_B64(
33701 : Inst_DS(iFmt
, "ds_condxchg32_rtn_b64")
33703 } // Inst_DS__DS_CONDXCHG32_RTN_B64
33705 Inst_DS__DS_CONDXCHG32_RTN_B64::~Inst_DS__DS_CONDXCHG32_RTN_B64()
33707 } // ~Inst_DS__DS_CONDXCHG32_RTN_B64
33709 // Conditional write exchange.
33711 Inst_DS__DS_CONDXCHG32_RTN_B64::execute(GPUDynInstPtr gpuDynInst
)
33713 panicUnimplemented();
33716 Inst_DS__DS_ADD_SRC2_U32::Inst_DS__DS_ADD_SRC2_U32(InFmt_DS
*iFmt
)
33717 : Inst_DS(iFmt
, "ds_add_src2_u32")
33719 } // Inst_DS__DS_ADD_SRC2_U32
33721 Inst_DS__DS_ADD_SRC2_U32::~Inst_DS__DS_ADD_SRC2_U32()
33723 } // ~Inst_DS__DS_ADD_SRC2_U32
33726 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33727 // {offset1[6],offset1[6:0],offset0});
33728 // MEM[A] = MEM[A] + MEM[B].
33730 Inst_DS__DS_ADD_SRC2_U32::execute(GPUDynInstPtr gpuDynInst
)
33732 panicUnimplemented();
33735 Inst_DS__DS_SUB_SRC2_U32::Inst_DS__DS_SUB_SRC2_U32(InFmt_DS
*iFmt
)
33736 : Inst_DS(iFmt
, "ds_sub_src2_u32")
33738 } // Inst_DS__DS_SUB_SRC2_U32
33740 Inst_DS__DS_SUB_SRC2_U32::~Inst_DS__DS_SUB_SRC2_U32()
33742 } // ~Inst_DS__DS_SUB_SRC2_U32
33745 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33746 // {offset1[6],offset1[6:0],offset0});
33747 // MEM[A] = MEM[A] - MEM[B].
33749 Inst_DS__DS_SUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst
)
33751 panicUnimplemented();
33754 Inst_DS__DS_RSUB_SRC2_U32::Inst_DS__DS_RSUB_SRC2_U32(InFmt_DS
*iFmt
)
33755 : Inst_DS(iFmt
, "ds_rsub_src2_u32")
33757 } // Inst_DS__DS_RSUB_SRC2_U32
33759 Inst_DS__DS_RSUB_SRC2_U32::~Inst_DS__DS_RSUB_SRC2_U32()
33761 } // ~Inst_DS__DS_RSUB_SRC2_U32
33764 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33765 // {offset1[6],offset1[6:0],offset0});
33766 // MEM[A] = MEM[B] - MEM[A].
33768 Inst_DS__DS_RSUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst
)
33770 panicUnimplemented();
33773 Inst_DS__DS_INC_SRC2_U32::Inst_DS__DS_INC_SRC2_U32(InFmt_DS
*iFmt
)
33774 : Inst_DS(iFmt
, "ds_inc_src2_u32")
33776 } // Inst_DS__DS_INC_SRC2_U32
33778 Inst_DS__DS_INC_SRC2_U32::~Inst_DS__DS_INC_SRC2_U32()
33780 } // ~Inst_DS__DS_INC_SRC2_U32
33783 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33784 // {offset1[6],offset1[6:0],offset0});
33785 // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1).
33787 Inst_DS__DS_INC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst
)
33789 panicUnimplemented();
33792 Inst_DS__DS_DEC_SRC2_U32::Inst_DS__DS_DEC_SRC2_U32(InFmt_DS
*iFmt
)
33793 : Inst_DS(iFmt
, "ds_dec_src2_u32")
33795 } // Inst_DS__DS_DEC_SRC2_U32
33797 Inst_DS__DS_DEC_SRC2_U32::~Inst_DS__DS_DEC_SRC2_U32()
33799 } // ~Inst_DS__DS_DEC_SRC2_U32
33802 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33803 // {offset1[6],offset1[6:0],offset0});
33804 // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1).
33807 Inst_DS__DS_DEC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst
)
33809 panicUnimplemented();
33812 Inst_DS__DS_MIN_SRC2_I32::Inst_DS__DS_MIN_SRC2_I32(InFmt_DS
*iFmt
)
33813 : Inst_DS(iFmt
, "ds_min_src2_i32")
33815 } // Inst_DS__DS_MIN_SRC2_I32
33817 Inst_DS__DS_MIN_SRC2_I32::~Inst_DS__DS_MIN_SRC2_I32()
33819 } // ~Inst_DS__DS_MIN_SRC2_I32
33822 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33823 // {offset1[6],offset1[6:0],offset0});
33824 // MEM[A] = min(MEM[A], MEM[B]).
33826 Inst_DS__DS_MIN_SRC2_I32::execute(GPUDynInstPtr gpuDynInst
)
33828 panicUnimplemented();
33831 Inst_DS__DS_MAX_SRC2_I32::Inst_DS__DS_MAX_SRC2_I32(InFmt_DS
*iFmt
)
33832 : Inst_DS(iFmt
, "ds_max_src2_i32")
33834 } // Inst_DS__DS_MAX_SRC2_I32
33836 Inst_DS__DS_MAX_SRC2_I32::~Inst_DS__DS_MAX_SRC2_I32()
33838 } // ~Inst_DS__DS_MAX_SRC2_I32
33841 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33842 // {offset1[6],offset1[6:0],offset0});
33843 // MEM[A] = max(MEM[A], MEM[B]).
33845 Inst_DS__DS_MAX_SRC2_I32::execute(GPUDynInstPtr gpuDynInst
)
33847 panicUnimplemented();
33850 Inst_DS__DS_MIN_SRC2_U32::Inst_DS__DS_MIN_SRC2_U32(InFmt_DS
*iFmt
)
33851 : Inst_DS(iFmt
, "ds_min_src2_u32")
33853 } // Inst_DS__DS_MIN_SRC2_U32
33855 Inst_DS__DS_MIN_SRC2_U32::~Inst_DS__DS_MIN_SRC2_U32()
33857 } // ~Inst_DS__DS_MIN_SRC2_U32
33860 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33861 // {offset1[6],offset1[6:0],offset0});
33862 // MEM[A] = min(MEM[A], MEM[B]).
33864 Inst_DS__DS_MIN_SRC2_U32::execute(GPUDynInstPtr gpuDynInst
)
33866 panicUnimplemented();
33869 Inst_DS__DS_MAX_SRC2_U32::Inst_DS__DS_MAX_SRC2_U32(InFmt_DS
*iFmt
)
33870 : Inst_DS(iFmt
, "ds_max_src2_u32")
33872 } // Inst_DS__DS_MAX_SRC2_U32
33874 Inst_DS__DS_MAX_SRC2_U32::~Inst_DS__DS_MAX_SRC2_U32()
33876 } // ~Inst_DS__DS_MAX_SRC2_U32
33879 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33880 // {offset1[6],offset1[6:0],offset0});
33881 // MEM[A] = max(MEM[A], MEM[B]).
33883 Inst_DS__DS_MAX_SRC2_U32::execute(GPUDynInstPtr gpuDynInst
)
33885 panicUnimplemented();
33888 Inst_DS__DS_AND_SRC2_B32::Inst_DS__DS_AND_SRC2_B32(InFmt_DS
*iFmt
)
33889 : Inst_DS(iFmt
, "ds_and_src2_b32")
33891 } // Inst_DS__DS_AND_SRC2_B32
33893 Inst_DS__DS_AND_SRC2_B32::~Inst_DS__DS_AND_SRC2_B32()
33895 } // ~Inst_DS__DS_AND_SRC2_B32
33898 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33899 // {offset1[6],offset1[6:0],offset0});
33900 // MEM[A] = MEM[A] & MEM[B].
33902 Inst_DS__DS_AND_SRC2_B32::execute(GPUDynInstPtr gpuDynInst
)
33904 panicUnimplemented();
33907 Inst_DS__DS_OR_SRC2_B32::Inst_DS__DS_OR_SRC2_B32(InFmt_DS
*iFmt
)
33908 : Inst_DS(iFmt
, "ds_or_src2_b32")
33910 } // Inst_DS__DS_OR_SRC2_B32
33912 Inst_DS__DS_OR_SRC2_B32::~Inst_DS__DS_OR_SRC2_B32()
33914 } // ~Inst_DS__DS_OR_SRC2_B32
33917 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33918 // {offset1[6],offset1[6:0],offset0});
33919 // MEM[A] = MEM[A] | MEM[B].
33921 Inst_DS__DS_OR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst
)
33923 panicUnimplemented();
33926 Inst_DS__DS_XOR_SRC2_B32::Inst_DS__DS_XOR_SRC2_B32(InFmt_DS
*iFmt
)
33927 : Inst_DS(iFmt
, "ds_xor_src2_b32")
33929 } // Inst_DS__DS_XOR_SRC2_B32
33931 Inst_DS__DS_XOR_SRC2_B32::~Inst_DS__DS_XOR_SRC2_B32()
33933 } // ~Inst_DS__DS_XOR_SRC2_B32
33936 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33937 // {offset1[6],offset1[6:0],offset0});
33938 // MEM[A] = MEM[A] ^ MEM[B].
33940 Inst_DS__DS_XOR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst
)
33942 panicUnimplemented();
33945 Inst_DS__DS_WRITE_SRC2_B32::Inst_DS__DS_WRITE_SRC2_B32(InFmt_DS
*iFmt
)
33946 : Inst_DS(iFmt
, "ds_write_src2_b32")
33948 setFlag(MemoryRef
);
33950 } // Inst_DS__DS_WRITE_SRC2_B32
33952 Inst_DS__DS_WRITE_SRC2_B32::~Inst_DS__DS_WRITE_SRC2_B32()
33954 } // ~Inst_DS__DS_WRITE_SRC2_B32
33957 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33958 // {offset1[6],offset1[6:0],offset0});
33959 // MEM[A] = MEM[B].
33962 Inst_DS__DS_WRITE_SRC2_B32::execute(GPUDynInstPtr gpuDynInst
)
33964 panicUnimplemented();
33967 Inst_DS__DS_MIN_SRC2_F32::Inst_DS__DS_MIN_SRC2_F32(InFmt_DS
*iFmt
)
33968 : Inst_DS(iFmt
, "ds_min_src2_f32")
33971 } // Inst_DS__DS_MIN_SRC2_F32
33973 Inst_DS__DS_MIN_SRC2_F32::~Inst_DS__DS_MIN_SRC2_F32()
33975 } // ~Inst_DS__DS_MIN_SRC2_F32
33978 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33979 // {offset1[6],offset1[6:0],offset0});
33980 // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A].
33982 Inst_DS__DS_MIN_SRC2_F32::execute(GPUDynInstPtr gpuDynInst
)
33984 panicUnimplemented();
33987 Inst_DS__DS_MAX_SRC2_F32::Inst_DS__DS_MAX_SRC2_F32(InFmt_DS
*iFmt
)
33988 : Inst_DS(iFmt
, "ds_max_src2_f32")
33991 } // Inst_DS__DS_MAX_SRC2_F32
33993 Inst_DS__DS_MAX_SRC2_F32::~Inst_DS__DS_MAX_SRC2_F32()
33995 } // ~Inst_DS__DS_MAX_SRC2_F32
33998 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33999 // {offset1[6],offset1[6:0],offset0});
34000 // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A].
34002 Inst_DS__DS_MAX_SRC2_F32::execute(GPUDynInstPtr gpuDynInst
)
34004 panicUnimplemented();
34007 Inst_DS__DS_ADD_SRC2_F32::Inst_DS__DS_ADD_SRC2_F32(InFmt_DS
*iFmt
)
34008 : Inst_DS(iFmt
, "ds_add_src2_f32")
34011 } // Inst_DS__DS_ADD_SRC2_F32
34013 Inst_DS__DS_ADD_SRC2_F32::~Inst_DS__DS_ADD_SRC2_F32()
34015 } // ~Inst_DS__DS_ADD_SRC2_F32
34018 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34019 // {offset1[6],offset1[6:0],offset0});
34020 // MEM[A] = MEM[B] + MEM[A].
34022 Inst_DS__DS_ADD_SRC2_F32::execute(GPUDynInstPtr gpuDynInst
)
34024 panicUnimplemented();
34027 Inst_DS__DS_GWS_SEMA_RELEASE_ALL::Inst_DS__DS_GWS_SEMA_RELEASE_ALL(
34029 : Inst_DS(iFmt
, "ds_gws_sema_release_all")
34031 } // Inst_DS__DS_GWS_SEMA_RELEASE_ALL
34033 Inst_DS__DS_GWS_SEMA_RELEASE_ALL::~Inst_DS__DS_GWS_SEMA_RELEASE_ALL()
34035 } // ~Inst_DS__DS_GWS_SEMA_RELEASE_ALL
34038 Inst_DS__DS_GWS_SEMA_RELEASE_ALL::execute(GPUDynInstPtr gpuDynInst
)
34040 panicUnimplemented();
34043 Inst_DS__DS_GWS_INIT::Inst_DS__DS_GWS_INIT(InFmt_DS
*iFmt
)
34044 : Inst_DS(iFmt
, "ds_gws_init")
34046 } // Inst_DS__DS_GWS_INIT
34048 Inst_DS__DS_GWS_INIT::~Inst_DS__DS_GWS_INIT()
34050 } // ~Inst_DS__DS_GWS_INIT
34053 Inst_DS__DS_GWS_INIT::execute(GPUDynInstPtr gpuDynInst
)
34055 panicUnimplemented();
34058 Inst_DS__DS_GWS_SEMA_V::Inst_DS__DS_GWS_SEMA_V(InFmt_DS
*iFmt
)
34059 : Inst_DS(iFmt
, "ds_gws_sema_v")
34061 } // Inst_DS__DS_GWS_SEMA_V
34063 Inst_DS__DS_GWS_SEMA_V::~Inst_DS__DS_GWS_SEMA_V()
34065 } // ~Inst_DS__DS_GWS_SEMA_V
34068 Inst_DS__DS_GWS_SEMA_V::execute(GPUDynInstPtr gpuDynInst
)
34070 panicUnimplemented();
34073 Inst_DS__DS_GWS_SEMA_BR::Inst_DS__DS_GWS_SEMA_BR(InFmt_DS
*iFmt
)
34074 : Inst_DS(iFmt
, "ds_gws_sema_br")
34076 } // Inst_DS__DS_GWS_SEMA_BR
34078 Inst_DS__DS_GWS_SEMA_BR::~Inst_DS__DS_GWS_SEMA_BR()
34080 } // ~Inst_DS__DS_GWS_SEMA_BR
34083 Inst_DS__DS_GWS_SEMA_BR::execute(GPUDynInstPtr gpuDynInst
)
34085 panicUnimplemented();
34088 Inst_DS__DS_GWS_SEMA_P::Inst_DS__DS_GWS_SEMA_P(InFmt_DS
*iFmt
)
34089 : Inst_DS(iFmt
, "ds_gws_sema_p")
34091 } // Inst_DS__DS_GWS_SEMA_P
34093 Inst_DS__DS_GWS_SEMA_P::~Inst_DS__DS_GWS_SEMA_P()
34095 } // ~Inst_DS__DS_GWS_SEMA_P
34098 Inst_DS__DS_GWS_SEMA_P::execute(GPUDynInstPtr gpuDynInst
)
34100 panicUnimplemented();
34103 Inst_DS__DS_GWS_BARRIER::Inst_DS__DS_GWS_BARRIER(InFmt_DS
*iFmt
)
34104 : Inst_DS(iFmt
, "ds_gws_barrier")
34106 } // Inst_DS__DS_GWS_BARRIER
34108 Inst_DS__DS_GWS_BARRIER::~Inst_DS__DS_GWS_BARRIER()
34110 } // ~Inst_DS__DS_GWS_BARRIER
34113 Inst_DS__DS_GWS_BARRIER::execute(GPUDynInstPtr gpuDynInst
)
34115 panicUnimplemented();
34118 Inst_DS__DS_CONSUME::Inst_DS__DS_CONSUME(InFmt_DS
*iFmt
)
34119 : Inst_DS(iFmt
, "ds_consume")
34121 } // Inst_DS__DS_CONSUME
34123 Inst_DS__DS_CONSUME::~Inst_DS__DS_CONSUME()
34125 } // ~Inst_DS__DS_CONSUME
34128 Inst_DS__DS_CONSUME::execute(GPUDynInstPtr gpuDynInst
)
34130 panicUnimplemented();
34133 Inst_DS__DS_APPEND::Inst_DS__DS_APPEND(InFmt_DS
*iFmt
)
34134 : Inst_DS(iFmt
, "ds_append")
34136 } // Inst_DS__DS_APPEND
34138 Inst_DS__DS_APPEND::~Inst_DS__DS_APPEND()
34140 } // ~Inst_DS__DS_APPEND
34143 Inst_DS__DS_APPEND::execute(GPUDynInstPtr gpuDynInst
)
34145 panicUnimplemented();
34148 Inst_DS__DS_ORDERED_COUNT::Inst_DS__DS_ORDERED_COUNT(InFmt_DS
*iFmt
)
34149 : Inst_DS(iFmt
, "ds_ordered_count")
34151 } // Inst_DS__DS_ORDERED_COUNT
34153 Inst_DS__DS_ORDERED_COUNT::~Inst_DS__DS_ORDERED_COUNT()
34155 } // ~Inst_DS__DS_ORDERED_COUNT
34158 Inst_DS__DS_ORDERED_COUNT::execute(GPUDynInstPtr gpuDynInst
)
34160 panicUnimplemented();
34163 Inst_DS__DS_ADD_SRC2_U64::Inst_DS__DS_ADD_SRC2_U64(InFmt_DS
*iFmt
)
34164 : Inst_DS(iFmt
, "ds_add_src2_u64")
34166 } // Inst_DS__DS_ADD_SRC2_U64
34168 Inst_DS__DS_ADD_SRC2_U64::~Inst_DS__DS_ADD_SRC2_U64()
34170 } // ~Inst_DS__DS_ADD_SRC2_U64
34173 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34174 // {offset1[6],offset1[6:0],offset0});
34175 // MEM[A] = MEM[A] + MEM[B].
34177 Inst_DS__DS_ADD_SRC2_U64::execute(GPUDynInstPtr gpuDynInst
)
34179 panicUnimplemented();
34182 Inst_DS__DS_SUB_SRC2_U64::Inst_DS__DS_SUB_SRC2_U64(InFmt_DS
*iFmt
)
34183 : Inst_DS(iFmt
, "ds_sub_src2_u64")
34185 } // Inst_DS__DS_SUB_SRC2_U64
34187 Inst_DS__DS_SUB_SRC2_U64::~Inst_DS__DS_SUB_SRC2_U64()
34189 } // ~Inst_DS__DS_SUB_SRC2_U64
34192 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34193 // {offset1[6],offset1[6:0],offset0});
34194 // MEM[A] = MEM[A] - MEM[B].
34196 Inst_DS__DS_SUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst
)
34198 panicUnimplemented();
34201 Inst_DS__DS_RSUB_SRC2_U64::Inst_DS__DS_RSUB_SRC2_U64(InFmt_DS
*iFmt
)
34202 : Inst_DS(iFmt
, "ds_rsub_src2_u64")
34204 } // Inst_DS__DS_RSUB_SRC2_U64
34206 Inst_DS__DS_RSUB_SRC2_U64::~Inst_DS__DS_RSUB_SRC2_U64()
34208 } // ~Inst_DS__DS_RSUB_SRC2_U64
34211 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34212 // {offset1[6],offset1[6:0],offset0});
34213 // MEM[A] = MEM[B] - MEM[A].
34215 Inst_DS__DS_RSUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst
)
34217 panicUnimplemented();
34220 Inst_DS__DS_INC_SRC2_U64::Inst_DS__DS_INC_SRC2_U64(InFmt_DS
*iFmt
)
34221 : Inst_DS(iFmt
, "ds_inc_src2_u64")
34223 } // Inst_DS__DS_INC_SRC2_U64
34225 Inst_DS__DS_INC_SRC2_U64::~Inst_DS__DS_INC_SRC2_U64()
34227 } // ~Inst_DS__DS_INC_SRC2_U64
34230 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34231 // {offset1[6],offset1[6:0],offset0});
34232 // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1).
34234 Inst_DS__DS_INC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst
)
34236 panicUnimplemented();
34239 Inst_DS__DS_DEC_SRC2_U64::Inst_DS__DS_DEC_SRC2_U64(InFmt_DS
*iFmt
)
34240 : Inst_DS(iFmt
, "ds_dec_src2_u64")
34242 } // Inst_DS__DS_DEC_SRC2_U64
34244 Inst_DS__DS_DEC_SRC2_U64::~Inst_DS__DS_DEC_SRC2_U64()
34246 } // ~Inst_DS__DS_DEC_SRC2_U64
34249 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34250 // {offset1[6],offset1[6:0],offset0});
34251 // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1).
34254 Inst_DS__DS_DEC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst
)
34256 panicUnimplemented();
34259 Inst_DS__DS_MIN_SRC2_I64::Inst_DS__DS_MIN_SRC2_I64(InFmt_DS
*iFmt
)
34260 : Inst_DS(iFmt
, "ds_min_src2_i64")
34262 } // Inst_DS__DS_MIN_SRC2_I64
34264 Inst_DS__DS_MIN_SRC2_I64::~Inst_DS__DS_MIN_SRC2_I64()
34266 } // ~Inst_DS__DS_MIN_SRC2_I64
34269 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34270 // {offset1[6],offset1[6:0],offset0});
34271 // MEM[A] = min(MEM[A], MEM[B]).
34273 Inst_DS__DS_MIN_SRC2_I64::execute(GPUDynInstPtr gpuDynInst
)
34275 panicUnimplemented();
34278 Inst_DS__DS_MAX_SRC2_I64::Inst_DS__DS_MAX_SRC2_I64(InFmt_DS
*iFmt
)
34279 : Inst_DS(iFmt
, "ds_max_src2_i64")
34281 } // Inst_DS__DS_MAX_SRC2_I64
34283 Inst_DS__DS_MAX_SRC2_I64::~Inst_DS__DS_MAX_SRC2_I64()
34285 } // ~Inst_DS__DS_MAX_SRC2_I64
34288 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34289 // {offset1[6],offset1[6:0],offset0});
34290 // MEM[A] = max(MEM[A], MEM[B]).
34292 Inst_DS__DS_MAX_SRC2_I64::execute(GPUDynInstPtr gpuDynInst
)
34294 panicUnimplemented();
34297 Inst_DS__DS_MIN_SRC2_U64::Inst_DS__DS_MIN_SRC2_U64(InFmt_DS
*iFmt
)
34298 : Inst_DS(iFmt
, "ds_min_src2_u64")
34300 } // Inst_DS__DS_MIN_SRC2_U64
34302 Inst_DS__DS_MIN_SRC2_U64::~Inst_DS__DS_MIN_SRC2_U64()
34304 } // ~Inst_DS__DS_MIN_SRC2_U64
34307 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34308 // {offset1[6],offset1[6:0],offset0});
34309 // MEM[A] = min(MEM[A], MEM[B]).
34311 Inst_DS__DS_MIN_SRC2_U64::execute(GPUDynInstPtr gpuDynInst
)
34313 panicUnimplemented();
34316 Inst_DS__DS_MAX_SRC2_U64::Inst_DS__DS_MAX_SRC2_U64(InFmt_DS
*iFmt
)
34317 : Inst_DS(iFmt
, "ds_max_src2_u64")
34319 } // Inst_DS__DS_MAX_SRC2_U64
34321 Inst_DS__DS_MAX_SRC2_U64::~Inst_DS__DS_MAX_SRC2_U64()
34323 } // ~Inst_DS__DS_MAX_SRC2_U64
34326 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34327 // {offset1[6],offset1[6:0],offset0});
34328 // MEM[A] = max(MEM[A], MEM[B]).
34330 Inst_DS__DS_MAX_SRC2_U64::execute(GPUDynInstPtr gpuDynInst
)
34332 panicUnimplemented();
34335 Inst_DS__DS_AND_SRC2_B64::Inst_DS__DS_AND_SRC2_B64(InFmt_DS
*iFmt
)
34336 : Inst_DS(iFmt
, "ds_and_src2_b64")
34338 } // Inst_DS__DS_AND_SRC2_B64
34340 Inst_DS__DS_AND_SRC2_B64::~Inst_DS__DS_AND_SRC2_B64()
34342 } // ~Inst_DS__DS_AND_SRC2_B64
34345 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34346 // {offset1[6],offset1[6:0],offset0});
34347 // MEM[A] = MEM[A] & MEM[B].
34349 Inst_DS__DS_AND_SRC2_B64::execute(GPUDynInstPtr gpuDynInst
)
34351 panicUnimplemented();
34354 Inst_DS__DS_OR_SRC2_B64::Inst_DS__DS_OR_SRC2_B64(InFmt_DS
*iFmt
)
34355 : Inst_DS(iFmt
, "ds_or_src2_b64")
34357 } // Inst_DS__DS_OR_SRC2_B64
34359 Inst_DS__DS_OR_SRC2_B64::~Inst_DS__DS_OR_SRC2_B64()
34361 } // ~Inst_DS__DS_OR_SRC2_B64
34364 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34365 // {offset1[6],offset1[6:0],offset0});
34366 // MEM[A] = MEM[A] | MEM[B].
34368 Inst_DS__DS_OR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst
)
34370 panicUnimplemented();
34373 Inst_DS__DS_XOR_SRC2_B64::Inst_DS__DS_XOR_SRC2_B64(InFmt_DS
*iFmt
)
34374 : Inst_DS(iFmt
, "ds_xor_src2_b64")
34376 } // Inst_DS__DS_XOR_SRC2_B64
34378 Inst_DS__DS_XOR_SRC2_B64::~Inst_DS__DS_XOR_SRC2_B64()
34380 } // ~Inst_DS__DS_XOR_SRC2_B64
34383 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34384 // {offset1[6],offset1[6:0],offset0});
34385 // MEM[A] = MEM[A] ^ MEM[B].
34387 Inst_DS__DS_XOR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst
)
34389 panicUnimplemented();
34392 Inst_DS__DS_WRITE_SRC2_B64::Inst_DS__DS_WRITE_SRC2_B64(InFmt_DS
*iFmt
)
34393 : Inst_DS(iFmt
, "ds_write_src2_b64")
34395 setFlag(MemoryRef
);
34397 } // Inst_DS__DS_WRITE_SRC2_B64
34399 Inst_DS__DS_WRITE_SRC2_B64::~Inst_DS__DS_WRITE_SRC2_B64()
34401 } // ~Inst_DS__DS_WRITE_SRC2_B64
34404 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34405 // {offset1[6],offset1[6:0],offset0});
34406 // MEM[A] = MEM[B].
34409 Inst_DS__DS_WRITE_SRC2_B64::execute(GPUDynInstPtr gpuDynInst
)
34411 panicUnimplemented();
34414 Inst_DS__DS_MIN_SRC2_F64::Inst_DS__DS_MIN_SRC2_F64(InFmt_DS
*iFmt
)
34415 : Inst_DS(iFmt
, "ds_min_src2_f64")
34418 } // Inst_DS__DS_MIN_SRC2_F64
34420 Inst_DS__DS_MIN_SRC2_F64::~Inst_DS__DS_MIN_SRC2_F64()
34422 } // ~Inst_DS__DS_MIN_SRC2_F64
34425 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34426 // {offset1[6],offset1[6:0],offset0});
34427 // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A].
34429 Inst_DS__DS_MIN_SRC2_F64::execute(GPUDynInstPtr gpuDynInst
)
34431 panicUnimplemented();
34434 Inst_DS__DS_MAX_SRC2_F64::Inst_DS__DS_MAX_SRC2_F64(InFmt_DS
*iFmt
)
34435 : Inst_DS(iFmt
, "ds_max_src2_f64")
34438 } // Inst_DS__DS_MAX_SRC2_F64
34440 Inst_DS__DS_MAX_SRC2_F64::~Inst_DS__DS_MAX_SRC2_F64()
34442 } // ~Inst_DS__DS_MAX_SRC2_F64
34445 // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34446 // {offset1[6],offset1[6:0],offset0});
34447 // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A].
34449 Inst_DS__DS_MAX_SRC2_F64::execute(GPUDynInstPtr gpuDynInst
)
34451 panicUnimplemented();
34454 Inst_DS__DS_WRITE_B96::Inst_DS__DS_WRITE_B96(InFmt_DS
*iFmt
)
34455 : Inst_DS(iFmt
, "ds_write_b96")
34457 setFlag(MemoryRef
);
34459 } // Inst_DS__DS_WRITE_B96
34461 Inst_DS__DS_WRITE_B96::~Inst_DS__DS_WRITE_B96()
34463 } // ~Inst_DS__DS_WRITE_B96
34465 // {MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[95:0].
34466 // Tri-dword write.
34468 Inst_DS__DS_WRITE_B96::execute(GPUDynInstPtr gpuDynInst
)
34470 panicUnimplemented();
34473 Inst_DS__DS_WRITE_B128::Inst_DS__DS_WRITE_B128(InFmt_DS
*iFmt
)
34474 : Inst_DS(iFmt
, "ds_write_b128")
34476 setFlag(MemoryRef
);
34478 } // Inst_DS__DS_WRITE_B128
34480 Inst_DS__DS_WRITE_B128::~Inst_DS__DS_WRITE_B128()
34482 } // ~Inst_DS__DS_WRITE_B128
34484 // {MEM[ADDR + 12], MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[127:0].
34487 Inst_DS__DS_WRITE_B128::execute(GPUDynInstPtr gpuDynInst
)
34489 panicUnimplemented();
34492 Inst_DS__DS_READ_B96::Inst_DS__DS_READ_B96(InFmt_DS
*iFmt
)
34493 : Inst_DS(iFmt
, "ds_read_b96")
34495 setFlag(MemoryRef
);
34497 } // Inst_DS__DS_READ_B96
34499 Inst_DS__DS_READ_B96::~Inst_DS__DS_READ_B96()
34501 } // ~Inst_DS__DS_READ_B96
34505 Inst_DS__DS_READ_B96::execute(GPUDynInstPtr gpuDynInst
)
34507 panicUnimplemented();
34510 Inst_DS__DS_READ_B128::Inst_DS__DS_READ_B128(InFmt_DS
*iFmt
)
34511 : Inst_DS(iFmt
, "ds_read_b128")
34513 setFlag(MemoryRef
);
34515 } // Inst_DS__DS_READ_B128
34517 Inst_DS__DS_READ_B128::~Inst_DS__DS_READ_B128()
34519 } // ~Inst_DS__DS_READ_B128
34523 Inst_DS__DS_READ_B128::execute(GPUDynInstPtr gpuDynInst
)
34525 panicUnimplemented();
34528 Inst_MUBUF__BUFFER_LOAD_FORMAT_X
34529 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_X(InFmt_MUBUF
*iFmt
)
34530 : Inst_MUBUF(iFmt
, "buffer_load_format_x")
34532 setFlag(MemoryRef
);
34534 setFlag(GlobalSegment
);
34535 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_X
34537 Inst_MUBUF__BUFFER_LOAD_FORMAT_X::~Inst_MUBUF__BUFFER_LOAD_FORMAT_X()
34539 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_X
34541 // Untyped buffer load 1 dword with format conversion.
34543 Inst_MUBUF__BUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst
)
34545 panicUnimplemented();
34549 Inst_MUBUF__BUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst
)
34554 Inst_MUBUF__BUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst
)
34558 Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
34559 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XY(InFmt_MUBUF
*iFmt
)
34560 : Inst_MUBUF(iFmt
, "buffer_load_format_xy")
34562 setFlag(MemoryRef
);
34564 setFlag(GlobalSegment
);
34565 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
34567 Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY()
34569 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
34571 // Untyped buffer load 2 dwords with format conversion.
34573 Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst
)
34575 panicUnimplemented();
34579 Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst
)
34584 Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst
)
34588 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
34589 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ(InFmt_MUBUF
*iFmt
)
34590 : Inst_MUBUF(iFmt
, "buffer_load_format_xyz")
34592 setFlag(MemoryRef
);
34594 setFlag(GlobalSegment
);
34595 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
34597 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ()
34599 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
34601 // Untyped buffer load 3 dwords with format conversion.
34603 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst
)
34605 panicUnimplemented();
34609 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst
)
34614 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst
)
34618 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
34619 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW(InFmt_MUBUF
*iFmt
)
34620 : Inst_MUBUF(iFmt
, "buffer_load_format_xyzw")
34622 setFlag(MemoryRef
);
34624 setFlag(GlobalSegment
);
34625 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
34627 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW()
34629 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
34631 // Untyped buffer load 4 dwords with format conversion.
34633 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst
)
34635 panicUnimplemented();
34639 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst
)
34644 Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst
)
34648 Inst_MUBUF__BUFFER_STORE_FORMAT_X
34649 ::Inst_MUBUF__BUFFER_STORE_FORMAT_X(InFmt_MUBUF
*iFmt
)
34650 : Inst_MUBUF(iFmt
, "buffer_store_format_x")
34652 setFlag(MemoryRef
);
34654 setFlag(GlobalSegment
);
34655 } // Inst_MUBUF__BUFFER_STORE_FORMAT_X
34657 Inst_MUBUF__BUFFER_STORE_FORMAT_X::~Inst_MUBUF__BUFFER_STORE_FORMAT_X()
34659 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_X
34661 // Untyped buffer store 1 dword with format conversion.
34663 Inst_MUBUF__BUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst
)
34665 panicUnimplemented();
34669 Inst_MUBUF__BUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst
)
34674 Inst_MUBUF__BUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst
)
34678 Inst_MUBUF__BUFFER_STORE_FORMAT_XY
34679 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XY(InFmt_MUBUF
*iFmt
)
34680 : Inst_MUBUF(iFmt
, "buffer_store_format_xy")
34682 setFlag(MemoryRef
);
34684 setFlag(GlobalSegment
);
34685 } // Inst_MUBUF__BUFFER_STORE_FORMAT_XY
34687 Inst_MUBUF__BUFFER_STORE_FORMAT_XY::~Inst_MUBUF__BUFFER_STORE_FORMAT_XY()
34689 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XY
34691 // Untyped buffer store 2 dwords with format conversion.
34693 Inst_MUBUF__BUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst
)
34695 panicUnimplemented();
34699 Inst_MUBUF__BUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst
)
34704 Inst_MUBUF__BUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst
)
34708 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
34709 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ(InFmt_MUBUF
*iFmt
)
34710 : Inst_MUBUF(iFmt
, "buffer_store_format_xyz")
34712 setFlag(MemoryRef
);
34714 setFlag(GlobalSegment
);
34715 } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
34717 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ()
34719 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
34721 // Untyped buffer store 3 dwords with format conversion.
34723 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst
)
34725 panicUnimplemented();
34729 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst
)
34734 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst
)
34738 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
34739 ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW(InFmt_MUBUF
*iFmt
)
34740 : Inst_MUBUF(iFmt
, "buffer_store_format_xyzw")
34742 setFlag(MemoryRef
);
34744 setFlag(GlobalSegment
);
34745 } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
34747 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
34748 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW()
34750 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
34752 // Untyped buffer store 4 dwords with format conversion.
34754 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst
)
34756 panicUnimplemented();
34760 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst
)
34765 Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst
)
34769 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
34770 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X(InFmt_MUBUF
*iFmt
)
34771 : Inst_MUBUF(iFmt
, "buffer_load_format_d16_x")
34773 setFlag(MemoryRef
);
34775 setFlag(GlobalSegment
);
34776 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
34778 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
34779 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X()
34781 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
34783 // Untyped buffer load 1 dword with format conversion.
34785 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst
)
34787 panicUnimplemented();
34791 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::initiateAcc(GPUDynInstPtr gpuDynInst
)
34796 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::completeAcc(GPUDynInstPtr gpuDynInst
)
34800 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
34801 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY(InFmt_MUBUF
*iFmt
)
34802 : Inst_MUBUF(iFmt
, "buffer_load_format_d16_xy")
34804 setFlag(MemoryRef
);
34806 setFlag(GlobalSegment
);
34807 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
34809 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
34810 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY()
34812 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
34814 // Untyped buffer load 2 dwords with format conversion.
34816 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst
)
34818 panicUnimplemented();
34822 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::initiateAcc(
34823 GPUDynInstPtr gpuDynInst
)
34828 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::completeAcc(
34829 GPUDynInstPtr gpuDynInst
)
34833 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
34834 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ(InFmt_MUBUF
*iFmt
)
34835 : Inst_MUBUF(iFmt
, "buffer_load_format_d16_xyz")
34837 setFlag(MemoryRef
);
34839 setFlag(GlobalSegment
);
34840 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
34842 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
34843 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ()
34845 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
34847 // Untyped buffer load 3 dwords with format conversion.
34849 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst
)
34851 panicUnimplemented();
34855 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc(
34856 GPUDynInstPtr gpuDynInst
)
34861 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::completeAcc(
34862 GPUDynInstPtr gpuDynInst
)
34866 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
34867 ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW(InFmt_MUBUF
*iFmt
)
34868 : Inst_MUBUF(iFmt
, "buffer_load_format_d16_xyzw")
34870 setFlag(MemoryRef
);
34872 setFlag(GlobalSegment
);
34873 } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
34875 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
34876 ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW()
34878 } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
34880 // Untyped buffer load 4 dwords with format conversion.
34882 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst
)
34884 panicUnimplemented();
34888 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc(
34889 GPUDynInstPtr gpuDynInst
)
34894 Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::completeAcc(
34895 GPUDynInstPtr gpuDynInst
)
34899 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
34900 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X(InFmt_MUBUF
*iFmt
)
34901 : Inst_MUBUF(iFmt
, "buffer_store_format_d16_x")
34903 setFlag(MemoryRef
);
34905 setFlag(GlobalSegment
);
34906 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
34908 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
34909 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X()
34911 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
34913 // Untyped buffer store 1 dword with format conversion.
34915 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst
)
34917 panicUnimplemented();
34921 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::initiateAcc(
34922 GPUDynInstPtr gpuDynInst
)
34927 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::completeAcc(
34928 GPUDynInstPtr gpuDynInst
)
34932 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
34933 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY(InFmt_MUBUF
*iFmt
)
34934 : Inst_MUBUF(iFmt
, "buffer_store_format_d16_xy")
34936 setFlag(MemoryRef
);
34938 setFlag(GlobalSegment
);
34939 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
34941 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
34942 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY()
34944 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
34946 // Untyped buffer store 2 dwords with format conversion.
34948 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst
)
34950 panicUnimplemented();
34954 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::initiateAcc(
34955 GPUDynInstPtr gpuDynInst
)
34960 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::completeAcc(
34961 GPUDynInstPtr gpuDynInst
)
34965 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
34966 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ(InFmt_MUBUF
*iFmt
)
34967 : Inst_MUBUF(iFmt
, "buffer_store_format_d16_xyz")
34969 setFlag(MemoryRef
);
34971 setFlag(GlobalSegment
);
34972 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
34974 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
34975 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ()
34977 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
34979 // Untyped buffer store 3 dwords with format conversion.
34981 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst
)
34983 panicUnimplemented();
34987 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::initiateAcc(
34988 GPUDynInstPtr gpuDynInst
)
34993 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::completeAcc(
34994 GPUDynInstPtr gpuDynInst
)
34998 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
34999 ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW(InFmt_MUBUF
*iFmt
)
35000 : Inst_MUBUF(iFmt
, "buffer_store_format_d16_xyzw")
35002 setFlag(MemoryRef
);
35004 setFlag(GlobalSegment
);
35005 } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
35007 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
35008 ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW()
35010 } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
35012 // Untyped buffer store 4 dwords with format conversion.
35014 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst
)
35016 panicUnimplemented();
35020 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::initiateAcc(
35021 GPUDynInstPtr gpuDynInst
)
35026 Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::completeAcc(
35027 GPUDynInstPtr gpuDynInst
)
35031 Inst_MUBUF__BUFFER_LOAD_UBYTE
35032 ::Inst_MUBUF__BUFFER_LOAD_UBYTE(InFmt_MUBUF
*iFmt
)
35033 : Inst_MUBUF(iFmt
, "buffer_load_ubyte")
35035 setFlag(MemoryRef
);
35037 if (instData
.LDS
) {
35038 setFlag(GroupSegment
);
35040 setFlag(GlobalSegment
);
35042 } // Inst_MUBUF__BUFFER_LOAD_UBYTE
35044 Inst_MUBUF__BUFFER_LOAD_UBYTE::~Inst_MUBUF__BUFFER_LOAD_UBYTE()
35046 } // ~Inst_MUBUF__BUFFER_LOAD_UBYTE
35048 // Untyped buffer load unsigned byte (zero extend to VGPR destination).
35050 Inst_MUBUF__BUFFER_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst
)
35052 Wavefront
*wf
= gpuDynInst
->wavefront();
35053 gpuDynInst
->execUnitId
= wf
->execUnitId
;
35054 gpuDynInst
->exec_mask
= wf
->execMask();
35055 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
35056 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
35058 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
35059 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
35060 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
35061 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
35066 int inst_offset
= instData
.OFFSET
;
35068 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
35069 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35070 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35071 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35072 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
35074 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35075 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35076 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35077 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
35079 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35080 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35081 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35085 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35086 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35087 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35090 if (isLocalMem()) {
35091 gpuDynInst
->computeUnit()->localMemoryPipe
.
35092 issueRequest(gpuDynInst
);
35093 wf
->rdLmReqsInPipe
--;
35094 wf
->outstandingReqsRdLm
++;
35096 gpuDynInst
->computeUnit()->globalMemoryPipe
.
35097 issueRequest(gpuDynInst
);
35098 wf
->rdGmReqsInPipe
--;
35099 wf
->outstandingReqsRdGm
++;
35102 wf
->outstandingReqs
++;
35103 wf
->validateRequestCounters();
35107 Inst_MUBUF__BUFFER_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst
)
35109 initMemRead
<VecElemU8
>(gpuDynInst
);
35113 Inst_MUBUF__BUFFER_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst
)
35115 VecOperandU32
vdst(gpuDynInst
, extData
.VDATA
);
35117 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
35118 if (gpuDynInst
->exec_mask
[lane
]) {
35119 if (!oobMask
[lane
]) {
35120 vdst
[lane
] = (VecElemU32
)((reinterpret_cast<VecElemU8
*>(
35121 gpuDynInst
->d_data
))[lane
]);
35132 Inst_MUBUF__BUFFER_LOAD_SBYTE
35133 ::Inst_MUBUF__BUFFER_LOAD_SBYTE(InFmt_MUBUF
*iFmt
)
35134 : Inst_MUBUF(iFmt
, "buffer_load_sbyte")
35136 setFlag(MemoryRef
);
35138 setFlag(GlobalSegment
);
35139 } // Inst_MUBUF__BUFFER_LOAD_SBYTE
35141 Inst_MUBUF__BUFFER_LOAD_SBYTE::~Inst_MUBUF__BUFFER_LOAD_SBYTE()
35143 } // ~Inst_MUBUF__BUFFER_LOAD_SBYTE
35145 // Untyped buffer load signed byte (sign extend to VGPR destination).
35147 Inst_MUBUF__BUFFER_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst
)
35149 panicUnimplemented();
35153 Inst_MUBUF__BUFFER_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst
)
35158 Inst_MUBUF__BUFFER_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst
)
35162 Inst_MUBUF__BUFFER_LOAD_USHORT
35163 ::Inst_MUBUF__BUFFER_LOAD_USHORT(InFmt_MUBUF
*iFmt
)
35164 : Inst_MUBUF(iFmt
, "buffer_load_ushort")
35166 setFlag(MemoryRef
);
35168 if (instData
.LDS
) {
35169 setFlag(GroupSegment
);
35171 setFlag(GlobalSegment
);
35173 } // Inst_MUBUF__BUFFER_LOAD_USHORT
35175 Inst_MUBUF__BUFFER_LOAD_USHORT::~Inst_MUBUF__BUFFER_LOAD_USHORT()
35177 } // ~Inst_MUBUF__BUFFER_LOAD_USHORT
35179 // Untyped buffer load unsigned short (zero extend to VGPR destination).
35181 Inst_MUBUF__BUFFER_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst
)
35183 Wavefront
*wf
= gpuDynInst
->wavefront();
35184 gpuDynInst
->execUnitId
= wf
->execUnitId
;
35185 gpuDynInst
->exec_mask
= wf
->execMask();
35186 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
35187 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
35189 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
35190 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
35191 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
35192 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
35197 int inst_offset
= instData
.OFFSET
;
35199 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
35200 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35201 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35202 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35203 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
35205 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35206 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35207 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35208 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
35210 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35211 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35212 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35216 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35217 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35218 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35221 if (isLocalMem()) {
35222 gpuDynInst
->computeUnit()->localMemoryPipe
35223 .issueRequest(gpuDynInst
);
35224 wf
->rdLmReqsInPipe
--;
35225 wf
->outstandingReqsRdLm
++;
35227 gpuDynInst
->computeUnit()->globalMemoryPipe
35228 .issueRequest(gpuDynInst
);
35229 wf
->rdGmReqsInPipe
--;
35230 wf
->outstandingReqsRdGm
++;
35233 wf
->outstandingReqs
++;
35234 wf
->validateRequestCounters();
35238 Inst_MUBUF__BUFFER_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst
)
35240 initMemRead
<VecElemU16
>(gpuDynInst
);
35244 Inst_MUBUF__BUFFER_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst
)
35246 VecOperandU32
vdst(gpuDynInst
, extData
.VDATA
);
35248 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
35249 if (gpuDynInst
->exec_mask
[lane
]) {
35250 if (!oobMask
[lane
]) {
35251 vdst
[lane
] = (VecElemU32
)((reinterpret_cast<VecElemU16
*>(
35252 gpuDynInst
->d_data
))[lane
]);
35263 Inst_MUBUF__BUFFER_LOAD_SSHORT
35264 ::Inst_MUBUF__BUFFER_LOAD_SSHORT(InFmt_MUBUF
*iFmt
)
35265 : Inst_MUBUF(iFmt
, "buffer_load_sshort")
35267 setFlag(MemoryRef
);
35269 setFlag(GlobalSegment
);
35270 } // Inst_MUBUF__BUFFER_LOAD_SSHORT
35272 Inst_MUBUF__BUFFER_LOAD_SSHORT::~Inst_MUBUF__BUFFER_LOAD_SSHORT()
35274 } // ~Inst_MUBUF__BUFFER_LOAD_SSHORT
35276 // Untyped buffer load signed short (sign extend to VGPR destination).
35278 Inst_MUBUF__BUFFER_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst
)
35280 panicUnimplemented();
35284 Inst_MUBUF__BUFFER_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst
)
35289 Inst_MUBUF__BUFFER_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst
)
35293 Inst_MUBUF__BUFFER_LOAD_DWORD
35294 ::Inst_MUBUF__BUFFER_LOAD_DWORD(InFmt_MUBUF
*iFmt
)
35295 : Inst_MUBUF(iFmt
, "buffer_load_dword")
35297 setFlag(MemoryRef
);
35299 if (instData
.LDS
) {
35300 setFlag(GroupSegment
);
35302 setFlag(GlobalSegment
);
35304 } // Inst_MUBUF__BUFFER_LOAD_DWORD
35306 Inst_MUBUF__BUFFER_LOAD_DWORD::~Inst_MUBUF__BUFFER_LOAD_DWORD()
35308 } // ~Inst_MUBUF__BUFFER_LOAD_DWORD
35310 // Untyped buffer load dword.
35312 Inst_MUBUF__BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst
)
35314 Wavefront
*wf
= gpuDynInst
->wavefront();
35315 gpuDynInst
->execUnitId
= wf
->execUnitId
;
35316 gpuDynInst
->exec_mask
= wf
->execMask();
35317 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
35318 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
35320 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
35321 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
35322 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
35323 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
35328 int inst_offset
= instData
.OFFSET
;
35330 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
35331 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35332 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35333 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35334 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
35336 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35337 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35338 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35339 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
35341 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35342 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35343 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35347 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35348 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35349 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35352 if (isLocalMem()) {
35353 gpuDynInst
->computeUnit()->localMemoryPipe
35354 .issueRequest(gpuDynInst
);
35355 wf
->rdLmReqsInPipe
--;
35356 wf
->outstandingReqsRdLm
++;
35358 gpuDynInst
->computeUnit()->globalMemoryPipe
35359 .issueRequest(gpuDynInst
);
35360 wf
->rdGmReqsInPipe
--;
35361 wf
->outstandingReqsRdGm
++;
35364 wf
->outstandingReqs
++;
35365 wf
->validateRequestCounters();
35369 Inst_MUBUF__BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst
)
35371 initMemRead
<VecElemU32
>(gpuDynInst
);
35375 Inst_MUBUF__BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst
)
35377 VecOperandU32
vdst(gpuDynInst
, extData
.VDATA
);
35379 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
35380 if (gpuDynInst
->exec_mask
[lane
]) {
35381 if (!oobMask
[lane
]) {
35382 vdst
[lane
] = (reinterpret_cast<VecElemU32
*>(
35383 gpuDynInst
->d_data
))[lane
];
35393 Inst_MUBUF__BUFFER_LOAD_DWORDX2
35394 ::Inst_MUBUF__BUFFER_LOAD_DWORDX2(InFmt_MUBUF
*iFmt
)
35395 : Inst_MUBUF(iFmt
, "buffer_load_dwordx2")
35397 setFlag(MemoryRef
);
35399 if (instData
.LDS
) {
35400 setFlag(GroupSegment
);
35402 setFlag(GlobalSegment
);
35404 } // Inst_MUBUF__BUFFER_LOAD_DWORDX2
35406 Inst_MUBUF__BUFFER_LOAD_DWORDX2::~Inst_MUBUF__BUFFER_LOAD_DWORDX2()
35408 } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX2
35410 // Untyped buffer load 2 dwords.
35412 Inst_MUBUF__BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst
)
35414 Wavefront
*wf
= gpuDynInst
->wavefront();
35415 gpuDynInst
->execUnitId
= wf
->execUnitId
;
35416 gpuDynInst
->exec_mask
= wf
->execMask();
35417 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
35418 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
35420 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
35421 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
35422 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
35423 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
35428 int inst_offset
= instData
.OFFSET
;
35430 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
35431 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35432 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35433 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35434 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
35436 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35437 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35438 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35439 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
35441 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35442 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35443 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35447 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35448 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35449 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35452 if (isLocalMem()) {
35453 gpuDynInst
->computeUnit()->localMemoryPipe
35454 .issueRequest(gpuDynInst
);
35455 wf
->rdLmReqsInPipe
--;
35456 wf
->outstandingReqsRdLm
++;
35458 gpuDynInst
->computeUnit()->globalMemoryPipe
35459 .issueRequest(gpuDynInst
);
35460 wf
->rdGmReqsInPipe
--;
35461 wf
->outstandingReqsRdGm
++;
35464 wf
->outstandingReqs
++;
35465 wf
->validateRequestCounters();
35469 Inst_MUBUF__BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst
)
35471 initMemRead
<2>(gpuDynInst
);
35475 Inst_MUBUF__BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst
)
35477 VecOperandU32
vdst0(gpuDynInst
, extData
.VDATA
);
35478 VecOperandU32
vdst1(gpuDynInst
, extData
.VDATA
+ 1);
35480 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
35481 if (gpuDynInst
->exec_mask
[lane
]) {
35482 if (!oobMask
[lane
]) {
35483 vdst0
[lane
] = (reinterpret_cast<VecElemU32
*>(
35484 gpuDynInst
->d_data
))[lane
* 2];
35485 vdst1
[lane
] = (reinterpret_cast<VecElemU32
*>(
35486 gpuDynInst
->d_data
))[lane
* 2 + 1];
35498 Inst_MUBUF__BUFFER_LOAD_DWORDX3
35499 ::Inst_MUBUF__BUFFER_LOAD_DWORDX3(InFmt_MUBUF
*iFmt
)
35500 : Inst_MUBUF(iFmt
, "buffer_load_dwordx3")
35502 setFlag(MemoryRef
);
35504 if (instData
.LDS
) {
35505 setFlag(GroupSegment
);
35507 setFlag(GlobalSegment
);
35509 } // Inst_MUBUF__BUFFER_LOAD_DWORDX3
35511 Inst_MUBUF__BUFFER_LOAD_DWORDX3::~Inst_MUBUF__BUFFER_LOAD_DWORDX3()
35513 } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX3
35515 // Untyped buffer load 3 dwords.
35517 Inst_MUBUF__BUFFER_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst
)
35519 Wavefront
*wf
= gpuDynInst
->wavefront();
35520 gpuDynInst
->execUnitId
= wf
->execUnitId
;
35521 gpuDynInst
->exec_mask
= wf
->execMask();
35522 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
35523 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
35525 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
35526 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
35527 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
35528 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
35533 int inst_offset
= instData
.OFFSET
;
35535 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
35536 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35537 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35538 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35539 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
35541 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35542 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35543 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35544 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
35546 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35547 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35548 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35552 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35553 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35554 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35557 if (isLocalMem()) {
35558 gpuDynInst
->computeUnit()->localMemoryPipe
35559 .issueRequest(gpuDynInst
);
35560 wf
->rdLmReqsInPipe
--;
35561 wf
->outstandingReqsRdLm
++;
35563 gpuDynInst
->computeUnit()->globalMemoryPipe
35564 .issueRequest(gpuDynInst
);
35565 wf
->rdGmReqsInPipe
--;
35566 wf
->outstandingReqsRdGm
++;
35569 wf
->outstandingReqs
++;
35570 wf
->validateRequestCounters();
35574 Inst_MUBUF__BUFFER_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst
)
35576 initMemRead
<3>(gpuDynInst
);
35580 Inst_MUBUF__BUFFER_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst
)
35582 VecOperandU32
vdst0(gpuDynInst
, extData
.VDATA
);
35583 VecOperandU32
vdst1(gpuDynInst
, extData
.VDATA
+ 1);
35584 VecOperandU32
vdst2(gpuDynInst
, extData
.VDATA
+ 2);
35586 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
35587 if (gpuDynInst
->exec_mask
[lane
]) {
35588 if (!oobMask
[lane
]) {
35589 vdst0
[lane
] = (reinterpret_cast<VecElemU32
*>(
35590 gpuDynInst
->d_data
))[lane
* 3];
35591 vdst1
[lane
] = (reinterpret_cast<VecElemU32
*>(
35592 gpuDynInst
->d_data
))[lane
* 3 + 1];
35593 vdst2
[lane
] = (reinterpret_cast<VecElemU32
*>(
35594 gpuDynInst
->d_data
))[lane
* 3 + 2];
35608 Inst_MUBUF__BUFFER_LOAD_DWORDX4
35609 ::Inst_MUBUF__BUFFER_LOAD_DWORDX4(InFmt_MUBUF
*iFmt
)
35610 : Inst_MUBUF(iFmt
, "buffer_load_dwordx4")
35612 setFlag(MemoryRef
);
35614 if (instData
.LDS
) {
35615 setFlag(GroupSegment
);
35617 setFlag(GlobalSegment
);
35619 } // Inst_MUBUF__BUFFER_LOAD_DWORDX4
35621 Inst_MUBUF__BUFFER_LOAD_DWORDX4::~Inst_MUBUF__BUFFER_LOAD_DWORDX4()
35623 } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX4
35625 // Untyped buffer load 4 dwords.
35627 Inst_MUBUF__BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst
)
35629 Wavefront
*wf
= gpuDynInst
->wavefront();
35630 gpuDynInst
->execUnitId
= wf
->execUnitId
;
35631 gpuDynInst
->exec_mask
= wf
->execMask();
35632 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
35633 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
35635 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
35636 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
35637 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
35638 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
35643 int inst_offset
= instData
.OFFSET
;
35645 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
35646 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35647 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35648 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35649 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
35651 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35652 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35653 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35654 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
35656 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35657 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35658 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35662 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35663 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35664 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35667 if (isLocalMem()) {
35668 gpuDynInst
->computeUnit()->localMemoryPipe
35669 .issueRequest(gpuDynInst
);
35670 wf
->rdLmReqsInPipe
--;
35671 wf
->outstandingReqsRdLm
++;
35673 gpuDynInst
->computeUnit()->globalMemoryPipe
35674 .issueRequest(gpuDynInst
);
35675 wf
->rdGmReqsInPipe
--;
35676 wf
->outstandingReqsRdGm
++;
35679 wf
->outstandingReqs
++;
35680 wf
->validateRequestCounters();
35684 Inst_MUBUF__BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst
)
35686 initMemRead
<4>(gpuDynInst
);
35690 Inst_MUBUF__BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst
)
35692 VecOperandU32
vdst0(gpuDynInst
, extData
.VDATA
);
35693 VecOperandU32
vdst1(gpuDynInst
, extData
.VDATA
+ 1);
35694 VecOperandU32
vdst2(gpuDynInst
, extData
.VDATA
+ 2);
35695 VecOperandU32
vdst3(gpuDynInst
, extData
.VDATA
+ 3);
35697 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
35698 if (gpuDynInst
->exec_mask
[lane
]) {
35699 if (!oobMask
[lane
]) {
35700 vdst0
[lane
] = (reinterpret_cast<VecElemU32
*>(
35701 gpuDynInst
->d_data
))[lane
* 4];
35702 vdst1
[lane
] = (reinterpret_cast<VecElemU32
*>(
35703 gpuDynInst
->d_data
))[lane
* 4 + 1];
35704 vdst2
[lane
] = (reinterpret_cast<VecElemU32
*>(
35705 gpuDynInst
->d_data
))[lane
* 4 + 2];
35706 vdst3
[lane
] = (reinterpret_cast<VecElemU32
*>(
35707 gpuDynInst
->d_data
))[lane
* 4 + 3];
35723 Inst_MUBUF__BUFFER_STORE_BYTE
35724 ::Inst_MUBUF__BUFFER_STORE_BYTE(InFmt_MUBUF
*iFmt
)
35725 : Inst_MUBUF(iFmt
, "buffer_store_byte")
35727 setFlag(MemoryRef
);
35729 if (instData
.LDS
) {
35730 setFlag(GroupSegment
);
35732 setFlag(GlobalSegment
);
35734 } // Inst_MUBUF__BUFFER_STORE_BYTE
35736 Inst_MUBUF__BUFFER_STORE_BYTE::~Inst_MUBUF__BUFFER_STORE_BYTE()
35738 } // ~Inst_MUBUF__BUFFER_STORE_BYTE
35740 // Untyped buffer store byte.
35742 Inst_MUBUF__BUFFER_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst
)
35744 Wavefront
*wf
= gpuDynInst
->wavefront();
35745 gpuDynInst
->execUnitId
= wf
->execUnitId
;
35746 gpuDynInst
->exec_mask
= wf
->execMask();
35747 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
35748 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
35750 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
35751 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
35752 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
35753 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
35758 int inst_offset
= instData
.OFFSET
;
35760 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
35761 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35762 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35763 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35764 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
35766 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35767 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35768 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35769 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
35771 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35772 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35773 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35777 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35778 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35779 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35782 if (isLocalMem()) {
35783 gpuDynInst
->computeUnit()->localMemoryPipe
35784 .issueRequest(gpuDynInst
);
35785 wf
->wrLmReqsInPipe
--;
35786 wf
->outstandingReqsWrLm
++;
35788 gpuDynInst
->computeUnit()->globalMemoryPipe
35789 .issueRequest(gpuDynInst
);
35790 wf
->wrGmReqsInPipe
--;
35791 wf
->outstandingReqsWrGm
++;
35794 wf
->outstandingReqs
++;
35795 wf
->validateRequestCounters();
35799 Inst_MUBUF__BUFFER_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst
)
35801 ConstVecOperandI8
data(gpuDynInst
, extData
.VDATA
);
35804 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
35805 if (gpuDynInst
->exec_mask
[lane
]) {
35806 (reinterpret_cast<VecElemI8
*>(gpuDynInst
->d_data
))[lane
]
35811 initMemWrite
<VecElemI8
>(gpuDynInst
);
35815 Inst_MUBUF__BUFFER_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst
)
35819 Inst_MUBUF__BUFFER_STORE_SHORT
35820 ::Inst_MUBUF__BUFFER_STORE_SHORT(InFmt_MUBUF
*iFmt
)
35821 : Inst_MUBUF(iFmt
, "buffer_store_short")
35823 setFlag(MemoryRef
);
35825 if (instData
.LDS
) {
35826 setFlag(GroupSegment
);
35828 setFlag(GlobalSegment
);
35830 } // Inst_MUBUF__BUFFER_STORE_SHORT
35832 Inst_MUBUF__BUFFER_STORE_SHORT::~Inst_MUBUF__BUFFER_STORE_SHORT()
35834 } // ~Inst_MUBUF__BUFFER_STORE_SHORT
35836 // Untyped buffer store short.
35838 Inst_MUBUF__BUFFER_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst
)
35840 Wavefront
*wf
= gpuDynInst
->wavefront();
35841 gpuDynInst
->execUnitId
= wf
->execUnitId
;
35842 gpuDynInst
->exec_mask
= wf
->execMask();
35843 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
35844 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
35846 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
35847 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
35848 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
35849 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
35854 int inst_offset
= instData
.OFFSET
;
35856 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
35857 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35858 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35859 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35860 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
35862 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35863 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35864 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35865 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
35867 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35868 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35869 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35873 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35874 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35875 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35878 if (isLocalMem()) {
35879 gpuDynInst
->computeUnit()->localMemoryPipe
35880 .issueRequest(gpuDynInst
);
35881 wf
->wrLmReqsInPipe
--;
35882 wf
->outstandingReqsWrLm
++;
35884 gpuDynInst
->computeUnit()->globalMemoryPipe
35885 .issueRequest(gpuDynInst
);
35886 wf
->wrGmReqsInPipe
--;
35887 wf
->outstandingReqsWrGm
++;
35890 wf
->outstandingReqs
++;
35891 wf
->validateRequestCounters();
35895 Inst_MUBUF__BUFFER_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst
)
35897 ConstVecOperandI16
data(gpuDynInst
, extData
.VDATA
);
35900 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
35901 if (gpuDynInst
->exec_mask
[lane
]) {
35902 (reinterpret_cast<VecElemI16
*>(gpuDynInst
->d_data
))[lane
]
35907 initMemWrite
<VecElemI16
>(gpuDynInst
);
35911 Inst_MUBUF__BUFFER_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst
)
35915 Inst_MUBUF__BUFFER_STORE_DWORD::
35916 Inst_MUBUF__BUFFER_STORE_DWORD(InFmt_MUBUF
*iFmt
)
35917 : Inst_MUBUF(iFmt
, "buffer_store_dword")
35919 setFlag(MemoryRef
);
35921 if (instData
.LDS
) {
35922 setFlag(GroupSegment
);
35924 setFlag(GlobalSegment
);
35926 } // Inst_MUBUF__BUFFER_STORE_DWORD
35928 Inst_MUBUF__BUFFER_STORE_DWORD::~Inst_MUBUF__BUFFER_STORE_DWORD()
35930 } // ~Inst_MUBUF__BUFFER_STORE_DWORD
35932 // Untyped buffer store dword.
35934 Inst_MUBUF__BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst
)
35936 Wavefront
*wf
= gpuDynInst
->wavefront();
35937 gpuDynInst
->execUnitId
= wf
->execUnitId
;
35938 gpuDynInst
->exec_mask
= wf
->execMask();
35939 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
35940 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
35942 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
35943 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
35944 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
35945 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
35950 int inst_offset
= instData
.OFFSET
;
35952 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
35953 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35954 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35955 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35956 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
35958 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35959 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35960 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
35961 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
35963 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35964 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35965 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35969 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
35970 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
35971 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
35974 if (isLocalMem()) {
35975 gpuDynInst
->computeUnit()->localMemoryPipe
35976 .issueRequest(gpuDynInst
);
35977 wf
->wrLmReqsInPipe
--;
35978 wf
->outstandingReqsWrLm
++;
35980 gpuDynInst
->computeUnit()->globalMemoryPipe
35981 .issueRequest(gpuDynInst
);
35982 wf
->wrGmReqsInPipe
--;
35983 wf
->outstandingReqsWrGm
++;
35986 wf
->outstandingReqs
++;
35987 wf
->validateRequestCounters();
35991 Inst_MUBUF__BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst
)
35993 ConstVecOperandU32
data(gpuDynInst
, extData
.VDATA
);
35996 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
35997 if (gpuDynInst
->exec_mask
[lane
]) {
35998 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
]
36003 initMemWrite
<VecElemU32
>(gpuDynInst
);
36007 Inst_MUBUF__BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst
)
36011 Inst_MUBUF__BUFFER_STORE_DWORDX2
36012 ::Inst_MUBUF__BUFFER_STORE_DWORDX2(InFmt_MUBUF
*iFmt
)
36013 : Inst_MUBUF(iFmt
, "buffer_store_dwordx2")
36015 setFlag(MemoryRef
);
36017 if (instData
.LDS
) {
36018 setFlag(GroupSegment
);
36020 setFlag(GlobalSegment
);
36022 } // Inst_MUBUF__BUFFER_STORE_DWORDX2
36024 Inst_MUBUF__BUFFER_STORE_DWORDX2::~Inst_MUBUF__BUFFER_STORE_DWORDX2()
36026 } // ~Inst_MUBUF__BUFFER_STORE_DWORDX2
36028 // Untyped buffer store 2 dwords.
36030 Inst_MUBUF__BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst
)
36032 Wavefront
*wf
= gpuDynInst
->wavefront();
36033 gpuDynInst
->execUnitId
= wf
->execUnitId
;
36034 gpuDynInst
->exec_mask
= wf
->execMask();
36035 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
36036 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
36038 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
36039 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
36040 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
36041 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
36042 ConstVecOperandU32
data0(gpuDynInst
, extData
.VDATA
);
36043 ConstVecOperandU32
data1(gpuDynInst
, extData
.VDATA
+ 1);
36050 int inst_offset
= instData
.OFFSET
;
36052 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
36053 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36054 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36055 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
36056 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
36058 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36059 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36060 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
36061 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
36063 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36064 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36065 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
36069 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36070 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36071 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
36074 if (isLocalMem()) {
36075 gpuDynInst
->computeUnit()->localMemoryPipe
36076 .issueRequest(gpuDynInst
);
36077 wf
->wrLmReqsInPipe
--;
36078 wf
->outstandingReqsWrLm
++;
36080 gpuDynInst
->computeUnit()->globalMemoryPipe
36081 .issueRequest(gpuDynInst
);
36082 wf
->wrGmReqsInPipe
--;
36083 wf
->outstandingReqsWrGm
++;
36086 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
36087 if (gpuDynInst
->exec_mask
[lane
]) {
36088 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
* 4]
36090 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
*4 + 1]
36095 wf
->outstandingReqs
++;
36096 wf
->validateRequestCounters();
36100 Inst_MUBUF__BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst
)
36102 initMemWrite
<2>(gpuDynInst
);
36106 Inst_MUBUF__BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst
)
36110 Inst_MUBUF__BUFFER_STORE_DWORDX3
36111 ::Inst_MUBUF__BUFFER_STORE_DWORDX3(InFmt_MUBUF
*iFmt
)
36112 : Inst_MUBUF(iFmt
, "buffer_store_dwordx3")
36114 setFlag(MemoryRef
);
36116 if (instData
.LDS
) {
36117 setFlag(GroupSegment
);
36119 setFlag(GlobalSegment
);
36121 } // Inst_MUBUF__BUFFER_STORE_DWORDX3
36123 Inst_MUBUF__BUFFER_STORE_DWORDX3::~Inst_MUBUF__BUFFER_STORE_DWORDX3()
36125 } // ~Inst_MUBUF__BUFFER_STORE_DWORDX3
36127 // Untyped buffer store 3 dwords.
36129 Inst_MUBUF__BUFFER_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst
)
36131 Wavefront
*wf
= gpuDynInst
->wavefront();
36132 gpuDynInst
->execUnitId
= wf
->execUnitId
;
36133 gpuDynInst
->exec_mask
= wf
->execMask();
36134 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
36135 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
36137 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
36138 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
36139 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
36140 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
36141 ConstVecOperandU32
data0(gpuDynInst
, extData
.VDATA
);
36142 ConstVecOperandU32
data1(gpuDynInst
, extData
.VDATA
+ 1);
36143 ConstVecOperandU32
data2(gpuDynInst
, extData
.VDATA
+ 2);
36151 int inst_offset
= instData
.OFFSET
;
36153 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
36154 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36155 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36156 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
36157 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
36159 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36160 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36161 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
36162 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
36164 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36165 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36166 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
36170 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36171 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36172 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
36175 if (isLocalMem()) {
36176 gpuDynInst
->computeUnit()->localMemoryPipe
36177 .issueRequest(gpuDynInst
);
36178 wf
->wrLmReqsInPipe
--;
36179 wf
->outstandingReqsWrLm
++;
36181 gpuDynInst
->computeUnit()->globalMemoryPipe
36182 .issueRequest(gpuDynInst
);
36183 wf
->wrGmReqsInPipe
--;
36184 wf
->outstandingReqsWrGm
++;
36187 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
36188 if (gpuDynInst
->exec_mask
[lane
]) {
36189 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
* 4]
36191 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
*4 + 1]
36193 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
*4 + 2]
36198 wf
->outstandingReqs
++;
36199 wf
->validateRequestCounters();
36203 Inst_MUBUF__BUFFER_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst
)
36205 initMemWrite
<3>(gpuDynInst
);
36209 Inst_MUBUF__BUFFER_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst
)
36213 Inst_MUBUF__BUFFER_STORE_DWORDX4
36214 ::Inst_MUBUF__BUFFER_STORE_DWORDX4(InFmt_MUBUF
*iFmt
)
36215 : Inst_MUBUF(iFmt
, "buffer_store_dwordx4")
36217 setFlag(MemoryRef
);
36219 if (instData
.LDS
) {
36220 setFlag(GroupSegment
);
36222 setFlag(GlobalSegment
);
36224 } // Inst_MUBUF__BUFFER_STORE_DWORDX4
36226 Inst_MUBUF__BUFFER_STORE_DWORDX4::~Inst_MUBUF__BUFFER_STORE_DWORDX4()
36228 } // ~Inst_MUBUF__BUFFER_STORE_DWORDX4
36230 // Untyped buffer store 4 dwords.
36232 Inst_MUBUF__BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst
)
36234 Wavefront
*wf
= gpuDynInst
->wavefront();
36235 gpuDynInst
->execUnitId
= wf
->execUnitId
;
36236 gpuDynInst
->exec_mask
= wf
->execMask();
36237 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
36238 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
36240 ConstVecOperandU32
addr0(gpuDynInst
, extData
.VADDR
);
36241 ConstVecOperandU32
addr1(gpuDynInst
, extData
.VADDR
+ 1);
36242 ConstScalarOperandU128
rsrcDesc(gpuDynInst
, extData
.SRSRC
* 4);
36243 ConstScalarOperandU32
offset(gpuDynInst
, extData
.SOFFSET
);
36244 ConstVecOperandU32
data0(gpuDynInst
, extData
.VDATA
);
36245 ConstVecOperandU32
data1(gpuDynInst
, extData
.VDATA
+ 1);
36246 ConstVecOperandU32
data2(gpuDynInst
, extData
.VDATA
+ 2);
36247 ConstVecOperandU32
data3(gpuDynInst
, extData
.VDATA
+ 3);
36256 int inst_offset
= instData
.OFFSET
;
36258 if (!instData
.IDXEN
&& !instData
.OFFEN
) {
36259 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36260 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36261 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
36262 } else if (!instData
.IDXEN
&& instData
.OFFEN
) {
36264 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36265 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36266 addr0
, addr1
, rsrcDesc
, offset
, inst_offset
);
36267 } else if (instData
.IDXEN
&& !instData
.OFFEN
) {
36269 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36270 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36271 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
36275 calcAddr
<ConstVecOperandU32
, ConstVecOperandU32
,
36276 ConstScalarOperandU128
, ConstScalarOperandU32
>(gpuDynInst
,
36277 addr1
, addr0
, rsrcDesc
, offset
, inst_offset
);
36280 if (isLocalMem()) {
36281 gpuDynInst
->computeUnit()->localMemoryPipe
36282 .issueRequest(gpuDynInst
);
36283 wf
->wrLmReqsInPipe
--;
36284 wf
->outstandingReqsWrLm
++;
36286 gpuDynInst
->computeUnit()->globalMemoryPipe
36287 .issueRequest(gpuDynInst
);
36288 wf
->wrGmReqsInPipe
--;
36289 wf
->outstandingReqsWrGm
++;
36292 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
36293 if (gpuDynInst
->exec_mask
[lane
]) {
36294 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
* 4]
36296 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
*4 + 1]
36298 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
*4 + 2]
36300 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
*4 + 3]
36305 wf
->outstandingReqs
++;
36306 wf
->validateRequestCounters();
36310 Inst_MUBUF__BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst
)
36312 initMemWrite
<4>(gpuDynInst
);
36316 Inst_MUBUF__BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst
)
36320 Inst_MUBUF__BUFFER_STORE_LDS_DWORD
36321 ::Inst_MUBUF__BUFFER_STORE_LDS_DWORD(InFmt_MUBUF
*iFmt
)
36322 : Inst_MUBUF(iFmt
, "buffer_store_lds_dword")
36324 setFlag(GlobalSegment
);
36325 } // Inst_MUBUF__BUFFER_STORE_LDS_DWORD
36327 Inst_MUBUF__BUFFER_STORE_LDS_DWORD::~Inst_MUBUF__BUFFER_STORE_LDS_DWORD()
36329 } // ~Inst_MUBUF__BUFFER_STORE_LDS_DWORD
36331 // Store one DWORD from LDS memory to system memory without utilizing
36334 Inst_MUBUF__BUFFER_STORE_LDS_DWORD::execute(GPUDynInstPtr gpuDynInst
)
36336 panicUnimplemented();
36339 Inst_MUBUF__BUFFER_WBINVL1::Inst_MUBUF__BUFFER_WBINVL1(InFmt_MUBUF
*iFmt
)
36340 : Inst_MUBUF(iFmt
, "buffer_wbinvl1")
36342 setFlag(MemoryRef
);
36343 setFlag(GPUStaticInst::MemSync
);
36344 setFlag(GlobalSegment
);
36346 } // Inst_MUBUF__BUFFER_WBINVL1
36348 Inst_MUBUF__BUFFER_WBINVL1::~Inst_MUBUF__BUFFER_WBINVL1()
36350 } // ~Inst_MUBUF__BUFFER_WBINVL1
36352 // Write back and invalidate the shader L1.
36353 // Always returns ACK to shader.
36355 Inst_MUBUF__BUFFER_WBINVL1::execute(GPUDynInstPtr gpuDynInst
)
36357 Wavefront
*wf
= gpuDynInst
->wavefront();
36358 gpuDynInst
->execUnitId
= wf
->execUnitId
;
36359 gpuDynInst
->exec_mask
= wf
->execMask();
36360 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
36361 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
36363 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
36364 gpuDynInst
->computeUnit()->globalMemoryPipe
.
36365 issueRequest(gpuDynInst
);
36366 wf
->wrGmReqsInPipe
--;
36367 wf
->rdGmReqsInPipe
--;
36369 wf
->outstandingReqsWrGm
++;
36370 wf
->outstandingReqsRdGm
++;
36372 fatal("Non global flat instructions not implemented yet.\n");
36375 wf
->outstandingReqs
++;
36376 wf
->validateRequestCounters();
36380 Inst_MUBUF__BUFFER_WBINVL1::initiateAcc(GPUDynInstPtr gpuDynInst
)
36382 injectGlobalMemFence(gpuDynInst
);
36386 Inst_MUBUF__BUFFER_WBINVL1::completeAcc(GPUDynInstPtr gpuDynInst
)
36390 Inst_MUBUF__BUFFER_WBINVL1_VOL
36391 ::Inst_MUBUF__BUFFER_WBINVL1_VOL(InFmt_MUBUF
*iFmt
)
36392 : Inst_MUBUF(iFmt
, "buffer_wbinvl1_vol") {
36394 * This instruction is same as buffer_wbinvl1 instruction except this
36395 * instruction only invalidate L1 shader line with MTYPE for system
36396 * or group coherence. Since L1 do not differentiate between its cache
36397 * lines, this instruction currently behaves (and implemented )
36398 * exactly like buffer_wbinvl1 instruction.
36400 setFlag(MemoryRef
);
36401 setFlag(GPUStaticInst::MemSync
);
36402 setFlag(GlobalSegment
);
36404 } // Inst_MUBUF__BUFFER_WBINVL1_VOL
36406 Inst_MUBUF__BUFFER_WBINVL1_VOL::~Inst_MUBUF__BUFFER_WBINVL1_VOL()
36408 } // ~Inst_MUBUF__BUFFER_WBINVL1_VOL
36410 // Write back and invalidate the shader L1 only for lines that are marked
36411 // volatile. Always returns ACK to shader.
36413 Inst_MUBUF__BUFFER_WBINVL1_VOL::execute(GPUDynInstPtr gpuDynInst
)
36415 Wavefront
*wf
= gpuDynInst
->wavefront();
36416 gpuDynInst
->execUnitId
= wf
->execUnitId
;
36417 gpuDynInst
->exec_mask
= wf
->execMask();
36418 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
36419 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
36421 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
36422 gpuDynInst
->computeUnit()->globalMemoryPipe
.
36423 issueRequest(gpuDynInst
);
36424 wf
->wrGmReqsInPipe
--;
36425 wf
->rdGmReqsInPipe
--;
36427 wf
->outstandingReqsWrGm
++;
36428 wf
->outstandingReqsRdGm
++;
36430 fatal("Non global flat instructions not implemented yet.\n");
36433 wf
->outstandingReqs
++;
36434 wf
->validateRequestCounters();
36437 Inst_MUBUF__BUFFER_WBINVL1_VOL::initiateAcc(GPUDynInstPtr gpuDynInst
)
36439 injectGlobalMemFence(gpuDynInst
);
36442 Inst_MUBUF__BUFFER_WBINVL1_VOL::completeAcc(GPUDynInstPtr gpuDynInst
)
36446 Inst_MUBUF__BUFFER_ATOMIC_SWAP
36447 ::Inst_MUBUF__BUFFER_ATOMIC_SWAP(InFmt_MUBUF
*iFmt
)
36448 : Inst_MUBUF(iFmt
, "buffer_atomic_swap")
36450 setFlag(AtomicExch
);
36451 if (instData
.GLC
) {
36452 setFlag(AtomicReturn
);
36454 setFlag(AtomicNoReturn
);
36456 setFlag(MemoryRef
);
36457 setFlag(GlobalSegment
);
36458 } // Inst_MUBUF__BUFFER_ATOMIC_SWAP
36460 Inst_MUBUF__BUFFER_ATOMIC_SWAP::~Inst_MUBUF__BUFFER_ATOMIC_SWAP()
36462 } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP
36464 // tmp = MEM[ADDR];
36465 // MEM[ADDR] = DATA;
36466 // RETURN_DATA = tmp.
36468 Inst_MUBUF__BUFFER_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst
)
36470 panicUnimplemented();
36473 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
36474 ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP(InFmt_MUBUF
*iFmt
)
36475 : Inst_MUBUF(iFmt
, "buffer_atomic_cmpswap")
36477 setFlag(AtomicCAS
);
36478 if (instData
.GLC
) {
36479 setFlag(AtomicReturn
);
36481 setFlag(AtomicNoReturn
);
36483 setFlag(MemoryRef
);
36484 setFlag(GlobalSegment
);
36485 } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
36487 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP()
36489 } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
36491 // tmp = MEM[ADDR];
36494 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
36495 // RETURN_DATA[0] = tmp.
36497 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst
)
36499 panicUnimplemented();
36502 Inst_MUBUF__BUFFER_ATOMIC_ADD
36503 ::Inst_MUBUF__BUFFER_ATOMIC_ADD(InFmt_MUBUF
*iFmt
)
36504 : Inst_MUBUF(iFmt
, "buffer_atomic_add")
36506 setFlag(AtomicAdd
);
36507 if (instData
.GLC
) {
36508 setFlag(AtomicReturn
);
36510 setFlag(AtomicNoReturn
);
36512 setFlag(MemoryRef
);
36513 setFlag(GlobalSegment
);
36514 } // Inst_MUBUF__BUFFER_ATOMIC_ADD
36516 Inst_MUBUF__BUFFER_ATOMIC_ADD::~Inst_MUBUF__BUFFER_ATOMIC_ADD()
36518 } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD
36520 // tmp = MEM[ADDR];
36521 // MEM[ADDR] += DATA;
36522 // RETURN_DATA = tmp.
36524 Inst_MUBUF__BUFFER_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst
)
36526 panicUnimplemented();
36529 Inst_MUBUF__BUFFER_ATOMIC_SUB
36530 ::Inst_MUBUF__BUFFER_ATOMIC_SUB(InFmt_MUBUF
*iFmt
)
36531 : Inst_MUBUF(iFmt
, "buffer_atomic_sub")
36533 setFlag(AtomicSub
);
36534 if (instData
.GLC
) {
36535 setFlag(AtomicReturn
);
36537 setFlag(AtomicNoReturn
);
36539 setFlag(MemoryRef
);
36540 setFlag(GlobalSegment
);
36541 } // Inst_MUBUF__BUFFER_ATOMIC_SUB
36543 Inst_MUBUF__BUFFER_ATOMIC_SUB::~Inst_MUBUF__BUFFER_ATOMIC_SUB()
36545 } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB
36547 // tmp = MEM[ADDR];
36548 // MEM[ADDR] -= DATA;
36549 // RETURN_DATA = tmp.
36551 Inst_MUBUF__BUFFER_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst
)
36553 panicUnimplemented();
36556 Inst_MUBUF__BUFFER_ATOMIC_SMIN
36557 ::Inst_MUBUF__BUFFER_ATOMIC_SMIN(InFmt_MUBUF
*iFmt
)
36558 : Inst_MUBUF(iFmt
, "buffer_atomic_smin")
36560 setFlag(AtomicMin
);
36561 if (instData
.GLC
) {
36562 setFlag(AtomicReturn
);
36564 setFlag(AtomicNoReturn
);
36566 setFlag(MemoryRef
);
36567 setFlag(GlobalSegment
);
36568 } // Inst_MUBUF__BUFFER_ATOMIC_SMIN
36570 Inst_MUBUF__BUFFER_ATOMIC_SMIN::~Inst_MUBUF__BUFFER_ATOMIC_SMIN()
36572 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN
36574 // tmp = MEM[ADDR];
36575 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
36576 // RETURN_DATA = tmp.
36578 Inst_MUBUF__BUFFER_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst
)
36580 panicUnimplemented();
36583 Inst_MUBUF__BUFFER_ATOMIC_UMIN
36584 ::Inst_MUBUF__BUFFER_ATOMIC_UMIN(InFmt_MUBUF
*iFmt
)
36585 : Inst_MUBUF(iFmt
, "buffer_atomic_umin")
36587 setFlag(AtomicMin
);
36588 if (instData
.GLC
) {
36589 setFlag(AtomicReturn
);
36591 setFlag(AtomicNoReturn
);
36593 setFlag(MemoryRef
);
36594 setFlag(GlobalSegment
);
36595 } // Inst_MUBUF__BUFFER_ATOMIC_UMIN
36597 Inst_MUBUF__BUFFER_ATOMIC_UMIN::~Inst_MUBUF__BUFFER_ATOMIC_UMIN()
36599 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN
36601 // tmp = MEM[ADDR];
36602 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
36603 // RETURN_DATA = tmp.
36605 Inst_MUBUF__BUFFER_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst
)
36607 panicUnimplemented();
36610 Inst_MUBUF__BUFFER_ATOMIC_SMAX
36611 ::Inst_MUBUF__BUFFER_ATOMIC_SMAX(InFmt_MUBUF
*iFmt
)
36612 : Inst_MUBUF(iFmt
, "buffer_atomic_smax")
36614 setFlag(AtomicMax
);
36615 if (instData
.GLC
) {
36616 setFlag(AtomicReturn
);
36618 setFlag(AtomicNoReturn
);
36620 setFlag(MemoryRef
);
36621 setFlag(GlobalSegment
);
36622 } // Inst_MUBUF__BUFFER_ATOMIC_SMAX
36624 Inst_MUBUF__BUFFER_ATOMIC_SMAX::~Inst_MUBUF__BUFFER_ATOMIC_SMAX()
36626 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX
36628 // tmp = MEM[ADDR];
36629 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
36630 // RETURN_DATA = tmp.
36632 Inst_MUBUF__BUFFER_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst
)
36634 panicUnimplemented();
36637 Inst_MUBUF__BUFFER_ATOMIC_UMAX
36638 ::Inst_MUBUF__BUFFER_ATOMIC_UMAX(InFmt_MUBUF
*iFmt
)
36639 : Inst_MUBUF(iFmt
, "buffer_atomic_umax")
36641 setFlag(AtomicMax
);
36642 if (instData
.GLC
) {
36643 setFlag(AtomicReturn
);
36645 setFlag(AtomicNoReturn
);
36647 setFlag(MemoryRef
);
36648 setFlag(GlobalSegment
);
36649 } // Inst_MUBUF__BUFFER_ATOMIC_UMAX
36651 Inst_MUBUF__BUFFER_ATOMIC_UMAX::~Inst_MUBUF__BUFFER_ATOMIC_UMAX()
36653 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX
36655 // tmp = MEM[ADDR];
36656 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
36657 // RETURN_DATA = tmp.
36659 Inst_MUBUF__BUFFER_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst
)
36661 panicUnimplemented();
36664 Inst_MUBUF__BUFFER_ATOMIC_AND
36665 ::Inst_MUBUF__BUFFER_ATOMIC_AND(InFmt_MUBUF
*iFmt
)
36666 : Inst_MUBUF(iFmt
, "buffer_atomic_and")
36668 setFlag(AtomicAnd
);
36669 if (instData
.GLC
) {
36670 setFlag(AtomicReturn
);
36672 setFlag(AtomicNoReturn
);
36674 setFlag(MemoryRef
);
36675 setFlag(GlobalSegment
);
36676 } // Inst_MUBUF__BUFFER_ATOMIC_AND
36678 Inst_MUBUF__BUFFER_ATOMIC_AND::~Inst_MUBUF__BUFFER_ATOMIC_AND()
36680 } // ~Inst_MUBUF__BUFFER_ATOMIC_AND
36682 // tmp = MEM[ADDR];
36683 // MEM[ADDR] &= DATA;
36684 // RETURN_DATA = tmp.
36686 Inst_MUBUF__BUFFER_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst
)
36688 panicUnimplemented();
36691 Inst_MUBUF__BUFFER_ATOMIC_OR
36692 ::Inst_MUBUF__BUFFER_ATOMIC_OR(InFmt_MUBUF
*iFmt
)
36693 : Inst_MUBUF(iFmt
, "buffer_atomic_or")
36696 if (instData
.GLC
) {
36697 setFlag(AtomicReturn
);
36699 setFlag(AtomicNoReturn
);
36701 setFlag(MemoryRef
);
36702 setFlag(GlobalSegment
);
36703 } // Inst_MUBUF__BUFFER_ATOMIC_OR
36705 Inst_MUBUF__BUFFER_ATOMIC_OR::~Inst_MUBUF__BUFFER_ATOMIC_OR()
36707 } // ~Inst_MUBUF__BUFFER_ATOMIC_OR
36709 // tmp = MEM[ADDR];
36710 // MEM[ADDR] |= DATA;
36711 // RETURN_DATA = tmp.
36713 Inst_MUBUF__BUFFER_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst
)
36715 panicUnimplemented();
36718 Inst_MUBUF__BUFFER_ATOMIC_XOR
36719 ::Inst_MUBUF__BUFFER_ATOMIC_XOR(InFmt_MUBUF
*iFmt
)
36720 : Inst_MUBUF(iFmt
, "buffer_atomic_xor")
36722 setFlag(AtomicXor
);
36723 if (instData
.GLC
) {
36724 setFlag(AtomicReturn
);
36726 setFlag(AtomicNoReturn
);
36728 setFlag(MemoryRef
);
36729 setFlag(GlobalSegment
);
36730 } // Inst_MUBUF__BUFFER_ATOMIC_XOR
36732 Inst_MUBUF__BUFFER_ATOMIC_XOR::~Inst_MUBUF__BUFFER_ATOMIC_XOR()
36734 } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR
36736 // tmp = MEM[ADDR];
36737 // MEM[ADDR] ^= DATA;
36738 // RETURN_DATA = tmp.
36740 Inst_MUBUF__BUFFER_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst
)
36742 panicUnimplemented();
36745 Inst_MUBUF__BUFFER_ATOMIC_INC
36746 ::Inst_MUBUF__BUFFER_ATOMIC_INC(InFmt_MUBUF
*iFmt
)
36747 : Inst_MUBUF(iFmt
, "buffer_atomic_inc")
36749 setFlag(AtomicInc
);
36750 if (instData
.GLC
) {
36751 setFlag(AtomicReturn
);
36753 setFlag(AtomicNoReturn
);
36755 setFlag(MemoryRef
);
36756 setFlag(GlobalSegment
);
36757 } // Inst_MUBUF__BUFFER_ATOMIC_INC
36759 Inst_MUBUF__BUFFER_ATOMIC_INC::~Inst_MUBUF__BUFFER_ATOMIC_INC()
36761 } // ~Inst_MUBUF__BUFFER_ATOMIC_INC
36763 // tmp = MEM[ADDR];
36764 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
36765 // RETURN_DATA = tmp.
36767 Inst_MUBUF__BUFFER_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst
)
36769 panicUnimplemented();
36772 Inst_MUBUF__BUFFER_ATOMIC_DEC
36773 ::Inst_MUBUF__BUFFER_ATOMIC_DEC(InFmt_MUBUF
*iFmt
)
36774 : Inst_MUBUF(iFmt
, "buffer_atomic_dec")
36776 setFlag(AtomicDec
);
36777 if (instData
.GLC
) {
36778 setFlag(AtomicReturn
);
36780 setFlag(AtomicNoReturn
);
36782 setFlag(MemoryRef
);
36783 setFlag(GlobalSegment
);
36784 } // Inst_MUBUF__BUFFER_ATOMIC_DEC
36786 Inst_MUBUF__BUFFER_ATOMIC_DEC::~Inst_MUBUF__BUFFER_ATOMIC_DEC()
36788 } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC
36790 // tmp = MEM[ADDR];
36791 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
36792 // (unsigned compare); RETURN_DATA = tmp.
36794 Inst_MUBUF__BUFFER_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst
)
36796 panicUnimplemented();
36799 Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
36800 ::Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2(InFmt_MUBUF
*iFmt
)
36801 : Inst_MUBUF(iFmt
, "buffer_atomic_swap_x2")
36803 setFlag(AtomicExch
);
36804 if (instData
.GLC
) {
36805 setFlag(AtomicReturn
);
36807 setFlag(AtomicNoReturn
);
36809 setFlag(MemoryRef
);
36810 setFlag(GlobalSegment
);
36811 } // Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
36813 Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2()
36815 } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
36817 // tmp = MEM[ADDR];
36818 // MEM[ADDR] = DATA[0:1];
36819 // RETURN_DATA[0:1] = tmp.
36821 Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst
)
36823 panicUnimplemented();
36826 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
36827 ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2(InFmt_MUBUF
*iFmt
)
36828 : Inst_MUBUF(iFmt
, "buffer_atomic_cmpswap_x2")
36830 setFlag(AtomicCAS
);
36831 if (instData
.GLC
) {
36832 setFlag(AtomicReturn
);
36834 setFlag(AtomicNoReturn
);
36836 setFlag(MemoryRef
);
36837 setFlag(GlobalSegment
);
36838 } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
36840 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
36841 ::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2()
36843 } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
36845 // tmp = MEM[ADDR];
36846 // src = DATA[0:1];
36847 // cmp = DATA[2:3];
36848 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
36849 // RETURN_DATA[0:1] = tmp.
36851 Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst
)
36853 panicUnimplemented();
36856 Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
36857 ::Inst_MUBUF__BUFFER_ATOMIC_ADD_X2(InFmt_MUBUF
*iFmt
)
36858 : Inst_MUBUF(iFmt
, "buffer_atomic_add_x2")
36860 setFlag(AtomicAdd
);
36861 if (instData
.GLC
) {
36862 setFlag(AtomicReturn
);
36864 setFlag(AtomicNoReturn
);
36866 setFlag(MemoryRef
);
36867 setFlag(GlobalSegment
);
36868 } // Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
36870 Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2()
36872 } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
36874 // tmp = MEM[ADDR];
36875 // MEM[ADDR] += DATA[0:1];
36876 // RETURN_DATA[0:1] = tmp.
36878 Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst
)
36880 panicUnimplemented();
36883 Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
36884 ::Inst_MUBUF__BUFFER_ATOMIC_SUB_X2(InFmt_MUBUF
*iFmt
)
36885 : Inst_MUBUF(iFmt
, "buffer_atomic_sub_x2")
36887 setFlag(AtomicSub
);
36888 if (instData
.GLC
) {
36889 setFlag(AtomicReturn
);
36891 setFlag(AtomicNoReturn
);
36893 setFlag(MemoryRef
);
36894 setFlag(GlobalSegment
);
36895 } // Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
36897 Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2()
36899 } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
36901 // tmp = MEM[ADDR];
36902 // MEM[ADDR] -= DATA[0:1];
36903 // RETURN_DATA[0:1] = tmp.
36905 Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst
)
36907 panicUnimplemented();
36910 Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
36911 ::Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2(InFmt_MUBUF
*iFmt
)
36912 : Inst_MUBUF(iFmt
, "buffer_atomic_smin_x2")
36914 setFlag(AtomicMin
);
36915 if (instData
.GLC
) {
36916 setFlag(AtomicReturn
);
36918 setFlag(AtomicNoReturn
);
36920 setFlag(MemoryRef
);
36921 setFlag(GlobalSegment
);
36922 } // Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
36924 Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2()
36926 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
36928 // tmp = MEM[ADDR];
36929 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
36930 // RETURN_DATA[0:1] = tmp.
36932 Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst
)
36934 panicUnimplemented();
36937 Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
36938 ::Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2(InFmt_MUBUF
*iFmt
)
36939 : Inst_MUBUF(iFmt
, "buffer_atomic_umin_x2")
36941 setFlag(AtomicMin
);
36942 if (instData
.GLC
) {
36943 setFlag(AtomicReturn
);
36945 setFlag(AtomicNoReturn
);
36947 setFlag(MemoryRef
);
36948 setFlag(GlobalSegment
);
36949 } // Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
36951 Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2()
36953 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
36955 // tmp = MEM[ADDR];
36956 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
36957 // RETURN_DATA[0:1] = tmp.
36959 Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst
)
36961 panicUnimplemented();
36964 Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
36965 ::Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2(InFmt_MUBUF
*iFmt
)
36966 : Inst_MUBUF(iFmt
, "buffer_atomic_smax_x2")
36968 setFlag(AtomicMax
);
36969 if (instData
.GLC
) {
36970 setFlag(AtomicReturn
);
36972 setFlag(AtomicNoReturn
);
36974 setFlag(MemoryRef
);
36975 setFlag(GlobalSegment
);
36976 } // Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
36978 Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2()
36980 } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
36982 // tmp = MEM[ADDR];
36983 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
36984 // RETURN_DATA[0:1] = tmp.
36986 Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst
)
36988 panicUnimplemented();
36991 Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
36992 ::Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2(InFmt_MUBUF
*iFmt
)
36993 : Inst_MUBUF(iFmt
, "buffer_atomic_umax_x2")
36995 setFlag(AtomicMax
);
36996 if (instData
.GLC
) {
36997 setFlag(AtomicReturn
);
36999 setFlag(AtomicNoReturn
);
37001 setFlag(MemoryRef
);
37002 setFlag(GlobalSegment
);
37003 } // Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
37005 Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2()
37007 } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
37009 // tmp = MEM[ADDR];
37010 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
37011 // RETURN_DATA[0:1] = tmp.
37013 Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst
)
37015 panicUnimplemented();
37018 Inst_MUBUF__BUFFER_ATOMIC_AND_X2
37019 ::Inst_MUBUF__BUFFER_ATOMIC_AND_X2(InFmt_MUBUF
*iFmt
)
37020 : Inst_MUBUF(iFmt
, "buffer_atomic_and_x2")
37022 setFlag(AtomicAnd
);
37023 if (instData
.GLC
) {
37024 setFlag(AtomicReturn
);
37026 setFlag(AtomicNoReturn
);
37028 setFlag(MemoryRef
);
37029 setFlag(GlobalSegment
);
37030 } // Inst_MUBUF__BUFFER_ATOMIC_AND_X2
37032 Inst_MUBUF__BUFFER_ATOMIC_AND_X2::~Inst_MUBUF__BUFFER_ATOMIC_AND_X2()
37034 } // ~Inst_MUBUF__BUFFER_ATOMIC_AND_X2
37036 // tmp = MEM[ADDR];
37037 // MEM[ADDR] &= DATA[0:1];
37038 // RETURN_DATA[0:1] = tmp.
37040 Inst_MUBUF__BUFFER_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst
)
37042 panicUnimplemented();
37045 Inst_MUBUF__BUFFER_ATOMIC_OR_X2
37046 ::Inst_MUBUF__BUFFER_ATOMIC_OR_X2(InFmt_MUBUF
*iFmt
)
37047 : Inst_MUBUF(iFmt
, "buffer_atomic_or_x2")
37050 if (instData
.GLC
) {
37051 setFlag(AtomicReturn
);
37053 setFlag(AtomicNoReturn
);
37055 setFlag(MemoryRef
);
37056 setFlag(GlobalSegment
);
37057 } // Inst_MUBUF__BUFFER_ATOMIC_OR_X2
37059 Inst_MUBUF__BUFFER_ATOMIC_OR_X2::~Inst_MUBUF__BUFFER_ATOMIC_OR_X2()
37061 } // ~Inst_MUBUF__BUFFER_ATOMIC_OR_X2
37063 // tmp = MEM[ADDR];
37064 // MEM[ADDR] |= DATA[0:1];
37065 // RETURN_DATA[0:1] = tmp.
37067 Inst_MUBUF__BUFFER_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst
)
37069 panicUnimplemented();
37072 Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
37073 ::Inst_MUBUF__BUFFER_ATOMIC_XOR_X2(InFmt_MUBUF
*iFmt
)
37074 : Inst_MUBUF(iFmt
, "buffer_atomic_xor_x2")
37076 setFlag(AtomicXor
);
37077 if (instData
.GLC
) {
37078 setFlag(AtomicReturn
);
37080 setFlag(AtomicNoReturn
);
37082 setFlag(MemoryRef
);
37083 setFlag(GlobalSegment
);
37084 } // Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
37086 Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2()
37088 } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
37090 // tmp = MEM[ADDR];
37091 // MEM[ADDR] ^= DATA[0:1];
37092 // RETURN_DATA[0:1] = tmp.
37094 Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst
)
37096 panicUnimplemented();
37099 Inst_MUBUF__BUFFER_ATOMIC_INC_X2
37100 ::Inst_MUBUF__BUFFER_ATOMIC_INC_X2(InFmt_MUBUF
*iFmt
)
37101 : Inst_MUBUF(iFmt
, "buffer_atomic_inc_x2")
37103 setFlag(AtomicInc
);
37104 if (instData
.GLC
) {
37105 setFlag(AtomicReturn
);
37107 setFlag(AtomicNoReturn
);
37109 setFlag(MemoryRef
);
37110 setFlag(GlobalSegment
);
37111 } // Inst_MUBUF__BUFFER_ATOMIC_INC_X2
37113 Inst_MUBUF__BUFFER_ATOMIC_INC_X2::~Inst_MUBUF__BUFFER_ATOMIC_INC_X2()
37115 } // ~Inst_MUBUF__BUFFER_ATOMIC_INC_X2
37117 // tmp = MEM[ADDR];
37118 // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
37119 // RETURN_DATA[0:1] = tmp.
37121 Inst_MUBUF__BUFFER_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst
)
37123 panicUnimplemented();
37126 Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
37127 ::Inst_MUBUF__BUFFER_ATOMIC_DEC_X2(InFmt_MUBUF
*iFmt
)
37128 : Inst_MUBUF(iFmt
, "buffer_atomic_dec_x2")
37130 setFlag(AtomicDec
);
37131 if (instData
.GLC
) {
37132 setFlag(AtomicReturn
);
37134 setFlag(AtomicNoReturn
);
37136 setFlag(MemoryRef
);
37137 setFlag(GlobalSegment
);
37138 } // Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
37140 Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2()
37142 } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
37144 // tmp = MEM[ADDR];
37145 // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
37146 // (unsigned compare);
37147 // RETURN_DATA[0:1] = tmp.
37149 Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst
)
37151 panicUnimplemented();
37154 Inst_MTBUF__TBUFFER_LOAD_FORMAT_X
37155 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_X(InFmt_MTBUF
*iFmt
)
37156 : Inst_MTBUF(iFmt
, "tbuffer_load_format_x")
37158 setFlag(MemoryRef
);
37160 setFlag(GlobalSegment
);
37161 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_X
37163 Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X()
37165 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X
37167 // Typed buffer load 1 dword with format conversion.
37169 Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst
)
37171 panicUnimplemented();
37175 Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst
)
37180 Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst
)
37184 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY
37185 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY(InFmt_MTBUF
*iFmt
)
37186 : Inst_MTBUF(iFmt
, "tbuffer_load_format_xy")
37188 setFlag(MemoryRef
);
37190 setFlag(GlobalSegment
);
37191 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY
37193 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY()
37195 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY
37197 // Typed buffer load 2 dwords with format conversion.
37199 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst
)
37201 panicUnimplemented();
37205 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst
)
37210 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst
)
37214 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ
37215 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ(InFmt_MTBUF
*iFmt
)
37216 : Inst_MTBUF(iFmt
, "tbuffer_load_format_xyz")
37218 setFlag(MemoryRef
);
37220 setFlag(GlobalSegment
);
37221 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ
37223 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ()
37225 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ
37227 // Typed buffer load 3 dwords with format conversion.
37229 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst
)
37231 panicUnimplemented();
37235 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst
)
37240 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst
)
37244 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
37245 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW(InFmt_MTBUF
*iFmt
)
37246 : Inst_MTBUF(iFmt
, "tbuffer_load_format_xyzw")
37248 setFlag(MemoryRef
);
37250 setFlag(GlobalSegment
);
37251 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
37253 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
37254 ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW()
37256 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
37258 // Typed buffer load 4 dwords with format conversion.
37260 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst
)
37262 panicUnimplemented();
37266 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst
)
37271 Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst
)
37275 Inst_MTBUF__TBUFFER_STORE_FORMAT_X
37276 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_X(InFmt_MTBUF
*iFmt
)
37277 : Inst_MTBUF(iFmt
, "tbuffer_store_format_x")
37279 setFlag(MemoryRef
);
37281 setFlag(GlobalSegment
);
37282 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_X
37284 Inst_MTBUF__TBUFFER_STORE_FORMAT_X::~Inst_MTBUF__TBUFFER_STORE_FORMAT_X()
37286 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_X
37288 // Typed buffer store 1 dword with format conversion.
37290 Inst_MTBUF__TBUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst
)
37292 panicUnimplemented();
37296 Inst_MTBUF__TBUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst
)
37301 Inst_MTBUF__TBUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst
)
37305 Inst_MTBUF__TBUFFER_STORE_FORMAT_XY
37306 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XY(InFmt_MTBUF
*iFmt
)
37307 : Inst_MTBUF(iFmt
, "tbuffer_store_format_xy")
37309 setFlag(MemoryRef
);
37311 setFlag(GlobalSegment
);
37312 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XY
37314 Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY()
37316 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY
37318 // Typed buffer store 2 dwords with format conversion.
37320 Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst
)
37322 panicUnimplemented();
37326 Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst
)
37331 Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst
)
37335 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
37336 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ(InFmt_MTBUF
*iFmt
)
37337 : Inst_MTBUF(iFmt
, "tbuffer_store_format_xyz")
37339 setFlag(MemoryRef
);
37341 setFlag(GlobalSegment
);
37342 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
37344 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
37345 ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ()
37347 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
37349 // Typed buffer store 3 dwords with format conversion.
37351 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst
)
37353 panicUnimplemented();
37357 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst
)
37362 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst
)
37366 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
37367 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW(InFmt_MTBUF
*iFmt
)
37368 : Inst_MTBUF(iFmt
, "tbuffer_store_format_xyzw")
37370 setFlag(MemoryRef
);
37372 setFlag(GlobalSegment
);
37373 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
37375 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
37376 ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW()
37378 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
37380 // Typed buffer store 4 dwords with format conversion.
37382 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst
)
37384 panicUnimplemented();
37388 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::initiateAcc(
37389 GPUDynInstPtr gpuDynInst
)
37394 Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::completeAcc(
37395 GPUDynInstPtr gpuDynInst
)
37399 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X
37400 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X(InFmt_MTBUF
*iFmt
)
37401 : Inst_MTBUF(iFmt
, "tbuffer_load_format_d16_x")
37403 setFlag(MemoryRef
);
37405 setFlag(GlobalSegment
);
37406 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X
37408 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::
37409 ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X()
37411 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X
37413 // Typed buffer load 1 dword with format conversion.
37415 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst
)
37417 panicUnimplemented();
37421 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::initiateAcc(
37422 GPUDynInstPtr gpuDynInst
)
37427 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::completeAcc(
37428 GPUDynInstPtr gpuDynInst
)
37432 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
37433 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY(InFmt_MTBUF
*iFmt
)
37434 : Inst_MTBUF(iFmt
, "tbuffer_load_format_d16_xy")
37436 setFlag(MemoryRef
);
37438 setFlag(GlobalSegment
);
37439 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
37441 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
37442 ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY()
37444 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
37446 // Typed buffer load 2 dwords with format conversion.
37448 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst
)
37450 panicUnimplemented();
37454 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::initiateAcc(
37455 GPUDynInstPtr gpuDynInst
)
37460 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::completeAcc(
37461 GPUDynInstPtr gpuDynInst
)
37465 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
37466 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ(
37468 : Inst_MTBUF(iFmt
, "tbuffer_load_format_d16_xyz")
37470 setFlag(MemoryRef
);
37472 setFlag(GlobalSegment
);
37473 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
37475 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
37476 ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ()
37478 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
37480 // Typed buffer load 3 dwords with format conversion.
37482 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst
)
37484 panicUnimplemented();
37488 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc(
37489 GPUDynInstPtr gpuDynInst
)
37494 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::completeAcc(
37495 GPUDynInstPtr gpuDynInst
)
37499 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
37500 ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW(
37502 : Inst_MTBUF(iFmt
, "tbuffer_load_format_d16_xyzw")
37504 setFlag(MemoryRef
);
37506 setFlag(GlobalSegment
);
37507 } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
37509 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
37510 ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW()
37512 } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
37514 // Typed buffer load 4 dwords with format conversion.
37516 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst
)
37518 panicUnimplemented();
37522 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc(
37523 GPUDynInstPtr gpuDynInst
)
37528 Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::completeAcc(
37529 GPUDynInstPtr gpuDynInst
)
37533 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
37534 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X(InFmt_MTBUF
*iFmt
)
37535 : Inst_MTBUF(iFmt
, "tbuffer_store_format_d16_x")
37537 setFlag(MemoryRef
);
37539 setFlag(GlobalSegment
);
37540 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
37542 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
37543 ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X()
37545 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
37547 // Typed buffer store 1 dword with format conversion.
37549 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst
)
37551 panicUnimplemented();
37555 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::initiateAcc(
37556 GPUDynInstPtr gpuDynInst
)
37561 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::completeAcc(
37562 GPUDynInstPtr gpuDynInst
)
37566 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
37567 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY(InFmt_MTBUF
*iFmt
)
37568 : Inst_MTBUF(iFmt
, "tbuffer_store_format_d16_xy")
37570 setFlag(MemoryRef
);
37572 setFlag(GlobalSegment
);
37573 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
37575 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
37576 ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY()
37578 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
37580 // Typed buffer store 2 dwords with format conversion.
37582 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst
)
37584 panicUnimplemented();
37588 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::initiateAcc(
37589 GPUDynInstPtr gpuDynInst
)
37594 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::completeAcc(
37595 GPUDynInstPtr gpuDynInst
)
37599 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
37600 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ(InFmt_MTBUF
*iFmt
)
37601 : Inst_MTBUF(iFmt
, "tbuffer_store_format_d16_xyz")
37603 setFlag(MemoryRef
);
37605 setFlag(GlobalSegment
);
37606 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
37608 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
37609 ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ()
37611 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
37613 // Typed buffer store 3 dwords with format conversion.
37615 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst
)
37617 panicUnimplemented();
37621 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::initiateAcc(
37622 GPUDynInstPtr gpuDynInst
)
37627 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::completeAcc(
37628 GPUDynInstPtr gpuDynInst
)
37632 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
37633 ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW(InFmt_MTBUF
*iFmt
)
37634 : Inst_MTBUF(iFmt
, "tbuffer_store_format_d16_xyzw")
37636 setFlag(MemoryRef
);
37638 setFlag(GlobalSegment
);
37639 } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
37641 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
37642 ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW()
37644 } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
37646 // Typed buffer store 4 dwords with format conversion.
37648 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::execute(
37649 GPUDynInstPtr gpuDynInst
)
37651 panicUnimplemented();
37655 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::initiateAcc(
37656 GPUDynInstPtr gpuDynInst
)
37661 Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::completeAcc(
37662 GPUDynInstPtr gpuDynInst
)
37666 Inst_MIMG__IMAGE_LOAD::Inst_MIMG__IMAGE_LOAD(InFmt_MIMG
*iFmt
)
37667 : Inst_MIMG(iFmt
, "image_load")
37669 setFlag(MemoryRef
);
37671 setFlag(GlobalSegment
);
37672 } // Inst_MIMG__IMAGE_LOAD
37674 Inst_MIMG__IMAGE_LOAD::~Inst_MIMG__IMAGE_LOAD()
37676 } // ~Inst_MIMG__IMAGE_LOAD
37678 // Image memory load with format conversion specified
37680 Inst_MIMG__IMAGE_LOAD::execute(GPUDynInstPtr gpuDynInst
)
37682 panicUnimplemented();
37686 Inst_MIMG__IMAGE_LOAD::initiateAcc(GPUDynInstPtr gpuDynInst
)
37691 Inst_MIMG__IMAGE_LOAD::completeAcc(GPUDynInstPtr gpuDynInst
)
37695 Inst_MIMG__IMAGE_LOAD_MIP::Inst_MIMG__IMAGE_LOAD_MIP(InFmt_MIMG
*iFmt
)
37696 : Inst_MIMG(iFmt
, "image_load_mip")
37698 setFlag(MemoryRef
);
37700 setFlag(GlobalSegment
);
37701 } // Inst_MIMG__IMAGE_LOAD_MIP
37703 Inst_MIMG__IMAGE_LOAD_MIP::~Inst_MIMG__IMAGE_LOAD_MIP()
37705 } // ~Inst_MIMG__IMAGE_LOAD_MIP
37708 Inst_MIMG__IMAGE_LOAD_MIP::execute(GPUDynInstPtr gpuDynInst
)
37710 panicUnimplemented();
37714 Inst_MIMG__IMAGE_LOAD_MIP::initiateAcc(GPUDynInstPtr gpuDynInst
)
37719 Inst_MIMG__IMAGE_LOAD_MIP::completeAcc(GPUDynInstPtr gpuDynInst
)
37723 Inst_MIMG__IMAGE_LOAD_PCK::Inst_MIMG__IMAGE_LOAD_PCK(InFmt_MIMG
*iFmt
)
37724 : Inst_MIMG(iFmt
, "image_load_pck")
37726 setFlag(MemoryRef
);
37728 setFlag(GlobalSegment
);
37729 } // Inst_MIMG__IMAGE_LOAD_PCK
37731 Inst_MIMG__IMAGE_LOAD_PCK::~Inst_MIMG__IMAGE_LOAD_PCK()
37733 } // ~Inst_MIMG__IMAGE_LOAD_PCK
37736 Inst_MIMG__IMAGE_LOAD_PCK::execute(GPUDynInstPtr gpuDynInst
)
37738 panicUnimplemented();
37742 Inst_MIMG__IMAGE_LOAD_PCK::initiateAcc(GPUDynInstPtr gpuDynInst
)
37747 Inst_MIMG__IMAGE_LOAD_PCK::completeAcc(GPUDynInstPtr gpuDynInst
)
37751 Inst_MIMG__IMAGE_LOAD_PCK_SGN::Inst_MIMG__IMAGE_LOAD_PCK_SGN(
37753 : Inst_MIMG(iFmt
, "image_load_pck_sgn")
37755 setFlag(MemoryRef
);
37757 setFlag(GlobalSegment
);
37758 } // Inst_MIMG__IMAGE_LOAD_PCK_SGN
37760 Inst_MIMG__IMAGE_LOAD_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_PCK_SGN()
37762 } // ~Inst_MIMG__IMAGE_LOAD_PCK_SGN
37764 // Image memory load with with no format conversion and sign extension
37766 Inst_MIMG__IMAGE_LOAD_PCK_SGN::execute(GPUDynInstPtr gpuDynInst
)
37768 panicUnimplemented();
37772 Inst_MIMG__IMAGE_LOAD_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst
)
37777 Inst_MIMG__IMAGE_LOAD_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst
)
37781 Inst_MIMG__IMAGE_LOAD_MIP_PCK::Inst_MIMG__IMAGE_LOAD_MIP_PCK(
37783 : Inst_MIMG(iFmt
, "image_load_mip_pck")
37785 setFlag(MemoryRef
);
37787 setFlag(GlobalSegment
);
37788 } // Inst_MIMG__IMAGE_LOAD_MIP_PCK
37790 Inst_MIMG__IMAGE_LOAD_MIP_PCK::~Inst_MIMG__IMAGE_LOAD_MIP_PCK()
37792 } // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK
37794 // Image memory load with user-supplied mip level, no format conversion
37796 Inst_MIMG__IMAGE_LOAD_MIP_PCK::execute(GPUDynInstPtr gpuDynInst
)
37798 panicUnimplemented();
37802 Inst_MIMG__IMAGE_LOAD_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst
)
37807 Inst_MIMG__IMAGE_LOAD_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst
)
37811 Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN(
37813 : Inst_MIMG(iFmt
, "image_load_mip_pck_sgn")
37815 setFlag(MemoryRef
);
37817 setFlag(GlobalSegment
);
37818 } // Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN
37820 Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN()
37822 } // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN
37824 // Image memory load with user-supplied mip level, no format conversion.
37826 Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::execute(GPUDynInstPtr gpuDynInst
)
37828 panicUnimplemented();
37832 Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst
)
37837 Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst
)
37841 Inst_MIMG__IMAGE_STORE::Inst_MIMG__IMAGE_STORE(InFmt_MIMG
*iFmt
)
37842 : Inst_MIMG(iFmt
, "image_store")
37844 setFlag(MemoryRef
);
37846 setFlag(GlobalSegment
);
37847 } // Inst_MIMG__IMAGE_STORE
37849 Inst_MIMG__IMAGE_STORE::~Inst_MIMG__IMAGE_STORE()
37851 } // ~Inst_MIMG__IMAGE_STORE
37853 // Image memory store with format conversion specified
37855 Inst_MIMG__IMAGE_STORE::execute(GPUDynInstPtr gpuDynInst
)
37857 panicUnimplemented();
37861 Inst_MIMG__IMAGE_STORE::initiateAcc(GPUDynInstPtr gpuDynInst
)
37866 Inst_MIMG__IMAGE_STORE::completeAcc(GPUDynInstPtr gpuDynInst
)
37870 Inst_MIMG__IMAGE_STORE_MIP::Inst_MIMG__IMAGE_STORE_MIP(InFmt_MIMG
*iFmt
)
37871 : Inst_MIMG(iFmt
, "image_store_mip")
37873 setFlag(MemoryRef
);
37875 setFlag(GlobalSegment
);
37876 } // Inst_MIMG__IMAGE_STORE_MIP
37878 Inst_MIMG__IMAGE_STORE_MIP::~Inst_MIMG__IMAGE_STORE_MIP()
37880 } // ~Inst_MIMG__IMAGE_STORE_MIP
37883 Inst_MIMG__IMAGE_STORE_MIP::execute(GPUDynInstPtr gpuDynInst
)
37885 panicUnimplemented();
37889 Inst_MIMG__IMAGE_STORE_MIP::initiateAcc(GPUDynInstPtr gpuDynInst
)
37894 Inst_MIMG__IMAGE_STORE_MIP::completeAcc(GPUDynInstPtr gpuDynInst
)
37898 Inst_MIMG__IMAGE_STORE_PCK::Inst_MIMG__IMAGE_STORE_PCK(InFmt_MIMG
*iFmt
)
37899 : Inst_MIMG(iFmt
, "image_store_pck")
37901 setFlag(MemoryRef
);
37903 setFlag(GlobalSegment
);
37904 } // Inst_MIMG__IMAGE_STORE_PCK
37906 Inst_MIMG__IMAGE_STORE_PCK::~Inst_MIMG__IMAGE_STORE_PCK()
37908 } // ~Inst_MIMG__IMAGE_STORE_PCK
37910 // Image memory store of packed data without format conversion.
37912 Inst_MIMG__IMAGE_STORE_PCK::execute(GPUDynInstPtr gpuDynInst
)
37914 panicUnimplemented();
37918 Inst_MIMG__IMAGE_STORE_PCK::initiateAcc(GPUDynInstPtr gpuDynInst
)
37923 Inst_MIMG__IMAGE_STORE_PCK::completeAcc(GPUDynInstPtr gpuDynInst
)
37927 Inst_MIMG__IMAGE_STORE_MIP_PCK::Inst_MIMG__IMAGE_STORE_MIP_PCK(
37929 : Inst_MIMG(iFmt
, "image_store_mip_pck")
37931 setFlag(MemoryRef
);
37933 setFlag(GlobalSegment
);
37934 } // Inst_MIMG__IMAGE_STORE_MIP_PCK
37936 Inst_MIMG__IMAGE_STORE_MIP_PCK::~Inst_MIMG__IMAGE_STORE_MIP_PCK()
37938 } // ~Inst_MIMG__IMAGE_STORE_MIP_PCK
37940 // Image memory store of packed data without format conversion
37942 Inst_MIMG__IMAGE_STORE_MIP_PCK::execute(GPUDynInstPtr gpuDynInst
)
37944 panicUnimplemented();
37948 Inst_MIMG__IMAGE_STORE_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst
)
37953 Inst_MIMG__IMAGE_STORE_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst
)
37957 Inst_MIMG__IMAGE_GET_RESINFO::Inst_MIMG__IMAGE_GET_RESINFO(
37959 : Inst_MIMG(iFmt
, "image_get_resinfo")
37961 setFlag(GlobalSegment
);
37962 } // Inst_MIMG__IMAGE_GET_RESINFO
37964 Inst_MIMG__IMAGE_GET_RESINFO::~Inst_MIMG__IMAGE_GET_RESINFO()
37966 } // ~Inst_MIMG__IMAGE_GET_RESINFO
37969 Inst_MIMG__IMAGE_GET_RESINFO::execute(GPUDynInstPtr gpuDynInst
)
37971 panicUnimplemented();
37974 Inst_MIMG__IMAGE_ATOMIC_SWAP::Inst_MIMG__IMAGE_ATOMIC_SWAP(
37976 : Inst_MIMG(iFmt
, "image_atomic_swap")
37978 setFlag(AtomicExch
);
37979 if (instData
.GLC
) {
37980 setFlag(AtomicReturn
);
37982 setFlag(AtomicNoReturn
);
37984 setFlag(MemoryRef
);
37985 setFlag(GlobalSegment
);
37986 } // Inst_MIMG__IMAGE_ATOMIC_SWAP
37988 Inst_MIMG__IMAGE_ATOMIC_SWAP::~Inst_MIMG__IMAGE_ATOMIC_SWAP()
37990 } // ~Inst_MIMG__IMAGE_ATOMIC_SWAP
37992 // tmp = MEM[ADDR];
37993 // MEM[ADDR] = DATA;
37994 // RETURN_DATA = tmp.
37996 Inst_MIMG__IMAGE_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst
)
37998 panicUnimplemented();
38001 Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::Inst_MIMG__IMAGE_ATOMIC_CMPSWAP(
38003 : Inst_MIMG(iFmt
, "image_atomic_cmpswap")
38005 setFlag(AtomicCAS
);
38006 if (instData
.GLC
) {
38007 setFlag(AtomicReturn
);
38009 setFlag(AtomicNoReturn
);
38011 setFlag(MemoryRef
);
38012 setFlag(GlobalSegment
);
38013 } // Inst_MIMG__IMAGE_ATOMIC_CMPSWAP
38015 Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP()
38017 } // ~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP
38019 // tmp = MEM[ADDR];
38022 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
38023 // RETURN_DATA[0] = tmp.
38025 Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst
)
38027 panicUnimplemented();
38030 Inst_MIMG__IMAGE_ATOMIC_ADD::Inst_MIMG__IMAGE_ATOMIC_ADD(InFmt_MIMG
*iFmt
)
38031 : Inst_MIMG(iFmt
, "image_atomic_add")
38033 setFlag(AtomicAdd
);
38034 if (instData
.GLC
) {
38035 setFlag(AtomicReturn
);
38037 setFlag(AtomicNoReturn
);
38039 setFlag(MemoryRef
);
38040 setFlag(GlobalSegment
);
38041 } // Inst_MIMG__IMAGE_ATOMIC_ADD
38043 Inst_MIMG__IMAGE_ATOMIC_ADD::~Inst_MIMG__IMAGE_ATOMIC_ADD()
38045 } // ~Inst_MIMG__IMAGE_ATOMIC_ADD
38047 // tmp = MEM[ADDR];
38048 // MEM[ADDR] += DATA;
38049 // RETURN_DATA = tmp.
38051 Inst_MIMG__IMAGE_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst
)
38053 panicUnimplemented();
38056 Inst_MIMG__IMAGE_ATOMIC_SUB::Inst_MIMG__IMAGE_ATOMIC_SUB(InFmt_MIMG
*iFmt
)
38057 : Inst_MIMG(iFmt
, "image_atomic_sub")
38059 setFlag(AtomicSub
);
38060 if (instData
.GLC
) {
38061 setFlag(AtomicReturn
);
38063 setFlag(AtomicNoReturn
);
38065 setFlag(MemoryRef
);
38066 setFlag(GlobalSegment
);
38067 } // Inst_MIMG__IMAGE_ATOMIC_SUB
38069 Inst_MIMG__IMAGE_ATOMIC_SUB::~Inst_MIMG__IMAGE_ATOMIC_SUB()
38071 } // ~Inst_MIMG__IMAGE_ATOMIC_SUB
38073 // tmp = MEM[ADDR];
38074 // MEM[ADDR] -= DATA;
38075 // RETURN_DATA = tmp.
38077 Inst_MIMG__IMAGE_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst
)
38079 panicUnimplemented();
38082 Inst_MIMG__IMAGE_ATOMIC_SMIN::Inst_MIMG__IMAGE_ATOMIC_SMIN(
38084 : Inst_MIMG(iFmt
, "image_atomic_smin")
38086 setFlag(AtomicMin
);
38087 if (instData
.GLC
) {
38088 setFlag(AtomicReturn
);
38090 setFlag(AtomicNoReturn
);
38092 setFlag(MemoryRef
);
38093 setFlag(GlobalSegment
);
38094 } // Inst_MIMG__IMAGE_ATOMIC_SMIN
38096 Inst_MIMG__IMAGE_ATOMIC_SMIN::~Inst_MIMG__IMAGE_ATOMIC_SMIN()
38098 } // ~Inst_MIMG__IMAGE_ATOMIC_SMIN
38100 // tmp = MEM[ADDR];
38101 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
38102 // RETURN_DATA = tmp.
38104 Inst_MIMG__IMAGE_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst
)
38106 panicUnimplemented();
38109 Inst_MIMG__IMAGE_ATOMIC_UMIN::Inst_MIMG__IMAGE_ATOMIC_UMIN(
38111 : Inst_MIMG(iFmt
, "image_atomic_umin")
38113 setFlag(AtomicMin
);
38114 if (instData
.GLC
) {
38115 setFlag(AtomicReturn
);
38117 setFlag(AtomicNoReturn
);
38119 setFlag(MemoryRef
);
38120 setFlag(GlobalSegment
);
38121 } // Inst_MIMG__IMAGE_ATOMIC_UMIN
38123 Inst_MIMG__IMAGE_ATOMIC_UMIN::~Inst_MIMG__IMAGE_ATOMIC_UMIN()
38125 } // ~Inst_MIMG__IMAGE_ATOMIC_UMIN
38127 // tmp = MEM[ADDR];
38128 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
38129 // RETURN_DATA = tmp.
38131 Inst_MIMG__IMAGE_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst
)
38133 panicUnimplemented();
38136 Inst_MIMG__IMAGE_ATOMIC_SMAX::Inst_MIMG__IMAGE_ATOMIC_SMAX(
38138 : Inst_MIMG(iFmt
, "image_atomic_smax")
38140 setFlag(AtomicMax
);
38141 if (instData
.GLC
) {
38142 setFlag(AtomicReturn
);
38144 setFlag(AtomicNoReturn
);
38146 setFlag(MemoryRef
);
38147 setFlag(GlobalSegment
);
38148 } // Inst_MIMG__IMAGE_ATOMIC_SMAX
38150 Inst_MIMG__IMAGE_ATOMIC_SMAX::~Inst_MIMG__IMAGE_ATOMIC_SMAX()
38152 } // ~Inst_MIMG__IMAGE_ATOMIC_SMAX
38154 // tmp = MEM[ADDR];
38155 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
38156 // RETURN_DATA = tmp.
38158 Inst_MIMG__IMAGE_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst
)
38160 panicUnimplemented();
38163 Inst_MIMG__IMAGE_ATOMIC_UMAX::Inst_MIMG__IMAGE_ATOMIC_UMAX(
38165 : Inst_MIMG(iFmt
, "image_atomic_umax")
38167 setFlag(AtomicMax
);
38168 if (instData
.GLC
) {
38169 setFlag(AtomicReturn
);
38171 setFlag(AtomicNoReturn
);
38173 setFlag(MemoryRef
);
38174 setFlag(GlobalSegment
);
38175 } // Inst_MIMG__IMAGE_ATOMIC_UMAX
38177 Inst_MIMG__IMAGE_ATOMIC_UMAX::~Inst_MIMG__IMAGE_ATOMIC_UMAX()
38179 } // ~Inst_MIMG__IMAGE_ATOMIC_UMAX
38181 // tmp = MEM[ADDR];
38182 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
38183 // RETURN_DATA = tmp.
38185 Inst_MIMG__IMAGE_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst
)
38187 panicUnimplemented();
38190 Inst_MIMG__IMAGE_ATOMIC_AND::Inst_MIMG__IMAGE_ATOMIC_AND(InFmt_MIMG
*iFmt
)
38191 : Inst_MIMG(iFmt
, "image_atomic_and")
38193 setFlag(AtomicAnd
);
38194 if (instData
.GLC
) {
38195 setFlag(AtomicReturn
);
38197 setFlag(AtomicNoReturn
);
38199 setFlag(MemoryRef
);
38200 setFlag(GlobalSegment
);
38201 } // Inst_MIMG__IMAGE_ATOMIC_AND
38203 Inst_MIMG__IMAGE_ATOMIC_AND::~Inst_MIMG__IMAGE_ATOMIC_AND()
38205 } // ~Inst_MIMG__IMAGE_ATOMIC_AND
38207 // tmp = MEM[ADDR];
38208 // MEM[ADDR] &= DATA;
38209 // RETURN_DATA = tmp.
38211 Inst_MIMG__IMAGE_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst
)
38213 panicUnimplemented();
38216 Inst_MIMG__IMAGE_ATOMIC_OR::Inst_MIMG__IMAGE_ATOMIC_OR(InFmt_MIMG
*iFmt
)
38217 : Inst_MIMG(iFmt
, "image_atomic_or")
38220 if (instData
.GLC
) {
38221 setFlag(AtomicReturn
);
38223 setFlag(AtomicNoReturn
);
38225 setFlag(MemoryRef
);
38226 setFlag(GlobalSegment
);
38227 } // Inst_MIMG__IMAGE_ATOMIC_OR
38229 Inst_MIMG__IMAGE_ATOMIC_OR::~Inst_MIMG__IMAGE_ATOMIC_OR()
38231 } // ~Inst_MIMG__IMAGE_ATOMIC_OR
38233 // tmp = MEM[ADDR];
38234 // MEM[ADDR] |= DATA;
38235 // RETURN_DATA = tmp.
38237 Inst_MIMG__IMAGE_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst
)
38239 panicUnimplemented();
38242 Inst_MIMG__IMAGE_ATOMIC_XOR::Inst_MIMG__IMAGE_ATOMIC_XOR(InFmt_MIMG
*iFmt
)
38243 : Inst_MIMG(iFmt
, "image_atomic_xor")
38245 setFlag(AtomicXor
);
38246 if (instData
.GLC
) {
38247 setFlag(AtomicReturn
);
38249 setFlag(AtomicNoReturn
);
38251 setFlag(MemoryRef
);
38252 setFlag(GlobalSegment
);
38253 } // Inst_MIMG__IMAGE_ATOMIC_XOR
38255 Inst_MIMG__IMAGE_ATOMIC_XOR::~Inst_MIMG__IMAGE_ATOMIC_XOR()
38257 } // ~Inst_MIMG__IMAGE_ATOMIC_XOR
38259 // tmp = MEM[ADDR];
38260 // MEM[ADDR] ^= DATA;
38261 // RETURN_DATA = tmp.
38263 Inst_MIMG__IMAGE_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst
)
38265 panicUnimplemented();
38268 Inst_MIMG__IMAGE_ATOMIC_INC::Inst_MIMG__IMAGE_ATOMIC_INC(InFmt_MIMG
*iFmt
)
38269 : Inst_MIMG(iFmt
, "image_atomic_inc")
38271 setFlag(AtomicInc
);
38272 if (instData
.GLC
) {
38273 setFlag(AtomicReturn
);
38275 setFlag(AtomicNoReturn
);
38277 setFlag(MemoryRef
);
38278 setFlag(GlobalSegment
);
38279 } // Inst_MIMG__IMAGE_ATOMIC_INC
38281 Inst_MIMG__IMAGE_ATOMIC_INC::~Inst_MIMG__IMAGE_ATOMIC_INC()
38283 } // ~Inst_MIMG__IMAGE_ATOMIC_INC
38285 // tmp = MEM[ADDR];
38286 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
38287 // RETURN_DATA = tmp.
38289 Inst_MIMG__IMAGE_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst
)
38291 panicUnimplemented();
38294 Inst_MIMG__IMAGE_ATOMIC_DEC::Inst_MIMG__IMAGE_ATOMIC_DEC(InFmt_MIMG
*iFmt
)
38295 : Inst_MIMG(iFmt
, "image_atomic_dec")
38297 setFlag(AtomicDec
);
38298 if (instData
.GLC
) {
38299 setFlag(AtomicReturn
);
38301 setFlag(AtomicNoReturn
);
38303 setFlag(MemoryRef
);
38304 setFlag(GlobalSegment
);
38305 } // Inst_MIMG__IMAGE_ATOMIC_DEC
38307 Inst_MIMG__IMAGE_ATOMIC_DEC::~Inst_MIMG__IMAGE_ATOMIC_DEC()
38309 } // ~Inst_MIMG__IMAGE_ATOMIC_DEC
38311 // tmp = MEM[ADDR];
38312 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
38313 // (unsigned compare); RETURN_DATA = tmp.
38315 Inst_MIMG__IMAGE_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst
)
38317 panicUnimplemented();
38320 Inst_MIMG__IMAGE_SAMPLE::Inst_MIMG__IMAGE_SAMPLE(InFmt_MIMG
*iFmt
)
38321 : Inst_MIMG(iFmt
, "image_sample")
38323 setFlag(GlobalSegment
);
38324 } // Inst_MIMG__IMAGE_SAMPLE
38326 Inst_MIMG__IMAGE_SAMPLE::~Inst_MIMG__IMAGE_SAMPLE()
38328 } // ~Inst_MIMG__IMAGE_SAMPLE
38331 Inst_MIMG__IMAGE_SAMPLE::execute(GPUDynInstPtr gpuDynInst
)
38333 panicUnimplemented();
38336 Inst_MIMG__IMAGE_SAMPLE_CL::Inst_MIMG__IMAGE_SAMPLE_CL(InFmt_MIMG
*iFmt
)
38337 : Inst_MIMG(iFmt
, "image_sample_cl")
38339 setFlag(GlobalSegment
);
38340 } // Inst_MIMG__IMAGE_SAMPLE_CL
38342 Inst_MIMG__IMAGE_SAMPLE_CL::~Inst_MIMG__IMAGE_SAMPLE_CL()
38344 } // ~Inst_MIMG__IMAGE_SAMPLE_CL
38347 Inst_MIMG__IMAGE_SAMPLE_CL::execute(GPUDynInstPtr gpuDynInst
)
38349 panicUnimplemented();
38352 Inst_MIMG__IMAGE_SAMPLE_D::Inst_MIMG__IMAGE_SAMPLE_D(InFmt_MIMG
*iFmt
)
38353 : Inst_MIMG(iFmt
, "image_sample_d")
38355 setFlag(GlobalSegment
);
38356 } // Inst_MIMG__IMAGE_SAMPLE_D
38358 Inst_MIMG__IMAGE_SAMPLE_D::~Inst_MIMG__IMAGE_SAMPLE_D()
38360 } // ~Inst_MIMG__IMAGE_SAMPLE_D
38363 Inst_MIMG__IMAGE_SAMPLE_D::execute(GPUDynInstPtr gpuDynInst
)
38365 panicUnimplemented();
38368 Inst_MIMG__IMAGE_SAMPLE_D_CL::Inst_MIMG__IMAGE_SAMPLE_D_CL(
38370 : Inst_MIMG(iFmt
, "image_sample_d_cl")
38372 setFlag(GlobalSegment
);
38373 } // Inst_MIMG__IMAGE_SAMPLE_D_CL
38375 Inst_MIMG__IMAGE_SAMPLE_D_CL::~Inst_MIMG__IMAGE_SAMPLE_D_CL()
38377 } // ~Inst_MIMG__IMAGE_SAMPLE_D_CL
38380 Inst_MIMG__IMAGE_SAMPLE_D_CL::execute(GPUDynInstPtr gpuDynInst
)
38382 panicUnimplemented();
38385 Inst_MIMG__IMAGE_SAMPLE_L::Inst_MIMG__IMAGE_SAMPLE_L(InFmt_MIMG
*iFmt
)
38386 : Inst_MIMG(iFmt
, "image_sample_l")
38388 setFlag(GlobalSegment
);
38389 } // Inst_MIMG__IMAGE_SAMPLE_L
38391 Inst_MIMG__IMAGE_SAMPLE_L::~Inst_MIMG__IMAGE_SAMPLE_L()
38393 } // ~Inst_MIMG__IMAGE_SAMPLE_L
38396 Inst_MIMG__IMAGE_SAMPLE_L::execute(GPUDynInstPtr gpuDynInst
)
38398 panicUnimplemented();
38401 Inst_MIMG__IMAGE_SAMPLE_B::Inst_MIMG__IMAGE_SAMPLE_B(InFmt_MIMG
*iFmt
)
38402 : Inst_MIMG(iFmt
, "image_sample_b")
38404 setFlag(GlobalSegment
);
38405 } // Inst_MIMG__IMAGE_SAMPLE_B
38407 Inst_MIMG__IMAGE_SAMPLE_B::~Inst_MIMG__IMAGE_SAMPLE_B()
38409 } // ~Inst_MIMG__IMAGE_SAMPLE_B
38412 Inst_MIMG__IMAGE_SAMPLE_B::execute(GPUDynInstPtr gpuDynInst
)
38414 panicUnimplemented();
38417 Inst_MIMG__IMAGE_SAMPLE_B_CL::Inst_MIMG__IMAGE_SAMPLE_B_CL(
38419 : Inst_MIMG(iFmt
, "image_sample_b_cl")
38421 setFlag(GlobalSegment
);
38422 } // Inst_MIMG__IMAGE_SAMPLE_B_CL
38424 Inst_MIMG__IMAGE_SAMPLE_B_CL::~Inst_MIMG__IMAGE_SAMPLE_B_CL()
38426 } // ~Inst_MIMG__IMAGE_SAMPLE_B_CL
38429 Inst_MIMG__IMAGE_SAMPLE_B_CL::execute(GPUDynInstPtr gpuDynInst
)
38431 panicUnimplemented();
38434 Inst_MIMG__IMAGE_SAMPLE_LZ::Inst_MIMG__IMAGE_SAMPLE_LZ(InFmt_MIMG
*iFmt
)
38435 : Inst_MIMG(iFmt
, "image_sample_lz")
38437 setFlag(GlobalSegment
);
38438 } // Inst_MIMG__IMAGE_SAMPLE_LZ
38440 Inst_MIMG__IMAGE_SAMPLE_LZ::~Inst_MIMG__IMAGE_SAMPLE_LZ()
38442 } // ~Inst_MIMG__IMAGE_SAMPLE_LZ
38445 Inst_MIMG__IMAGE_SAMPLE_LZ::execute(GPUDynInstPtr gpuDynInst
)
38447 panicUnimplemented();
38450 Inst_MIMG__IMAGE_SAMPLE_C::Inst_MIMG__IMAGE_SAMPLE_C(InFmt_MIMG
*iFmt
)
38451 : Inst_MIMG(iFmt
, "image_sample_c")
38453 setFlag(GlobalSegment
);
38454 } // Inst_MIMG__IMAGE_SAMPLE_C
38456 Inst_MIMG__IMAGE_SAMPLE_C::~Inst_MIMG__IMAGE_SAMPLE_C()
38458 } // ~Inst_MIMG__IMAGE_SAMPLE_C
38461 Inst_MIMG__IMAGE_SAMPLE_C::execute(GPUDynInstPtr gpuDynInst
)
38463 panicUnimplemented();
38466 Inst_MIMG__IMAGE_SAMPLE_C_CL::Inst_MIMG__IMAGE_SAMPLE_C_CL(
38468 : Inst_MIMG(iFmt
, "image_sample_c_cl")
38470 setFlag(GlobalSegment
);
38471 } // Inst_MIMG__IMAGE_SAMPLE_C_CL
38473 Inst_MIMG__IMAGE_SAMPLE_C_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CL()
38475 } // ~Inst_MIMG__IMAGE_SAMPLE_C_CL
38478 Inst_MIMG__IMAGE_SAMPLE_C_CL::execute(GPUDynInstPtr gpuDynInst
)
38480 panicUnimplemented();
38483 Inst_MIMG__IMAGE_SAMPLE_C_D::Inst_MIMG__IMAGE_SAMPLE_C_D(InFmt_MIMG
*iFmt
)
38484 : Inst_MIMG(iFmt
, "image_sample_c_d")
38486 setFlag(GlobalSegment
);
38487 } // Inst_MIMG__IMAGE_SAMPLE_C_D
38489 Inst_MIMG__IMAGE_SAMPLE_C_D::~Inst_MIMG__IMAGE_SAMPLE_C_D()
38491 } // ~Inst_MIMG__IMAGE_SAMPLE_C_D
38494 Inst_MIMG__IMAGE_SAMPLE_C_D::execute(GPUDynInstPtr gpuDynInst
)
38496 panicUnimplemented();
38499 Inst_MIMG__IMAGE_SAMPLE_C_D_CL::Inst_MIMG__IMAGE_SAMPLE_C_D_CL(
38501 : Inst_MIMG(iFmt
, "image_sample_c_d_cl")
38503 setFlag(GlobalSegment
);
38504 } // Inst_MIMG__IMAGE_SAMPLE_C_D_CL
38506 Inst_MIMG__IMAGE_SAMPLE_C_D_CL::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL()
38508 } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL
38511 Inst_MIMG__IMAGE_SAMPLE_C_D_CL::execute(GPUDynInstPtr gpuDynInst
)
38513 panicUnimplemented();
38516 Inst_MIMG__IMAGE_SAMPLE_C_L::Inst_MIMG__IMAGE_SAMPLE_C_L(InFmt_MIMG
*iFmt
)
38517 : Inst_MIMG(iFmt
, "image_sample_c_l")
38519 setFlag(GlobalSegment
);
38520 } // Inst_MIMG__IMAGE_SAMPLE_C_L
38522 Inst_MIMG__IMAGE_SAMPLE_C_L::~Inst_MIMG__IMAGE_SAMPLE_C_L()
38524 } // ~Inst_MIMG__IMAGE_SAMPLE_C_L
38527 Inst_MIMG__IMAGE_SAMPLE_C_L::execute(GPUDynInstPtr gpuDynInst
)
38529 panicUnimplemented();
38532 Inst_MIMG__IMAGE_SAMPLE_C_B::Inst_MIMG__IMAGE_SAMPLE_C_B(InFmt_MIMG
*iFmt
)
38533 : Inst_MIMG(iFmt
, "image_sample_c_b")
38535 setFlag(GlobalSegment
);
38536 } // Inst_MIMG__IMAGE_SAMPLE_C_B
38538 Inst_MIMG__IMAGE_SAMPLE_C_B::~Inst_MIMG__IMAGE_SAMPLE_C_B()
38540 } // ~Inst_MIMG__IMAGE_SAMPLE_C_B
38543 Inst_MIMG__IMAGE_SAMPLE_C_B::execute(GPUDynInstPtr gpuDynInst
)
38545 panicUnimplemented();
38548 Inst_MIMG__IMAGE_SAMPLE_C_B_CL::Inst_MIMG__IMAGE_SAMPLE_C_B_CL(
38550 : Inst_MIMG(iFmt
, "image_sample_c_b_cl")
38552 setFlag(GlobalSegment
);
38553 } // Inst_MIMG__IMAGE_SAMPLE_C_B_CL
38555 Inst_MIMG__IMAGE_SAMPLE_C_B_CL::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL()
38557 } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL
38560 Inst_MIMG__IMAGE_SAMPLE_C_B_CL::execute(GPUDynInstPtr gpuDynInst
)
38562 panicUnimplemented();
38565 Inst_MIMG__IMAGE_SAMPLE_C_LZ::Inst_MIMG__IMAGE_SAMPLE_C_LZ(
38567 : Inst_MIMG(iFmt
, "image_sample_c_lz")
38569 setFlag(GlobalSegment
);
38570 } // Inst_MIMG__IMAGE_SAMPLE_C_LZ
38572 Inst_MIMG__IMAGE_SAMPLE_C_LZ::~Inst_MIMG__IMAGE_SAMPLE_C_LZ()
38574 } // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ
38577 Inst_MIMG__IMAGE_SAMPLE_C_LZ::execute(GPUDynInstPtr gpuDynInst
)
38579 panicUnimplemented();
38582 Inst_MIMG__IMAGE_SAMPLE_O::Inst_MIMG__IMAGE_SAMPLE_O(InFmt_MIMG
*iFmt
)
38583 : Inst_MIMG(iFmt
, "image_sample_o")
38585 setFlag(GlobalSegment
);
38586 } // Inst_MIMG__IMAGE_SAMPLE_O
38588 Inst_MIMG__IMAGE_SAMPLE_O::~Inst_MIMG__IMAGE_SAMPLE_O()
38590 } // ~Inst_MIMG__IMAGE_SAMPLE_O
38593 Inst_MIMG__IMAGE_SAMPLE_O::execute(GPUDynInstPtr gpuDynInst
)
38595 panicUnimplemented();
38598 Inst_MIMG__IMAGE_SAMPLE_CL_O::Inst_MIMG__IMAGE_SAMPLE_CL_O(
38600 : Inst_MIMG(iFmt
, "image_sample_cl_o")
38602 setFlag(GlobalSegment
);
38603 } // Inst_MIMG__IMAGE_SAMPLE_CL_O
38605 Inst_MIMG__IMAGE_SAMPLE_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CL_O()
38607 } // ~Inst_MIMG__IMAGE_SAMPLE_CL_O
38610 Inst_MIMG__IMAGE_SAMPLE_CL_O::execute(GPUDynInstPtr gpuDynInst
)
38612 panicUnimplemented();
38615 Inst_MIMG__IMAGE_SAMPLE_D_O::Inst_MIMG__IMAGE_SAMPLE_D_O(InFmt_MIMG
*iFmt
)
38616 : Inst_MIMG(iFmt
, "image_sample_d_o")
38618 setFlag(GlobalSegment
);
38619 } // Inst_MIMG__IMAGE_SAMPLE_D_O
38621 Inst_MIMG__IMAGE_SAMPLE_D_O::~Inst_MIMG__IMAGE_SAMPLE_D_O()
38623 } // ~Inst_MIMG__IMAGE_SAMPLE_D_O
38626 Inst_MIMG__IMAGE_SAMPLE_D_O::execute(GPUDynInstPtr gpuDynInst
)
38628 panicUnimplemented();
38631 Inst_MIMG__IMAGE_SAMPLE_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_D_CL_O(
38633 : Inst_MIMG(iFmt
, "image_sample_d_cl_o")
38635 setFlag(GlobalSegment
);
38636 } // Inst_MIMG__IMAGE_SAMPLE_D_CL_O
38638 Inst_MIMG__IMAGE_SAMPLE_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_D_CL_O()
38640 } // ~Inst_MIMG__IMAGE_SAMPLE_D_CL_O
38643 Inst_MIMG__IMAGE_SAMPLE_D_CL_O::execute(GPUDynInstPtr gpuDynInst
)
38645 panicUnimplemented();
38648 Inst_MIMG__IMAGE_SAMPLE_L_O::Inst_MIMG__IMAGE_SAMPLE_L_O(InFmt_MIMG
*iFmt
)
38649 : Inst_MIMG(iFmt
, "image_sample_l_o")
38651 setFlag(GlobalSegment
);
38652 } // Inst_MIMG__IMAGE_SAMPLE_L_O
38654 Inst_MIMG__IMAGE_SAMPLE_L_O::~Inst_MIMG__IMAGE_SAMPLE_L_O()
38656 } // ~Inst_MIMG__IMAGE_SAMPLE_L_O
38659 Inst_MIMG__IMAGE_SAMPLE_L_O::execute(GPUDynInstPtr gpuDynInst
)
38661 panicUnimplemented();
38664 Inst_MIMG__IMAGE_SAMPLE_B_O::Inst_MIMG__IMAGE_SAMPLE_B_O(InFmt_MIMG
*iFmt
)
38665 : Inst_MIMG(iFmt
, "image_sample_b_o")
38667 setFlag(GlobalSegment
);
38668 } // Inst_MIMG__IMAGE_SAMPLE_B_O
38670 Inst_MIMG__IMAGE_SAMPLE_B_O::~Inst_MIMG__IMAGE_SAMPLE_B_O()
38672 } // ~Inst_MIMG__IMAGE_SAMPLE_B_O
38675 Inst_MIMG__IMAGE_SAMPLE_B_O::execute(GPUDynInstPtr gpuDynInst
)
38677 panicUnimplemented();
38680 Inst_MIMG__IMAGE_SAMPLE_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_B_CL_O(
38682 : Inst_MIMG(iFmt
, "image_sample_b_cl_o")
38684 setFlag(GlobalSegment
);
38685 } // Inst_MIMG__IMAGE_SAMPLE_B_CL_O
38687 Inst_MIMG__IMAGE_SAMPLE_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_B_CL_O()
38689 } // ~Inst_MIMG__IMAGE_SAMPLE_B_CL_O
38692 Inst_MIMG__IMAGE_SAMPLE_B_CL_O::execute(GPUDynInstPtr gpuDynInst
)
38694 panicUnimplemented();
38697 Inst_MIMG__IMAGE_SAMPLE_LZ_O::Inst_MIMG__IMAGE_SAMPLE_LZ_O(
38699 : Inst_MIMG(iFmt
, "image_sample_lz_o")
38701 setFlag(GlobalSegment
);
38702 } // Inst_MIMG__IMAGE_SAMPLE_LZ_O
38704 Inst_MIMG__IMAGE_SAMPLE_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_LZ_O()
38706 } // ~Inst_MIMG__IMAGE_SAMPLE_LZ_O
38709 Inst_MIMG__IMAGE_SAMPLE_LZ_O::execute(GPUDynInstPtr gpuDynInst
)
38711 panicUnimplemented();
38714 Inst_MIMG__IMAGE_SAMPLE_C_O::Inst_MIMG__IMAGE_SAMPLE_C_O(InFmt_MIMG
*iFmt
)
38715 : Inst_MIMG(iFmt
, "image_sample_c_o")
38717 setFlag(GlobalSegment
);
38718 } // Inst_MIMG__IMAGE_SAMPLE_C_O
38720 Inst_MIMG__IMAGE_SAMPLE_C_O::~Inst_MIMG__IMAGE_SAMPLE_C_O()
38722 } // ~Inst_MIMG__IMAGE_SAMPLE_C_O
38725 Inst_MIMG__IMAGE_SAMPLE_C_O::execute(GPUDynInstPtr gpuDynInst
)
38727 panicUnimplemented();
38730 Inst_MIMG__IMAGE_SAMPLE_C_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CL_O(
38732 : Inst_MIMG(iFmt
, "image_sample_c_cl_o")
38734 setFlag(GlobalSegment
);
38735 } // Inst_MIMG__IMAGE_SAMPLE_C_CL_O
38737 Inst_MIMG__IMAGE_SAMPLE_C_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CL_O()
38739 } // ~Inst_MIMG__IMAGE_SAMPLE_C_CL_O
38742 Inst_MIMG__IMAGE_SAMPLE_C_CL_O::execute(GPUDynInstPtr gpuDynInst
)
38744 panicUnimplemented();
38747 Inst_MIMG__IMAGE_SAMPLE_C_D_O::Inst_MIMG__IMAGE_SAMPLE_C_D_O(
38749 : Inst_MIMG(iFmt
, "image_sample_c_d_o")
38751 setFlag(GlobalSegment
);
38752 } // Inst_MIMG__IMAGE_SAMPLE_C_D_O
38754 Inst_MIMG__IMAGE_SAMPLE_C_D_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_O()
38756 } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_O
38759 Inst_MIMG__IMAGE_SAMPLE_C_D_O::execute(GPUDynInstPtr gpuDynInst
)
38761 panicUnimplemented();
38764 Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O(
38766 : Inst_MIMG(iFmt
, "image_sample_c_d_cl_o")
38768 setFlag(GlobalSegment
);
38769 } // Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O
38771 Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O()
38773 } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O
38776 Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::execute(GPUDynInstPtr gpuDynInst
)
38778 panicUnimplemented();
38781 Inst_MIMG__IMAGE_SAMPLE_C_L_O::Inst_MIMG__IMAGE_SAMPLE_C_L_O(
38783 : Inst_MIMG(iFmt
, "image_sample_c_l_o")
38785 setFlag(GlobalSegment
);
38786 } // Inst_MIMG__IMAGE_SAMPLE_C_L_O
38788 Inst_MIMG__IMAGE_SAMPLE_C_L_O::~Inst_MIMG__IMAGE_SAMPLE_C_L_O()
38790 } // ~Inst_MIMG__IMAGE_SAMPLE_C_L_O
38793 Inst_MIMG__IMAGE_SAMPLE_C_L_O::execute(GPUDynInstPtr gpuDynInst
)
38795 panicUnimplemented();
38798 Inst_MIMG__IMAGE_SAMPLE_C_B_O::Inst_MIMG__IMAGE_SAMPLE_C_B_O(
38800 : Inst_MIMG(iFmt
, "image_sample_c_b_o")
38802 setFlag(GlobalSegment
);
38803 } // Inst_MIMG__IMAGE_SAMPLE_C_B_O
38805 Inst_MIMG__IMAGE_SAMPLE_C_B_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_O()
38807 } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_O
38810 Inst_MIMG__IMAGE_SAMPLE_C_B_O::execute(GPUDynInstPtr gpuDynInst
)
38812 panicUnimplemented();
38815 Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O(
38817 : Inst_MIMG(iFmt
, "image_sample_c_b_cl_o")
38819 setFlag(GlobalSegment
);
38820 } // Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O
38822 Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O()
38824 } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O
38827 Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst
)
38829 panicUnimplemented();
38832 Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::Inst_MIMG__IMAGE_SAMPLE_C_LZ_O(
38834 : Inst_MIMG(iFmt
, "image_sample_c_lz_o")
38836 setFlag(GlobalSegment
);
38837 } // Inst_MIMG__IMAGE_SAMPLE_C_LZ_O
38839 Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O()
38841 } // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O
38844 Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::execute(GPUDynInstPtr gpuDynInst
)
38846 panicUnimplemented();
38849 Inst_MIMG__IMAGE_GATHER4::Inst_MIMG__IMAGE_GATHER4(InFmt_MIMG
*iFmt
)
38850 : Inst_MIMG(iFmt
, "image_gather4")
38852 setFlag(GlobalSegment
);
38853 } // Inst_MIMG__IMAGE_GATHER4
38855 Inst_MIMG__IMAGE_GATHER4::~Inst_MIMG__IMAGE_GATHER4()
38857 } // ~Inst_MIMG__IMAGE_GATHER4
38860 Inst_MIMG__IMAGE_GATHER4::execute(GPUDynInstPtr gpuDynInst
)
38862 panicUnimplemented();
38865 Inst_MIMG__IMAGE_GATHER4_CL::Inst_MIMG__IMAGE_GATHER4_CL(InFmt_MIMG
*iFmt
)
38866 : Inst_MIMG(iFmt
, "image_gather4_cl")
38868 setFlag(GlobalSegment
);
38869 } // Inst_MIMG__IMAGE_GATHER4_CL
38871 Inst_MIMG__IMAGE_GATHER4_CL::~Inst_MIMG__IMAGE_GATHER4_CL()
38873 } // ~Inst_MIMG__IMAGE_GATHER4_CL
38876 Inst_MIMG__IMAGE_GATHER4_CL::execute(GPUDynInstPtr gpuDynInst
)
38878 panicUnimplemented();
38881 Inst_MIMG__IMAGE_GATHER4_L::Inst_MIMG__IMAGE_GATHER4_L(InFmt_MIMG
*iFmt
)
38882 : Inst_MIMG(iFmt
, "image_gather4_l")
38884 setFlag(GlobalSegment
);
38885 } // Inst_MIMG__IMAGE_GATHER4_L
38887 Inst_MIMG__IMAGE_GATHER4_L::~Inst_MIMG__IMAGE_GATHER4_L()
38889 } // ~Inst_MIMG__IMAGE_GATHER4_L
38892 Inst_MIMG__IMAGE_GATHER4_L::execute(GPUDynInstPtr gpuDynInst
)
38894 panicUnimplemented();
38897 Inst_MIMG__IMAGE_GATHER4_B::Inst_MIMG__IMAGE_GATHER4_B(InFmt_MIMG
*iFmt
)
38898 : Inst_MIMG(iFmt
, "image_gather4_b")
38900 setFlag(GlobalSegment
);
38901 } // Inst_MIMG__IMAGE_GATHER4_B
38903 Inst_MIMG__IMAGE_GATHER4_B::~Inst_MIMG__IMAGE_GATHER4_B()
38905 } // ~Inst_MIMG__IMAGE_GATHER4_B
38908 Inst_MIMG__IMAGE_GATHER4_B::execute(GPUDynInstPtr gpuDynInst
)
38910 panicUnimplemented();
38913 Inst_MIMG__IMAGE_GATHER4_B_CL::Inst_MIMG__IMAGE_GATHER4_B_CL(
38915 : Inst_MIMG(iFmt
, "image_gather4_b_cl")
38917 setFlag(GlobalSegment
);
38918 } // Inst_MIMG__IMAGE_GATHER4_B_CL
38920 Inst_MIMG__IMAGE_GATHER4_B_CL::~Inst_MIMG__IMAGE_GATHER4_B_CL()
38922 } // ~Inst_MIMG__IMAGE_GATHER4_B_CL
38925 Inst_MIMG__IMAGE_GATHER4_B_CL::execute(GPUDynInstPtr gpuDynInst
)
38927 panicUnimplemented();
38930 Inst_MIMG__IMAGE_GATHER4_LZ::Inst_MIMG__IMAGE_GATHER4_LZ(InFmt_MIMG
*iFmt
)
38931 : Inst_MIMG(iFmt
, "image_gather4_lz")
38933 setFlag(GlobalSegment
);
38934 } // Inst_MIMG__IMAGE_GATHER4_LZ
38936 Inst_MIMG__IMAGE_GATHER4_LZ::~Inst_MIMG__IMAGE_GATHER4_LZ()
38938 } // ~Inst_MIMG__IMAGE_GATHER4_LZ
38941 Inst_MIMG__IMAGE_GATHER4_LZ::execute(GPUDynInstPtr gpuDynInst
)
38943 panicUnimplemented();
38946 Inst_MIMG__IMAGE_GATHER4_C::Inst_MIMG__IMAGE_GATHER4_C(InFmt_MIMG
*iFmt
)
38947 : Inst_MIMG(iFmt
, "image_gather4_c")
38949 setFlag(GlobalSegment
);
38950 } // Inst_MIMG__IMAGE_GATHER4_C
38952 Inst_MIMG__IMAGE_GATHER4_C::~Inst_MIMG__IMAGE_GATHER4_C()
38954 } // ~Inst_MIMG__IMAGE_GATHER4_C
38957 Inst_MIMG__IMAGE_GATHER4_C::execute(GPUDynInstPtr gpuDynInst
)
38959 panicUnimplemented();
38962 Inst_MIMG__IMAGE_GATHER4_C_CL::Inst_MIMG__IMAGE_GATHER4_C_CL(
38964 : Inst_MIMG(iFmt
, "image_gather4_c_cl")
38966 setFlag(GlobalSegment
);
38967 } // Inst_MIMG__IMAGE_GATHER4_C_CL
38969 Inst_MIMG__IMAGE_GATHER4_C_CL::~Inst_MIMG__IMAGE_GATHER4_C_CL()
38971 } // ~Inst_MIMG__IMAGE_GATHER4_C_CL
38974 Inst_MIMG__IMAGE_GATHER4_C_CL::execute(GPUDynInstPtr gpuDynInst
)
38976 panicUnimplemented();
38979 Inst_MIMG__IMAGE_GATHER4_C_L::Inst_MIMG__IMAGE_GATHER4_C_L(
38981 : Inst_MIMG(iFmt
, "image_gather4_c_l")
38983 setFlag(GlobalSegment
);
38984 } // Inst_MIMG__IMAGE_GATHER4_C_L
38986 Inst_MIMG__IMAGE_GATHER4_C_L::~Inst_MIMG__IMAGE_GATHER4_C_L()
38988 } // ~Inst_MIMG__IMAGE_GATHER4_C_L
38991 Inst_MIMG__IMAGE_GATHER4_C_L::execute(GPUDynInstPtr gpuDynInst
)
38993 panicUnimplemented();
38996 Inst_MIMG__IMAGE_GATHER4_C_B::Inst_MIMG__IMAGE_GATHER4_C_B(
38998 : Inst_MIMG(iFmt
, "image_gather4_c_b")
39000 setFlag(GlobalSegment
);
39001 } // Inst_MIMG__IMAGE_GATHER4_C_B
39003 Inst_MIMG__IMAGE_GATHER4_C_B::~Inst_MIMG__IMAGE_GATHER4_C_B()
39005 } // ~Inst_MIMG__IMAGE_GATHER4_C_B
39008 Inst_MIMG__IMAGE_GATHER4_C_B::execute(GPUDynInstPtr gpuDynInst
)
39010 panicUnimplemented();
39013 Inst_MIMG__IMAGE_GATHER4_C_B_CL::Inst_MIMG__IMAGE_GATHER4_C_B_CL(
39015 : Inst_MIMG(iFmt
, "image_gather4_c_b_cl")
39017 setFlag(GlobalSegment
);
39018 } // Inst_MIMG__IMAGE_GATHER4_C_B_CL
39020 Inst_MIMG__IMAGE_GATHER4_C_B_CL::~Inst_MIMG__IMAGE_GATHER4_C_B_CL()
39022 } // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL
39025 Inst_MIMG__IMAGE_GATHER4_C_B_CL::execute(GPUDynInstPtr gpuDynInst
)
39027 panicUnimplemented();
39030 Inst_MIMG__IMAGE_GATHER4_C_LZ::Inst_MIMG__IMAGE_GATHER4_C_LZ(
39032 : Inst_MIMG(iFmt
, "image_gather4_c_lz")
39034 setFlag(GlobalSegment
);
39035 } // Inst_MIMG__IMAGE_GATHER4_C_LZ
39037 Inst_MIMG__IMAGE_GATHER4_C_LZ::~Inst_MIMG__IMAGE_GATHER4_C_LZ()
39039 } // ~Inst_MIMG__IMAGE_GATHER4_C_LZ
39042 Inst_MIMG__IMAGE_GATHER4_C_LZ::execute(GPUDynInstPtr gpuDynInst
)
39044 panicUnimplemented();
39047 Inst_MIMG__IMAGE_GATHER4_O::Inst_MIMG__IMAGE_GATHER4_O(InFmt_MIMG
*iFmt
)
39048 : Inst_MIMG(iFmt
, "image_gather4_o")
39050 setFlag(GlobalSegment
);
39051 } // Inst_MIMG__IMAGE_GATHER4_O
39053 Inst_MIMG__IMAGE_GATHER4_O::~Inst_MIMG__IMAGE_GATHER4_O()
39055 } // ~Inst_MIMG__IMAGE_GATHER4_O
39058 Inst_MIMG__IMAGE_GATHER4_O::execute(GPUDynInstPtr gpuDynInst
)
39060 panicUnimplemented();
39063 Inst_MIMG__IMAGE_GATHER4_CL_O::Inst_MIMG__IMAGE_GATHER4_CL_O(
39065 : Inst_MIMG(iFmt
, "image_gather4_cl_o")
39067 setFlag(GlobalSegment
);
39068 } // Inst_MIMG__IMAGE_GATHER4_CL_O
39070 Inst_MIMG__IMAGE_GATHER4_CL_O::~Inst_MIMG__IMAGE_GATHER4_CL_O()
39072 } // ~Inst_MIMG__IMAGE_GATHER4_CL_O
39075 Inst_MIMG__IMAGE_GATHER4_CL_O::execute(GPUDynInstPtr gpuDynInst
)
39077 panicUnimplemented();
39080 Inst_MIMG__IMAGE_GATHER4_L_O::Inst_MIMG__IMAGE_GATHER4_L_O(
39082 : Inst_MIMG(iFmt
, "image_gather4_l_o")
39084 setFlag(GlobalSegment
);
39085 } // Inst_MIMG__IMAGE_GATHER4_L_O
39087 Inst_MIMG__IMAGE_GATHER4_L_O::~Inst_MIMG__IMAGE_GATHER4_L_O()
39089 } // ~Inst_MIMG__IMAGE_GATHER4_L_O
39092 Inst_MIMG__IMAGE_GATHER4_L_O::execute(GPUDynInstPtr gpuDynInst
)
39094 panicUnimplemented();
39097 Inst_MIMG__IMAGE_GATHER4_B_O::Inst_MIMG__IMAGE_GATHER4_B_O(
39099 : Inst_MIMG(iFmt
, "image_gather4_b_o")
39101 setFlag(GlobalSegment
);
39102 } // Inst_MIMG__IMAGE_GATHER4_B_O
39104 Inst_MIMG__IMAGE_GATHER4_B_O::~Inst_MIMG__IMAGE_GATHER4_B_O()
39106 } // ~Inst_MIMG__IMAGE_GATHER4_B_O
39109 Inst_MIMG__IMAGE_GATHER4_B_O::execute(GPUDynInstPtr gpuDynInst
)
39111 panicUnimplemented();
39114 Inst_MIMG__IMAGE_GATHER4_B_CL_O::Inst_MIMG__IMAGE_GATHER4_B_CL_O(
39116 : Inst_MIMG(iFmt
, "image_gather4_b_cl_o")
39118 setFlag(GlobalSegment
);
39119 } // Inst_MIMG__IMAGE_GATHER4_B_CL_O
39121 Inst_MIMG__IMAGE_GATHER4_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_B_CL_O()
39123 } // ~Inst_MIMG__IMAGE_GATHER4_B_CL_O
39126 Inst_MIMG__IMAGE_GATHER4_B_CL_O::execute(GPUDynInstPtr gpuDynInst
)
39128 panicUnimplemented();
39131 Inst_MIMG__IMAGE_GATHER4_LZ_O::Inst_MIMG__IMAGE_GATHER4_LZ_O(
39133 : Inst_MIMG(iFmt
, "image_gather4_lz_o")
39135 setFlag(GlobalSegment
);
39136 } // Inst_MIMG__IMAGE_GATHER4_LZ_O
39138 Inst_MIMG__IMAGE_GATHER4_LZ_O::~Inst_MIMG__IMAGE_GATHER4_LZ_O()
39140 } // ~Inst_MIMG__IMAGE_GATHER4_LZ_O
39143 Inst_MIMG__IMAGE_GATHER4_LZ_O::execute(GPUDynInstPtr gpuDynInst
)
39145 panicUnimplemented();
39148 Inst_MIMG__IMAGE_GATHER4_C_O::Inst_MIMG__IMAGE_GATHER4_C_O(
39150 : Inst_MIMG(iFmt
, "image_gather4_c_o")
39152 setFlag(GlobalSegment
);
39153 } // Inst_MIMG__IMAGE_GATHER4_C_O
39155 Inst_MIMG__IMAGE_GATHER4_C_O::~Inst_MIMG__IMAGE_GATHER4_C_O()
39157 } // ~Inst_MIMG__IMAGE_GATHER4_C_O
39160 Inst_MIMG__IMAGE_GATHER4_C_O::execute(GPUDynInstPtr gpuDynInst
)
39162 panicUnimplemented();
39165 Inst_MIMG__IMAGE_GATHER4_C_CL_O::Inst_MIMG__IMAGE_GATHER4_C_CL_O(
39167 : Inst_MIMG(iFmt
, "image_gather4_c_cl_o")
39169 setFlag(GlobalSegment
);
39170 } // Inst_MIMG__IMAGE_GATHER4_C_CL_O
39172 Inst_MIMG__IMAGE_GATHER4_C_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_CL_O()
39174 } // ~Inst_MIMG__IMAGE_GATHER4_C_CL_O
39177 Inst_MIMG__IMAGE_GATHER4_C_CL_O::execute(GPUDynInstPtr gpuDynInst
)
39179 panicUnimplemented();
39182 Inst_MIMG__IMAGE_GATHER4_C_L_O::Inst_MIMG__IMAGE_GATHER4_C_L_O(
39184 : Inst_MIMG(iFmt
, "image_gather4_c_l_o")
39186 setFlag(GlobalSegment
);
39187 } // Inst_MIMG__IMAGE_GATHER4_C_L_O
39189 Inst_MIMG__IMAGE_GATHER4_C_L_O::~Inst_MIMG__IMAGE_GATHER4_C_L_O()
39191 } // ~Inst_MIMG__IMAGE_GATHER4_C_L_O
39194 Inst_MIMG__IMAGE_GATHER4_C_L_O::execute(GPUDynInstPtr gpuDynInst
)
39196 panicUnimplemented();
39199 Inst_MIMG__IMAGE_GATHER4_C_B_O::Inst_MIMG__IMAGE_GATHER4_C_B_O(
39201 : Inst_MIMG(iFmt
, "image_gather4_c_b_o")
39203 setFlag(GlobalSegment
);
39204 } // Inst_MIMG__IMAGE_GATHER4_C_B_O
39206 Inst_MIMG__IMAGE_GATHER4_C_B_O::~Inst_MIMG__IMAGE_GATHER4_C_B_O()
39208 } // ~Inst_MIMG__IMAGE_GATHER4_C_B_O
39211 Inst_MIMG__IMAGE_GATHER4_C_B_O::execute(GPUDynInstPtr gpuDynInst
)
39213 panicUnimplemented();
39216 Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::Inst_MIMG__IMAGE_GATHER4_C_B_CL_O(
39218 : Inst_MIMG(iFmt
, "image_gather4_c_b_cl_o")
39220 setFlag(GlobalSegment
);
39221 } // Inst_MIMG__IMAGE_GATHER4_C_B_CL_O
39223 Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O()
39225 } // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O
39228 Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst
)
39230 panicUnimplemented();
39233 Inst_MIMG__IMAGE_GATHER4_C_LZ_O::Inst_MIMG__IMAGE_GATHER4_C_LZ_O(
39235 : Inst_MIMG(iFmt
, "image_gather4_c_lz_o")
39237 setFlag(GlobalSegment
);
39238 } // Inst_MIMG__IMAGE_GATHER4_C_LZ_O
39240 Inst_MIMG__IMAGE_GATHER4_C_LZ_O::~Inst_MIMG__IMAGE_GATHER4_C_LZ_O()
39242 } // ~Inst_MIMG__IMAGE_GATHER4_C_LZ_O
39245 Inst_MIMG__IMAGE_GATHER4_C_LZ_O::execute(GPUDynInstPtr gpuDynInst
)
39247 panicUnimplemented();
39250 Inst_MIMG__IMAGE_GET_LOD::Inst_MIMG__IMAGE_GET_LOD(InFmt_MIMG
*iFmt
)
39251 : Inst_MIMG(iFmt
, "image_get_lod")
39253 setFlag(GlobalSegment
);
39254 } // Inst_MIMG__IMAGE_GET_LOD
39256 Inst_MIMG__IMAGE_GET_LOD::~Inst_MIMG__IMAGE_GET_LOD()
39258 } // ~Inst_MIMG__IMAGE_GET_LOD
39261 Inst_MIMG__IMAGE_GET_LOD::execute(GPUDynInstPtr gpuDynInst
)
39263 panicUnimplemented();
39266 Inst_MIMG__IMAGE_SAMPLE_CD::Inst_MIMG__IMAGE_SAMPLE_CD(InFmt_MIMG
*iFmt
)
39267 : Inst_MIMG(iFmt
, "image_sample_cd")
39269 setFlag(GlobalSegment
);
39270 } // Inst_MIMG__IMAGE_SAMPLE_CD
39272 Inst_MIMG__IMAGE_SAMPLE_CD::~Inst_MIMG__IMAGE_SAMPLE_CD()
39274 } // ~Inst_MIMG__IMAGE_SAMPLE_CD
39277 Inst_MIMG__IMAGE_SAMPLE_CD::execute(GPUDynInstPtr gpuDynInst
)
39279 panicUnimplemented();
39282 Inst_MIMG__IMAGE_SAMPLE_CD_CL::Inst_MIMG__IMAGE_SAMPLE_CD_CL(
39284 : Inst_MIMG(iFmt
, "image_sample_cd_cl")
39286 setFlag(GlobalSegment
);
39287 } // Inst_MIMG__IMAGE_SAMPLE_CD_CL
39289 Inst_MIMG__IMAGE_SAMPLE_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_CD_CL()
39291 } // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL
39294 Inst_MIMG__IMAGE_SAMPLE_CD_CL::execute(GPUDynInstPtr gpuDynInst
)
39296 panicUnimplemented();
39299 Inst_MIMG__IMAGE_SAMPLE_C_CD::Inst_MIMG__IMAGE_SAMPLE_C_CD(
39301 : Inst_MIMG(iFmt
, "image_sample_c_cd")
39303 setFlag(GlobalSegment
);
39304 } // Inst_MIMG__IMAGE_SAMPLE_C_CD
39306 Inst_MIMG__IMAGE_SAMPLE_C_CD::~Inst_MIMG__IMAGE_SAMPLE_C_CD()
39308 } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD
39311 Inst_MIMG__IMAGE_SAMPLE_C_CD::execute(GPUDynInstPtr gpuDynInst
)
39313 panicUnimplemented();
39316 Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL(
39318 : Inst_MIMG(iFmt
, "image_sample_c_cd_cl")
39320 setFlag(GlobalSegment
);
39321 } // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL
39323 Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL()
39325 } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL
39328 Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::execute(GPUDynInstPtr gpuDynInst
)
39330 panicUnimplemented();
39333 Inst_MIMG__IMAGE_SAMPLE_CD_O::Inst_MIMG__IMAGE_SAMPLE_CD_O(
39335 : Inst_MIMG(iFmt
, "image_sample_cd_o")
39337 setFlag(GlobalSegment
);
39338 } // Inst_MIMG__IMAGE_SAMPLE_CD_O
39340 Inst_MIMG__IMAGE_SAMPLE_CD_O::~Inst_MIMG__IMAGE_SAMPLE_CD_O()
39342 } // ~Inst_MIMG__IMAGE_SAMPLE_CD_O
39345 Inst_MIMG__IMAGE_SAMPLE_CD_O::execute(GPUDynInstPtr gpuDynInst
)
39347 panicUnimplemented();
39350 Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_CD_CL_O(
39352 : Inst_MIMG(iFmt
, "image_sample_cd_cl_o")
39354 setFlag(GlobalSegment
);
39355 } // Inst_MIMG__IMAGE_SAMPLE_CD_CL_O
39357 Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O()
39359 } // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O
39362 Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::execute(GPUDynInstPtr gpuDynInst
)
39364 panicUnimplemented();
39367 Inst_MIMG__IMAGE_SAMPLE_C_CD_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_O(
39369 : Inst_MIMG(iFmt
, "image_sample_c_cd_o")
39371 setFlag(GlobalSegment
);
39372 } // Inst_MIMG__IMAGE_SAMPLE_C_CD_O
39374 Inst_MIMG__IMAGE_SAMPLE_C_CD_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_O()
39376 } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_O
39379 Inst_MIMG__IMAGE_SAMPLE_C_CD_O::execute(GPUDynInstPtr gpuDynInst
)
39381 panicUnimplemented();
39384 Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O(
39386 : Inst_MIMG(iFmt
, "image_sample_c_cd_cl_o")
39388 setFlag(GlobalSegment
);
39389 } // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O
39391 Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O()
39393 } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O
39396 Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::execute(GPUDynInstPtr gpuDynInst
)
39398 panicUnimplemented();
39401 Inst_EXP__EXP::Inst_EXP__EXP(InFmt_EXP
*iFmt
)
39402 : Inst_EXP(iFmt
, "exp")
39406 Inst_EXP__EXP::~Inst_EXP__EXP()
39408 } // ~Inst_EXP__EXP
39411 Inst_EXP__EXP::execute(GPUDynInstPtr gpuDynInst
)
39413 panicUnimplemented();
39416 Inst_FLAT__FLAT_LOAD_UBYTE::Inst_FLAT__FLAT_LOAD_UBYTE(InFmt_FLAT
*iFmt
)
39417 : Inst_FLAT(iFmt
, "flat_load_ubyte")
39419 setFlag(MemoryRef
);
39421 } // Inst_FLAT__FLAT_LOAD_UBYTE
39423 Inst_FLAT__FLAT_LOAD_UBYTE::~Inst_FLAT__FLAT_LOAD_UBYTE()
39425 } // ~Inst_FLAT__FLAT_LOAD_UBYTE
39427 // Untyped buffer load unsigned byte (zero extend to VGPR destination).
39429 Inst_FLAT__FLAT_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst
)
39431 Wavefront
*wf
= gpuDynInst
->wavefront();
39433 if (wf
->execMask().none()) {
39434 wf
->decVMemInstsIssued();
39435 wf
->decLGKMInstsIssued();
39436 wf
->rdGmReqsInPipe
--;
39437 wf
->rdLmReqsInPipe
--;
39438 gpuDynInst
->exec_mask
= wf
->execMask();
39439 wf
->computeUnit
->vrf
[wf
->simdId
]->
39440 scheduleWriteOperandsFromLoad(wf
, gpuDynInst
);
39444 gpuDynInst
->execUnitId
= wf
->execUnitId
;
39445 gpuDynInst
->exec_mask
= gpuDynInst
->wavefront()->execMask();
39446 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
39447 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
39449 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
39453 calcAddr(gpuDynInst
, addr
);
39455 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
39456 gpuDynInst
->computeUnit()->globalMemoryPipe
39457 .issueRequest(gpuDynInst
);
39458 wf
->rdGmReqsInPipe
--;
39459 wf
->outstandingReqsRdGm
++;
39461 fatal("Non global flat instructions not implemented yet.\n");
39464 gpuDynInst
->wavefront()->outstandingReqs
++;
39465 gpuDynInst
->wavefront()->validateRequestCounters();
39469 Inst_FLAT__FLAT_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst
)
39471 initMemRead
<VecElemU8
>(gpuDynInst
);
39475 Inst_FLAT__FLAT_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst
)
39477 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
39479 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
39480 if (gpuDynInst
->exec_mask
[lane
]) {
39481 vdst
[lane
] = (VecElemU32
)((reinterpret_cast<VecElemU8
*>(
39482 gpuDynInst
->d_data
))[lane
]);
39487 // --- Inst_FLAT__FLAT_LOAD_SBYTE class methods ---
39489 Inst_FLAT__FLAT_LOAD_SBYTE::Inst_FLAT__FLAT_LOAD_SBYTE(InFmt_FLAT
*iFmt
)
39490 : Inst_FLAT(iFmt
, "flat_load_sbyte")
39492 setFlag(MemoryRef
);
39494 } // Inst_FLAT__FLAT_LOAD_SBYTE
39496 Inst_FLAT__FLAT_LOAD_SBYTE::~Inst_FLAT__FLAT_LOAD_SBYTE()
39498 } // ~Inst_FLAT__FLAT_LOAD_SBYTE
39500 // Untyped buffer load signed byte (sign extend to VGPR destination).
39502 Inst_FLAT__FLAT_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst
)
39504 Wavefront
*wf
= gpuDynInst
->wavefront();
39506 if (wf
->execMask().none()) {
39507 wf
->decVMemInstsIssued();
39508 wf
->decLGKMInstsIssued();
39509 wf
->rdGmReqsInPipe
--;
39510 wf
->rdLmReqsInPipe
--;
39511 gpuDynInst
->exec_mask
= wf
->execMask();
39512 wf
->computeUnit
->vrf
[wf
->simdId
]->
39513 scheduleWriteOperandsFromLoad(wf
, gpuDynInst
);
39517 gpuDynInst
->execUnitId
= wf
->execUnitId
;
39518 gpuDynInst
->exec_mask
= gpuDynInst
->wavefront()->execMask();
39519 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
39520 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
39522 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
39526 calcAddr(gpuDynInst
, addr
);
39528 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
39529 gpuDynInst
->computeUnit()->globalMemoryPipe
39530 .issueRequest(gpuDynInst
);
39531 wf
->rdGmReqsInPipe
--;
39532 wf
->outstandingReqsRdGm
++;
39534 fatal("Non global flat instructions not implemented yet.\n");
39537 gpuDynInst
->wavefront()->outstandingReqs
++;
39538 gpuDynInst
->wavefront()->validateRequestCounters();
39542 Inst_FLAT__FLAT_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst
)
39544 initMemRead
<VecElemI8
>(gpuDynInst
);
39548 Inst_FLAT__FLAT_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst
)
39550 VecOperandI32
vdst(gpuDynInst
, extData
.VDST
);
39552 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
39553 if (gpuDynInst
->exec_mask
[lane
]) {
39554 vdst
[lane
] = (VecElemI32
)((reinterpret_cast<VecElemI8
*>(
39555 gpuDynInst
->d_data
))[lane
]);
39561 Inst_FLAT__FLAT_LOAD_USHORT::Inst_FLAT__FLAT_LOAD_USHORT(InFmt_FLAT
*iFmt
)
39562 : Inst_FLAT(iFmt
, "flat_load_ushort")
39564 setFlag(MemoryRef
);
39566 } // Inst_FLAT__FLAT_LOAD_USHORT
39568 Inst_FLAT__FLAT_LOAD_USHORT::~Inst_FLAT__FLAT_LOAD_USHORT()
39570 } // ~Inst_FLAT__FLAT_LOAD_USHORT
39572 // Untyped buffer load unsigned short (zero extend to VGPR destination).
39574 Inst_FLAT__FLAT_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst
)
39576 Wavefront
*wf
= gpuDynInst
->wavefront();
39578 if (wf
->execMask().none()) {
39579 wf
->decVMemInstsIssued();
39580 wf
->decLGKMInstsIssued();
39581 wf
->rdGmReqsInPipe
--;
39582 wf
->rdLmReqsInPipe
--;
39583 gpuDynInst
->exec_mask
= wf
->execMask();
39584 wf
->computeUnit
->vrf
[wf
->simdId
]->
39585 scheduleWriteOperandsFromLoad(wf
, gpuDynInst
);
39589 gpuDynInst
->execUnitId
= wf
->execUnitId
;
39590 gpuDynInst
->exec_mask
= gpuDynInst
->wavefront()->execMask();
39591 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
39592 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
39594 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
39598 calcAddr(gpuDynInst
, addr
);
39600 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
39601 gpuDynInst
->computeUnit()->globalMemoryPipe
39602 .issueRequest(gpuDynInst
);
39603 wf
->rdGmReqsInPipe
--;
39604 wf
->outstandingReqsRdGm
++;
39606 fatal("Non global flat instructions not implemented yet.\n");
39609 gpuDynInst
->wavefront()->outstandingReqs
++;
39610 gpuDynInst
->wavefront()->validateRequestCounters();
39614 Inst_FLAT__FLAT_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst
)
39616 initMemRead
<VecElemU16
>(gpuDynInst
);
39620 Inst_FLAT__FLAT_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst
)
39622 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
39624 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
39625 if (gpuDynInst
->exec_mask
[lane
]) {
39626 vdst
[lane
] = (VecElemU32
)((reinterpret_cast<VecElemU16
*>(
39627 gpuDynInst
->d_data
))[lane
]);
39634 Inst_FLAT__FLAT_LOAD_SSHORT::Inst_FLAT__FLAT_LOAD_SSHORT(InFmt_FLAT
*iFmt
)
39635 : Inst_FLAT(iFmt
, "flat_load_sshort")
39637 setFlag(MemoryRef
);
39639 } // Inst_FLAT__FLAT_LOAD_SSHORT
39641 Inst_FLAT__FLAT_LOAD_SSHORT::~Inst_FLAT__FLAT_LOAD_SSHORT()
39643 } // ~Inst_FLAT__FLAT_LOAD_SSHORT
39645 // Untyped buffer load signed short (sign extend to VGPR destination).
39647 Inst_FLAT__FLAT_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst
)
39649 panicUnimplemented();
39653 Inst_FLAT__FLAT_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst
)
39658 Inst_FLAT__FLAT_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst
)
39662 Inst_FLAT__FLAT_LOAD_DWORD::Inst_FLAT__FLAT_LOAD_DWORD(InFmt_FLAT
*iFmt
)
39663 : Inst_FLAT(iFmt
, "flat_load_dword")
39665 setFlag(MemoryRef
);
39667 } // Inst_FLAT__FLAT_LOAD_DWORD
39669 Inst_FLAT__FLAT_LOAD_DWORD::~Inst_FLAT__FLAT_LOAD_DWORD()
39671 } // ~Inst_FLAT__FLAT_LOAD_DWORD
39673 // Untyped buffer load dword.
39675 Inst_FLAT__FLAT_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst
)
39677 Wavefront
*wf
= gpuDynInst
->wavefront();
39679 if (wf
->execMask().none()) {
39680 wf
->decVMemInstsIssued();
39681 wf
->decLGKMInstsIssued();
39682 wf
->rdGmReqsInPipe
--;
39683 wf
->rdLmReqsInPipe
--;
39684 gpuDynInst
->exec_mask
= wf
->execMask();
39685 wf
->computeUnit
->vrf
[wf
->simdId
]->
39686 scheduleWriteOperandsFromLoad(wf
, gpuDynInst
);
39690 gpuDynInst
->execUnitId
= wf
->execUnitId
;
39691 gpuDynInst
->exec_mask
= gpuDynInst
->wavefront()->execMask();
39692 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
39693 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
39695 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
39699 calcAddr(gpuDynInst
, addr
);
39701 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
39702 gpuDynInst
->computeUnit()->globalMemoryPipe
39703 .issueRequest(gpuDynInst
);
39704 wf
->rdGmReqsInPipe
--;
39705 wf
->outstandingReqsRdGm
++;
39707 fatal("Non global flat instructions not implemented yet.\n");
39710 gpuDynInst
->wavefront()->outstandingReqs
++;
39711 gpuDynInst
->wavefront()->validateRequestCounters();
39715 Inst_FLAT__FLAT_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst
)
39717 initMemRead
<VecElemU32
>(gpuDynInst
);
39721 Inst_FLAT__FLAT_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst
)
39723 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
39725 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
39726 if (gpuDynInst
->exec_mask
[lane
]) {
39727 vdst
[lane
] = (reinterpret_cast<VecElemU32
*>(
39728 gpuDynInst
->d_data
))[lane
];
39734 Inst_FLAT__FLAT_LOAD_DWORDX2::Inst_FLAT__FLAT_LOAD_DWORDX2(
39736 : Inst_FLAT(iFmt
, "flat_load_dwordx2")
39738 setFlag(MemoryRef
);
39740 } // Inst_FLAT__FLAT_LOAD_DWORDX2
39742 Inst_FLAT__FLAT_LOAD_DWORDX2::~Inst_FLAT__FLAT_LOAD_DWORDX2()
39744 } // ~Inst_FLAT__FLAT_LOAD_DWORDX2
39746 // Untyped buffer load 2 dwords.
39748 Inst_FLAT__FLAT_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst
)
39750 Wavefront
*wf
= gpuDynInst
->wavefront();
39752 if (wf
->execMask().none()) {
39753 wf
->decVMemInstsIssued();
39754 wf
->decLGKMInstsIssued();
39755 wf
->rdGmReqsInPipe
--;
39756 wf
->rdLmReqsInPipe
--;
39757 gpuDynInst
->exec_mask
= wf
->execMask();
39758 wf
->computeUnit
->vrf
[wf
->simdId
]->
39759 scheduleWriteOperandsFromLoad(wf
, gpuDynInst
);
39763 gpuDynInst
->execUnitId
= wf
->execUnitId
;
39764 gpuDynInst
->exec_mask
= gpuDynInst
->wavefront()->execMask();
39765 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
39766 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
39768 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
39772 calcAddr(gpuDynInst
, addr
);
39774 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
39775 gpuDynInst
->computeUnit()->globalMemoryPipe
39776 .issueRequest(gpuDynInst
);
39777 wf
->rdGmReqsInPipe
--;
39778 wf
->outstandingReqsRdGm
++;
39780 fatal("Non global flat instructions not implemented yet.\n");
39783 gpuDynInst
->wavefront()->outstandingReqs
++;
39784 gpuDynInst
->wavefront()->validateRequestCounters();
39788 Inst_FLAT__FLAT_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst
)
39790 initMemRead
<VecElemU64
>(gpuDynInst
);
39794 Inst_FLAT__FLAT_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst
)
39796 VecOperandU64
vdst(gpuDynInst
, extData
.VDST
);
39798 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
39799 if (gpuDynInst
->exec_mask
[lane
]) {
39800 vdst
[lane
] = (reinterpret_cast<VecElemU64
*>(
39801 gpuDynInst
->d_data
))[lane
];
39807 Inst_FLAT__FLAT_LOAD_DWORDX3::Inst_FLAT__FLAT_LOAD_DWORDX3(
39809 : Inst_FLAT(iFmt
, "flat_load_dwordx3")
39811 setFlag(MemoryRef
);
39813 } // Inst_FLAT__FLAT_LOAD_DWORDX3
39815 Inst_FLAT__FLAT_LOAD_DWORDX3::~Inst_FLAT__FLAT_LOAD_DWORDX3()
39817 } // ~Inst_FLAT__FLAT_LOAD_DWORDX3
39819 // Untyped buffer load 3 dwords.
39821 Inst_FLAT__FLAT_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst
)
39823 Wavefront
*wf
= gpuDynInst
->wavefront();
39825 if (wf
->execMask().none()) {
39826 wf
->decVMemInstsIssued();
39827 wf
->decLGKMInstsIssued();
39828 wf
->rdGmReqsInPipe
--;
39829 wf
->rdLmReqsInPipe
--;
39830 gpuDynInst
->exec_mask
= wf
->execMask();
39831 wf
->computeUnit
->vrf
[wf
->simdId
]->
39832 scheduleWriteOperandsFromLoad(wf
, gpuDynInst
);
39836 gpuDynInst
->execUnitId
= wf
->execUnitId
;
39837 gpuDynInst
->exec_mask
= wf
->execMask();
39838 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
39839 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
39841 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
39845 calcAddr(gpuDynInst
, addr
);
39847 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
39848 gpuDynInst
->computeUnit()->globalMemoryPipe
39849 .issueRequest(gpuDynInst
);
39850 wf
->rdGmReqsInPipe
--;
39851 wf
->outstandingReqsRdGm
++;
39853 fatal("Non global flat instructions not implemented yet.\n");
39856 gpuDynInst
->wavefront()->outstandingReqs
++;
39857 gpuDynInst
->wavefront()->validateRequestCounters();
39861 Inst_FLAT__FLAT_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst
)
39863 initMemRead
<3>(gpuDynInst
);
39867 Inst_FLAT__FLAT_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst
)
39869 VecOperandU32
vdst0(gpuDynInst
, extData
.VDST
);
39870 VecOperandU32
vdst1(gpuDynInst
, extData
.VDST
+ 1);
39871 VecOperandU32
vdst2(gpuDynInst
, extData
.VDST
+ 2);
39873 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
39874 if (gpuDynInst
->exec_mask
[lane
]) {
39875 vdst0
[lane
] = (reinterpret_cast<VecElemU32
*>(
39876 gpuDynInst
->d_data
))[lane
* 3];
39877 vdst1
[lane
] = (reinterpret_cast<VecElemU32
*>(
39878 gpuDynInst
->d_data
))[lane
* 3 + 1];
39879 vdst2
[lane
] = (reinterpret_cast<VecElemU32
*>(
39880 gpuDynInst
->d_data
))[lane
* 3 + 2];
39889 Inst_FLAT__FLAT_LOAD_DWORDX4::Inst_FLAT__FLAT_LOAD_DWORDX4(
39891 : Inst_FLAT(iFmt
, "flat_load_dwordx4")
39893 setFlag(MemoryRef
);
39895 } // Inst_FLAT__FLAT_LOAD_DWORDX4
39897 Inst_FLAT__FLAT_LOAD_DWORDX4::~Inst_FLAT__FLAT_LOAD_DWORDX4()
39899 } // ~Inst_FLAT__FLAT_LOAD_DWORDX4
39901 // Untyped buffer load 4 dwords.
39903 Inst_FLAT__FLAT_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst
)
39905 Wavefront
*wf
= gpuDynInst
->wavefront();
39907 if (wf
->execMask().none()) {
39908 wf
->decVMemInstsIssued();
39909 wf
->decLGKMInstsIssued();
39910 wf
->rdGmReqsInPipe
--;
39911 wf
->rdLmReqsInPipe
--;
39912 gpuDynInst
->exec_mask
= wf
->execMask();
39913 wf
->computeUnit
->vrf
[wf
->simdId
]->
39914 scheduleWriteOperandsFromLoad(wf
, gpuDynInst
);
39918 gpuDynInst
->execUnitId
= wf
->execUnitId
;
39919 gpuDynInst
->exec_mask
= wf
->execMask();
39920 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
39921 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
39923 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
39927 calcAddr(gpuDynInst
, addr
);
39929 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
39930 gpuDynInst
->computeUnit()->globalMemoryPipe
39931 .issueRequest(gpuDynInst
);
39932 wf
->rdGmReqsInPipe
--;
39933 wf
->outstandingReqsRdGm
++;
39935 fatal("Non global flat instructions not implemented yet.\n");
39938 gpuDynInst
->wavefront()->outstandingReqs
++;
39939 gpuDynInst
->wavefront()->validateRequestCounters();
39943 Inst_FLAT__FLAT_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst
)
39945 initMemRead
<4>(gpuDynInst
);
39949 Inst_FLAT__FLAT_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst
)
39951 VecOperandU32
vdst0(gpuDynInst
, extData
.VDST
);
39952 VecOperandU32
vdst1(gpuDynInst
, extData
.VDST
+ 1);
39953 VecOperandU32
vdst2(gpuDynInst
, extData
.VDST
+ 2);
39954 VecOperandU32
vdst3(gpuDynInst
, extData
.VDST
+ 3);
39956 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
39957 if (gpuDynInst
->exec_mask
[lane
]) {
39958 vdst0
[lane
] = (reinterpret_cast<VecElemU32
*>(
39959 gpuDynInst
->d_data
))[lane
* 4];
39960 vdst1
[lane
] = (reinterpret_cast<VecElemU32
*>(
39961 gpuDynInst
->d_data
))[lane
* 4 + 1];
39962 vdst2
[lane
] = (reinterpret_cast<VecElemU32
*>(
39963 gpuDynInst
->d_data
))[lane
* 4 + 2];
39964 vdst3
[lane
] = (reinterpret_cast<VecElemU32
*>(
39965 gpuDynInst
->d_data
))[lane
* 4 + 3];
39975 Inst_FLAT__FLAT_STORE_BYTE::Inst_FLAT__FLAT_STORE_BYTE(InFmt_FLAT
*iFmt
)
39976 : Inst_FLAT(iFmt
, "flat_store_byte")
39978 setFlag(MemoryRef
);
39980 } // Inst_FLAT__FLAT_STORE_BYTE
39982 Inst_FLAT__FLAT_STORE_BYTE::~Inst_FLAT__FLAT_STORE_BYTE()
39984 } // ~Inst_FLAT__FLAT_STORE_BYTE
39986 // Untyped buffer store byte.
39988 Inst_FLAT__FLAT_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst
)
39990 Wavefront
*wf
= gpuDynInst
->wavefront();
39992 if (wf
->execMask().none()) {
39993 wf
->decVMemInstsIssued();
39994 wf
->decLGKMInstsIssued();
39995 wf
->wrGmReqsInPipe
--;
39996 wf
->wrLmReqsInPipe
--;
40000 gpuDynInst
->execUnitId
= wf
->execUnitId
;
40001 gpuDynInst
->exec_mask
= wf
->execMask();
40002 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
40003 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
40005 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
40009 calcAddr(gpuDynInst
, addr
);
40011 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
40012 gpuDynInst
->computeUnit()->globalMemoryPipe
40013 .issueRequest(gpuDynInst
);
40014 wf
->wrGmReqsInPipe
--;
40015 wf
->outstandingReqsWrGm
++;
40017 fatal("Non global flat instructions not implemented yet.\n");
40020 gpuDynInst
->wavefront()->outstandingReqs
++;
40021 gpuDynInst
->wavefront()->validateRequestCounters();
40025 Inst_FLAT__FLAT_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst
)
40027 ConstVecOperandU8
data(gpuDynInst
, extData
.DATA
);
40030 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40031 if (gpuDynInst
->exec_mask
[lane
]) {
40032 (reinterpret_cast<VecElemU8
*>(gpuDynInst
->d_data
))[lane
]
40037 initMemWrite
<VecElemU8
>(gpuDynInst
);
40041 Inst_FLAT__FLAT_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst
)
40045 Inst_FLAT__FLAT_STORE_SHORT::Inst_FLAT__FLAT_STORE_SHORT(InFmt_FLAT
*iFmt
)
40046 : Inst_FLAT(iFmt
, "flat_store_short")
40048 setFlag(MemoryRef
);
40050 } // Inst_FLAT__FLAT_STORE_SHORT
40052 Inst_FLAT__FLAT_STORE_SHORT::~Inst_FLAT__FLAT_STORE_SHORT()
40054 } // ~Inst_FLAT__FLAT_STORE_SHORT
40056 // Untyped buffer store short.
40058 Inst_FLAT__FLAT_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst
)
40060 Wavefront
*wf
= gpuDynInst
->wavefront();
40062 if (wf
->execMask().none()) {
40063 wf
->decVMemInstsIssued();
40064 wf
->decLGKMInstsIssued();
40065 wf
->wrGmReqsInPipe
--;
40066 wf
->wrLmReqsInPipe
--;
40070 gpuDynInst
->execUnitId
= wf
->execUnitId
;
40071 gpuDynInst
->exec_mask
= wf
->execMask();
40072 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
40073 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
40075 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
40079 calcAddr(gpuDynInst
, addr
);
40081 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
40082 gpuDynInst
->computeUnit()->globalMemoryPipe
40083 .issueRequest(gpuDynInst
);
40084 wf
->wrGmReqsInPipe
--;
40085 wf
->outstandingReqsWrGm
++;
40087 fatal("Non global flat instructions not implemented yet.\n");
40090 gpuDynInst
->wavefront()->outstandingReqs
++;
40091 gpuDynInst
->wavefront()->validateRequestCounters();
40095 Inst_FLAT__FLAT_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst
)
40097 ConstVecOperandU16
data(gpuDynInst
, extData
.DATA
);
40101 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40102 if (gpuDynInst
->exec_mask
[lane
]) {
40103 (reinterpret_cast<VecElemU16
*>(gpuDynInst
->d_data
))[lane
]
40108 initMemWrite
<VecElemU16
>(gpuDynInst
);
40112 Inst_FLAT__FLAT_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst
)
40116 Inst_FLAT__FLAT_STORE_DWORD::Inst_FLAT__FLAT_STORE_DWORD(InFmt_FLAT
*iFmt
)
40117 : Inst_FLAT(iFmt
, "flat_store_dword")
40119 setFlag(MemoryRef
);
40121 } // Inst_FLAT__FLAT_STORE_DWORD
40123 Inst_FLAT__FLAT_STORE_DWORD::~Inst_FLAT__FLAT_STORE_DWORD()
40125 } // ~Inst_FLAT__FLAT_STORE_DWORD
40127 // Untyped buffer store dword.
40129 Inst_FLAT__FLAT_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst
)
40131 Wavefront
*wf
= gpuDynInst
->wavefront();
40133 if (wf
->execMask().none()) {
40134 wf
->decVMemInstsIssued();
40135 wf
->decLGKMInstsIssued();
40136 wf
->wrGmReqsInPipe
--;
40137 wf
->wrLmReqsInPipe
--;
40141 gpuDynInst
->execUnitId
= wf
->execUnitId
;
40142 gpuDynInst
->exec_mask
= wf
->execMask();
40143 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
40144 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
40146 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
40150 calcAddr(gpuDynInst
, addr
);
40152 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
40153 gpuDynInst
->computeUnit()->globalMemoryPipe
40154 .issueRequest(gpuDynInst
);
40155 wf
->wrGmReqsInPipe
--;
40156 wf
->outstandingReqsWrGm
++;
40158 fatal("Non global flat instructions not implemented yet.\n");
40161 gpuDynInst
->wavefront()->outstandingReqs
++;
40162 gpuDynInst
->wavefront()->validateRequestCounters();
40166 Inst_FLAT__FLAT_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst
)
40168 ConstVecOperandU32
data(gpuDynInst
, extData
.DATA
);
40171 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40172 if (gpuDynInst
->exec_mask
[lane
]) {
40173 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->d_data
))[lane
]
40178 initMemWrite
<VecElemU32
>(gpuDynInst
);
40182 Inst_FLAT__FLAT_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst
)
40186 Inst_FLAT__FLAT_STORE_DWORDX2::Inst_FLAT__FLAT_STORE_DWORDX2(
40188 : Inst_FLAT(iFmt
, "flat_store_dwordx2")
40190 setFlag(MemoryRef
);
40192 } // Inst_FLAT__FLAT_STORE_DWORDX2
40194 Inst_FLAT__FLAT_STORE_DWORDX2::~Inst_FLAT__FLAT_STORE_DWORDX2()
40196 } // ~Inst_FLAT__FLAT_STORE_DWORDX2
40198 // Untyped buffer store 2 dwords.
40200 Inst_FLAT__FLAT_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst
)
40202 Wavefront
*wf
= gpuDynInst
->wavefront();
40204 if (wf
->execMask().none()) {
40205 wf
->decVMemInstsIssued();
40206 wf
->decLGKMInstsIssued();
40207 wf
->wrGmReqsInPipe
--;
40208 wf
->wrLmReqsInPipe
--;
40212 gpuDynInst
->execUnitId
= wf
->execUnitId
;
40213 gpuDynInst
->exec_mask
= wf
->execMask();
40214 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
40215 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
40217 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
40221 calcAddr(gpuDynInst
, addr
);
40223 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
40224 gpuDynInst
->computeUnit()->globalMemoryPipe
40225 .issueRequest(gpuDynInst
);
40226 wf
->wrGmReqsInPipe
--;
40227 wf
->outstandingReqsWrGm
++;
40229 fatal("Non global flat instructions not implemented yet.\n");
40232 wf
->outstandingReqs
++;
40233 wf
->validateRequestCounters();
40237 Inst_FLAT__FLAT_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst
)
40239 ConstVecOperandU64
data(gpuDynInst
, extData
.DATA
);
40242 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40243 if (gpuDynInst
->exec_mask
[lane
]) {
40244 (reinterpret_cast<VecElemU64
*>(gpuDynInst
->d_data
))[lane
]
40249 initMemWrite
<VecElemU64
>(gpuDynInst
);
40253 Inst_FLAT__FLAT_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst
)
40257 Inst_FLAT__FLAT_STORE_DWORDX3::Inst_FLAT__FLAT_STORE_DWORDX3(
40259 : Inst_FLAT(iFmt
, "flat_store_dwordx3")
40261 setFlag(MemoryRef
);
40263 } // Inst_FLAT__FLAT_STORE_DWORDX3
40265 Inst_FLAT__FLAT_STORE_DWORDX3::~Inst_FLAT__FLAT_STORE_DWORDX3()
40267 } // ~Inst_FLAT__FLAT_STORE_DWORDX3
40269 // Untyped buffer store 3 dwords.
40271 Inst_FLAT__FLAT_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst
)
40273 Wavefront
*wf
= gpuDynInst
->wavefront();
40275 if (wf
->execMask().none()) {
40276 wf
->decVMemInstsIssued();
40277 wf
->decLGKMInstsIssued();
40278 wf
->wrGmReqsInPipe
--;
40279 wf
->wrLmReqsInPipe
--;
40283 gpuDynInst
->execUnitId
= wf
->execUnitId
;
40284 gpuDynInst
->exec_mask
= wf
->execMask();
40285 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
40286 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
40288 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
40292 calcAddr(gpuDynInst
, addr
);
40294 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
40295 gpuDynInst
->computeUnit()->globalMemoryPipe
40296 .issueRequest(gpuDynInst
);
40297 wf
->wrGmReqsInPipe
--;
40298 wf
->outstandingReqsWrGm
++;
40300 fatal("Non global flat instructions not implemented yet.\n");
40303 gpuDynInst
->wavefront()->outstandingReqs
++;
40304 gpuDynInst
->wavefront()->validateRequestCounters();
40308 Inst_FLAT__FLAT_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst
)
40310 ConstVecOperandU32
data0(gpuDynInst
, extData
.DATA
);
40311 ConstVecOperandU32
data1(gpuDynInst
, extData
.DATA
+ 1);
40312 ConstVecOperandU32
data2(gpuDynInst
, extData
.DATA
+ 2);
40318 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40319 if (gpuDynInst
->exec_mask
[lane
]) {
40320 (reinterpret_cast<VecElemU32
*>(
40321 gpuDynInst
->d_data
))[lane
* 3] = data0
[lane
];
40322 (reinterpret_cast<VecElemU32
*>(
40323 gpuDynInst
->d_data
))[lane
* 3 + 1] = data1
[lane
];
40324 (reinterpret_cast<VecElemU32
*>(
40325 gpuDynInst
->d_data
))[lane
* 3 + 2] = data2
[lane
];
40329 initMemWrite
<3>(gpuDynInst
);
40333 Inst_FLAT__FLAT_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst
)
40337 Inst_FLAT__FLAT_STORE_DWORDX4::Inst_FLAT__FLAT_STORE_DWORDX4(
40339 : Inst_FLAT(iFmt
, "flat_store_dwordx4")
40341 setFlag(MemoryRef
);
40343 } // Inst_FLAT__FLAT_STORE_DWORDX4
40345 Inst_FLAT__FLAT_STORE_DWORDX4::~Inst_FLAT__FLAT_STORE_DWORDX4()
40347 } // ~Inst_FLAT__FLAT_STORE_DWORDX4
40349 // Untyped buffer store 4 dwords.
40351 Inst_FLAT__FLAT_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst
)
40353 Wavefront
*wf
= gpuDynInst
->wavefront();
40355 if (wf
->execMask().none()) {
40356 wf
->decVMemInstsIssued();
40357 wf
->decLGKMInstsIssued();
40358 wf
->wrGmReqsInPipe
--;
40359 wf
->wrLmReqsInPipe
--;
40363 gpuDynInst
->execUnitId
= wf
->execUnitId
;
40364 gpuDynInst
->exec_mask
= wf
->execMask();
40365 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
40366 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
40368 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
40372 calcAddr(gpuDynInst
, addr
);
40374 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
40375 gpuDynInst
->computeUnit()->globalMemoryPipe
40376 .issueRequest(gpuDynInst
);
40377 wf
->wrGmReqsInPipe
--;
40378 wf
->outstandingReqsWrGm
++;
40380 fatal("Non global flat instructions not implemented yet.\n");
40383 gpuDynInst
->wavefront()->outstandingReqs
++;
40384 gpuDynInst
->wavefront()->validateRequestCounters();
40388 Inst_FLAT__FLAT_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst
)
40390 ConstVecOperandU32
data0(gpuDynInst
, extData
.DATA
);
40391 ConstVecOperandU32
data1(gpuDynInst
, extData
.DATA
+ 1);
40392 ConstVecOperandU32
data2(gpuDynInst
, extData
.DATA
+ 2);
40393 ConstVecOperandU32
data3(gpuDynInst
, extData
.DATA
+ 3);
40400 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40401 if (gpuDynInst
->exec_mask
[lane
]) {
40402 (reinterpret_cast<VecElemU32
*>(
40403 gpuDynInst
->d_data
))[lane
* 4] = data0
[lane
];
40404 (reinterpret_cast<VecElemU32
*>(
40405 gpuDynInst
->d_data
))[lane
* 4 + 1] = data1
[lane
];
40406 (reinterpret_cast<VecElemU32
*>(
40407 gpuDynInst
->d_data
))[lane
* 4 + 2] = data2
[lane
];
40408 (reinterpret_cast<VecElemU32
*>(
40409 gpuDynInst
->d_data
))[lane
* 4 + 3] = data3
[lane
];
40413 initMemWrite
<4>(gpuDynInst
);
40417 Inst_FLAT__FLAT_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst
)
40421 Inst_FLAT__FLAT_ATOMIC_SWAP::Inst_FLAT__FLAT_ATOMIC_SWAP(InFmt_FLAT
*iFmt
)
40422 : Inst_FLAT(iFmt
, "flat_atomic_swap")
40424 setFlag(AtomicExch
);
40425 if (instData
.GLC
) {
40426 setFlag(AtomicReturn
);
40428 setFlag(AtomicNoReturn
);
40430 setFlag(MemoryRef
);
40431 } // Inst_FLAT__FLAT_ATOMIC_SWAP
40433 Inst_FLAT__FLAT_ATOMIC_SWAP::~Inst_FLAT__FLAT_ATOMIC_SWAP()
40435 } // ~Inst_FLAT__FLAT_ATOMIC_SWAP
40437 // tmp = MEM[ADDR];
40438 // MEM[ADDR] = DATA;
40439 // RETURN_DATA = tmp.
40441 Inst_FLAT__FLAT_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst
)
40443 Wavefront
*wf
= gpuDynInst
->wavefront();
40445 if (wf
->execMask().none()) {
40446 wf
->decVMemInstsIssued();
40447 wf
->decLGKMInstsIssued();
40448 wf
->wrGmReqsInPipe
--;
40449 wf
->rdGmReqsInPipe
--;
40450 wf
->wrLmReqsInPipe
--;
40451 wf
->rdLmReqsInPipe
--;
40452 if (instData
.GLC
) {
40453 gpuDynInst
->exec_mask
= wf
->execMask();
40454 wf
->computeUnit
->vrf
[wf
->simdId
]->
40455 scheduleWriteOperandsFromLoad(wf
, gpuDynInst
);
40460 gpuDynInst
->execUnitId
= wf
->execUnitId
;
40461 gpuDynInst
->exec_mask
= wf
->execMask();
40462 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
40463 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
40465 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
40469 calcAddr(gpuDynInst
, addr
);
40471 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
||
40472 gpuDynInst
->executedAs() == Enums::SC_PRIVATE
) {
40473 // TODO: additional address computation required for scratch
40474 panic_if(gpuDynInst
->executedAs() == Enums::SC_PRIVATE
,
40475 "Flats to private aperture not tested yet\n");
40476 gpuDynInst
->computeUnit()->globalMemoryPipe
.
40477 issueRequest(gpuDynInst
);
40478 wf
->wrGmReqsInPipe
--;
40479 wf
->outstandingReqsWrGm
++;
40480 wf
->rdGmReqsInPipe
--;
40481 wf
->outstandingReqsRdGm
++;
40483 fatal("Non global flat instructions not implemented yet.\n");
40486 gpuDynInst
->wavefront()->outstandingReqs
++;
40487 gpuDynInst
->wavefront()->validateRequestCounters();
40489 ConstVecOperandU32
data(gpuDynInst
, extData
.DATA
);
40493 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40494 if (gpuDynInst
->exec_mask
[lane
]) {
40495 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->a_data
))[lane
]
40503 Inst_FLAT__FLAT_ATOMIC_SWAP::initiateAcc(GPUDynInstPtr gpuDynInst
)
40505 initAtomicAccess
<VecElemU32
>(gpuDynInst
);
40509 Inst_FLAT__FLAT_ATOMIC_SWAP::completeAcc(GPUDynInstPtr gpuDynInst
)
40511 if (isAtomicRet()) {
40512 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
40514 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40515 if (gpuDynInst
->exec_mask
[lane
]) {
40516 vdst
[lane
] = (reinterpret_cast<VecElemU32
*>(
40517 gpuDynInst
->d_data
))[lane
];
40525 // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP class methods ---
40527 Inst_FLAT__FLAT_ATOMIC_CMPSWAP
40528 ::Inst_FLAT__FLAT_ATOMIC_CMPSWAP(InFmt_FLAT
*iFmt
)
40529 : Inst_FLAT(iFmt
, "flat_atomic_cmpswap")
40531 setFlag(AtomicCAS
);
40532 if (instData
.GLC
) {
40533 setFlag(AtomicReturn
);
40535 setFlag(AtomicNoReturn
);
40537 setFlag(MemoryRef
);
40538 } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP
40540 Inst_FLAT__FLAT_ATOMIC_CMPSWAP::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP()
40542 } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP
40544 // tmp = MEM[ADDR];
40547 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
40548 // RETURN_DATA[0] = tmp.
40550 Inst_FLAT__FLAT_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst
)
40552 Wavefront
*wf
= gpuDynInst
->wavefront();
40554 if (wf
->execMask().none()) {
40555 wf
->decVMemInstsIssued();
40556 wf
->decLGKMInstsIssued();
40557 wf
->wrGmReqsInPipe
--;
40558 wf
->rdGmReqsInPipe
--;
40559 wf
->wrLmReqsInPipe
--;
40560 wf
->rdLmReqsInPipe
--;
40561 if (instData
.GLC
) {
40562 gpuDynInst
->exec_mask
= wf
->execMask();
40563 wf
->computeUnit
->vrf
[wf
->simdId
]->
40564 scheduleWriteOperandsFromLoad(wf
, gpuDynInst
);
40569 gpuDynInst
->execUnitId
= wf
->execUnitId
;
40570 gpuDynInst
->exec_mask
= wf
->execMask();
40571 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
40572 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
40574 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
40575 ConstVecOperandU32
data(gpuDynInst
, extData
.DATA
);
40576 ConstVecOperandU32
cmp(gpuDynInst
, extData
.DATA
+ 1);
40582 calcAddr(gpuDynInst
, addr
);
40584 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40585 if (gpuDynInst
->exec_mask
[lane
]) {
40586 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->x_data
))[lane
]
40588 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->a_data
))[lane
]
40593 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
||
40594 gpuDynInst
->executedAs() == Enums::SC_PRIVATE
) {
40596 * TODO: If you encounter this panic, just remove this panic
40597 * and restart the simulation. It should just work fine but
40598 * this is to warn user that this path is never tested although
40599 * all the necessary logic is implemented
40601 panic_if(gpuDynInst
->executedAs() == Enums::SC_PRIVATE
,
40602 "Flats to private aperture not tested yet\n");
40603 gpuDynInst
->computeUnit()->globalMemoryPipe
.
40604 issueRequest(gpuDynInst
);
40605 wf
->wrGmReqsInPipe
--;
40606 wf
->outstandingReqsWrGm
++;
40607 wf
->rdGmReqsInPipe
--;
40608 wf
->outstandingReqsRdGm
++;
40610 fatal("Non global flat instructions not implemented yet.\n");
40613 gpuDynInst
->wavefront()->outstandingReqs
++;
40614 gpuDynInst
->wavefront()->validateRequestCounters();
40618 Inst_FLAT__FLAT_ATOMIC_CMPSWAP::initiateAcc(GPUDynInstPtr gpuDynInst
)
40620 initAtomicAccess
<VecElemU32
>(gpuDynInst
);
40624 Inst_FLAT__FLAT_ATOMIC_CMPSWAP::completeAcc(GPUDynInstPtr gpuDynInst
)
40626 if (isAtomicRet()) {
40627 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
40629 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40630 if (gpuDynInst
->exec_mask
[lane
]) {
40631 vdst
[lane
] = (reinterpret_cast<VecElemU32
*>(
40632 gpuDynInst
->d_data
))[lane
];
40640 Inst_FLAT__FLAT_ATOMIC_ADD::Inst_FLAT__FLAT_ATOMIC_ADD(InFmt_FLAT
*iFmt
)
40641 : Inst_FLAT(iFmt
, "flat_atomic_add")
40643 setFlag(AtomicAdd
);
40644 if (instData
.GLC
) {
40645 setFlag(AtomicReturn
);
40647 setFlag(AtomicNoReturn
);
40649 setFlag(MemoryRef
);
40650 } // Inst_FLAT__FLAT_ATOMIC_ADD
40652 Inst_FLAT__FLAT_ATOMIC_ADD::~Inst_FLAT__FLAT_ATOMIC_ADD()
40654 } // ~Inst_FLAT__FLAT_ATOMIC_ADD
40656 // tmp = MEM[ADDR];
40657 // MEM[ADDR] += DATA;
40658 // RETURN_DATA = tmp.
40660 Inst_FLAT__FLAT_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst
)
40662 Wavefront
*wf
= gpuDynInst
->wavefront();
40664 if (wf
->execMask().none()) {
40665 wf
->decVMemInstsIssued();
40666 wf
->decLGKMInstsIssued();
40667 wf
->wrGmReqsInPipe
--;
40668 wf
->rdGmReqsInPipe
--;
40669 wf
->wrLmReqsInPipe
--;
40670 wf
->rdLmReqsInPipe
--;
40671 if (instData
.GLC
) {
40672 gpuDynInst
->exec_mask
= wf
->execMask();
40673 wf
->computeUnit
->vrf
[wf
->simdId
]->
40674 scheduleWriteOperandsFromLoad(wf
, gpuDynInst
);
40679 gpuDynInst
->execUnitId
= wf
->execUnitId
;
40680 gpuDynInst
->exec_mask
= wf
->execMask();
40681 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
40682 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
40684 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
40685 ConstVecOperandU32
data(gpuDynInst
, extData
.DATA
);
40690 calcAddr(gpuDynInst
, addr
);
40692 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40693 if (gpuDynInst
->exec_mask
[lane
]) {
40694 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->a_data
))[lane
]
40699 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
40700 gpuDynInst
->computeUnit()->globalMemoryPipe
.
40701 issueRequest(gpuDynInst
);
40702 wf
->wrGmReqsInPipe
--;
40703 wf
->outstandingReqsWrGm
++;
40704 wf
->rdGmReqsInPipe
--;
40705 wf
->outstandingReqsRdGm
++;
40707 fatal("Non global flat instructions not implemented yet.\n");
40710 gpuDynInst
->wavefront()->outstandingReqs
++;
40711 gpuDynInst
->wavefront()->validateRequestCounters();
40715 Inst_FLAT__FLAT_ATOMIC_ADD::initiateAcc(GPUDynInstPtr gpuDynInst
)
40717 initAtomicAccess
<VecElemU32
>(gpuDynInst
);
40721 Inst_FLAT__FLAT_ATOMIC_ADD::completeAcc(GPUDynInstPtr gpuDynInst
)
40723 if (isAtomicRet()) {
40724 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
40726 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40727 if (gpuDynInst
->exec_mask
[lane
]) {
40728 vdst
[lane
] = (reinterpret_cast<VecElemU32
*>(
40729 gpuDynInst
->d_data
))[lane
];
40737 Inst_FLAT__FLAT_ATOMIC_SUB::Inst_FLAT__FLAT_ATOMIC_SUB(InFmt_FLAT
*iFmt
)
40738 : Inst_FLAT(iFmt
, "flat_atomic_sub")
40740 setFlag(AtomicSub
);
40741 if (instData
.GLC
) {
40742 setFlag(AtomicReturn
);
40744 setFlag(AtomicNoReturn
);
40746 setFlag(MemoryRef
);
40747 } // Inst_FLAT__FLAT_ATOMIC_SUB
40749 Inst_FLAT__FLAT_ATOMIC_SUB::~Inst_FLAT__FLAT_ATOMIC_SUB()
40751 } // ~Inst_FLAT__FLAT_ATOMIC_SUB
40753 // tmp = MEM[ADDR];
40754 // MEM[ADDR] -= DATA;
40755 // RETURN_DATA = tmp.
40757 Inst_FLAT__FLAT_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst
)
40759 Wavefront
*wf
= gpuDynInst
->wavefront();
40761 if (wf
->execMask().none()) {
40762 wf
->decVMemInstsIssued();
40763 wf
->decLGKMInstsIssued();
40764 wf
->wrGmReqsInPipe
--;
40765 wf
->rdGmReqsInPipe
--;
40766 wf
->wrLmReqsInPipe
--;
40767 wf
->rdLmReqsInPipe
--;
40768 if (instData
.GLC
) {
40769 gpuDynInst
->exec_mask
= wf
->execMask();
40770 wf
->computeUnit
->vrf
[wf
->simdId
]->
40771 scheduleWriteOperandsFromLoad(wf
, gpuDynInst
);
40776 gpuDynInst
->execUnitId
= wf
->execUnitId
;
40777 gpuDynInst
->exec_mask
= wf
->execMask();
40778 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
40779 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
40781 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
40782 ConstVecOperandU32
data(gpuDynInst
, extData
.DATA
);
40787 calcAddr(gpuDynInst
, addr
);
40789 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40790 if (gpuDynInst
->exec_mask
[lane
]) {
40791 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->a_data
))[lane
]
40796 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
40797 gpuDynInst
->computeUnit()->globalMemoryPipe
.
40798 issueRequest(gpuDynInst
);
40799 wf
->wrGmReqsInPipe
--;
40800 wf
->outstandingReqsWrGm
++;
40801 wf
->rdGmReqsInPipe
--;
40802 wf
->outstandingReqsRdGm
++;
40804 fatal("Non global flat instructions not implemented yet.\n");
40807 gpuDynInst
->wavefront()->outstandingReqs
++;
40808 gpuDynInst
->wavefront()->validateRequestCounters();
40811 Inst_FLAT__FLAT_ATOMIC_SUB::initiateAcc(GPUDynInstPtr gpuDynInst
)
40813 initAtomicAccess
<VecElemU32
>(gpuDynInst
);
40817 Inst_FLAT__FLAT_ATOMIC_SUB::completeAcc(GPUDynInstPtr gpuDynInst
)
40819 if (isAtomicRet()) {
40820 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
40822 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
40823 if (gpuDynInst
->exec_mask
[lane
]) {
40824 vdst
[lane
] = (reinterpret_cast<VecElemU32
*>(
40825 gpuDynInst
->d_data
))[lane
];
40833 Inst_FLAT__FLAT_ATOMIC_SMIN::Inst_FLAT__FLAT_ATOMIC_SMIN(InFmt_FLAT
*iFmt
)
40834 : Inst_FLAT(iFmt
, "flat_atomic_smin")
40836 setFlag(AtomicMin
);
40837 if (instData
.GLC
) {
40838 setFlag(AtomicReturn
);
40840 setFlag(AtomicNoReturn
);
40842 setFlag(MemoryRef
);
40843 } // Inst_FLAT__FLAT_ATOMIC_SMIN
40845 Inst_FLAT__FLAT_ATOMIC_SMIN::~Inst_FLAT__FLAT_ATOMIC_SMIN()
40847 } // ~Inst_FLAT__FLAT_ATOMIC_SMIN
40849 // tmp = MEM[ADDR];
40850 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
40851 // RETURN_DATA = tmp.
40853 Inst_FLAT__FLAT_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst
)
40855 panicUnimplemented();
40858 Inst_FLAT__FLAT_ATOMIC_UMIN::Inst_FLAT__FLAT_ATOMIC_UMIN(InFmt_FLAT
*iFmt
)
40859 : Inst_FLAT(iFmt
, "flat_atomic_umin")
40861 setFlag(AtomicMin
);
40862 if (instData
.GLC
) {
40863 setFlag(AtomicReturn
);
40865 setFlag(AtomicNoReturn
);
40867 setFlag(MemoryRef
);
40868 } // Inst_FLAT__FLAT_ATOMIC_UMIN
40870 Inst_FLAT__FLAT_ATOMIC_UMIN::~Inst_FLAT__FLAT_ATOMIC_UMIN()
40872 } // ~Inst_FLAT__FLAT_ATOMIC_UMIN
40874 // tmp = MEM[ADDR];
40875 // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
40876 // RETURN_DATA = tmp.
40878 Inst_FLAT__FLAT_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst
)
40880 panicUnimplemented();
40883 Inst_FLAT__FLAT_ATOMIC_SMAX::Inst_FLAT__FLAT_ATOMIC_SMAX(InFmt_FLAT
*iFmt
)
40884 : Inst_FLAT(iFmt
, "flat_atomic_smax")
40886 setFlag(AtomicMax
);
40887 if (instData
.GLC
) {
40888 setFlag(AtomicReturn
);
40890 setFlag(AtomicNoReturn
);
40892 setFlag(MemoryRef
);
40893 } // Inst_FLAT__FLAT_ATOMIC_SMAX
40895 Inst_FLAT__FLAT_ATOMIC_SMAX::~Inst_FLAT__FLAT_ATOMIC_SMAX()
40897 } // ~Inst_FLAT__FLAT_ATOMIC_SMAX
40899 // tmp = MEM[ADDR];
40900 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
40901 // RETURN_DATA = tmp.
40903 Inst_FLAT__FLAT_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst
)
40905 panicUnimplemented();
40908 Inst_FLAT__FLAT_ATOMIC_UMAX::Inst_FLAT__FLAT_ATOMIC_UMAX(InFmt_FLAT
*iFmt
)
40909 : Inst_FLAT(iFmt
, "flat_atomic_umax")
40911 setFlag(AtomicMax
);
40912 if (instData
.GLC
) {
40913 setFlag(AtomicReturn
);
40915 setFlag(AtomicNoReturn
);
40917 setFlag(MemoryRef
);
40918 } // Inst_FLAT__FLAT_ATOMIC_UMAX
40920 Inst_FLAT__FLAT_ATOMIC_UMAX::~Inst_FLAT__FLAT_ATOMIC_UMAX()
40922 } // ~Inst_FLAT__FLAT_ATOMIC_UMAX
40924 // tmp = MEM[ADDR];
40925 // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
40926 // RETURN_DATA = tmp.
40928 Inst_FLAT__FLAT_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst
)
40930 panicUnimplemented();
40933 Inst_FLAT__FLAT_ATOMIC_AND::Inst_FLAT__FLAT_ATOMIC_AND(InFmt_FLAT
*iFmt
)
40934 : Inst_FLAT(iFmt
, "flat_atomic_and")
40936 setFlag(AtomicAnd
);
40937 if (instData
.GLC
) {
40938 setFlag(AtomicReturn
);
40940 setFlag(AtomicNoReturn
);
40942 setFlag(MemoryRef
);
40943 } // Inst_FLAT__FLAT_ATOMIC_AND
40945 Inst_FLAT__FLAT_ATOMIC_AND::~Inst_FLAT__FLAT_ATOMIC_AND()
40947 } // ~Inst_FLAT__FLAT_ATOMIC_AND
40949 // tmp = MEM[ADDR];
40950 // MEM[ADDR] &= DATA;
40951 // RETURN_DATA = tmp.
40953 Inst_FLAT__FLAT_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst
)
40955 panicUnimplemented();
40958 Inst_FLAT__FLAT_ATOMIC_OR::Inst_FLAT__FLAT_ATOMIC_OR(InFmt_FLAT
*iFmt
)
40959 : Inst_FLAT(iFmt
, "flat_atomic_or")
40962 if (instData
.GLC
) {
40963 setFlag(AtomicReturn
);
40965 setFlag(AtomicNoReturn
);
40967 setFlag(MemoryRef
);
40968 } // Inst_FLAT__FLAT_ATOMIC_OR
40970 Inst_FLAT__FLAT_ATOMIC_OR::~Inst_FLAT__FLAT_ATOMIC_OR()
40972 } // ~Inst_FLAT__FLAT_ATOMIC_OR
40974 // tmp = MEM[ADDR];
40975 // MEM[ADDR] |= DATA;
40976 // RETURN_DATA = tmp.
40978 Inst_FLAT__FLAT_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst
)
40980 panicUnimplemented();
40983 Inst_FLAT__FLAT_ATOMIC_XOR::Inst_FLAT__FLAT_ATOMIC_XOR(InFmt_FLAT
*iFmt
)
40984 : Inst_FLAT(iFmt
, "flat_atomic_xor")
40986 setFlag(AtomicXor
);
40987 if (instData
.GLC
) {
40988 setFlag(AtomicReturn
);
40990 setFlag(AtomicNoReturn
);
40992 setFlag(MemoryRef
);
40993 } // Inst_FLAT__FLAT_ATOMIC_XOR
40995 Inst_FLAT__FLAT_ATOMIC_XOR::~Inst_FLAT__FLAT_ATOMIC_XOR()
40997 } // ~Inst_FLAT__FLAT_ATOMIC_XOR
40999 // tmp = MEM[ADDR];
41000 // MEM[ADDR] ^= DATA;
41001 // RETURN_DATA = tmp.
41003 Inst_FLAT__FLAT_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst
)
41005 panicUnimplemented();
41008 Inst_FLAT__FLAT_ATOMIC_INC::Inst_FLAT__FLAT_ATOMIC_INC(InFmt_FLAT
*iFmt
)
41009 : Inst_FLAT(iFmt
, "flat_atomic_inc")
41011 setFlag(AtomicInc
);
41012 if (instData
.GLC
) {
41013 setFlag(AtomicReturn
);
41015 setFlag(AtomicNoReturn
);
41017 setFlag(MemoryRef
);
41018 } // Inst_FLAT__FLAT_ATOMIC_INC
41020 Inst_FLAT__FLAT_ATOMIC_INC::~Inst_FLAT__FLAT_ATOMIC_INC()
41022 } // ~Inst_FLAT__FLAT_ATOMIC_INC
41024 // tmp = MEM[ADDR];
41025 // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
41026 // RETURN_DATA = tmp.
41028 Inst_FLAT__FLAT_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst
)
41030 Wavefront
*wf
= gpuDynInst
->wavefront();
41032 if (wf
->execMask().none()) {
41033 wf
->decVMemInstsIssued();
41034 wf
->decLGKMInstsIssued();
41035 wf
->wrGmReqsInPipe
--;
41036 wf
->rdGmReqsInPipe
--;
41037 wf
->wrLmReqsInPipe
--;
41038 wf
->rdLmReqsInPipe
--;
41039 if (instData
.GLC
) {
41040 gpuDynInst
->exec_mask
= wf
->execMask();
41041 wf
->computeUnit
->vrf
[wf
->simdId
]->
41042 scheduleWriteOperandsFromLoad(wf
, gpuDynInst
);
41047 gpuDynInst
->execUnitId
= wf
->execUnitId
;
41048 gpuDynInst
->exec_mask
= wf
->execMask();
41049 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
41050 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
41052 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
41053 ConstVecOperandU32
data(gpuDynInst
, extData
.DATA
);
41058 calcAddr(gpuDynInst
, addr
);
41060 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41061 if (gpuDynInst
->exec_mask
[lane
]) {
41062 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->a_data
))[lane
]
41067 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
41068 gpuDynInst
->computeUnit()->globalMemoryPipe
.
41069 issueRequest(gpuDynInst
);
41070 wf
->wrGmReqsInPipe
--;
41071 wf
->outstandingReqsWrGm
++;
41072 wf
->rdGmReqsInPipe
--;
41073 wf
->outstandingReqsRdGm
++;
41075 fatal("Non global flat instructions not implemented yet.\n");
41078 gpuDynInst
->wavefront()->outstandingReqs
++;
41079 gpuDynInst
->wavefront()->validateRequestCounters();
41083 Inst_FLAT__FLAT_ATOMIC_INC::initiateAcc(GPUDynInstPtr gpuDynInst
)
41085 initAtomicAccess
<VecElemU32
>(gpuDynInst
);
41089 Inst_FLAT__FLAT_ATOMIC_INC::completeAcc(GPUDynInstPtr gpuDynInst
)
41091 if (isAtomicRet()) {
41092 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
41094 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41095 if (gpuDynInst
->exec_mask
[lane
]) {
41096 vdst
[lane
] = (reinterpret_cast<VecElemU32
*>(
41097 gpuDynInst
->d_data
))[lane
];
41105 Inst_FLAT__FLAT_ATOMIC_DEC::Inst_FLAT__FLAT_ATOMIC_DEC(InFmt_FLAT
*iFmt
)
41106 : Inst_FLAT(iFmt
, "flat_atomic_dec")
41108 setFlag(AtomicDec
);
41109 if (instData
.GLC
) {
41110 setFlag(AtomicReturn
);
41112 setFlag(AtomicNoReturn
);
41114 setFlag(MemoryRef
);
41115 } // Inst_FLAT__FLAT_ATOMIC_DEC
41117 Inst_FLAT__FLAT_ATOMIC_DEC::~Inst_FLAT__FLAT_ATOMIC_DEC()
41119 } // ~Inst_FLAT__FLAT_ATOMIC_DEC
41121 // tmp = MEM[ADDR];
41122 // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
41123 // (unsigned compare); RETURN_DATA = tmp.
41125 Inst_FLAT__FLAT_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst
)
41127 Wavefront
*wf
= gpuDynInst
->wavefront();
41129 if (wf
->execMask().none()) {
41130 wf
->decVMemInstsIssued();
41131 wf
->decLGKMInstsIssued();
41132 wf
->wrGmReqsInPipe
--;
41133 wf
->rdGmReqsInPipe
--;
41134 wf
->wrLmReqsInPipe
--;
41135 wf
->rdLmReqsInPipe
--;
41136 if (instData
.GLC
) {
41137 gpuDynInst
->exec_mask
= wf
->execMask();
41138 wf
->computeUnit
->vrf
[wf
->simdId
]->
41139 scheduleWriteOperandsFromLoad(wf
, gpuDynInst
);
41144 gpuDynInst
->execUnitId
= wf
->execUnitId
;
41145 gpuDynInst
->exec_mask
= wf
->execMask();
41146 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
41147 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
41149 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
41150 ConstVecOperandU32
data(gpuDynInst
, extData
.DATA
);
41155 calcAddr(gpuDynInst
, addr
);
41157 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41158 if (gpuDynInst
->exec_mask
[lane
]) {
41159 (reinterpret_cast<VecElemU32
*>(gpuDynInst
->a_data
))[lane
]
41164 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
41165 gpuDynInst
->computeUnit()->globalMemoryPipe
.
41166 issueRequest(gpuDynInst
);
41167 wf
->wrGmReqsInPipe
--;
41168 wf
->outstandingReqsWrGm
++;
41169 wf
->rdGmReqsInPipe
--;
41170 wf
->outstandingReqsRdGm
++;
41172 fatal("Non global flat instructions not implemented yet.\n");
41175 gpuDynInst
->wavefront()->outstandingReqs
++;
41176 gpuDynInst
->wavefront()->validateRequestCounters();
41180 Inst_FLAT__FLAT_ATOMIC_DEC::initiateAcc(GPUDynInstPtr gpuDynInst
)
41182 initAtomicAccess
<VecElemU32
>(gpuDynInst
);
41186 Inst_FLAT__FLAT_ATOMIC_DEC::completeAcc(GPUDynInstPtr gpuDynInst
)
41188 if (isAtomicRet()) {
41189 VecOperandU32
vdst(gpuDynInst
, extData
.VDST
);
41191 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41192 if (gpuDynInst
->exec_mask
[lane
]) {
41193 vdst
[lane
] = (reinterpret_cast<VecElemU32
*>(
41194 gpuDynInst
->d_data
))[lane
];
41202 Inst_FLAT__FLAT_ATOMIC_SWAP_X2::Inst_FLAT__FLAT_ATOMIC_SWAP_X2(
41204 : Inst_FLAT(iFmt
, "flat_atomic_swap_x2")
41206 setFlag(AtomicExch
);
41207 if (instData
.GLC
) {
41208 setFlag(AtomicReturn
);
41210 setFlag(AtomicNoReturn
);
41212 setFlag(MemoryRef
);
41213 } // Inst_FLAT__FLAT_ATOMIC_SWAP_X2
41215 Inst_FLAT__FLAT_ATOMIC_SWAP_X2::~Inst_FLAT__FLAT_ATOMIC_SWAP_X2()
41217 } // ~Inst_FLAT__FLAT_ATOMIC_SWAP_X2
41219 // tmp = MEM[ADDR];
41220 // MEM[ADDR] = DATA[0:1];
41221 // RETURN_DATA[0:1] = tmp.
41223 Inst_FLAT__FLAT_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst
)
41225 panicUnimplemented();
41228 Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2(
41230 : Inst_FLAT(iFmt
, "flat_atomic_cmpswap_x2")
41232 setFlag(AtomicCAS
);
41233 if (instData
.GLC
) {
41234 setFlag(AtomicReturn
);
41236 setFlag(AtomicNoReturn
);
41238 setFlag(MemoryRef
);
41239 } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2
41241 Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2()
41243 } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2
41245 // tmp = MEM[ADDR];
41246 // src = DATA[0:1];
41247 // cmp = DATA[2:3];
41248 // MEM[ADDR] = (tmp == cmp) ? src : tmp;
41249 // RETURN_DATA[0:1] = tmp.
41251 Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst
)
41253 Wavefront
*wf
= gpuDynInst
->wavefront();
41255 if (wf
->execMask().none()) {
41256 wf
->decVMemInstsIssued();
41257 wf
->decLGKMInstsIssued();
41258 wf
->wrGmReqsInPipe
--;
41259 wf
->rdGmReqsInPipe
--;
41260 wf
->wrLmReqsInPipe
--;
41261 wf
->rdLmReqsInPipe
--;
41262 if (instData
.GLC
) {
41263 gpuDynInst
->exec_mask
= wf
->execMask();
41264 wf
->computeUnit
->vrf
[wf
->simdId
]->
41265 scheduleWriteOperandsFromLoad(wf
, gpuDynInst
);
41270 gpuDynInst
->execUnitId
= wf
->execUnitId
;
41271 gpuDynInst
->exec_mask
= wf
->execMask();
41272 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
41273 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
41275 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
41276 ConstVecOperandU64
data(gpuDynInst
, extData
.DATA
);
41277 ConstVecOperandU64
cmp(gpuDynInst
, extData
.DATA
+ 2);
41283 calcAddr(gpuDynInst
, addr
);
41285 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41286 if (gpuDynInst
->exec_mask
[lane
]) {
41287 (reinterpret_cast<VecElemU64
*>(gpuDynInst
->x_data
))[lane
]
41289 (reinterpret_cast<VecElemU64
*>(gpuDynInst
->a_data
))[lane
]
41294 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
||
41295 gpuDynInst
->executedAs() == Enums::SC_PRIVATE
) {
41297 * TODO: If you encounter this panic, just remove this panic
41298 * and restart the simulation. It should just work fine but
41299 * this is to warn user that this path is never tested although
41300 * all the necessary logic is implemented
41302 panic_if(gpuDynInst
->executedAs() == Enums::SC_PRIVATE
,
41303 "Flats to private aperture not tested yet\n");
41304 gpuDynInst
->computeUnit()->globalMemoryPipe
.
41305 issueRequest(gpuDynInst
);
41306 wf
->wrGmReqsInPipe
--;
41307 wf
->outstandingReqsWrGm
++;
41308 wf
->rdGmReqsInPipe
--;
41309 wf
->outstandingReqsRdGm
++;
41311 fatal("Non global flat instructions not implemented yet.\n");
41314 gpuDynInst
->wavefront()->outstandingReqs
++;
41315 gpuDynInst
->wavefront()->validateRequestCounters();
41319 Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::initiateAcc(GPUDynInstPtr gpuDynInst
)
41321 initAtomicAccess
<VecElemU64
>(gpuDynInst
);
41325 Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::completeAcc(GPUDynInstPtr gpuDynInst
)
41327 if (isAtomicRet()) {
41328 VecOperandU64
vdst(gpuDynInst
, extData
.VDST
);
41330 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41331 if (gpuDynInst
->exec_mask
[lane
]) {
41332 vdst
[lane
] = (reinterpret_cast<VecElemU64
*>(
41333 gpuDynInst
->d_data
))[lane
];
41341 Inst_FLAT__FLAT_ATOMIC_ADD_X2::Inst_FLAT__FLAT_ATOMIC_ADD_X2(
41343 : Inst_FLAT(iFmt
, "flat_atomic_add_x2")
41345 setFlag(AtomicAdd
);
41346 if (instData
.GLC
) {
41347 setFlag(AtomicReturn
);
41349 setFlag(AtomicNoReturn
);
41351 setFlag(MemoryRef
);
41352 } // Inst_FLAT__FLAT_ATOMIC_ADD_X2
41354 Inst_FLAT__FLAT_ATOMIC_ADD_X2::~Inst_FLAT__FLAT_ATOMIC_ADD_X2()
41356 } // ~Inst_FLAT__FLAT_ATOMIC_ADD_X2
41358 // tmp = MEM[ADDR];
41359 // MEM[ADDR] += DATA[0:1];
41360 // RETURN_DATA[0:1] = tmp.
41362 Inst_FLAT__FLAT_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst
)
41364 Wavefront
*wf
= gpuDynInst
->wavefront();
41366 if (wf
->execMask().none()) {
41367 wf
->decVMemInstsIssued();
41368 wf
->decLGKMInstsIssued();
41369 wf
->wrGmReqsInPipe
--;
41370 wf
->rdGmReqsInPipe
--;
41371 wf
->wrLmReqsInPipe
--;
41372 wf
->rdLmReqsInPipe
--;
41373 if (instData
.GLC
) {
41374 gpuDynInst
->exec_mask
= wf
->execMask();
41375 wf
->computeUnit
->vrf
[wf
->simdId
]->
41376 scheduleWriteOperandsFromLoad(wf
, gpuDynInst
);
41381 gpuDynInst
->execUnitId
= wf
->execUnitId
;
41382 gpuDynInst
->exec_mask
= wf
->execMask();
41383 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
41384 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
41386 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
41387 ConstVecOperandU64
data(gpuDynInst
, extData
.DATA
);
41392 calcAddr(gpuDynInst
, addr
);
41394 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41395 if (gpuDynInst
->exec_mask
[lane
]) {
41396 (reinterpret_cast<VecElemU64
*>(gpuDynInst
->a_data
))[lane
]
41401 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
41402 gpuDynInst
->computeUnit()->globalMemoryPipe
.
41403 issueRequest(gpuDynInst
);
41404 wf
->wrGmReqsInPipe
--;
41405 wf
->outstandingReqsWrGm
++;
41406 wf
->rdGmReqsInPipe
--;
41407 wf
->outstandingReqsRdGm
++;
41409 fatal("Non global flat instructions not implemented yet.\n");
41412 gpuDynInst
->wavefront()->outstandingReqs
++;
41413 gpuDynInst
->wavefront()->validateRequestCounters();
41417 Inst_FLAT__FLAT_ATOMIC_ADD_X2::initiateAcc(GPUDynInstPtr gpuDynInst
)
41419 initAtomicAccess
<VecElemU64
>(gpuDynInst
);
41423 Inst_FLAT__FLAT_ATOMIC_ADD_X2::completeAcc(GPUDynInstPtr gpuDynInst
)
41425 if (isAtomicRet()) {
41426 VecOperandU64
vdst(gpuDynInst
, extData
.VDST
);
41429 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41430 if (gpuDynInst
->exec_mask
[lane
]) {
41431 vdst
[lane
] = (reinterpret_cast<VecElemU64
*>(
41432 gpuDynInst
->d_data
))[lane
];
41440 Inst_FLAT__FLAT_ATOMIC_SUB_X2::Inst_FLAT__FLAT_ATOMIC_SUB_X2(
41442 : Inst_FLAT(iFmt
, "flat_atomic_sub_x2")
41444 setFlag(AtomicSub
);
41445 if (instData
.GLC
) {
41446 setFlag(AtomicReturn
);
41448 setFlag(AtomicNoReturn
);
41450 setFlag(MemoryRef
);
41451 } // Inst_FLAT__FLAT_ATOMIC_SUB_X2
41453 Inst_FLAT__FLAT_ATOMIC_SUB_X2::~Inst_FLAT__FLAT_ATOMIC_SUB_X2()
41455 } // ~Inst_FLAT__FLAT_ATOMIC_SUB_X2
41457 // tmp = MEM[ADDR];
41458 // MEM[ADDR] -= DATA[0:1];
41459 // RETURN_DATA[0:1] = tmp.
41461 Inst_FLAT__FLAT_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst
)
41463 Wavefront
*wf
= gpuDynInst
->wavefront();
41465 if (wf
->execMask().none()) {
41466 wf
->decVMemInstsIssued();
41467 wf
->decLGKMInstsIssued();
41468 wf
->wrGmReqsInPipe
--;
41469 wf
->rdGmReqsInPipe
--;
41470 wf
->wrLmReqsInPipe
--;
41471 wf
->rdLmReqsInPipe
--;
41472 if (instData
.GLC
) {
41473 gpuDynInst
->exec_mask
= wf
->execMask();
41474 wf
->computeUnit
->vrf
[wf
->simdId
]->
41475 scheduleWriteOperandsFromLoad(wf
, gpuDynInst
);
41480 gpuDynInst
->execUnitId
= wf
->execUnitId
;
41481 gpuDynInst
->exec_mask
= wf
->execMask();
41482 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
41483 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
41485 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
41486 ConstVecOperandU64
data(gpuDynInst
, extData
.DATA
);
41491 calcAddr(gpuDynInst
, addr
);
41493 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41494 if (gpuDynInst
->exec_mask
[lane
]) {
41495 (reinterpret_cast<VecElemU64
*>(gpuDynInst
->a_data
))[lane
]
41500 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
41501 gpuDynInst
->computeUnit()->globalMemoryPipe
.
41502 issueRequest(gpuDynInst
);
41503 wf
->wrGmReqsInPipe
--;
41504 wf
->outstandingReqsWrGm
++;
41505 wf
->rdGmReqsInPipe
--;
41506 wf
->outstandingReqsRdGm
++;
41508 fatal("Non global flat instructions not implemented yet.\n");
41511 gpuDynInst
->wavefront()->outstandingReqs
++;
41512 gpuDynInst
->wavefront()->validateRequestCounters();
41516 Inst_FLAT__FLAT_ATOMIC_SUB_X2::initiateAcc(GPUDynInstPtr gpuDynInst
)
41518 initAtomicAccess
<VecElemU64
>(gpuDynInst
);
41522 Inst_FLAT__FLAT_ATOMIC_SUB_X2::completeAcc(GPUDynInstPtr gpuDynInst
)
41524 if (isAtomicRet()) {
41525 VecOperandU64
vdst(gpuDynInst
, extData
.VDST
);
41528 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41529 if (gpuDynInst
->exec_mask
[lane
]) {
41530 vdst
[lane
] = (reinterpret_cast<VecElemU64
*>(
41531 gpuDynInst
->d_data
))[lane
];
41539 Inst_FLAT__FLAT_ATOMIC_SMIN_X2::Inst_FLAT__FLAT_ATOMIC_SMIN_X2(
41541 : Inst_FLAT(iFmt
, "flat_atomic_smin_x2")
41543 setFlag(AtomicMin
);
41544 if (instData
.GLC
) {
41545 setFlag(AtomicReturn
);
41547 setFlag(AtomicNoReturn
);
41549 setFlag(MemoryRef
);
41550 } // Inst_FLAT__FLAT_ATOMIC_SMIN_X2
41552 Inst_FLAT__FLAT_ATOMIC_SMIN_X2::~Inst_FLAT__FLAT_ATOMIC_SMIN_X2()
41554 } // ~Inst_FLAT__FLAT_ATOMIC_SMIN_X2
41556 // tmp = MEM[ADDR];
41557 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
41558 // RETURN_DATA[0:1] = tmp.
41560 Inst_FLAT__FLAT_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst
)
41562 panicUnimplemented();
41565 Inst_FLAT__FLAT_ATOMIC_UMIN_X2::Inst_FLAT__FLAT_ATOMIC_UMIN_X2(
41567 : Inst_FLAT(iFmt
, "flat_atomic_umin_x2")
41569 setFlag(AtomicMin
);
41570 if (instData
.GLC
) {
41571 setFlag(AtomicReturn
);
41573 setFlag(AtomicNoReturn
);
41575 setFlag(MemoryRef
);
41576 } // Inst_FLAT__FLAT_ATOMIC_UMIN_X2
41578 Inst_FLAT__FLAT_ATOMIC_UMIN_X2::~Inst_FLAT__FLAT_ATOMIC_UMIN_X2()
41580 } // ~Inst_FLAT__FLAT_ATOMIC_UMIN_X2
41582 // tmp = MEM[ADDR];
41583 // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
41584 // RETURN_DATA[0:1] = tmp.
41586 Inst_FLAT__FLAT_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst
)
41588 panicUnimplemented();
41591 Inst_FLAT__FLAT_ATOMIC_SMAX_X2::Inst_FLAT__FLAT_ATOMIC_SMAX_X2(
41593 : Inst_FLAT(iFmt
, "flat_atomic_smax_x2")
41595 setFlag(AtomicMax
);
41596 if (instData
.GLC
) {
41597 setFlag(AtomicReturn
);
41599 setFlag(AtomicNoReturn
);
41601 setFlag(MemoryRef
);
41602 } // Inst_FLAT__FLAT_ATOMIC_SMAX_X2
41604 Inst_FLAT__FLAT_ATOMIC_SMAX_X2::~Inst_FLAT__FLAT_ATOMIC_SMAX_X2()
41606 } // ~Inst_FLAT__FLAT_ATOMIC_SMAX_X2
41608 // tmp = MEM[ADDR];
41609 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
41610 // RETURN_DATA[0:1] = tmp.
41612 Inst_FLAT__FLAT_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst
)
41614 panicUnimplemented();
41617 Inst_FLAT__FLAT_ATOMIC_UMAX_X2::Inst_FLAT__FLAT_ATOMIC_UMAX_X2(
41619 : Inst_FLAT(iFmt
, "flat_atomic_umax_x2")
41621 setFlag(AtomicMax
);
41622 if (instData
.GLC
) {
41623 setFlag(AtomicReturn
);
41625 setFlag(AtomicNoReturn
);
41627 setFlag(MemoryRef
);
41628 } // Inst_FLAT__FLAT_ATOMIC_UMAX_X2
41630 Inst_FLAT__FLAT_ATOMIC_UMAX_X2::~Inst_FLAT__FLAT_ATOMIC_UMAX_X2()
41632 } // ~Inst_FLAT__FLAT_ATOMIC_UMAX_X2
41634 // tmp = MEM[ADDR];
41635 // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
41636 // RETURN_DATA[0:1] = tmp.
41638 Inst_FLAT__FLAT_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst
)
41640 panicUnimplemented();
41643 Inst_FLAT__FLAT_ATOMIC_AND_X2::Inst_FLAT__FLAT_ATOMIC_AND_X2(
41645 : Inst_FLAT(iFmt
, "flat_atomic_and_x2")
41647 setFlag(AtomicAnd
);
41648 if (instData
.GLC
) {
41649 setFlag(AtomicReturn
);
41651 setFlag(AtomicNoReturn
);
41653 setFlag(MemoryRef
);
41654 } // Inst_FLAT__FLAT_ATOMIC_AND_X2
41656 Inst_FLAT__FLAT_ATOMIC_AND_X2::~Inst_FLAT__FLAT_ATOMIC_AND_X2()
41658 } // ~Inst_FLAT__FLAT_ATOMIC_AND_X2
41660 // tmp = MEM[ADDR];
41661 // MEM[ADDR] &= DATA[0:1];
41662 // RETURN_DATA[0:1] = tmp.
41664 Inst_FLAT__FLAT_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst
)
41666 panicUnimplemented();
41669 Inst_FLAT__FLAT_ATOMIC_OR_X2::Inst_FLAT__FLAT_ATOMIC_OR_X2(
41671 : Inst_FLAT(iFmt
, "flat_atomic_or_x2")
41674 if (instData
.GLC
) {
41675 setFlag(AtomicReturn
);
41677 setFlag(AtomicNoReturn
);
41679 setFlag(MemoryRef
);
41680 } // Inst_FLAT__FLAT_ATOMIC_OR_X2
41682 Inst_FLAT__FLAT_ATOMIC_OR_X2::~Inst_FLAT__FLAT_ATOMIC_OR_X2()
41684 } // ~Inst_FLAT__FLAT_ATOMIC_OR_X2
41686 // tmp = MEM[ADDR];
41687 // MEM[ADDR] |= DATA[0:1];
41688 // RETURN_DATA[0:1] = tmp.
41690 Inst_FLAT__FLAT_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst
)
41692 panicUnimplemented();
41695 Inst_FLAT__FLAT_ATOMIC_XOR_X2::Inst_FLAT__FLAT_ATOMIC_XOR_X2(
41697 : Inst_FLAT(iFmt
, "flat_atomic_xor_x2")
41699 setFlag(AtomicXor
);
41700 if (instData
.GLC
) {
41701 setFlag(AtomicReturn
);
41703 setFlag(AtomicNoReturn
);
41705 setFlag(MemoryRef
);
41706 } // Inst_FLAT__FLAT_ATOMIC_XOR_X2
41708 Inst_FLAT__FLAT_ATOMIC_XOR_X2::~Inst_FLAT__FLAT_ATOMIC_XOR_X2()
41710 } // ~Inst_FLAT__FLAT_ATOMIC_XOR_X2
41712 // tmp = MEM[ADDR];
41713 // MEM[ADDR] ^= DATA[0:1];
41714 // RETURN_DATA[0:1] = tmp.
41716 Inst_FLAT__FLAT_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst
)
41718 panicUnimplemented();
41721 Inst_FLAT__FLAT_ATOMIC_INC_X2::Inst_FLAT__FLAT_ATOMIC_INC_X2(
41723 : Inst_FLAT(iFmt
, "flat_atomic_inc_x2")
41725 setFlag(AtomicInc
);
41726 if (instData
.GLC
) {
41727 setFlag(AtomicReturn
);
41729 setFlag(AtomicNoReturn
);
41731 setFlag(MemoryRef
);
41732 } // Inst_FLAT__FLAT_ATOMIC_INC_X2
41734 Inst_FLAT__FLAT_ATOMIC_INC_X2::~Inst_FLAT__FLAT_ATOMIC_INC_X2()
41736 } // ~Inst_FLAT__FLAT_ATOMIC_INC_X2
41738 // tmp = MEM[ADDR];
41739 // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
41740 // RETURN_DATA[0:1] = tmp.
41742 Inst_FLAT__FLAT_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst
)
41744 Wavefront
*wf
= gpuDynInst
->wavefront();
41746 if (wf
->execMask().none()) {
41747 wf
->decVMemInstsIssued();
41748 wf
->decLGKMInstsIssued();
41749 wf
->wrGmReqsInPipe
--;
41750 wf
->rdGmReqsInPipe
--;
41751 wf
->wrLmReqsInPipe
--;
41752 wf
->rdLmReqsInPipe
--;
41753 if (instData
.GLC
) {
41754 gpuDynInst
->exec_mask
= wf
->execMask();
41755 wf
->computeUnit
->vrf
[wf
->simdId
]->
41756 scheduleWriteOperandsFromLoad(wf
, gpuDynInst
);
41761 gpuDynInst
->execUnitId
= wf
->execUnitId
;
41762 gpuDynInst
->exec_mask
= wf
->execMask();
41763 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
41764 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
41766 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
41767 ConstVecOperandU64
data(gpuDynInst
, extData
.DATA
);
41772 calcAddr(gpuDynInst
, addr
);
41774 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41775 if (gpuDynInst
->exec_mask
[lane
]) {
41776 (reinterpret_cast<VecElemU64
*>(gpuDynInst
->a_data
))[lane
]
41781 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
41782 gpuDynInst
->computeUnit()->globalMemoryPipe
.
41783 issueRequest(gpuDynInst
);
41784 wf
->wrGmReqsInPipe
--;
41785 wf
->outstandingReqsWrGm
++;
41786 wf
->rdGmReqsInPipe
--;
41787 wf
->outstandingReqsRdGm
++;
41789 fatal("Non global flat instructions not implemented yet.\n");
41792 gpuDynInst
->wavefront()->outstandingReqs
++;
41793 gpuDynInst
->wavefront()->validateRequestCounters();
41797 Inst_FLAT__FLAT_ATOMIC_INC_X2::initiateAcc(GPUDynInstPtr gpuDynInst
)
41799 initAtomicAccess
<VecElemU64
>(gpuDynInst
);
41803 Inst_FLAT__FLAT_ATOMIC_INC_X2::completeAcc(GPUDynInstPtr gpuDynInst
)
41805 if (isAtomicRet()) {
41806 VecOperandU64
vdst(gpuDynInst
, extData
.VDST
);
41809 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41810 if (gpuDynInst
->exec_mask
[lane
]) {
41811 vdst
[lane
] = (reinterpret_cast<VecElemU64
*>(
41812 gpuDynInst
->d_data
))[lane
];
41820 Inst_FLAT__FLAT_ATOMIC_DEC_X2::Inst_FLAT__FLAT_ATOMIC_DEC_X2(
41822 : Inst_FLAT(iFmt
, "flat_atomic_dec_x2")
41824 setFlag(AtomicDec
);
41825 if (instData
.GLC
) {
41826 setFlag(AtomicReturn
);
41828 setFlag(AtomicNoReturn
);
41830 setFlag(MemoryRef
);
41831 } // Inst_FLAT__FLAT_ATOMIC_DEC_X2
41833 Inst_FLAT__FLAT_ATOMIC_DEC_X2::~Inst_FLAT__FLAT_ATOMIC_DEC_X2()
41835 } // ~Inst_FLAT__FLAT_ATOMIC_DEC_X2
41837 // tmp = MEM[ADDR];
41838 // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
41839 // (unsigned compare);
41840 // RETURN_DATA[0:1] = tmp.
41842 Inst_FLAT__FLAT_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst
)
41844 Wavefront
*wf
= gpuDynInst
->wavefront();
41846 if (wf
->execMask().none()) {
41847 wf
->decVMemInstsIssued();
41848 wf
->decLGKMInstsIssued();
41849 wf
->wrGmReqsInPipe
--;
41850 wf
->rdGmReqsInPipe
--;
41851 wf
->wrLmReqsInPipe
--;
41852 wf
->rdLmReqsInPipe
--;
41853 if (instData
.GLC
) {
41854 gpuDynInst
->exec_mask
= wf
->execMask();
41855 wf
->computeUnit
->vrf
[wf
->simdId
]->
41856 scheduleWriteOperandsFromLoad(wf
, gpuDynInst
);
41861 gpuDynInst
->execUnitId
= wf
->execUnitId
;
41862 gpuDynInst
->exec_mask
= wf
->execMask();
41863 gpuDynInst
->latency
.init(gpuDynInst
->computeUnit());
41864 gpuDynInst
->latency
.set(gpuDynInst
->computeUnit()->clockPeriod());
41866 ConstVecOperandU64
addr(gpuDynInst
, extData
.ADDR
);
41867 ConstVecOperandU64
data(gpuDynInst
, extData
.DATA
);
41872 calcAddr(gpuDynInst
, addr
);
41874 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41875 if (gpuDynInst
->exec_mask
[lane
]) {
41876 (reinterpret_cast<VecElemU64
*>(gpuDynInst
->a_data
))[lane
]
41881 if (gpuDynInst
->executedAs() == Enums::SC_GLOBAL
) {
41882 gpuDynInst
->computeUnit()->globalMemoryPipe
.
41883 issueRequest(gpuDynInst
);
41884 wf
->wrGmReqsInPipe
--;
41885 wf
->outstandingReqsWrGm
++;
41886 wf
->rdGmReqsInPipe
--;
41887 wf
->outstandingReqsRdGm
++;
41889 fatal("Non global flat instructions not implemented yet.\n");
41892 gpuDynInst
->wavefront()->outstandingReqs
++;
41893 gpuDynInst
->wavefront()->validateRequestCounters();
41897 Inst_FLAT__FLAT_ATOMIC_DEC_X2::initiateAcc(GPUDynInstPtr gpuDynInst
)
41899 initAtomicAccess
<VecElemU64
>(gpuDynInst
);
41903 Inst_FLAT__FLAT_ATOMIC_DEC_X2::completeAcc(GPUDynInstPtr gpuDynInst
)
41905 if (isAtomicRet()) {
41906 VecOperandU64
vdst(gpuDynInst
, extData
.VDST
);
41909 for (int lane
= 0; lane
< NumVecElemPerVecReg
; ++lane
) {
41910 if (gpuDynInst
->exec_mask
[lane
]) {
41911 vdst
[lane
] = (reinterpret_cast<VecElemU64
*>(
41912 gpuDynInst
->d_data
))[lane
];
41919 } // namespace Gcn3ISA