src/arch/gcn3/insts/instructions.cc

   1 /*
   2  * Copyright (c) 2015-2017 Advanced Micro Devices, Inc.
   3  * All rights reserved.
   4  *
   5  * For use for simulation and test purposes only
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions are met:
   9  *
  10  * 1. Redistributions of source code must retain the above copyright notice,
  11  * this list of conditions and the following disclaimer.
  12  *
  13  * 2. Redistributions in binary form must reproduce the above copyright notice,
  14  * this list of conditions and the following disclaimer in the documentation
  15  * and/or other materials provided with the distribution.
  16  *
  17  * 3. Neither the name of the copyright holder nor the names of its
  18  * contributors may be used to endorse or promote products derived from this
  19  * software without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31  * POSSIBILITY OF SUCH DAMAGE.
  32  *
  33  * Authors: Anthony Gutierrez
  34  */
  35
  36 #include "arch/gcn3/insts/instructions.hh"
  37
  38 #include <cmath>
  39
  40 #include "arch/gcn3/insts/inst_util.hh"
  41 #include "debug/GCN3.hh"
  42 #include "debug/GPUSync.hh"
  43 #include "gpu-compute/shader.hh"
  44
  45 namespace Gcn3ISA
  46 {
  47
  48     Inst_SOP2__S_ADD_U32::Inst_SOP2__S_ADD_U32(InFmt_SOP2 *iFmt)
  49         : Inst_SOP2(iFmt, "s_add_u32")
  50     {
  51         setFlag(ALU);
  52     } // Inst_SOP2__S_ADD_U32
  53
  54     Inst_SOP2__S_ADD_U32::~Inst_SOP2__S_ADD_U32()
  55     {
  56     } // ~Inst_SOP2__S_ADD_U32
  57
  58     // D.u = S0.u + S1.u;
  59     // SCC = (S0.u + S1.u >= 0x100000000ULL ? 1 : 0) is an unsigned
  60     // overflow/carry-out.
  61     void
  62     Inst_SOP2__S_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
  63     {
  64         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
  65         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
  66         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
  67         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
  68
  69         src0.read();
  70         src1.read();
  71
  72         sdst = src0.rawData() + src1.rawData();
  73         scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData())
  74             >= 0x100000000ULL ? 1 : 0;
  75
  76         sdst.write();
  77         scc.write();
  78     }
  79
  80     Inst_SOP2__S_SUB_U32::Inst_SOP2__S_SUB_U32(InFmt_SOP2 *iFmt)
  81         : Inst_SOP2(iFmt, "s_sub_u32")
  82     {
  83         setFlag(ALU);
  84     } // Inst_SOP2__S_SUB_U32
  85
  86     Inst_SOP2__S_SUB_U32::~Inst_SOP2__S_SUB_U32()
  87     {
  88     } // ~Inst_SOP2__S_SUB_U32
  89
  90     // D.u = S0.u - S1.u;
  91     // SCC = (S1.u > S0.u ? 1 : 0) is an unsigned overflow or carry-out.
  92     void
  93     Inst_SOP2__S_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
  94     {
  95         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
  96         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
  97         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
  98         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
  99
 100         src0.read();
 101         src1.read();
 102
 103         sdst = src0.rawData() - src1.rawData();
 104         scc = (src1.rawData() > src0.rawData()) ? 1 : 0;
 105
 106         sdst.write();
 107         scc.write();
 108     }
 109
 110     Inst_SOP2__S_ADD_I32::Inst_SOP2__S_ADD_I32(InFmt_SOP2 *iFmt)
 111         : Inst_SOP2(iFmt, "s_add_i32")
 112     {
 113         setFlag(ALU);
 114     } // Inst_SOP2__S_ADD_I32
 115
 116     Inst_SOP2__S_ADD_I32::~Inst_SOP2__S_ADD_I32()
 117     {
 118     } // ~Inst_SOP2__S_ADD_I32
 119
 120     // D.i = S0.i + S1.i;
 121     // SCC = (S0.u[31] == S1.u[31] && S0.u[31] != D.u[31]) is a signed
 122     // overflow.
 123     void
 124     Inst_SOP2__S_ADD_I32::execute(GPUDynInstPtr gpuDynInst)
 125     {
 126         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
 127         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
 128         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
 129         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 130
 131         src0.read();
 132         src1.read();
 133
 134         sdst = src0.rawData() + src1.rawData();
 135         scc = (bits(src0.rawData(), 31) == bits(src1.rawData(), 31)
 136             && bits(src0.rawData(), 31) != bits(sdst.rawData(), 31))
 137             ? 1 : 0;
 138
 139         sdst.write();
 140         scc.write();
 141     }
 142
 143     Inst_SOP2__S_SUB_I32::Inst_SOP2__S_SUB_I32(InFmt_SOP2 *iFmt)
 144         : Inst_SOP2(iFmt, "s_sub_i32")
 145     {
 146         setFlag(ALU);
 147     } // Inst_SOP2__S_SUB_I32
 148
 149     Inst_SOP2__S_SUB_I32::~Inst_SOP2__S_SUB_I32()
 150     {
 151     } // ~Inst_SOP2__S_SUB_I32
 152
 153     // D.i = S0.i - S1.i;
 154     // SCC = (S0.u[31] != S1.u[31] && S0.u[31] != D.u[31]) is a signed
 155     // overflow.
 156     void
 157     Inst_SOP2__S_SUB_I32::execute(GPUDynInstPtr gpuDynInst)
 158     {
 159         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
 160         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
 161         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
 162         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 163
 164         src0.read();
 165         src1.read();
 166
 167         sdst = src0.rawData() - src1.rawData();
 168         scc = (bits(src0.rawData(), 31) != bits(src1.rawData(), 31)
 169             && bits(src0.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0;
 170
 171         sdst.write();
 172         scc.write();
 173     }
 174
 175     Inst_SOP2__S_ADDC_U32::Inst_SOP2__S_ADDC_U32(InFmt_SOP2 *iFmt)
 176         : Inst_SOP2(iFmt, "s_addc_u32")
 177     {
 178         setFlag(ALU);
 179     } // Inst_SOP2__S_ADDC_U32
 180
 181     Inst_SOP2__S_ADDC_U32::~Inst_SOP2__S_ADDC_U32()
 182     {
 183     } // ~Inst_SOP2__S_ADDC_U32
 184
 185     // D.u = S0.u + S1.u + SCC;
 186     // SCC = (S0.u + S1.u + SCC >= 0x100000000ULL ? 1 : 0) is an unsigned
 187     // overflow.
 188     void
 189     Inst_SOP2__S_ADDC_U32::execute(GPUDynInstPtr gpuDynInst)
 190     {
 191         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 192         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 193         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 194         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 195
 196         src0.read();
 197         src1.read();
 198         scc.read();
 199
 200         sdst = src0.rawData() + src1.rawData() + scc.rawData();
 201         scc = ((ScalarRegU64)src0.rawData() + (ScalarRegU64)src1.rawData()
 202             + (ScalarRegU64)scc.rawData()) >= 0x100000000ULL ? 1 : 0;
 203
 204         sdst.write();
 205         scc.write();
 206     }
 207
 208     Inst_SOP2__S_SUBB_U32::Inst_SOP2__S_SUBB_U32(InFmt_SOP2 *iFmt)
 209         : Inst_SOP2(iFmt, "s_subb_u32")
 210     {
 211         setFlag(ALU);
 212     } // Inst_SOP2__S_SUBB_U32
 213
 214     Inst_SOP2__S_SUBB_U32::~Inst_SOP2__S_SUBB_U32()
 215     {
 216     } // ~Inst_SOP2__S_SUBB_U32
 217
 218     // D.u = S0.u - S1.u - SCC;
 219     // SCC = (S1.u + SCC > S0.u ? 1 : 0) is an unsigned overflow.
 220     void
 221     Inst_SOP2__S_SUBB_U32::execute(GPUDynInstPtr gpuDynInst)
 222     {
 223         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 224         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 225         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 226         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 227
 228         src0.read();
 229         src1.read();
 230         scc.read();
 231
 232         sdst = src0.rawData() - src1.rawData() - scc.rawData();
 233         scc = (src1.rawData() + scc.rawData()) > src0.rawData() ? 1 : 0;
 234
 235         sdst.write();
 236         scc.write();
 237     }
 238
 239     Inst_SOP2__S_MIN_I32::Inst_SOP2__S_MIN_I32(InFmt_SOP2 *iFmt)
 240         : Inst_SOP2(iFmt, "s_min_i32")
 241     {
 242         setFlag(ALU);
 243     } // Inst_SOP2__S_MIN_I32
 244
 245     Inst_SOP2__S_MIN_I32::~Inst_SOP2__S_MIN_I32()
 246     {
 247     } // ~Inst_SOP2__S_MIN_I32
 248
 249     // D.i = (S0.i < S1.i) ? S0.i : S1.i;
 250     // SCC = 1 if S0 is chosen as the minimum value.
 251     void
 252     Inst_SOP2__S_MIN_I32::execute(GPUDynInstPtr gpuDynInst)
 253     {
 254         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
 255         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
 256         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
 257         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 258
 259         src0.read();
 260         src1.read();
 261
 262         sdst = std::min(src0.rawData(), src1.rawData());
 263         scc = (src0.rawData() < src1.rawData()) ? 1 : 0;
 264
 265         sdst.write();
 266         scc.write();
 267     }
 268
 269     Inst_SOP2__S_MIN_U32::Inst_SOP2__S_MIN_U32(InFmt_SOP2 *iFmt)
 270         : Inst_SOP2(iFmt, "s_min_u32")
 271     {
 272         setFlag(ALU);
 273     } // Inst_SOP2__S_MIN_U32
 274
 275     Inst_SOP2__S_MIN_U32::~Inst_SOP2__S_MIN_U32()
 276     {
 277     } // ~Inst_SOP2__S_MIN_U32
 278
 279     // D.u = (S0.u < S1.u) ? S0.u : S1.u;
 280     // SCC = 1 if S0 is chosen as the minimum value.
 281     void
 282     Inst_SOP2__S_MIN_U32::execute(GPUDynInstPtr gpuDynInst)
 283     {
 284         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 285         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 286         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 287         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 288
 289         src0.read();
 290         src1.read();
 291
 292         sdst = std::min(src0.rawData(), src1.rawData());
 293         scc = (src0.rawData() < src1.rawData()) ? 1 : 0;
 294
 295         sdst.write();
 296         scc.write();
 297     }
 298
 299     Inst_SOP2__S_MAX_I32::Inst_SOP2__S_MAX_I32(InFmt_SOP2 *iFmt)
 300         : Inst_SOP2(iFmt, "s_max_i32")
 301     {
 302         setFlag(ALU);
 303     } // Inst_SOP2__S_MAX_I32
 304
 305     Inst_SOP2__S_MAX_I32::~Inst_SOP2__S_MAX_I32()
 306     {
 307     } // ~Inst_SOP2__S_MAX_I32
 308
 309     // D.i = (S0.i > S1.i) ? S0.i : S1.i;
 310     // SCC = 1 if S0 is chosen as the maximum value.
 311     void
 312     Inst_SOP2__S_MAX_I32::execute(GPUDynInstPtr gpuDynInst)
 313     {
 314         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
 315         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
 316         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
 317         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 318
 319         src0.read();
 320         src1.read();
 321
 322         sdst = std::max(src0.rawData(), src1.rawData());
 323         scc = (src0.rawData() > src1.rawData()) ? 1 : 0;
 324
 325         sdst.write();
 326         scc.write();
 327     }
 328
 329     Inst_SOP2__S_MAX_U32::Inst_SOP2__S_MAX_U32(InFmt_SOP2 *iFmt)
 330         : Inst_SOP2(iFmt, "s_max_u32")
 331     {
 332         setFlag(ALU);
 333     } // Inst_SOP2__S_MAX_U32
 334
 335     Inst_SOP2__S_MAX_U32::~Inst_SOP2__S_MAX_U32()
 336     {
 337     } // ~Inst_SOP2__S_MAX_U32
 338
 339     // D.u = (S0.u > S1.u) ? S0.u : S1.u;
 340     // SCC = 1 if S0 is chosen as the maximum value.
 341     void
 342     Inst_SOP2__S_MAX_U32::execute(GPUDynInstPtr gpuDynInst)
 343     {
 344         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 345         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 346         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 347         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 348
 349         src0.read();
 350         src1.read();
 351
 352         sdst = std::max(src0.rawData(), src1.rawData());
 353         scc = (src0.rawData() > src1.rawData()) ? 1 : 0;
 354
 355         sdst.write();
 356         scc.write();
 357     }
 358
 359     Inst_SOP2__S_CSELECT_B32::Inst_SOP2__S_CSELECT_B32(InFmt_SOP2 *iFmt)
 360         : Inst_SOP2(iFmt, "s_cselect_b32")
 361     {
 362         setFlag(ALU);
 363     } // Inst_SOP2__S_CSELECT_B32
 364
 365     Inst_SOP2__S_CSELECT_B32::~Inst_SOP2__S_CSELECT_B32()
 366     {
 367     } // ~Inst_SOP2__S_CSELECT_B32
 368
 369     // D.u = SCC ? S0.u : S1.u (conditional select).
 370     void
 371     Inst_SOP2__S_CSELECT_B32::execute(GPUDynInstPtr gpuDynInst)
 372     {
 373         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 374         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 375         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 376         ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
 377
 378         src0.read();
 379         src1.read();
 380         scc.read();
 381
 382         sdst = scc.rawData() ? src0.rawData() : src1.rawData();
 383
 384         sdst.write();
 385     }
 386
 387     Inst_SOP2__S_CSELECT_B64::Inst_SOP2__S_CSELECT_B64(InFmt_SOP2 *iFmt)
 388         : Inst_SOP2(iFmt, "s_cselect_b64")
 389     {
 390         setFlag(ALU);
 391     } // Inst_SOP2__S_CSELECT_B64
 392
 393     Inst_SOP2__S_CSELECT_B64::~Inst_SOP2__S_CSELECT_B64()
 394     {
 395     } // ~Inst_SOP2__S_CSELECT_B64
 396
 397     // D.u64 = SCC ? S0.u64 : S1.u64 (conditional select).
 398     void
 399     Inst_SOP2__S_CSELECT_B64::execute(GPUDynInstPtr gpuDynInst)
 400     {
 401         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
 402         ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
 403         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
 404         ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
 405
 406         src0.read();
 407         src1.read();
 408         scc.read();
 409
 410         sdst = scc.rawData() ? src0.rawData() : src1.rawData();
 411
 412         sdst.write();
 413     }
 414
 415     Inst_SOP2__S_AND_B32::Inst_SOP2__S_AND_B32(InFmt_SOP2 *iFmt)
 416         : Inst_SOP2(iFmt, "s_and_b32")
 417     {
 418         setFlag(ALU);
 419     } // Inst_SOP2__S_AND_B32
 420
 421     Inst_SOP2__S_AND_B32::~Inst_SOP2__S_AND_B32()
 422     {
 423     } // ~Inst_SOP2__S_AND_B32
 424
 425     // D.u = S0.u & S1.u;
 426     // SCC = 1 if result is non-zero.
 427     void
 428     Inst_SOP2__S_AND_B32::execute(GPUDynInstPtr gpuDynInst)
 429     {
 430         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 431         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 432         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 433         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 434
 435         src0.read();
 436         src1.read();
 437
 438         sdst = src0.rawData() & src1.rawData();
 439         scc = sdst.rawData() ? 1 : 0;
 440
 441         sdst.write();
 442         scc.write();
 443     }
 444
 445     Inst_SOP2__S_AND_B64::Inst_SOP2__S_AND_B64(InFmt_SOP2 *iFmt)
 446         : Inst_SOP2(iFmt, "s_and_b64")
 447     {
 448         setFlag(ALU);
 449     } // Inst_SOP2__S_AND_B64
 450
 451     Inst_SOP2__S_AND_B64::~Inst_SOP2__S_AND_B64()
 452     {
 453     } // ~Inst_SOP2__S_AND_B64
 454
 455     // D.u64 = S0.u64 & S1.u64;
 456     // SCC = 1 if result is non-zero.
 457     void
 458     Inst_SOP2__S_AND_B64::execute(GPUDynInstPtr gpuDynInst)
 459     {
 460         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
 461         ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
 462         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
 463         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 464
 465         src0.read();
 466         src1.read();
 467
 468         sdst = src0.rawData() & src1.rawData();
 469         scc = sdst.rawData() ? 1 : 0;
 470
 471         sdst.write();
 472         scc.write();
 473     }
 474
 475     Inst_SOP2__S_OR_B32::Inst_SOP2__S_OR_B32(InFmt_SOP2 *iFmt)
 476         : Inst_SOP2(iFmt, "s_or_b32")
 477     {
 478         setFlag(ALU);
 479     } // Inst_SOP2__S_OR_B32
 480
 481     Inst_SOP2__S_OR_B32::~Inst_SOP2__S_OR_B32()
 482     {
 483     } // ~Inst_SOP2__S_OR_B32
 484
 485     // D.u = S0.u | S1.u;
 486     // SCC = 1 if result is non-zero.
 487     void
 488     Inst_SOP2__S_OR_B32::execute(GPUDynInstPtr gpuDynInst)
 489     {
 490         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 491         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 492         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 493         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 494
 495         src0.read();
 496         src1.read();
 497
 498         sdst = src0.rawData() | src1.rawData();
 499         scc = sdst.rawData() ? 1 : 0;
 500
 501         sdst.write();
 502         scc.write();
 503     }
 504
 505     Inst_SOP2__S_OR_B64::Inst_SOP2__S_OR_B64(InFmt_SOP2 *iFmt)
 506         : Inst_SOP2(iFmt, "s_or_b64")
 507     {
 508         setFlag(ALU);
 509     } // Inst_SOP2__S_OR_B64
 510
 511     Inst_SOP2__S_OR_B64::~Inst_SOP2__S_OR_B64()
 512     {
 513     } // ~Inst_SOP2__S_OR_B64
 514
 515     // D.u64 = S0.u64 | S1.u64;
 516     // SCC = 1 if result is non-zero.
 517     void
 518     Inst_SOP2__S_OR_B64::execute(GPUDynInstPtr gpuDynInst)
 519     {
 520         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
 521         ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
 522         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
 523         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 524
 525         src0.read();
 526         src1.read();
 527
 528         sdst = src0.rawData() | src1.rawData();
 529         scc = sdst.rawData() ? 1 : 0;
 530
 531         sdst.write();
 532         scc.write();
 533     }
 534
 535     Inst_SOP2__S_XOR_B32::Inst_SOP2__S_XOR_B32(InFmt_SOP2 *iFmt)
 536         : Inst_SOP2(iFmt, "s_xor_b32")
 537     {
 538         setFlag(ALU);
 539     } // Inst_SOP2__S_XOR_B32
 540
 541     Inst_SOP2__S_XOR_B32::~Inst_SOP2__S_XOR_B32()
 542     {
 543     } // ~Inst_SOP2__S_XOR_B32
 544
 545     // D.u = S0.u ^ S1.u;
 546     // SCC = 1 if result is non-zero.
 547     void
 548     Inst_SOP2__S_XOR_B32::execute(GPUDynInstPtr gpuDynInst)
 549     {
 550         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 551         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 552         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 553         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 554
 555         src0.read();
 556         src1.read();
 557
 558         sdst = src0.rawData() ^ src1.rawData();
 559         scc = sdst.rawData() ? 1 : 0;
 560
 561         sdst.write();
 562         scc.write();
 563     }
 564
 565     Inst_SOP2__S_XOR_B64::Inst_SOP2__S_XOR_B64(InFmt_SOP2 *iFmt)
 566         : Inst_SOP2(iFmt, "s_xor_b64")
 567     {
 568         setFlag(ALU);
 569     } // Inst_SOP2__S_XOR_B64
 570
 571     Inst_SOP2__S_XOR_B64::~Inst_SOP2__S_XOR_B64()
 572     {
 573     } // ~Inst_SOP2__S_XOR_B64
 574
 575     // D.u64 = S0.u64 ^ S1.u64;
 576     // SCC = 1 if result is non-zero.
 577     void
 578     Inst_SOP2__S_XOR_B64::execute(GPUDynInstPtr gpuDynInst)
 579     {
 580         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
 581         ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
 582         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
 583         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 584
 585         src0.read();
 586         src1.read();
 587
 588         sdst = src0.rawData() ^ src1.rawData();
 589         scc = sdst.rawData() ? 1 : 0;
 590
 591         sdst.write();
 592         scc.write();
 593     }
 594
 595     Inst_SOP2__S_ANDN2_B32::Inst_SOP2__S_ANDN2_B32(InFmt_SOP2 *iFmt)
 596         : Inst_SOP2(iFmt, "s_andn2_b32")
 597     {
 598         setFlag(ALU);
 599     } // Inst_SOP2__S_ANDN2_B32
 600
 601     Inst_SOP2__S_ANDN2_B32::~Inst_SOP2__S_ANDN2_B32()
 602     {
 603     } // ~Inst_SOP2__S_ANDN2_B32
 604
 605     // D.u = S0.u & ~S1.u;
 606     // SCC = 1 if result is non-zero.
 607     void
 608     Inst_SOP2__S_ANDN2_B32::execute(GPUDynInstPtr gpuDynInst)
 609     {
 610         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 611         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 612         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 613         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 614
 615         src0.read();
 616         src1.read();
 617
 618         sdst = src0.rawData() &~ src1.rawData();
 619         scc = sdst.rawData() ? 1 : 0;
 620
 621         sdst.write();
 622         scc.write();
 623     }
 624
 625     Inst_SOP2__S_ANDN2_B64::Inst_SOP2__S_ANDN2_B64(InFmt_SOP2 *iFmt)
 626         : Inst_SOP2(iFmt, "s_andn2_b64")
 627     {
 628         setFlag(ALU);
 629     } // Inst_SOP2__S_ANDN2_B64
 630
 631     Inst_SOP2__S_ANDN2_B64::~Inst_SOP2__S_ANDN2_B64()
 632     {
 633     } // ~Inst_SOP2__S_ANDN2_B64
 634
 635     // D.u64 = S0.u64 & ~S1.u64;
 636     // SCC = 1 if result is non-zero.
 637     void
 638     Inst_SOP2__S_ANDN2_B64::execute(GPUDynInstPtr gpuDynInst)
 639     {
 640         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
 641         ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
 642         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
 643         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 644
 645         src0.read();
 646         src1.read();
 647
 648         sdst = src0.rawData() &~ src1.rawData();
 649         scc = sdst.rawData() ? 1 : 0;
 650
 651         sdst.write();
 652         scc.write();
 653     }
 654
 655     Inst_SOP2__S_ORN2_B32::Inst_SOP2__S_ORN2_B32(InFmt_SOP2 *iFmt)
 656         : Inst_SOP2(iFmt, "s_orn2_b32")
 657     {
 658         setFlag(ALU);
 659     } // Inst_SOP2__S_ORN2_B32
 660
 661     Inst_SOP2__S_ORN2_B32::~Inst_SOP2__S_ORN2_B32()
 662     {
 663     } // ~Inst_SOP2__S_ORN2_B32
 664
 665     // D.u = S0.u | ~S1.u;
 666     // SCC = 1 if result is non-zero.
 667     void
 668     Inst_SOP2__S_ORN2_B32::execute(GPUDynInstPtr gpuDynInst)
 669     {
 670         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 671         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 672         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 673         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 674
 675         src0.read();
 676         src1.read();
 677
 678         sdst = src0.rawData() |~ src1.rawData();
 679         scc = sdst.rawData() ? 1 : 0;
 680
 681         sdst.write();
 682         scc.write();
 683     }
 684
 685     Inst_SOP2__S_ORN2_B64::Inst_SOP2__S_ORN2_B64(InFmt_SOP2 *iFmt)
 686         : Inst_SOP2(iFmt, "s_orn2_b64")
 687     {
 688         setFlag(ALU);
 689     } // Inst_SOP2__S_ORN2_B64
 690
 691     Inst_SOP2__S_ORN2_B64::~Inst_SOP2__S_ORN2_B64()
 692     {
 693     } // ~Inst_SOP2__S_ORN2_B64
 694
 695     // D.u64 = S0.u64 | ~S1.u64;
 696     // SCC = 1 if result is non-zero.
 697     void
 698     Inst_SOP2__S_ORN2_B64::execute(GPUDynInstPtr gpuDynInst)
 699     {
 700         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
 701         ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
 702         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
 703         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 704
 705         src0.read();
 706         src1.read();
 707
 708         sdst = src0.rawData() |~ src1.rawData();
 709         scc = sdst.rawData() ? 1 : 0;
 710
 711         sdst.write();
 712         scc.write();
 713     }
 714
 715     Inst_SOP2__S_NAND_B32::Inst_SOP2__S_NAND_B32(InFmt_SOP2 *iFmt)
 716         : Inst_SOP2(iFmt, "s_nand_b32")
 717     {
 718         setFlag(ALU);
 719     } // Inst_SOP2__S_NAND_B32
 720
 721     Inst_SOP2__S_NAND_B32::~Inst_SOP2__S_NAND_B32()
 722     {
 723     } // ~Inst_SOP2__S_NAND_B32
 724
 725     // D.u = ~(S0.u & S1.u);
 726     // SCC = 1 if result is non-zero.
 727     void
 728     Inst_SOP2__S_NAND_B32::execute(GPUDynInstPtr gpuDynInst)
 729     {
 730         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 731         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 732         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 733         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 734
 735         src0.read();
 736         src1.read();
 737
 738         sdst = ~(src0.rawData() & src1.rawData());
 739         scc = sdst.rawData() ? 1 : 0;
 740
 741         sdst.write();
 742         scc.write();
 743     }
 744
 745     Inst_SOP2__S_NAND_B64::Inst_SOP2__S_NAND_B64(InFmt_SOP2 *iFmt)
 746         : Inst_SOP2(iFmt, "s_nand_b64")
 747     {
 748         setFlag(ALU);
 749     } // Inst_SOP2__S_NAND_B64
 750
 751     Inst_SOP2__S_NAND_B64::~Inst_SOP2__S_NAND_B64()
 752     {
 753     } // ~Inst_SOP2__S_NAND_B64
 754
 755     // D.u64 = ~(S0.u64 & S1.u64);
 756     // SCC = 1 if result is non-zero.
 757     void
 758     Inst_SOP2__S_NAND_B64::execute(GPUDynInstPtr gpuDynInst)
 759     {
 760         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
 761         ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
 762         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
 763         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 764
 765         src0.read();
 766         src1.read();
 767
 768         sdst = ~(src0.rawData() & src1.rawData());
 769         scc = sdst.rawData() ? 1 : 0;
 770
 771         sdst.write();
 772         scc.write();
 773     }
 774
 775     Inst_SOP2__S_NOR_B32::Inst_SOP2__S_NOR_B32(InFmt_SOP2 *iFmt)
 776         : Inst_SOP2(iFmt, "s_nor_b32")
 777     {
 778         setFlag(ALU);
 779     } // Inst_SOP2__S_NOR_B32
 780
 781     Inst_SOP2__S_NOR_B32::~Inst_SOP2__S_NOR_B32()
 782     {
 783     } // ~Inst_SOP2__S_NOR_B32
 784
 785     // D.u = ~(S0.u | S1.u);
 786     // SCC = 1 if result is non-zero.
 787     void
 788     Inst_SOP2__S_NOR_B32::execute(GPUDynInstPtr gpuDynInst)
 789     {
 790         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 791         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 792         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 793         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 794
 795         src0.read();
 796         src1.read();
 797
 798         sdst = ~(src0.rawData() | src1.rawData());
 799         scc = sdst.rawData() ? 1 : 0;
 800
 801         sdst.write();
 802         scc.write();
 803     }
 804
 805     Inst_SOP2__S_NOR_B64::Inst_SOP2__S_NOR_B64(InFmt_SOP2 *iFmt)
 806         : Inst_SOP2(iFmt, "s_nor_b64")
 807     {
 808         setFlag(ALU);
 809     } // Inst_SOP2__S_NOR_B64
 810
 811     Inst_SOP2__S_NOR_B64::~Inst_SOP2__S_NOR_B64()
 812     {
 813     } // ~Inst_SOP2__S_NOR_B64
 814
 815     // D.u64 = ~(S0.u64 | S1.u64);
 816     // SCC = 1 if result is non-zero.
 817     void
 818     Inst_SOP2__S_NOR_B64::execute(GPUDynInstPtr gpuDynInst)
 819     {
 820         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
 821         ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
 822         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
 823         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 824
 825         src0.read();
 826         src1.read();
 827
 828         sdst = ~(src0.rawData() | src1.rawData());
 829         scc = sdst.rawData() ? 1 : 0;
 830
 831         sdst.write();
 832         scc.write();
 833     }
 834
 835     Inst_SOP2__S_XNOR_B32::Inst_SOP2__S_XNOR_B32(InFmt_SOP2 *iFmt)
 836         : Inst_SOP2(iFmt, "s_xnor_b32")
 837     {
 838         setFlag(ALU);
 839     } // Inst_SOP2__S_XNOR_B32
 840
 841     Inst_SOP2__S_XNOR_B32::~Inst_SOP2__S_XNOR_B32()
 842     {
 843     } // ~Inst_SOP2__S_XNOR_B32
 844
 845     // D.u = ~(S0.u ^ S1.u);
 846     // SCC = 1 if result is non-zero.
 847     void
 848     Inst_SOP2__S_XNOR_B32::execute(GPUDynInstPtr gpuDynInst)
 849     {
 850         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 851         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 852         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 853         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 854
 855         src0.read();
 856         src1.read();
 857
 858         sdst = ~(src0.rawData() ^ src1.rawData());
 859         scc = sdst.rawData() ? 1 : 0;
 860
 861         sdst.write();
 862         scc.write();
 863     }
 864
 865     Inst_SOP2__S_XNOR_B64::Inst_SOP2__S_XNOR_B64(InFmt_SOP2 *iFmt)
 866         : Inst_SOP2(iFmt, "s_xnor_b64")
 867     {
 868         setFlag(ALU);
 869     } // Inst_SOP2__S_XNOR_B64
 870
 871     Inst_SOP2__S_XNOR_B64::~Inst_SOP2__S_XNOR_B64()
 872     {
 873     } // ~Inst_SOP2__S_XNOR_B64
 874
 875     // D.u64 = ~(S0.u64 ^ S1.u64);
 876     // SCC = 1 if result is non-zero.
 877     void
 878     Inst_SOP2__S_XNOR_B64::execute(GPUDynInstPtr gpuDynInst)
 879     {
 880         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
 881         ConstScalarOperandU64 src1(gpuDynInst, instData.SSRC1);
 882         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
 883         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 884
 885         src0.read();
 886         src1.read();
 887
 888         sdst = ~(src0.rawData() ^ src1.rawData());
 889         scc = sdst.rawData() ? 1 : 0;
 890
 891         sdst.write();
 892         scc.write();
 893     }
 894
 895     Inst_SOP2__S_LSHL_B32::Inst_SOP2__S_LSHL_B32(InFmt_SOP2 *iFmt)
 896         : Inst_SOP2(iFmt, "s_lshl_b32")
 897     {
 898         setFlag(ALU);
 899     } // Inst_SOP2__S_LSHL_B32
 900
 901     Inst_SOP2__S_LSHL_B32::~Inst_SOP2__S_LSHL_B32()
 902     {
 903     } // ~Inst_SOP2__S_LSHL_B32
 904
 905     // D.u = S0.u << S1.u[4:0];
 906     // SCC = 1 if result is non-zero.
 907     void
 908     Inst_SOP2__S_LSHL_B32::execute(GPUDynInstPtr gpuDynInst)
 909     {
 910         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 911         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 912         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 913         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 914
 915         src0.read();
 916         src1.read();
 917
 918         sdst = (src0.rawData() << bits(src1.rawData(), 4, 0));
 919         scc = sdst.rawData() ? 1 : 0;
 920
 921         sdst.write();
 922         scc.write();
 923     }
 924
 925     Inst_SOP2__S_LSHL_B64::Inst_SOP2__S_LSHL_B64(InFmt_SOP2 *iFmt)
 926         : Inst_SOP2(iFmt, "s_lshl_b64")
 927     {
 928         setFlag(ALU);
 929     } // Inst_SOP2__S_LSHL_B64
 930
 931     Inst_SOP2__S_LSHL_B64::~Inst_SOP2__S_LSHL_B64()
 932     {
 933     } // ~Inst_SOP2__S_LSHL_B64
 934
 935     // D.u64 = S0.u64 << S1.u[5:0];
 936     // SCC = 1 if result is non-zero.
 937     void
 938     Inst_SOP2__S_LSHL_B64::execute(GPUDynInstPtr gpuDynInst)
 939     {
 940         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
 941         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 942         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
 943         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 944
 945         src0.read();
 946         src1.read();
 947
 948         sdst = (src0.rawData() << bits(src1.rawData(), 5, 0));
 949         scc = sdst.rawData() ? 1 : 0;
 950
 951         sdst.write();
 952         scc.write();
 953     }
 954
 955     Inst_SOP2__S_LSHR_B32::Inst_SOP2__S_LSHR_B32(InFmt_SOP2 *iFmt)
 956         : Inst_SOP2(iFmt, "s_lshr_b32")
 957     {
 958         setFlag(ALU);
 959     } // Inst_SOP2__S_LSHR_B32
 960
 961     Inst_SOP2__S_LSHR_B32::~Inst_SOP2__S_LSHR_B32()
 962     {
 963     } // ~Inst_SOP2__S_LSHR_B32
 964
 965     // D.u = S0.u >> S1.u[4:0];
 966     // SCC = 1 if result is non-zero.
 967     // The vacated bits are set to zero.
 968     void
 969     Inst_SOP2__S_LSHR_B32::execute(GPUDynInstPtr gpuDynInst)
 970     {
 971         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
 972         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
 973         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
 974         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
 975
 976         src0.read();
 977         src1.read();
 978
 979         sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0));
 980         scc = sdst.rawData() ? 1 : 0;
 981
 982         sdst.write();
 983         scc.write();
 984     }
 985
 986     Inst_SOP2__S_LSHR_B64::Inst_SOP2__S_LSHR_B64(InFmt_SOP2 *iFmt)
 987         : Inst_SOP2(iFmt, "s_lshr_b64")
 988     {
 989         setFlag(ALU);
 990     } // Inst_SOP2__S_LSHR_B64
 991
 992     Inst_SOP2__S_LSHR_B64::~Inst_SOP2__S_LSHR_B64()
 993     {
 994     } // ~Inst_SOP2__S_LSHR_B64
 995
 996     // D.u64 = S0.u64 >> S1.u[5:0];
 997     // SCC = 1 if result is non-zero.
 998     // The vacated bits are set to zero.
 999     void
1000     Inst_SOP2__S_LSHR_B64::execute(GPUDynInstPtr gpuDynInst)
1001     {
1002         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
1003         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1004         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1005         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1006
1007         src0.read();
1008         src1.read();
1009
1010         sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0));
1011         scc = sdst.rawData() ? 1 : 0;
1012
1013         sdst.write();
1014         scc.write();
1015     }
1016
1017     Inst_SOP2__S_ASHR_I32::Inst_SOP2__S_ASHR_I32(InFmt_SOP2 *iFmt)
1018         : Inst_SOP2(iFmt, "s_ashr_i32")
1019     {
1020         setFlag(ALU);
1021     } // Inst_SOP2__S_ASHR_I32
1022
1023     Inst_SOP2__S_ASHR_I32::~Inst_SOP2__S_ASHR_I32()
1024     {
1025     } // ~Inst_SOP2__S_ASHR_I32
1026
1027     // D.i = signext(S0.i) >> S1.u[4:0];
1028     // SCC = 1 if result is non-zero.
1029     // The vacated bits are set to the sign bit of the input value.
1030     void
1031     Inst_SOP2__S_ASHR_I32::execute(GPUDynInstPtr gpuDynInst)
1032     {
1033         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
1034         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1035         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1036         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1037
1038         src0.read();
1039         src1.read();
1040
1041         sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0));
1042         scc = sdst.rawData() ? 1 : 0;
1043
1044         sdst.write();
1045         scc.write();
1046     }
1047
1048     Inst_SOP2__S_ASHR_I64::Inst_SOP2__S_ASHR_I64(InFmt_SOP2 *iFmt)
1049         : Inst_SOP2(iFmt, "s_ashr_i64")
1050     {
1051         setFlag(ALU);
1052     } // Inst_SOP2__S_ASHR_I64
1053
1054     Inst_SOP2__S_ASHR_I64::~Inst_SOP2__S_ASHR_I64()
1055     {
1056     } // ~Inst_SOP2__S_ASHR_I64
1057
1058     // D.i64 = signext(S0.i64) >> S1.u[5:0];
1059     // SCC = 1 if result is non-zero.
1060     // The vacated bits are set to the sign bit of the input value.
1061     void
1062     Inst_SOP2__S_ASHR_I64::execute(GPUDynInstPtr gpuDynInst)
1063     {
1064         ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0);
1065         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1066         ScalarOperandI64 sdst(gpuDynInst, instData.SDST);
1067         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1068
1069         src0.read();
1070         src1.read();
1071
1072         sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0));
1073         scc = sdst.rawData() ? 1 : 0;
1074
1075         sdst.write();
1076         scc.write();
1077     }
1078
1079     Inst_SOP2__S_BFM_B32::Inst_SOP2__S_BFM_B32(InFmt_SOP2 *iFmt)
1080         : Inst_SOP2(iFmt, "s_bfm_b32")
1081     {
1082         setFlag(ALU);
1083     } // Inst_SOP2__S_BFM_B32
1084
1085     Inst_SOP2__S_BFM_B32::~Inst_SOP2__S_BFM_B32()
1086     {
1087     } // ~Inst_SOP2__S_BFM_B32
1088
1089     // D.u = ((1 << S0.u[4:0]) - 1) << S1.u[4:0] (bitfield mask).
1090     void
1091     Inst_SOP2__S_BFM_B32::execute(GPUDynInstPtr gpuDynInst)
1092     {
1093         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
1094         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1095         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
1096
1097         src0.read();
1098         src1.read();
1099
1100         sdst = ((1 << bits(src0.rawData(), 4, 0)) - 1)
1101             << bits(src1.rawData(), 4, 0);
1102
1103         sdst.write();
1104     }
1105
1106     Inst_SOP2__S_BFM_B64::Inst_SOP2__S_BFM_B64(InFmt_SOP2 *iFmt)
1107         : Inst_SOP2(iFmt, "s_bfm_b64")
1108     {
1109         setFlag(ALU);
1110     } // Inst_SOP2__S_BFM_B64
1111
1112     Inst_SOP2__S_BFM_B64::~Inst_SOP2__S_BFM_B64()
1113     {
1114     } // ~Inst_SOP2__S_BFM_B64
1115
1116     // D.u64 = ((1ULL << S0.u[5:0]) - 1) << S1.u[5:0] (bitfield mask).
1117     void
1118     Inst_SOP2__S_BFM_B64::execute(GPUDynInstPtr gpuDynInst)
1119     {
1120         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
1121         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1122         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1123
1124         src0.read();
1125         src1.read();
1126
1127         sdst = ((1ULL << bits(src0.rawData(), 5, 0)) - 1)
1128             << bits(src1.rawData(), 5, 0);
1129
1130         sdst.write();
1131     }
1132
1133     Inst_SOP2__S_MUL_I32::Inst_SOP2__S_MUL_I32(InFmt_SOP2 *iFmt)
1134         : Inst_SOP2(iFmt, "s_mul_i32")
1135     {
1136         setFlag(ALU);
1137     } // Inst_SOP2__S_MUL_I32
1138
1139     Inst_SOP2__S_MUL_I32::~Inst_SOP2__S_MUL_I32()
1140     {
1141     } // ~Inst_SOP2__S_MUL_I32
1142
1143     // D.i = S0.i * S1.i.
1144     void
1145     Inst_SOP2__S_MUL_I32::execute(GPUDynInstPtr gpuDynInst)
1146     {
1147         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
1148         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
1149         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1150
1151         src0.read();
1152         src1.read();
1153
1154         sdst = src0.rawData() * src1.rawData();
1155
1156         sdst.write();
1157     }
1158
1159     Inst_SOP2__S_BFE_U32::Inst_SOP2__S_BFE_U32(InFmt_SOP2 *iFmt)
1160         : Inst_SOP2(iFmt, "s_bfe_u32")
1161     {
1162         setFlag(ALU);
1163     } // Inst_SOP2__S_BFE_U32
1164
1165     Inst_SOP2__S_BFE_U32::~Inst_SOP2__S_BFE_U32()
1166     {
1167     } // ~Inst_SOP2__S_BFE_U32
1168
1169     // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is
1170     // field width.
1171     // D.u = (S0.u >> S1.u[4:0]) & ((1 << S1.u[22:16]) - 1);
1172     // SCC = 1 if result is non-zero.
1173     void
1174     Inst_SOP2__S_BFE_U32::execute(GPUDynInstPtr gpuDynInst)
1175     {
1176         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
1177         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1178         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
1179         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1180
1181         src0.read();
1182         src1.read();
1183
1184         sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0))
1185             & ((1 << bits(src1.rawData(), 22, 16)) - 1);
1186         scc = sdst.rawData() ? 1 : 0;
1187
1188         sdst.write();
1189         scc.write();
1190     }
1191
1192     Inst_SOP2__S_BFE_I32::Inst_SOP2__S_BFE_I32(InFmt_SOP2 *iFmt)
1193         : Inst_SOP2(iFmt, "s_bfe_i32")
1194     {
1195         setFlag(ALU);
1196     } // Inst_SOP2__S_BFE_I32
1197
1198     Inst_SOP2__S_BFE_I32::~Inst_SOP2__S_BFE_I32()
1199     {
1200     } // ~Inst_SOP2__S_BFE_I32
1201
1202     // Bit field extract. S0 is Data, S1[4:0] is field offset, S1[22:16] is
1203     // field width.
1204     // D.i = (S0.i >> S1.u[4:0]) & ((1 << S1.u[22:16]) - 1);
1205     // Sign-extend the result;
1206     // SCC = 1 if result is non-zero.
1207     void
1208     Inst_SOP2__S_BFE_I32::execute(GPUDynInstPtr gpuDynInst)
1209     {
1210         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
1211         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1212         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1213         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1214
1215         src0.read();
1216         src1.read();
1217
1218         sdst = (src0.rawData() >> bits(src1.rawData(), 4, 0))
1219             & ((1 << bits(src1.rawData(), 22, 16)) - 1);
1220         scc = sdst.rawData() ? 1 : 0;
1221
1222         sdst.write();
1223         scc.write();
1224     }
1225
1226     Inst_SOP2__S_BFE_U64::Inst_SOP2__S_BFE_U64(InFmt_SOP2 *iFmt)
1227         : Inst_SOP2(iFmt, "s_bfe_u64")
1228     {
1229         setFlag(ALU);
1230     } // Inst_SOP2__S_BFE_U64
1231
1232     Inst_SOP2__S_BFE_U64::~Inst_SOP2__S_BFE_U64()
1233     {
1234     } // ~Inst_SOP2__S_BFE_U64
1235
1236     // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is
1237     // field width.
1238     // D.u64 = (S0.u64 >> S1.u[5:0]) & ((1 << S1.u[22:16]) - 1);
1239     // SCC = 1 if result is non-zero.
1240     void
1241     Inst_SOP2__S_BFE_U64::execute(GPUDynInstPtr gpuDynInst)
1242     {
1243         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
1244         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1245         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1246         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1247
1248         src0.read();
1249         src1.read();
1250
1251         sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0))
1252             & ((1 << bits(src1.rawData(), 22, 16)) - 1);
1253         scc = sdst.rawData() ? 1 : 0;
1254
1255         sdst.write();
1256         scc.write();
1257     }
1258
1259     Inst_SOP2__S_BFE_I64::Inst_SOP2__S_BFE_I64(InFmt_SOP2 *iFmt)
1260         : Inst_SOP2(iFmt, "s_bfe_i64")
1261     {
1262         setFlag(ALU);
1263     } // Inst_SOP2__S_BFE_I64
1264
1265     Inst_SOP2__S_BFE_I64::~Inst_SOP2__S_BFE_I64()
1266     {
1267     } // ~Inst_SOP2__S_BFE_I64
1268
1269     // Bit field extract. S0 is Data, S1[5:0] is field offset, S1[22:16] is
1270     // field width.
1271     // D.i64 = (S0.i64 >> S1.u[5:0]) & ((1 << S1.u[22:16]) - 1);
1272     // Sign-extend result;
1273     // SCC = 1 if result is non-zero.
1274     void
1275     Inst_SOP2__S_BFE_I64::execute(GPUDynInstPtr gpuDynInst)
1276     {
1277         ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0);
1278         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
1279         ScalarOperandI64 sdst(gpuDynInst, instData.SDST);
1280         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1281
1282         src0.read();
1283         src1.read();
1284
1285         sdst = (src0.rawData() >> bits(src1.rawData(), 5, 0))
1286             & ((1 << bits(src1.rawData(), 22, 16)) - 1);
1287         scc = sdst.rawData() ? 1 : 0;
1288
1289         sdst.write();
1290         scc.write();
1291     }
1292
1293     Inst_SOP2__S_CBRANCH_G_FORK::Inst_SOP2__S_CBRANCH_G_FORK(InFmt_SOP2 *iFmt)
1294         : Inst_SOP2(iFmt, "s_cbranch_g_fork")
1295     {
1296         setFlag(Branch);
1297     } // Inst_SOP2__S_CBRANCH_G_FORK
1298
1299     Inst_SOP2__S_CBRANCH_G_FORK::~Inst_SOP2__S_CBRANCH_G_FORK()
1300     {
1301     } // ~Inst_SOP2__S_CBRANCH_G_FORK
1302
1303     // Conditional branch using branch-stack.
1304     // S0 = compare mask(vcc or any sgpr) and
1305     // S1 = 64-bit byte address of target instruction.
1306     void
1307     Inst_SOP2__S_CBRANCH_G_FORK::execute(GPUDynInstPtr gpuDynInst)
1308     {
1309         panicUnimplemented();
1310     }
1311
1312     Inst_SOP2__S_ABSDIFF_I32::Inst_SOP2__S_ABSDIFF_I32(InFmt_SOP2 *iFmt)
1313         : Inst_SOP2(iFmt, "s_absdiff_i32")
1314     {
1315         setFlag(ALU);
1316     } // Inst_SOP2__S_ABSDIFF_I32
1317
1318     Inst_SOP2__S_ABSDIFF_I32::~Inst_SOP2__S_ABSDIFF_I32()
1319     {
1320     } // ~Inst_SOP2__S_ABSDIFF_I32
1321
1322     // D.i = S0.i - S1.i;
1323     // if (D.i < 0) then D.i = -D.i;
1324     // SCC = 1 if result is non-zero.
1325     // Compute the absolute value of difference between two values.
1326     void
1327     Inst_SOP2__S_ABSDIFF_I32::execute(GPUDynInstPtr gpuDynInst)
1328     {
1329         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
1330         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
1331         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1332         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1333
1334         sdst = std::abs(src0.rawData() - src1.rawData());
1335         scc = sdst.rawData() ? 1 : 0;
1336
1337         sdst.write();
1338         scc.write();
1339     }
1340
1341     Inst_SOP2__S_RFE_RESTORE_B64::Inst_SOP2__S_RFE_RESTORE_B64(
1342           InFmt_SOP2 *iFmt)
1343         : Inst_SOP2(iFmt, "s_rfe_restore_b64")
1344     {
1345     } // Inst_SOP2__S_RFE_RESTORE_B64
1346
1347     Inst_SOP2__S_RFE_RESTORE_B64::~Inst_SOP2__S_RFE_RESTORE_B64()
1348     {
1349     } // ~Inst_SOP2__S_RFE_RESTORE_B64
1350
1351     // Return from exception handler and continue.
1352     void
1353     Inst_SOP2__S_RFE_RESTORE_B64::execute(GPUDynInstPtr gpuDynInst)
1354     {
1355         panicUnimplemented();
1356     }
1357
1358     Inst_SOPK__S_MOVK_I32::Inst_SOPK__S_MOVK_I32(InFmt_SOPK *iFmt)
1359         : Inst_SOPK(iFmt, "s_movk_i32")
1360     {
1361         setFlag(ALU);
1362     } // Inst_SOPK__S_MOVK_I32
1363
1364     Inst_SOPK__S_MOVK_I32::~Inst_SOPK__S_MOVK_I32()
1365     {
1366     } // ~Inst_SOPK__S_MOVK_I32
1367
1368     // D.i = signext(SIMM16) (sign extension).
1369     void
1370     Inst_SOPK__S_MOVK_I32::execute(GPUDynInstPtr gpuDynInst)
1371     {
1372         ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
1373         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1374
1375         sdst = simm16;
1376
1377         sdst.write();
1378     }
1379
1380     Inst_SOPK__S_CMOVK_I32::Inst_SOPK__S_CMOVK_I32(InFmt_SOPK *iFmt)
1381         : Inst_SOPK(iFmt, "s_cmovk_i32")
1382     {
1383         setFlag(ALU);
1384     } // Inst_SOPK__S_CMOVK_I32
1385
1386     Inst_SOPK__S_CMOVK_I32::~Inst_SOPK__S_CMOVK_I32()
1387     {
1388     } // ~Inst_SOPK__S_CMOVK_I32
1389
1390     // if (SCC) then D.i = signext(SIMM16);
1391     // else NOP.
1392     // Conditional move with sign extension.
1393     void
1394     Inst_SOPK__S_CMOVK_I32::execute(GPUDynInstPtr gpuDynInst)
1395     {
1396         ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
1397         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1398         ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
1399
1400         scc.read();
1401
1402         if (scc.rawData()) {
1403             sdst = simm16;
1404             sdst.write();
1405         }
1406     }
1407
1408     Inst_SOPK__S_CMPK_EQ_I32::Inst_SOPK__S_CMPK_EQ_I32(InFmt_SOPK *iFmt)
1409         : Inst_SOPK(iFmt, "s_cmpk_eq_i32")
1410     {
1411         setFlag(ALU);
1412     } // Inst_SOPK__S_CMPK_EQ_I32
1413
1414     Inst_SOPK__S_CMPK_EQ_I32::~Inst_SOPK__S_CMPK_EQ_I32()
1415     {
1416     } // ~Inst_SOPK__S_CMPK_EQ_I32
1417
1418     // SCC = (S0.i == signext(SIMM16)).
1419     void
1420     Inst_SOPK__S_CMPK_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
1421     {
1422         ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
1423         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
1424         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1425
1426         src.read();
1427
1428         scc = (src.rawData() == simm16) ? 1 : 0;
1429
1430         scc.write();
1431     }
1432
1433     Inst_SOPK__S_CMPK_LG_I32::Inst_SOPK__S_CMPK_LG_I32(InFmt_SOPK *iFmt)
1434         : Inst_SOPK(iFmt, "s_cmpk_lg_i32")
1435     {
1436         setFlag(ALU);
1437     } // Inst_SOPK__S_CMPK_LG_I32
1438
1439     Inst_SOPK__S_CMPK_LG_I32::~Inst_SOPK__S_CMPK_LG_I32()
1440     {
1441     } // ~Inst_SOPK__S_CMPK_LG_I32
1442
1443     // SCC = (S0.i != signext(SIMM16)).
1444     void
1445     Inst_SOPK__S_CMPK_LG_I32::execute(GPUDynInstPtr gpuDynInst)
1446     {
1447         ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
1448         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
1449         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1450
1451         src.read();
1452
1453         scc = (src.rawData() != simm16) ? 1 : 0;
1454
1455         scc.write();
1456     }
1457
1458     Inst_SOPK__S_CMPK_GT_I32::Inst_SOPK__S_CMPK_GT_I32(InFmt_SOPK *iFmt)
1459         : Inst_SOPK(iFmt, "s_cmpk_gt_i32")
1460     {
1461         setFlag(ALU);
1462     } // Inst_SOPK__S_CMPK_GT_I32
1463
1464     Inst_SOPK__S_CMPK_GT_I32::~Inst_SOPK__S_CMPK_GT_I32()
1465     {
1466     } // ~Inst_SOPK__S_CMPK_GT_I32
1467
1468     // SCC = (S0.i > signext(SIMM16)).
1469     void
1470     Inst_SOPK__S_CMPK_GT_I32::execute(GPUDynInstPtr gpuDynInst)
1471     {
1472         ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
1473         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
1474         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1475
1476         src.read();
1477
1478         scc = (src.rawData() > simm16) ? 1 : 0;
1479
1480         scc.write();
1481     }
1482
1483     Inst_SOPK__S_CMPK_GE_I32::Inst_SOPK__S_CMPK_GE_I32(InFmt_SOPK *iFmt)
1484         : Inst_SOPK(iFmt, "s_cmpk_ge_i32")
1485     {
1486         setFlag(ALU);
1487     } // Inst_SOPK__S_CMPK_GE_I32
1488
1489     Inst_SOPK__S_CMPK_GE_I32::~Inst_SOPK__S_CMPK_GE_I32()
1490     {
1491     } // ~Inst_SOPK__S_CMPK_GE_I32
1492
1493     // SCC = (S0.i >= signext(SIMM16)).
1494     void
1495     Inst_SOPK__S_CMPK_GE_I32::execute(GPUDynInstPtr gpuDynInst)
1496     {
1497         ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
1498         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
1499         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1500
1501         src.read();
1502
1503         scc = (src.rawData() >= simm16) ? 1 : 0;
1504
1505         scc.write();
1506     }
1507
1508     Inst_SOPK__S_CMPK_LT_I32::Inst_SOPK__S_CMPK_LT_I32(InFmt_SOPK *iFmt)
1509         : Inst_SOPK(iFmt, "s_cmpk_lt_i32")
1510     {
1511         setFlag(ALU);
1512     } // Inst_SOPK__S_CMPK_LT_I32
1513
1514     Inst_SOPK__S_CMPK_LT_I32::~Inst_SOPK__S_CMPK_LT_I32()
1515     {
1516     } // ~Inst_SOPK__S_CMPK_LT_I32
1517
1518     // SCC = (S0.i < signext(SIMM16)).
1519     void
1520     Inst_SOPK__S_CMPK_LT_I32::execute(GPUDynInstPtr gpuDynInst)
1521     {
1522         ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
1523         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
1524         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1525
1526         src.read();
1527
1528         scc = (src.rawData() < simm16) ? 1 : 0;
1529
1530         scc.write();
1531     }
1532
1533     Inst_SOPK__S_CMPK_LE_I32::Inst_SOPK__S_CMPK_LE_I32(InFmt_SOPK *iFmt)
1534         : Inst_SOPK(iFmt, "s_cmpk_le_i32")
1535     {
1536         setFlag(ALU);
1537     } // Inst_SOPK__S_CMPK_LE_I32
1538
1539     Inst_SOPK__S_CMPK_LE_I32::~Inst_SOPK__S_CMPK_LE_I32()
1540     {
1541     } // ~Inst_SOPK__S_CMPK_LE_I32
1542
1543     // SCC = (S0.i <= signext(SIMM16)).
1544     void
1545     Inst_SOPK__S_CMPK_LE_I32::execute(GPUDynInstPtr gpuDynInst)
1546     {
1547         ScalarRegI32 simm16 = (ScalarRegI32)instData.SIMM16;
1548         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
1549         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1550
1551         src.read();
1552
1553         scc = (src.rawData() <= simm16) ? 1 : 0;
1554
1555         scc.write();
1556     }
1557
1558     Inst_SOPK__S_CMPK_EQ_U32::Inst_SOPK__S_CMPK_EQ_U32(InFmt_SOPK *iFmt)
1559         : Inst_SOPK(iFmt, "s_cmpk_eq_u32")
1560     {
1561         setFlag(ALU);
1562     } // Inst_SOPK__S_CMPK_EQ_U32
1563
1564     Inst_SOPK__S_CMPK_EQ_U32::~Inst_SOPK__S_CMPK_EQ_U32()
1565     {
1566     } // ~Inst_SOPK__S_CMPK_EQ_U32
1567
1568     // SCC = (S0.u == SIMM16).
1569     void
1570     Inst_SOPK__S_CMPK_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
1571     {
1572         ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
1573         ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
1574         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1575
1576         src.read();
1577
1578         scc = (src.rawData() == simm16) ? 1 : 0;
1579
1580         scc.write();
1581     }
1582
1583     Inst_SOPK__S_CMPK_LG_U32::Inst_SOPK__S_CMPK_LG_U32(InFmt_SOPK *iFmt)
1584         : Inst_SOPK(iFmt, "s_cmpk_lg_u32")
1585     {
1586         setFlag(ALU);
1587     } // Inst_SOPK__S_CMPK_LG_U32
1588
1589     Inst_SOPK__S_CMPK_LG_U32::~Inst_SOPK__S_CMPK_LG_U32()
1590     {
1591     } // ~Inst_SOPK__S_CMPK_LG_U32
1592
1593     // SCC = (S0.u != SIMM16).
1594     void
1595     Inst_SOPK__S_CMPK_LG_U32::execute(GPUDynInstPtr gpuDynInst)
1596     {
1597         ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
1598         ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
1599         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1600
1601         src.read();
1602
1603         scc = (src.rawData() != simm16) ? 1 : 0;
1604
1605         scc.write();
1606     }
1607
1608     Inst_SOPK__S_CMPK_GT_U32::Inst_SOPK__S_CMPK_GT_U32(InFmt_SOPK *iFmt)
1609         : Inst_SOPK(iFmt, "s_cmpk_gt_u32")
1610     {
1611         setFlag(ALU);
1612     } // Inst_SOPK__S_CMPK_GT_U32
1613
1614     Inst_SOPK__S_CMPK_GT_U32::~Inst_SOPK__S_CMPK_GT_U32()
1615     {
1616     } // ~Inst_SOPK__S_CMPK_GT_U32
1617
1618     // SCC = (S0.u > SIMM16).
1619     void
1620     Inst_SOPK__S_CMPK_GT_U32::execute(GPUDynInstPtr gpuDynInst)
1621     {
1622         ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
1623         ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
1624         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1625
1626         src.read();
1627
1628         scc = (src.rawData() > simm16) ? 1 : 0;
1629
1630         scc.write();
1631     }
1632
1633     Inst_SOPK__S_CMPK_GE_U32::Inst_SOPK__S_CMPK_GE_U32(InFmt_SOPK *iFmt)
1634         : Inst_SOPK(iFmt, "s_cmpk_ge_u32")
1635     {
1636         setFlag(ALU);
1637     } // Inst_SOPK__S_CMPK_GE_U32
1638
1639     Inst_SOPK__S_CMPK_GE_U32::~Inst_SOPK__S_CMPK_GE_U32()
1640     {
1641     } // ~Inst_SOPK__S_CMPK_GE_U32
1642
1643     // SCC = (S0.u >= SIMM16).
1644     void
1645     Inst_SOPK__S_CMPK_GE_U32::execute(GPUDynInstPtr gpuDynInst)
1646     {
1647         ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
1648         ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
1649         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1650
1651         src.read();
1652
1653         scc = (src.rawData() >= simm16) ? 1 : 0;
1654
1655         scc.write();
1656     }
1657
1658     Inst_SOPK__S_CMPK_LT_U32::Inst_SOPK__S_CMPK_LT_U32(InFmt_SOPK *iFmt)
1659         : Inst_SOPK(iFmt, "s_cmpk_lt_u32")
1660     {
1661         setFlag(ALU);
1662     } // Inst_SOPK__S_CMPK_LT_U32
1663
1664     Inst_SOPK__S_CMPK_LT_U32::~Inst_SOPK__S_CMPK_LT_U32()
1665     {
1666     } // ~Inst_SOPK__S_CMPK_LT_U32
1667
1668     // SCC = (S0.u < SIMM16).
1669     void
1670     Inst_SOPK__S_CMPK_LT_U32::execute(GPUDynInstPtr gpuDynInst)
1671     {
1672         ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
1673         ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
1674         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1675
1676         src.read();
1677
1678         scc = (src.rawData() < simm16) ? 1 : 0;
1679
1680         scc.write();
1681     }
1682
1683     Inst_SOPK__S_CMPK_LE_U32::Inst_SOPK__S_CMPK_LE_U32(InFmt_SOPK *iFmt)
1684         : Inst_SOPK(iFmt, "s_cmpk_le_u32")
1685     {
1686         setFlag(ALU);
1687     } // Inst_SOPK__S_CMPK_LE_U32
1688
1689     Inst_SOPK__S_CMPK_LE_U32::~Inst_SOPK__S_CMPK_LE_U32()
1690     {
1691     } // ~Inst_SOPK__S_CMPK_LE_U32
1692
1693     // SCC = (S0.u <= SIMM16).
1694     void
1695     Inst_SOPK__S_CMPK_LE_U32::execute(GPUDynInstPtr gpuDynInst)
1696     {
1697         ScalarRegU32 simm16 = (ScalarRegU32)instData.SIMM16;
1698         ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
1699         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1700
1701         src.read();
1702
1703         scc = (src.rawData() <= simm16) ? 1 : 0;
1704
1705         scc.write();
1706     }
1707
1708     Inst_SOPK__S_ADDK_I32::Inst_SOPK__S_ADDK_I32(InFmt_SOPK *iFmt)
1709         : Inst_SOPK(iFmt, "s_addk_i32")
1710     {
1711         setFlag(ALU);
1712     } // Inst_SOPK__S_ADDK_I32
1713
1714     Inst_SOPK__S_ADDK_I32::~Inst_SOPK__S_ADDK_I32()
1715     {
1716     } // ~Inst_SOPK__S_ADDK_I32
1717
1718     // D.i = D.i + signext(SIMM16);
1719     // SCC = overflow.
1720     void
1721     Inst_SOPK__S_ADDK_I32::execute(GPUDynInstPtr gpuDynInst)
1722     {
1723         ScalarRegI16 simm16 = instData.SIMM16;
1724         ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
1725         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1726         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1727
1728         src.read();
1729
1730         sdst = src.rawData() + (ScalarRegI32)simm16;
1731         scc = (bits(src.rawData(), 31) == bits(simm16, 15)
1732             && bits(src.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0;
1733
1734         sdst.write();
1735         scc.write();
1736     }
1737
1738     Inst_SOPK__S_MULK_I32::Inst_SOPK__S_MULK_I32(InFmt_SOPK *iFmt)
1739         : Inst_SOPK(iFmt, "s_mulk_i32")
1740     {
1741         setFlag(ALU);
1742     } // Inst_SOPK__S_MULK_I32
1743
1744     Inst_SOPK__S_MULK_I32::~Inst_SOPK__S_MULK_I32()
1745     {
1746     } // ~Inst_SOPK__S_MULK_I32
1747
1748     // D.i = D.i * signext(SIMM16).
1749     void
1750     Inst_SOPK__S_MULK_I32::execute(GPUDynInstPtr gpuDynInst)
1751     {
1752         ScalarRegI16 simm16 = instData.SIMM16;
1753         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
1754
1755         sdst.read();
1756
1757         sdst = sdst.rawData() * (ScalarRegI32)simm16;
1758
1759         sdst.write();
1760     }
1761
1762     Inst_SOPK__S_CBRANCH_I_FORK::Inst_SOPK__S_CBRANCH_I_FORK(InFmt_SOPK *iFmt)
1763         : Inst_SOPK(iFmt, "s_cbranch_i_fork")
1764     {
1765         setFlag(Branch);
1766     } // Inst_SOPK__S_CBRANCH_I_FORK
1767
1768     Inst_SOPK__S_CBRANCH_I_FORK::~Inst_SOPK__S_CBRANCH_I_FORK()
1769     {
1770     } // ~Inst_SOPK__S_CBRANCH_I_FORK
1771
1772     // Conditional branch using branch-stack.
1773     // S0 = compare mask(vcc or any sgpr), and
1774     // SIMM16 = signed DWORD branch offset relative to next instruction.
1775     void
1776     Inst_SOPK__S_CBRANCH_I_FORK::execute(GPUDynInstPtr gpuDynInst)
1777     {
1778         panicUnimplemented();
1779     }
1780
1781     Inst_SOPK__S_GETREG_B32::Inst_SOPK__S_GETREG_B32(InFmt_SOPK *iFmt)
1782         : Inst_SOPK(iFmt, "s_getreg_b32")
1783     {
1784     } // Inst_SOPK__S_GETREG_B32
1785
1786     Inst_SOPK__S_GETREG_B32::~Inst_SOPK__S_GETREG_B32()
1787     {
1788     } // ~Inst_SOPK__S_GETREG_B32
1789
1790     // D.u = hardware-reg. Read some or all of a hardware register into the
1791     // LSBs of D.
1792     // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
1793     // is 1..32.
1794     void
1795     Inst_SOPK__S_GETREG_B32::execute(GPUDynInstPtr gpuDynInst)
1796     {
1797         panicUnimplemented();
1798     }
1799
1800     Inst_SOPK__S_SETREG_B32::Inst_SOPK__S_SETREG_B32(InFmt_SOPK *iFmt)
1801         : Inst_SOPK(iFmt, "s_setreg_b32")
1802     {
1803         setFlag(ALU);
1804     } // Inst_SOPK__S_SETREG_B32
1805
1806     Inst_SOPK__S_SETREG_B32::~Inst_SOPK__S_SETREG_B32()
1807     {
1808     } // ~Inst_SOPK__S_SETREG_B32
1809
1810     // hardware-reg = S0.u. Write some or all of the LSBs of D into a hardware
1811     // register.
1812     // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
1813     // is 1..32.
1814     void
1815     Inst_SOPK__S_SETREG_B32::execute(GPUDynInstPtr gpuDynInst)
1816     {
1817         ScalarRegI16 simm16 = instData.SIMM16;
1818         ScalarRegU32 hwregId = simm16 & 0x3f;
1819         ScalarRegU32 offset = (simm16 >> 6) & 31;
1820         ScalarRegU32 size = ((simm16 >> 11) & 31) + 1;
1821
1822         ScalarOperandU32 hwreg(gpuDynInst, hwregId);
1823         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
1824         hwreg.read();
1825         sdst.read();
1826
1827         // Store value from SDST to part of the hardware register.
1828         ScalarRegU32 mask = (((1U << size) - 1U) << offset);
1829         hwreg = ((hwreg.rawData() & ~mask)
1830                         | ((sdst.rawData() << offset) & mask));
1831         hwreg.write();
1832
1833         // set MODE register to control the behavior of single precision
1834         // floating-point numbers: denormal mode or round mode
1835         if (hwregId==1 && size==2
1836                         && (offset==4 || offset==0)) {
1837             warn_once("Be cautious that s_setreg_b32 has no real effect "
1838                             "on FP modes: %s\n", gpuDynInst->disassemble());
1839             return;
1840         }
1841
1842         // panic if not changing MODE of floating-point numbers
1843         panicUnimplemented();
1844     }
1845
1846     Inst_SOPK__S_SETREG_IMM32_B32::Inst_SOPK__S_SETREG_IMM32_B32(
1847           InFmt_SOPK *iFmt)
1848         : Inst_SOPK(iFmt, "s_setreg_imm32_b32")
1849     {
1850     } // Inst_SOPK__S_SETREG_IMM32_B32
1851
1852     Inst_SOPK__S_SETREG_IMM32_B32::~Inst_SOPK__S_SETREG_IMM32_B32()
1853     {
1854     } // ~Inst_SOPK__S_SETREG_IMM32_B32
1855
1856     // Write some or all of the LSBs of IMM32 into a hardware register; this
1857     // instruction requires a 32-bit literal constant.
1858     // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
1859     // is 1..32.
1860     void
1861     Inst_SOPK__S_SETREG_IMM32_B32::execute(GPUDynInstPtr gpuDynInst)
1862     {
1863         panicUnimplemented();
1864     }
1865
1866     Inst_SOP1__S_MOV_B32::Inst_SOP1__S_MOV_B32(InFmt_SOP1 *iFmt)
1867         : Inst_SOP1(iFmt, "s_mov_b32")
1868     {
1869         setFlag(ALU);
1870     } // Inst_SOP1__S_MOV_B32
1871
1872     Inst_SOP1__S_MOV_B32::~Inst_SOP1__S_MOV_B32()
1873     {
1874     } // ~Inst_SOP1__S_MOV_B32
1875
1876     // D.u = S0.u.
1877     void
1878     Inst_SOP1__S_MOV_B32::execute(GPUDynInstPtr gpuDynInst)
1879     {
1880         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
1881         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
1882
1883         src.read();
1884
1885         sdst = src.rawData();
1886
1887         sdst.write();
1888     }
1889
1890     Inst_SOP1__S_MOV_B64::Inst_SOP1__S_MOV_B64(InFmt_SOP1 *iFmt)
1891         : Inst_SOP1(iFmt, "s_mov_b64")
1892     {
1893         setFlag(ALU);
1894     } // Inst_SOP1__S_MOV_B64
1895
1896     Inst_SOP1__S_MOV_B64::~Inst_SOP1__S_MOV_B64()
1897     {
1898     } // ~Inst_SOP1__S_MOV_B64
1899
1900     // D.u64 = S0.u64.
1901     void
1902     Inst_SOP1__S_MOV_B64::execute(GPUDynInstPtr gpuDynInst)
1903     {
1904         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
1905         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1906
1907         src.read();
1908
1909         sdst = src.rawData();
1910
1911         sdst.write();
1912     }
1913
1914     Inst_SOP1__S_CMOV_B32::Inst_SOP1__S_CMOV_B32(InFmt_SOP1 *iFmt)
1915         : Inst_SOP1(iFmt, "s_cmov_b32")
1916     {
1917         setFlag(ALU);
1918     } // Inst_SOP1__S_CMOV_B32
1919
1920     Inst_SOP1__S_CMOV_B32::~Inst_SOP1__S_CMOV_B32()
1921     {
1922     } // ~Inst_SOP1__S_CMOV_B32
1923
1924     // if (SCC) then D.u = S0.u;
1925     // else NOP.
1926     // Conditional move.
1927     void
1928     Inst_SOP1__S_CMOV_B32::execute(GPUDynInstPtr gpuDynInst)
1929     {
1930         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
1931         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
1932         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1933
1934         src.read();
1935         scc.read();
1936
1937         if (scc.rawData()) {
1938             sdst = src.rawData();
1939             sdst.write();
1940         }
1941     }
1942
1943     Inst_SOP1__S_CMOV_B64::Inst_SOP1__S_CMOV_B64(InFmt_SOP1 *iFmt)
1944         : Inst_SOP1(iFmt, "s_cmov_b64")
1945     {
1946         setFlag(ALU);
1947     } // Inst_SOP1__S_CMOV_B64
1948
1949     Inst_SOP1__S_CMOV_B64::~Inst_SOP1__S_CMOV_B64()
1950     {
1951     } // ~Inst_SOP1__S_CMOV_B64
1952
1953     // if (SCC) then D.u64 = S0.u64;
1954     // else NOP.
1955     // Conditional move.
1956     void
1957     Inst_SOP1__S_CMOV_B64::execute(GPUDynInstPtr gpuDynInst)
1958     {
1959         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
1960         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
1961         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1962
1963         src.read();
1964         scc.read();
1965
1966         if (scc.rawData()) {
1967             sdst = src.rawData();
1968             sdst.write();
1969         }
1970     }
1971
1972     Inst_SOP1__S_NOT_B32::Inst_SOP1__S_NOT_B32(InFmt_SOP1 *iFmt)
1973         : Inst_SOP1(iFmt, "s_not_b32")
1974     {
1975         setFlag(ALU);
1976     } // Inst_SOP1__S_NOT_B32
1977
1978     Inst_SOP1__S_NOT_B32::~Inst_SOP1__S_NOT_B32()
1979     {
1980     } // ~Inst_SOP1__S_NOT_B32
1981
1982     // D.u = ~S0.u;
1983     // SCC = 1 if result is non-zero.
1984     // Bitwise negation.
1985     void
1986     Inst_SOP1__S_NOT_B32::execute(GPUDynInstPtr gpuDynInst)
1987     {
1988         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
1989         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
1990         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
1991
1992         src.read();
1993
1994         sdst = ~src.rawData();
1995
1996         scc = sdst.rawData() ? 1 : 0;
1997
1998         sdst.write();
1999         scc.write();
2000     }
2001
2002     Inst_SOP1__S_NOT_B64::Inst_SOP1__S_NOT_B64(InFmt_SOP1 *iFmt)
2003         : Inst_SOP1(iFmt, "s_not_b64")
2004     {
2005         setFlag(ALU);
2006     } // Inst_SOP1__S_NOT_B64
2007
2008     Inst_SOP1__S_NOT_B64::~Inst_SOP1__S_NOT_B64()
2009     {
2010     } // ~Inst_SOP1__S_NOT_B64
2011
2012     // D.u64 = ~S0.u64;
2013     // SCC = 1 if result is non-zero.
2014     // Bitwise negation.
2015     void
2016     Inst_SOP1__S_NOT_B64::execute(GPUDynInstPtr gpuDynInst)
2017     {
2018         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2019         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2020         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2021
2022         src.read();
2023
2024         sdst = ~src.rawData();
2025         scc = sdst.rawData() ? 1 : 0;
2026
2027         sdst.write();
2028         scc.write();
2029     }
2030
2031     Inst_SOP1__S_WQM_B32::Inst_SOP1__S_WQM_B32(InFmt_SOP1 *iFmt)
2032         : Inst_SOP1(iFmt, "s_wqm_b32")
2033     {
2034         setFlag(ALU);
2035     } // Inst_SOP1__S_WQM_B32
2036
2037     Inst_SOP1__S_WQM_B32::~Inst_SOP1__S_WQM_B32()
2038     {
2039     } // ~Inst_SOP1__S_WQM_B32
2040
2041     // Computes whole quad mode for an active/valid mask.
2042     // SCC = 1 if result is non-zero.
2043     void
2044     Inst_SOP1__S_WQM_B32::execute(GPUDynInstPtr gpuDynInst)
2045     {
2046         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2047         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
2048         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2049
2050         src.read();
2051
2052         sdst = wholeQuadMode(src.rawData());
2053         scc = sdst.rawData() ? 1 : 0;
2054
2055         sdst.write();
2056         scc.write();
2057     }
2058
2059     Inst_SOP1__S_WQM_B64::Inst_SOP1__S_WQM_B64(InFmt_SOP1 *iFmt)
2060         : Inst_SOP1(iFmt, "s_wqm_b64")
2061     {
2062         setFlag(ALU);
2063     } // Inst_SOP1__S_WQM_B64
2064
2065     Inst_SOP1__S_WQM_B64::~Inst_SOP1__S_WQM_B64()
2066     {
2067     } // ~Inst_SOP1__S_WQM_B64
2068
2069     // Computes whole quad mode for an active/valid mask.
2070     // SCC = 1 if result is non-zero.
2071     void
2072     Inst_SOP1__S_WQM_B64::execute(GPUDynInstPtr gpuDynInst)
2073     {
2074         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2075         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2076         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2077
2078         src.read();
2079
2080         sdst = wholeQuadMode(src.rawData());
2081         scc = sdst.rawData() ? 1 : 0;
2082
2083         sdst.write();
2084         scc.write();
2085     }
2086
2087     Inst_SOP1__S_BREV_B32::Inst_SOP1__S_BREV_B32(InFmt_SOP1 *iFmt)
2088         : Inst_SOP1(iFmt, "s_brev_b32")
2089     {
2090         setFlag(ALU);
2091     } // Inst_SOP1__S_BREV_B32
2092
2093     Inst_SOP1__S_BREV_B32::~Inst_SOP1__S_BREV_B32()
2094     {
2095     } // ~Inst_SOP1__S_BREV_B32
2096
2097     // D.u[31:0] = S0.u[0:31] (reverse bits).
2098     void
2099     Inst_SOP1__S_BREV_B32::execute(GPUDynInstPtr gpuDynInst)
2100     {
2101         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2102         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
2103
2104         src.read();
2105
2106         sdst = reverseBits(src.rawData());
2107
2108         sdst.write();
2109     }
2110
2111     Inst_SOP1__S_BREV_B64::Inst_SOP1__S_BREV_B64(InFmt_SOP1 *iFmt)
2112         : Inst_SOP1(iFmt, "s_brev_b64")
2113     {
2114         setFlag(ALU);
2115     } // Inst_SOP1__S_BREV_B64
2116
2117     Inst_SOP1__S_BREV_B64::~Inst_SOP1__S_BREV_B64()
2118     {
2119     } // ~Inst_SOP1__S_BREV_B64
2120
2121     // D.u64[63:0] = S0.u64[0:63] (reverse bits).
2122     void
2123     Inst_SOP1__S_BREV_B64::execute(GPUDynInstPtr gpuDynInst)
2124     {
2125         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2126         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2127
2128         src.read();
2129
2130         sdst = reverseBits(src.rawData());
2131
2132         sdst.write();
2133     }
2134
2135     Inst_SOP1__S_BCNT0_I32_B32::Inst_SOP1__S_BCNT0_I32_B32(InFmt_SOP1 *iFmt)
2136         : Inst_SOP1(iFmt, "s_bcnt0_i32_b32")
2137     {
2138         setFlag(ALU);
2139     } // Inst_SOP1__S_BCNT0_I32_B32
2140
2141     Inst_SOP1__S_BCNT0_I32_B32::~Inst_SOP1__S_BCNT0_I32_B32()
2142     {
2143     } // ~Inst_SOP1__S_BCNT0_I32_B32
2144
2145     // D.i = CountZeroBits(S0.u);
2146     // SCC = 1 if result is non-zero.
2147     void
2148     Inst_SOP1__S_BCNT0_I32_B32::execute(GPUDynInstPtr gpuDynInst)
2149     {
2150         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2151         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2152         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2153
2154         src.read();
2155
2156         sdst = countZeroBits(src.rawData());
2157         scc = sdst.rawData() ? 1 : 0;
2158
2159         sdst.write();
2160         scc.write();
2161     }
2162
2163     Inst_SOP1__S_BCNT0_I32_B64::Inst_SOP1__S_BCNT0_I32_B64(InFmt_SOP1 *iFmt)
2164         : Inst_SOP1(iFmt, "s_bcnt0_i32_b64")
2165     {
2166         setFlag(ALU);
2167     } // Inst_SOP1__S_BCNT0_I32_B64
2168
2169     Inst_SOP1__S_BCNT0_I32_B64::~Inst_SOP1__S_BCNT0_I32_B64()
2170     {
2171     } // ~Inst_SOP1__S_BCNT0_I32_B64
2172
2173     // D.i = CountZeroBits(S0.u64);
2174     // SCC = 1 if result is non-zero.
2175     void
2176     Inst_SOP1__S_BCNT0_I32_B64::execute(GPUDynInstPtr gpuDynInst)
2177     {
2178         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2179         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2180         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2181
2182         src.read();
2183
2184         sdst = countZeroBits(src.rawData());
2185         scc = sdst.rawData() ? 1 : 0;
2186
2187         sdst.write();
2188         scc.write();
2189     }
2190
2191     Inst_SOP1__S_BCNT1_I32_B32::Inst_SOP1__S_BCNT1_I32_B32(InFmt_SOP1 *iFmt)
2192         : Inst_SOP1(iFmt, "s_bcnt1_i32_b32")
2193     {
2194         setFlag(ALU);
2195     } // Inst_SOP1__S_BCNT1_I32_B32
2196
2197     Inst_SOP1__S_BCNT1_I32_B32::~Inst_SOP1__S_BCNT1_I32_B32()
2198     {
2199     } // ~Inst_SOP1__S_BCNT1_I32_B32
2200
2201     // D.i = CountOneBits(S0.u);
2202     // SCC = 1 if result is non-zero.
2203     void
2204     Inst_SOP1__S_BCNT1_I32_B32::execute(GPUDynInstPtr gpuDynInst)
2205     {
2206         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2207         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2208         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2209
2210         src.read();
2211
2212         sdst = popCount(src.rawData());
2213         scc = sdst.rawData() ? 1 : 0;
2214
2215         sdst.write();
2216         scc.write();
2217     }
2218
2219     Inst_SOP1__S_BCNT1_I32_B64::Inst_SOP1__S_BCNT1_I32_B64(InFmt_SOP1 *iFmt)
2220         : Inst_SOP1(iFmt, "s_bcnt1_i32_b64")
2221     {
2222         setFlag(ALU);
2223     } // Inst_SOP1__S_BCNT1_I32_B64
2224
2225     Inst_SOP1__S_BCNT1_I32_B64::~Inst_SOP1__S_BCNT1_I32_B64()
2226     {
2227     } // ~Inst_SOP1__S_BCNT1_I32_B64
2228
2229     // D.i = CountOneBits(S0.u64);
2230     // SCC = 1 if result is non-zero.
2231     void
2232     Inst_SOP1__S_BCNT1_I32_B64::execute(GPUDynInstPtr gpuDynInst)
2233     {
2234         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2235         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2236         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2237
2238         src.read();
2239
2240         sdst = popCount(src.rawData());
2241         scc = sdst.rawData() ? 1 : 0;
2242
2243         sdst.write();
2244         scc.write();
2245     }
2246
2247     Inst_SOP1__S_FF0_I32_B32::Inst_SOP1__S_FF0_I32_B32(InFmt_SOP1 *iFmt)
2248         : Inst_SOP1(iFmt, "s_ff0_i32_b32")
2249     {
2250         setFlag(ALU);
2251     } // Inst_SOP1__S_FF0_I32_B32
2252
2253     Inst_SOP1__S_FF0_I32_B32::~Inst_SOP1__S_FF0_I32_B32()
2254     {
2255     } // ~Inst_SOP1__S_FF0_I32_B32
2256
2257     // D.i = FindFirstZero(S0.u);
2258     // If no zeros are found, return -1.
2259     // Returns the bit position of the first zero from the LSB.
2260     void
2261     Inst_SOP1__S_FF0_I32_B32::execute(GPUDynInstPtr gpuDynInst)
2262     {
2263         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2264         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2265
2266         src.read();
2267
2268         sdst = findFirstZero(src.rawData());
2269
2270         sdst.write();
2271     }
2272
2273     Inst_SOP1__S_FF0_I32_B64::Inst_SOP1__S_FF0_I32_B64(InFmt_SOP1 *iFmt)
2274         : Inst_SOP1(iFmt, "s_ff0_i32_b64")
2275     {
2276         setFlag(ALU);
2277     } // Inst_SOP1__S_FF0_I32_B64
2278
2279     Inst_SOP1__S_FF0_I32_B64::~Inst_SOP1__S_FF0_I32_B64()
2280     {
2281     } // ~Inst_SOP1__S_FF0_I32_B64
2282
2283     // D.i = FindFirstZero(S0.u64);
2284     // If no zeros are found, return -1.
2285     // Returns the bit position of the first zero from the LSB.
2286     void
2287     Inst_SOP1__S_FF0_I32_B64::execute(GPUDynInstPtr gpuDynInst)
2288     {
2289         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2290         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2291
2292         src.read();
2293
2294         sdst = findFirstZero(src.rawData());
2295
2296         sdst.write();
2297     }
2298
2299     Inst_SOP1__S_FF1_I32_B32::Inst_SOP1__S_FF1_I32_B32(InFmt_SOP1 *iFmt)
2300         : Inst_SOP1(iFmt, "s_ff1_i32_b32")
2301     {
2302         setFlag(ALU);
2303     } // Inst_SOP1__S_FF1_I32_B32
2304
2305     Inst_SOP1__S_FF1_I32_B32::~Inst_SOP1__S_FF1_I32_B32()
2306     {
2307     } // ~Inst_SOP1__S_FF1_I32_B32
2308
2309     // D.i = FindFirstOne(S0.u);
2310     // If no ones are found, return -1.
2311     // Returns the bit position of the first one from the LSB.
2312     void
2313     Inst_SOP1__S_FF1_I32_B32::execute(GPUDynInstPtr gpuDynInst)
2314     {
2315         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2316         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2317
2318         src.read();
2319
2320         sdst = findFirstOne(src.rawData());
2321
2322         sdst.write();
2323     }
2324
2325     Inst_SOP1__S_FF1_I32_B64::Inst_SOP1__S_FF1_I32_B64(InFmt_SOP1 *iFmt)
2326         : Inst_SOP1(iFmt, "s_ff1_i32_b64")
2327     {
2328         setFlag(ALU);
2329     } // Inst_SOP1__S_FF1_I32_B64
2330
2331     Inst_SOP1__S_FF1_I32_B64::~Inst_SOP1__S_FF1_I32_B64()
2332     {
2333     } // ~Inst_SOP1__S_FF1_I32_B64
2334
2335     // D.i = FindFirstOne(S0.u64);
2336     // If no ones are found, return -1.
2337     // Returns the bit position of the first one from the LSB.
2338     void
2339     Inst_SOP1__S_FF1_I32_B64::execute(GPUDynInstPtr gpuDynInst)
2340     {
2341         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2342         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2343
2344         src.read();
2345
2346         sdst = findFirstOne(src.rawData());
2347
2348         sdst.write();
2349     }
2350
2351     Inst_SOP1__S_FLBIT_I32_B32::Inst_SOP1__S_FLBIT_I32_B32(InFmt_SOP1 *iFmt)
2352         : Inst_SOP1(iFmt, "s_flbit_i32_b32")
2353     {
2354         setFlag(ALU);
2355     } // Inst_SOP1__S_FLBIT_I32_B32
2356
2357     Inst_SOP1__S_FLBIT_I32_B32::~Inst_SOP1__S_FLBIT_I32_B32()
2358     {
2359     } // ~Inst_SOP1__S_FLBIT_I32_B32
2360
2361     // D.i = FindFirstOne(S0.u);
2362     // If no ones are found, return -1.
2363     // Counts how many zeros before the first one starting from the MSB.
2364     void
2365     Inst_SOP1__S_FLBIT_I32_B32::execute(GPUDynInstPtr gpuDynInst)
2366     {
2367         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2368         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2369
2370         src.read();
2371
2372         sdst = countZeroBitsMsb(src.rawData());
2373
2374         sdst.write();
2375     }
2376
2377     Inst_SOP1__S_FLBIT_I32_B64::Inst_SOP1__S_FLBIT_I32_B64(InFmt_SOP1 *iFmt)
2378         : Inst_SOP1(iFmt, "s_flbit_i32_b64")
2379     {
2380         setFlag(ALU);
2381     } // Inst_SOP1__S_FLBIT_I32_B64
2382
2383     Inst_SOP1__S_FLBIT_I32_B64::~Inst_SOP1__S_FLBIT_I32_B64()
2384     {
2385     } // ~Inst_SOP1__S_FLBIT_I32_B64
2386
2387     // D.i = FindFirstOne(S0.u64);
2388     // If no ones are found, return -1.
2389     // Counts how many zeros before the first one starting from the MSB.
2390     void
2391     Inst_SOP1__S_FLBIT_I32_B64::execute(GPUDynInstPtr gpuDynInst)
2392     {
2393         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2394         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2395
2396         src.read();
2397
2398         sdst = countZeroBitsMsb(src.rawData());
2399
2400         sdst.write();
2401     }
2402
2403     Inst_SOP1__S_FLBIT_I32::Inst_SOP1__S_FLBIT_I32(InFmt_SOP1 *iFmt)
2404         : Inst_SOP1(iFmt, "s_flbit_i32")
2405     {
2406         setFlag(ALU);
2407     } // Inst_SOP1__S_FLBIT_I32
2408
2409     Inst_SOP1__S_FLBIT_I32::~Inst_SOP1__S_FLBIT_I32()
2410     {
2411     } // ~Inst_SOP1__S_FLBIT_I32
2412
2413     // D.i = FirstOppositeSignBit(S0.i);
2414     // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1.
2415     // Counts how many bits in a row (from MSB to LSB) are the same as the
2416     // sign bit.
2417     void
2418     Inst_SOP1__S_FLBIT_I32::execute(GPUDynInstPtr gpuDynInst)
2419     {
2420         ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0);
2421         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2422
2423         src.read();
2424
2425         sdst = firstOppositeSignBit(src.rawData());
2426
2427         sdst.write();
2428     }
2429
2430     Inst_SOP1__S_FLBIT_I32_I64::Inst_SOP1__S_FLBIT_I32_I64(InFmt_SOP1 *iFmt)
2431         : Inst_SOP1(iFmt, "s_flbit_i32_i64")
2432     {
2433         setFlag(ALU);
2434     } // Inst_SOP1__S_FLBIT_I32_I64
2435
2436     Inst_SOP1__S_FLBIT_I32_I64::~Inst_SOP1__S_FLBIT_I32_I64()
2437     {
2438     } // ~Inst_SOP1__S_FLBIT_I32_I64
2439
2440     // D.i = FirstOppositeSignBit(S0.i64);
2441     // If S0.i == 0 or S0.i == -1 (all bits are the same), return -1.
2442     // Counts how many bits in a row (from MSB to LSB) are the same as the
2443     // sign bit.
2444     void
2445     Inst_SOP1__S_FLBIT_I32_I64::execute(GPUDynInstPtr gpuDynInst)
2446     {
2447         ConstScalarOperandI64 src(gpuDynInst, instData.SSRC0);
2448         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2449
2450         src.read();
2451
2452         sdst = firstOppositeSignBit(src.rawData());
2453
2454         sdst.write();
2455     }
2456
2457     Inst_SOP1__S_SEXT_I32_I8::Inst_SOP1__S_SEXT_I32_I8(InFmt_SOP1 *iFmt)
2458         : Inst_SOP1(iFmt, "s_sext_i32_i8")
2459     {
2460         setFlag(ALU);
2461     } // Inst_SOP1__S_SEXT_I32_I8
2462
2463     Inst_SOP1__S_SEXT_I32_I8::~Inst_SOP1__S_SEXT_I32_I8()
2464     {
2465     } // ~Inst_SOP1__S_SEXT_I32_I8
2466
2467     // D.i = signext(S0.i[7:0]) (sign extension).
2468     void
2469     Inst_SOP1__S_SEXT_I32_I8::execute(GPUDynInstPtr gpuDynInst)
2470     {
2471         ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0);
2472         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2473
2474         src.read();
2475
2476         sdst = sext<std::numeric_limits<ScalarRegI8>::digits>(
2477             bits(src.rawData(), 7, 0));
2478
2479         sdst.write();
2480     }
2481
2482     Inst_SOP1__S_SEXT_I32_I16::Inst_SOP1__S_SEXT_I32_I16(InFmt_SOP1 *iFmt)
2483         : Inst_SOP1(iFmt, "s_sext_i32_i16")
2484     {
2485         setFlag(ALU);
2486     } // Inst_SOP1__S_SEXT_I32_I16
2487
2488     Inst_SOP1__S_SEXT_I32_I16::~Inst_SOP1__S_SEXT_I32_I16()
2489     {
2490     } // ~Inst_SOP1__S_SEXT_I32_I16
2491
2492     // D.i = signext(S0.i[15:0]) (sign extension).
2493     void
2494     Inst_SOP1__S_SEXT_I32_I16::execute(GPUDynInstPtr gpuDynInst)
2495     {
2496         ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0);
2497         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
2498
2499         src.read();
2500
2501         sdst = sext<std::numeric_limits<ScalarRegI16>::digits>(
2502             bits(src.rawData(), 15, 0));
2503
2504         sdst.write();
2505     }
2506
2507     Inst_SOP1__S_BITSET0_B32::Inst_SOP1__S_BITSET0_B32(InFmt_SOP1 *iFmt)
2508         : Inst_SOP1(iFmt, "s_bitset0_b32")
2509     {
2510         setFlag(ALU);
2511     } // Inst_SOP1__S_BITSET0_B32
2512
2513     Inst_SOP1__S_BITSET0_B32::~Inst_SOP1__S_BITSET0_B32()
2514     {
2515     } // ~Inst_SOP1__S_BITSET0_B32
2516
2517     // D.u[S0.u[4:0]] = 0.
2518     void
2519     Inst_SOP1__S_BITSET0_B32::execute(GPUDynInstPtr gpuDynInst)
2520     {
2521         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2522         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
2523
2524         src.read();
2525
2526         sdst.setBit(bits(src.rawData(), 4, 0), 0);
2527
2528         sdst.write();
2529     }
2530
2531     Inst_SOP1__S_BITSET0_B64::Inst_SOP1__S_BITSET0_B64(InFmt_SOP1 *iFmt)
2532         : Inst_SOP1(iFmt, "s_bitset0_b64")
2533     {
2534         setFlag(ALU);
2535     } // Inst_SOP1__S_BITSET0_B64
2536
2537     Inst_SOP1__S_BITSET0_B64::~Inst_SOP1__S_BITSET0_B64()
2538     {
2539     } // ~Inst_SOP1__S_BITSET0_B64
2540
2541     // D.u64[S0.u[5:0]] = 0.
2542     void
2543     Inst_SOP1__S_BITSET0_B64::execute(GPUDynInstPtr gpuDynInst)
2544     {
2545         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2546         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2547
2548         src.read();
2549
2550         sdst.setBit(bits(src.rawData(), 5, 0), 0);
2551
2552         sdst.write();
2553     }
2554
2555     Inst_SOP1__S_BITSET1_B32::Inst_SOP1__S_BITSET1_B32(InFmt_SOP1 *iFmt)
2556         : Inst_SOP1(iFmt, "s_bitset1_b32")
2557     {
2558         setFlag(ALU);
2559     } // Inst_SOP1__S_BITSET1_B32
2560
2561     Inst_SOP1__S_BITSET1_B32::~Inst_SOP1__S_BITSET1_B32()
2562     {
2563     } // ~Inst_SOP1__S_BITSET1_B32
2564
2565     // D.u[S0.u[4:0]] = 1.
2566     void
2567     Inst_SOP1__S_BITSET1_B32::execute(GPUDynInstPtr gpuDynInst)
2568     {
2569         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2570         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
2571
2572         src.read();
2573
2574         sdst.setBit(bits(src.rawData(), 4, 0), 1);
2575
2576         sdst.write();
2577     }
2578
2579     Inst_SOP1__S_BITSET1_B64::Inst_SOP1__S_BITSET1_B64(InFmt_SOP1 *iFmt)
2580         : Inst_SOP1(iFmt, "s_bitset1_b64")
2581     {
2582         setFlag(ALU);
2583     } // Inst_SOP1__S_BITSET1_B64
2584
2585     Inst_SOP1__S_BITSET1_B64::~Inst_SOP1__S_BITSET1_B64()
2586     {
2587     } // ~Inst_SOP1__S_BITSET1_B64
2588
2589     // D.u64[S0.u[5:0]] = 1.
2590     void
2591     Inst_SOP1__S_BITSET1_B64::execute(GPUDynInstPtr gpuDynInst)
2592     {
2593         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2594         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2595
2596         src.read();
2597
2598         sdst.setBit(bits(src.rawData(), 5, 0), 1);
2599
2600         sdst.write();
2601     }
2602
2603     Inst_SOP1__S_GETPC_B64::Inst_SOP1__S_GETPC_B64(InFmt_SOP1 *iFmt)
2604         : Inst_SOP1(iFmt, "s_getpc_b64")
2605     {
2606         setFlag(ALU);
2607     } // Inst_SOP1__S_GETPC_B64
2608
2609     Inst_SOP1__S_GETPC_B64::~Inst_SOP1__S_GETPC_B64()
2610     {
2611     } // ~Inst_SOP1__S_GETPC_B64
2612
2613     // D.u64 = PC + 4.
2614     // Destination receives the byte address of the next instruction.
2615     void
2616     Inst_SOP1__S_GETPC_B64::execute(GPUDynInstPtr gpuDynInst)
2617     {
2618         Wavefront *wf = gpuDynInst->wavefront();
2619         Addr pc = wf->pc();
2620         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2621
2622         sdst = pc + 4;
2623
2624         sdst.write();
2625     }
2626
2627     Inst_SOP1__S_SETPC_B64::Inst_SOP1__S_SETPC_B64(InFmt_SOP1 *iFmt)
2628         : Inst_SOP1(iFmt, "s_setpc_b64")
2629     {
2630         setFlag(ALU);
2631     } // Inst_SOP1__S_SETPC_B64
2632
2633     Inst_SOP1__S_SETPC_B64::~Inst_SOP1__S_SETPC_B64()
2634     {
2635     } // ~Inst_SOP1__S_SETPC_B64
2636
2637     // PC = S0.u64.
2638     // S0.u64 is a byte address of the instruction to jump to.
2639     void
2640     Inst_SOP1__S_SETPC_B64::execute(GPUDynInstPtr gpuDynInst)
2641     {
2642         Wavefront *wf = gpuDynInst->wavefront();
2643         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2644
2645         src.read();
2646
2647         wf->pc(src.rawData());
2648     }
2649
2650     Inst_SOP1__S_SWAPPC_B64::Inst_SOP1__S_SWAPPC_B64(InFmt_SOP1 *iFmt)
2651         : Inst_SOP1(iFmt, "s_swappc_b64")
2652     {
2653         setFlag(ALU);
2654     } // Inst_SOP1__S_SWAPPC_B64
2655
2656     Inst_SOP1__S_SWAPPC_B64::~Inst_SOP1__S_SWAPPC_B64()
2657     {
2658     } // ~Inst_SOP1__S_SWAPPC_B64
2659
2660     // D.u64 = PC + 4; PC = S0.u64.
2661     // S0.u64 is a byte address of the instruction to jump to.
2662     void
2663     Inst_SOP1__S_SWAPPC_B64::execute(GPUDynInstPtr gpuDynInst)
2664     {
2665         Wavefront *wf = gpuDynInst->wavefront();
2666         Addr pc = wf->pc();
2667         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2668         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2669
2670         src.read();
2671
2672         sdst = pc + 4;
2673
2674         wf->pc(src.rawData());
2675         sdst.write();
2676     }
2677
2678     Inst_SOP1__S_RFE_B64::Inst_SOP1__S_RFE_B64(InFmt_SOP1 *iFmt)
2679         : Inst_SOP1(iFmt, "s_rfe_b64")
2680     {
2681     } // Inst_SOP1__S_RFE_B64
2682
2683     Inst_SOP1__S_RFE_B64::~Inst_SOP1__S_RFE_B64()
2684     {
2685     } // ~Inst_SOP1__S_RFE_B64
2686
2687     // Return from exception handler and continue.
2688     void
2689     Inst_SOP1__S_RFE_B64::execute(GPUDynInstPtr gpuDynInst)
2690     {
2691         panicUnimplemented();
2692     }
2693
2694     Inst_SOP1__S_AND_SAVEEXEC_B64::Inst_SOP1__S_AND_SAVEEXEC_B64(
2695           InFmt_SOP1 *iFmt)
2696         : Inst_SOP1(iFmt, "s_and_saveexec_b64")
2697     {
2698         setFlag(ALU);
2699     } // Inst_SOP1__S_AND_SAVEEXEC_B64
2700
2701     Inst_SOP1__S_AND_SAVEEXEC_B64::~Inst_SOP1__S_AND_SAVEEXEC_B64()
2702     {
2703     } // ~Inst_SOP1__S_AND_SAVEEXEC_B64
2704
2705     // D.u64 = EXEC;
2706     // EXEC = S0.u64 & EXEC;
2707     // SCC = 1 if the new value of EXEC is non-zero.
2708     void
2709     Inst_SOP1__S_AND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2710     {
2711         Wavefront *wf = gpuDynInst->wavefront();
2712         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2713         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2714         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2715
2716         src.read();
2717
2718         sdst = wf->execMask().to_ullong();
2719         wf->execMask() = src.rawData() & wf->execMask().to_ullong();
2720         scc = wf->execMask().any() ? 1 : 0;
2721
2722         sdst.write();
2723         scc.write();
2724     }
2725
2726     Inst_SOP1__S_OR_SAVEEXEC_B64::Inst_SOP1__S_OR_SAVEEXEC_B64(
2727           InFmt_SOP1 *iFmt)
2728         : Inst_SOP1(iFmt, "s_or_saveexec_b64")
2729     {
2730         setFlag(ALU);
2731     } // Inst_SOP1__S_OR_SAVEEXEC_B64
2732
2733     Inst_SOP1__S_OR_SAVEEXEC_B64::~Inst_SOP1__S_OR_SAVEEXEC_B64()
2734     {
2735     } // ~Inst_SOP1__S_OR_SAVEEXEC_B64
2736
2737     // D.u64 = EXEC;
2738     // EXEC = S0.u64 | EXEC;
2739     // SCC = 1 if the new value of EXEC is non-zero.
2740     void
2741     Inst_SOP1__S_OR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2742     {
2743         Wavefront *wf = gpuDynInst->wavefront();
2744         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2745         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2746         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2747
2748         src.read();
2749
2750         sdst = wf->execMask().to_ullong();
2751         wf->execMask() = src.rawData() | wf->execMask().to_ullong();
2752         scc = wf->execMask().any() ? 1 : 0;
2753
2754         sdst.write();
2755         scc.write();
2756     }
2757
2758     Inst_SOP1__S_XOR_SAVEEXEC_B64::Inst_SOP1__S_XOR_SAVEEXEC_B64(
2759           InFmt_SOP1 *iFmt)
2760         : Inst_SOP1(iFmt, "s_xor_saveexec_b64")
2761     {
2762         setFlag(ALU);
2763     } // Inst_SOP1__S_XOR_SAVEEXEC_B64
2764
2765     Inst_SOP1__S_XOR_SAVEEXEC_B64::~Inst_SOP1__S_XOR_SAVEEXEC_B64()
2766     {
2767     } // ~Inst_SOP1__S_XOR_SAVEEXEC_B64
2768
2769     // D.u64 = EXEC;
2770     // EXEC = S0.u64 ^ EXEC;
2771     // SCC = 1 if the new value of EXEC is non-zero.
2772     void
2773     Inst_SOP1__S_XOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2774     {
2775         Wavefront *wf = gpuDynInst->wavefront();
2776         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2777         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2778         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2779
2780         src.read();
2781
2782         sdst = wf->execMask().to_ullong();
2783         wf->execMask() = src.rawData() ^ wf->execMask().to_ullong();
2784         scc = wf->execMask().any() ? 1 : 0;
2785
2786         sdst.write();
2787         scc.write();
2788     }
2789
2790     Inst_SOP1__S_ANDN2_SAVEEXEC_B64::Inst_SOP1__S_ANDN2_SAVEEXEC_B64(
2791           InFmt_SOP1 *iFmt)
2792         : Inst_SOP1(iFmt, "s_andn2_saveexec_b64")
2793     {
2794         setFlag(ALU);
2795     } // Inst_SOP1__S_ANDN2_SAVEEXEC_B64
2796
2797     Inst_SOP1__S_ANDN2_SAVEEXEC_B64::~Inst_SOP1__S_ANDN2_SAVEEXEC_B64()
2798     {
2799     } // ~Inst_SOP1__S_ANDN2_SAVEEXEC_B64
2800
2801     // D.u64 = EXEC;
2802     // EXEC = S0.u64 & ~EXEC;
2803     // SCC = 1 if the new value of EXEC is non-zero.
2804     void
2805     Inst_SOP1__S_ANDN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2806     {
2807         Wavefront *wf = gpuDynInst->wavefront();
2808         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2809         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2810         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2811
2812         src.read();
2813
2814         sdst = wf->execMask().to_ullong();
2815         wf->execMask() = src.rawData() &~ wf->execMask().to_ullong();
2816         scc = wf->execMask().any() ? 1 : 0;
2817
2818         sdst.write();
2819         scc.write();
2820     }
2821
2822     Inst_SOP1__S_ORN2_SAVEEXEC_B64::Inst_SOP1__S_ORN2_SAVEEXEC_B64(
2823           InFmt_SOP1 *iFmt)
2824         : Inst_SOP1(iFmt, "s_orn2_saveexec_b64")
2825     {
2826         setFlag(ALU);
2827     } // Inst_SOP1__S_ORN2_SAVEEXEC_B64
2828
2829     Inst_SOP1__S_ORN2_SAVEEXEC_B64::~Inst_SOP1__S_ORN2_SAVEEXEC_B64()
2830     {
2831     } // ~Inst_SOP1__S_ORN2_SAVEEXEC_B64
2832
2833     // D.u64 = EXEC;
2834     // EXEC = S0.u64 | ~EXEC;
2835     // SCC = 1 if the new value of EXEC is non-zero.
2836     void
2837     Inst_SOP1__S_ORN2_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2838     {
2839         Wavefront *wf = gpuDynInst->wavefront();
2840         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2841         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2842         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2843
2844         src.read();
2845
2846         sdst = wf->execMask().to_ullong();
2847         wf->execMask() = src.rawData() |~ wf->execMask().to_ullong();
2848         scc = wf->execMask().any() ? 1 : 0;
2849
2850         sdst.write();
2851         scc.write();
2852     }
2853
2854     Inst_SOP1__S_NAND_SAVEEXEC_B64::Inst_SOP1__S_NAND_SAVEEXEC_B64(
2855           InFmt_SOP1 *iFmt)
2856         : Inst_SOP1(iFmt, "s_nand_saveexec_b64")
2857     {
2858         setFlag(ALU);
2859     } // Inst_SOP1__S_NAND_SAVEEXEC_B64
2860
2861     Inst_SOP1__S_NAND_SAVEEXEC_B64::~Inst_SOP1__S_NAND_SAVEEXEC_B64()
2862     {
2863     } // ~Inst_SOP1__S_NAND_SAVEEXEC_B64
2864
2865     // D.u64 = EXEC;
2866     // EXEC = ~(S0.u64 & EXEC);
2867     // SCC = 1 if the new value of EXEC is non-zero.
2868     void
2869     Inst_SOP1__S_NAND_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2870     {
2871         Wavefront *wf = gpuDynInst->wavefront();
2872         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2873         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2874         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2875
2876         src.read();
2877
2878         sdst = wf->execMask().to_ullong();
2879         wf->execMask() = ~(src.rawData() & wf->execMask().to_ullong());
2880         scc = wf->execMask().any() ? 1 : 0;
2881
2882         sdst.write();
2883         scc.write();
2884     }
2885
2886     Inst_SOP1__S_NOR_SAVEEXEC_B64::Inst_SOP1__S_NOR_SAVEEXEC_B64(
2887           InFmt_SOP1 *iFmt)
2888         : Inst_SOP1(iFmt, "s_nor_saveexec_b64")
2889     {
2890         setFlag(ALU);
2891     } // Inst_SOP1__S_NOR_SAVEEXEC_B64
2892
2893     Inst_SOP1__S_NOR_SAVEEXEC_B64::~Inst_SOP1__S_NOR_SAVEEXEC_B64()
2894     {
2895     } // ~Inst_SOP1__S_NOR_SAVEEXEC_B64
2896
2897     // D.u64 = EXEC;
2898     // EXEC = ~(S0.u64 | EXEC);
2899     // SCC = 1 if the new value of EXEC is non-zero.
2900     void
2901     Inst_SOP1__S_NOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2902     {
2903         Wavefront *wf = gpuDynInst->wavefront();
2904         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2905         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2906         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2907
2908         src.read();
2909
2910         sdst = wf->execMask().to_ullong();
2911         wf->execMask() = ~(src.rawData() | wf->execMask().to_ullong());
2912         scc = wf->execMask().any() ? 1 : 0;
2913
2914         sdst.write();
2915         scc.write();
2916     }
2917
2918     Inst_SOP1__S_XNOR_SAVEEXEC_B64::Inst_SOP1__S_XNOR_SAVEEXEC_B64(
2919           InFmt_SOP1 *iFmt)
2920         : Inst_SOP1(iFmt, "s_xnor_saveexec_b64")
2921     {
2922         setFlag(ALU);
2923     } // Inst_SOP1__S_XNOR_SAVEEXEC_B64
2924
2925     Inst_SOP1__S_XNOR_SAVEEXEC_B64::~Inst_SOP1__S_XNOR_SAVEEXEC_B64()
2926     {
2927     } // ~Inst_SOP1__S_XNOR_SAVEEXEC_B64
2928
2929     // D.u64 = EXEC;
2930     // EXEC = ~(S0.u64 ^ EXEC);
2931     // SCC = 1 if the new value of EXEC is non-zero.
2932     void
2933     Inst_SOP1__S_XNOR_SAVEEXEC_B64::execute(GPUDynInstPtr gpuDynInst)
2934     {
2935         Wavefront *wf = gpuDynInst->wavefront();
2936         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2937         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2938         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2939
2940         src.read();
2941
2942         sdst = wf->execMask().to_ullong();
2943         wf->execMask() = ~(src.rawData() ^ wf->execMask().to_ullong());
2944         scc = wf->execMask().any() ? 1 : 0;
2945
2946         sdst.write();
2947         scc.write();
2948     }
2949
2950     Inst_SOP1__S_QUADMASK_B32::Inst_SOP1__S_QUADMASK_B32(InFmt_SOP1 *iFmt)
2951         : Inst_SOP1(iFmt, "s_quadmask_b32")
2952     {
2953         setFlag(ALU);
2954     } // Inst_SOP1__S_QUADMASK_B32
2955
2956     Inst_SOP1__S_QUADMASK_B32::~Inst_SOP1__S_QUADMASK_B32()
2957     {
2958     } // ~Inst_SOP1__S_QUADMASK_B32
2959
2960     // D.u = QuadMask(S0.u):
2961     // D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[31:8] = 0;
2962     // SCC = 1 if result is non-zero.
2963     void
2964     Inst_SOP1__S_QUADMASK_B32::execute(GPUDynInstPtr gpuDynInst)
2965     {
2966         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
2967         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
2968         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2969
2970         src.read();
2971
2972         sdst = quadMask(src.rawData());
2973         scc = sdst.rawData() ? 1 : 0;
2974
2975         sdst.write();
2976         scc.write();
2977     }
2978
2979     Inst_SOP1__S_QUADMASK_B64::Inst_SOP1__S_QUADMASK_B64(InFmt_SOP1 *iFmt)
2980         : Inst_SOP1(iFmt, "s_quadmask_b64")
2981     {
2982         setFlag(ALU);
2983     } // Inst_SOP1__S_QUADMASK_B64
2984
2985     Inst_SOP1__S_QUADMASK_B64::~Inst_SOP1__S_QUADMASK_B64()
2986     {
2987     } // ~Inst_SOP1__S_QUADMASK_B64
2988
2989     // D.u64 = QuadMask(S0.u64):
2990     // D[0] = OR(S0[3:0]), D[1] = OR(S0[7:4]) ... D[63:16] = 0;
2991     // SCC = 1 if result is non-zero.
2992     void
2993     Inst_SOP1__S_QUADMASK_B64::execute(GPUDynInstPtr gpuDynInst)
2994     {
2995         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
2996         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
2997         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
2998
2999         src.read();
3000
3001         sdst = quadMask(src.rawData());
3002         scc = sdst.rawData() ? 1 : 0;
3003
3004         sdst.write();
3005         scc.write();
3006     }
3007
3008     Inst_SOP1__S_MOVRELS_B32::Inst_SOP1__S_MOVRELS_B32(InFmt_SOP1 *iFmt)
3009         : Inst_SOP1(iFmt, "s_movrels_b32")
3010     {
3011         setFlag(ALU);
3012     } // Inst_SOP1__S_MOVRELS_B32
3013
3014     Inst_SOP1__S_MOVRELS_B32::~Inst_SOP1__S_MOVRELS_B32()
3015     {
3016     } // ~Inst_SOP1__S_MOVRELS_B32
3017
3018     // D.u = SGPR[S0.u + M0.u].u (move from relative source).
3019     void
3020     Inst_SOP1__S_MOVRELS_B32::execute(GPUDynInstPtr gpuDynInst)
3021     {
3022         ConstScalarOperandU32 m0(gpuDynInst, REG_M0);
3023         m0.read();
3024         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0 + m0.rawData());
3025         ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
3026
3027         src.read();
3028
3029         sdst = src.rawData();
3030
3031         sdst.write();
3032     }
3033
3034     Inst_SOP1__S_MOVRELS_B64::Inst_SOP1__S_MOVRELS_B64(InFmt_SOP1 *iFmt)
3035         : Inst_SOP1(iFmt, "s_movrels_b64")
3036     {
3037         setFlag(ALU);
3038     } // Inst_SOP1__S_MOVRELS_B64
3039
3040     Inst_SOP1__S_MOVRELS_B64::~Inst_SOP1__S_MOVRELS_B64()
3041     {
3042     } // ~Inst_SOP1__S_MOVRELS_B64
3043
3044     // D.u64 = SGPR[S0.u + M0.u].u64 (move from relative source).
3045     // The index in M0.u must be even for this operation.
3046     void
3047     Inst_SOP1__S_MOVRELS_B64::execute(GPUDynInstPtr gpuDynInst)
3048     {
3049         ConstScalarOperandU32 m0(gpuDynInst, REG_M0);
3050         m0.read();
3051         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0 + m0.rawData());
3052         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
3053
3054         src.read();
3055
3056         sdst = src.rawData();
3057
3058         sdst.write();
3059     }
3060
3061     Inst_SOP1__S_MOVRELD_B32::Inst_SOP1__S_MOVRELD_B32(InFmt_SOP1 *iFmt)
3062         : Inst_SOP1(iFmt, "s_movreld_b32")
3063     {
3064         setFlag(ALU);
3065     } // Inst_SOP1__S_MOVRELD_B32
3066
3067     Inst_SOP1__S_MOVRELD_B32::~Inst_SOP1__S_MOVRELD_B32()
3068     {
3069     } // ~Inst_SOP1__S_MOVRELD_B32
3070
3071     // SGPR[D.u + M0.u].u = S0.u (move to relative destination).
3072     void
3073     Inst_SOP1__S_MOVRELD_B32::execute(GPUDynInstPtr gpuDynInst)
3074     {
3075         ConstScalarOperandU32 m0(gpuDynInst, REG_M0);
3076         m0.read();
3077         ConstScalarOperandU32 src(gpuDynInst, instData.SSRC0);
3078         ScalarOperandU32 sdst(gpuDynInst, instData.SDST + m0.rawData());
3079
3080         src.read();
3081
3082         sdst = src.rawData();
3083
3084         sdst.write();
3085     }
3086
3087     Inst_SOP1__S_MOVRELD_B64::Inst_SOP1__S_MOVRELD_B64(InFmt_SOP1 *iFmt)
3088         : Inst_SOP1(iFmt, "s_movreld_b64")
3089     {
3090         setFlag(ALU);
3091     } // Inst_SOP1__S_MOVRELD_B64
3092
3093     Inst_SOP1__S_MOVRELD_B64::~Inst_SOP1__S_MOVRELD_B64()
3094     {
3095     } // ~Inst_SOP1__S_MOVRELD_B64
3096
3097     // SGPR[D.u + M0.u].u64 = S0.u64 (move to relative destination).
3098     // The index in M0.u must be even for this operation.
3099     void
3100     Inst_SOP1__S_MOVRELD_B64::execute(GPUDynInstPtr gpuDynInst)
3101     {
3102         ConstScalarOperandU32 m0(gpuDynInst, REG_M0);
3103         m0.read();
3104         ConstScalarOperandU64 src(gpuDynInst, instData.SSRC0);
3105         ScalarOperandU64 sdst(gpuDynInst, instData.SDST + m0.rawData());
3106
3107         src.read();
3108
3109         sdst = src.rawData();
3110
3111         sdst.write();
3112     }
3113
3114     Inst_SOP1__S_CBRANCH_JOIN::Inst_SOP1__S_CBRANCH_JOIN(InFmt_SOP1 *iFmt)
3115         : Inst_SOP1(iFmt, "s_cbranch_join")
3116     {
3117         setFlag(Branch);
3118     } // Inst_SOP1__S_CBRANCH_JOIN
3119
3120     Inst_SOP1__S_CBRANCH_JOIN::~Inst_SOP1__S_CBRANCH_JOIN()
3121     {
3122     } // ~Inst_SOP1__S_CBRANCH_JOIN
3123
3124     // Conditional branch join point (end of conditional branch block).
3125     void
3126     Inst_SOP1__S_CBRANCH_JOIN::execute(GPUDynInstPtr gpuDynInst)
3127     {
3128         panicUnimplemented();
3129     }
3130
3131     Inst_SOP1__S_ABS_I32::Inst_SOP1__S_ABS_I32(InFmt_SOP1 *iFmt)
3132         : Inst_SOP1(iFmt, "s_abs_i32")
3133     {
3134         setFlag(ALU);
3135     } // Inst_SOP1__S_ABS_I32
3136
3137     Inst_SOP1__S_ABS_I32::~Inst_SOP1__S_ABS_I32()
3138     {
3139     } // ~Inst_SOP1__S_ABS_I32
3140
3141     // if (S.i < 0) then D.i = -S.i;
3142     // else D.i = S.i;
3143     // SCC = 1 if result is non-zero.
3144     // Integer absolute value.
3145     void
3146     Inst_SOP1__S_ABS_I32::execute(GPUDynInstPtr gpuDynInst)
3147     {
3148         ConstScalarOperandI32 src(gpuDynInst, instData.SSRC0);
3149         ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
3150         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3151
3152         src.read();
3153
3154         sdst = std::abs(src.rawData());
3155
3156         scc = sdst.rawData() ? 1 : 0;
3157
3158         sdst.write();
3159         scc.write();
3160     }
3161
3162     Inst_SOP1__S_MOV_FED_B32::Inst_SOP1__S_MOV_FED_B32(InFmt_SOP1 *iFmt)
3163         : Inst_SOP1(iFmt, "s_mov_fed_b32")
3164     {
3165         setFlag(ALU);
3166     } // Inst_SOP1__S_MOV_FED_B32
3167
3168     Inst_SOP1__S_MOV_FED_B32::~Inst_SOP1__S_MOV_FED_B32()
3169     {
3170     } // ~Inst_SOP1__S_MOV_FED_B32
3171
3172     // D.u = S0.u.
3173     void
3174     Inst_SOP1__S_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst)
3175     {
3176         panicUnimplemented();
3177     }
3178
3179     Inst_SOP1__S_SET_GPR_IDX_IDX::Inst_SOP1__S_SET_GPR_IDX_IDX(
3180           InFmt_SOP1 *iFmt)
3181         : Inst_SOP1(iFmt, "s_set_gpr_idx_idx")
3182     {
3183     } // Inst_SOP1__S_SET_GPR_IDX_IDX
3184
3185     Inst_SOP1__S_SET_GPR_IDX_IDX::~Inst_SOP1__S_SET_GPR_IDX_IDX()
3186     {
3187     } // ~Inst_SOP1__S_SET_GPR_IDX_IDX
3188
3189     // M0[7:0] = S0.u[7:0].
3190     // Modify the index used in vector GPR indexing.
3191     void
3192     Inst_SOP1__S_SET_GPR_IDX_IDX::execute(GPUDynInstPtr gpuDynInst)
3193     {
3194         panicUnimplemented();
3195     }
3196
3197     Inst_SOPC__S_CMP_EQ_I32::Inst_SOPC__S_CMP_EQ_I32(InFmt_SOPC *iFmt)
3198         : Inst_SOPC(iFmt, "s_cmp_eq_i32")
3199     {
3200         setFlag(ALU);
3201     } // Inst_SOPC__S_CMP_EQ_I32
3202
3203     Inst_SOPC__S_CMP_EQ_I32::~Inst_SOPC__S_CMP_EQ_I32()
3204     {
3205     } // ~Inst_SOPC__S_CMP_EQ_I32
3206
3207     // SCC = (S0.i == S1.i).
3208     void
3209     Inst_SOPC__S_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
3210     {
3211         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
3212         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
3213         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3214
3215         src0.read();
3216         src1.read();
3217
3218         scc = (src0.rawData() == src1.rawData()) ? 1 : 0;
3219
3220         scc.write();
3221     }
3222
3223     Inst_SOPC__S_CMP_LG_I32::Inst_SOPC__S_CMP_LG_I32(InFmt_SOPC *iFmt)
3224         : Inst_SOPC(iFmt, "s_cmp_lg_i32")
3225     {
3226         setFlag(ALU);
3227     } // Inst_SOPC__S_CMP_LG_I32
3228
3229     Inst_SOPC__S_CMP_LG_I32::~Inst_SOPC__S_CMP_LG_I32()
3230     {
3231     } // ~Inst_SOPC__S_CMP_LG_I32
3232
3233     // SCC = (S0.i != S1.i).
3234     void
3235     Inst_SOPC__S_CMP_LG_I32::execute(GPUDynInstPtr gpuDynInst)
3236     {
3237         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
3238         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
3239         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3240
3241         src0.read();
3242         src1.read();
3243
3244         scc = (src0.rawData() != src1.rawData()) ? 1 : 0;
3245
3246         scc.write();
3247     }
3248
3249     Inst_SOPC__S_CMP_GT_I32::Inst_SOPC__S_CMP_GT_I32(InFmt_SOPC *iFmt)
3250         : Inst_SOPC(iFmt, "s_cmp_gt_i32")
3251     {
3252         setFlag(ALU);
3253     } // Inst_SOPC__S_CMP_GT_I32
3254
3255     Inst_SOPC__S_CMP_GT_I32::~Inst_SOPC__S_CMP_GT_I32()
3256     {
3257     } // ~Inst_SOPC__S_CMP_GT_I32
3258
3259     // SCC = (S0.i > S1.i).
3260     void
3261     Inst_SOPC__S_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst)
3262     {
3263         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
3264         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
3265         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3266
3267         src0.read();
3268         src1.read();
3269
3270         scc = (src0.rawData() > src1.rawData()) ? 1 : 0;
3271
3272         scc.write();
3273     }
3274
3275     Inst_SOPC__S_CMP_GE_I32::Inst_SOPC__S_CMP_GE_I32(InFmt_SOPC *iFmt)
3276         : Inst_SOPC(iFmt, "s_cmp_ge_i32")
3277     {
3278         setFlag(ALU);
3279     } // Inst_SOPC__S_CMP_GE_I32
3280
3281     Inst_SOPC__S_CMP_GE_I32::~Inst_SOPC__S_CMP_GE_I32()
3282     {
3283     } // ~Inst_SOPC__S_CMP_GE_I32
3284
3285     // SCC = (S0.i >= S1.i).
3286     void
3287     Inst_SOPC__S_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst)
3288     {
3289         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
3290         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
3291         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3292
3293         src0.read();
3294         src1.read();
3295
3296         scc = (src0.rawData() >= src1.rawData()) ? 1 : 0;
3297
3298         scc.write();
3299     }
3300
3301     Inst_SOPC__S_CMP_LT_I32::Inst_SOPC__S_CMP_LT_I32(InFmt_SOPC *iFmt)
3302         : Inst_SOPC(iFmt, "s_cmp_lt_i32")
3303     {
3304         setFlag(ALU);
3305     } // Inst_SOPC__S_CMP_LT_I32
3306
3307     Inst_SOPC__S_CMP_LT_I32::~Inst_SOPC__S_CMP_LT_I32()
3308     {
3309     } // ~Inst_SOPC__S_CMP_LT_I32
3310
3311     // SCC = (S0.i < S1.i).
3312     void
3313     Inst_SOPC__S_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst)
3314     {
3315         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
3316         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
3317         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3318
3319         src0.read();
3320         src1.read();
3321
3322         scc = (src0.rawData() < src1.rawData()) ? 1 : 0;
3323
3324         scc.write();
3325     }
3326
3327     Inst_SOPC__S_CMP_LE_I32::Inst_SOPC__S_CMP_LE_I32(InFmt_SOPC *iFmt)
3328         : Inst_SOPC(iFmt, "s_cmp_le_i32")
3329     {
3330         setFlag(ALU);
3331     } // Inst_SOPC__S_CMP_LE_I32
3332
3333     Inst_SOPC__S_CMP_LE_I32::~Inst_SOPC__S_CMP_LE_I32()
3334     {
3335     } // ~Inst_SOPC__S_CMP_LE_I32
3336
3337     // SCC = (S0.i <= S1.i).
3338     void
3339     Inst_SOPC__S_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst)
3340     {
3341         ConstScalarOperandI32 src0(gpuDynInst, instData.SSRC0);
3342         ConstScalarOperandI32 src1(gpuDynInst, instData.SSRC1);
3343         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3344
3345         src0.read();
3346         src1.read();
3347
3348         scc = (src0.rawData() <= src1.rawData()) ? 1 : 0;
3349
3350         scc.write();
3351     }
3352
3353     Inst_SOPC__S_CMP_EQ_U32::Inst_SOPC__S_CMP_EQ_U32(InFmt_SOPC *iFmt)
3354         : Inst_SOPC(iFmt, "s_cmp_eq_u32")
3355     {
3356         setFlag(ALU);
3357     } // Inst_SOPC__S_CMP_EQ_U32
3358
3359     Inst_SOPC__S_CMP_EQ_U32::~Inst_SOPC__S_CMP_EQ_U32()
3360     {
3361     } // ~Inst_SOPC__S_CMP_EQ_U32
3362
3363     // SCC = (S0.u == S1.u).
3364     void
3365     Inst_SOPC__S_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
3366     {
3367         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3368         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3369         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3370
3371         src0.read();
3372         src1.read();
3373
3374         scc = (src0.rawData() == src1.rawData()) ? 1 : 0;
3375
3376         scc.write();
3377     }
3378
3379     Inst_SOPC__S_CMP_LG_U32::Inst_SOPC__S_CMP_LG_U32(InFmt_SOPC *iFmt)
3380         : Inst_SOPC(iFmt, "s_cmp_lg_u32")
3381     {
3382         setFlag(ALU);
3383     } // Inst_SOPC__S_CMP_LG_U32
3384
3385     Inst_SOPC__S_CMP_LG_U32::~Inst_SOPC__S_CMP_LG_U32()
3386     {
3387     } // ~Inst_SOPC__S_CMP_LG_U32
3388
3389     // SCC = (S0.u != S1.u).
3390     void
3391     Inst_SOPC__S_CMP_LG_U32::execute(GPUDynInstPtr gpuDynInst)
3392     {
3393         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3394         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3395         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3396
3397         src0.read();
3398         src1.read();
3399
3400         scc = (src0.rawData() != src1.rawData()) ? 1 : 0;
3401
3402         scc.write();
3403     }
3404
3405     Inst_SOPC__S_CMP_GT_U32::Inst_SOPC__S_CMP_GT_U32(InFmt_SOPC *iFmt)
3406         : Inst_SOPC(iFmt, "s_cmp_gt_u32")
3407     {
3408         setFlag(ALU);
3409     } // Inst_SOPC__S_CMP_GT_U32
3410
3411     Inst_SOPC__S_CMP_GT_U32::~Inst_SOPC__S_CMP_GT_U32()
3412     {
3413     } // ~Inst_SOPC__S_CMP_GT_U32
3414
3415     // SCC = (S0.u > S1.u).
3416     void
3417     Inst_SOPC__S_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst)
3418     {
3419         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3420         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3421         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3422
3423         src0.read();
3424         src1.read();
3425
3426         scc = (src0.rawData() > src1.rawData()) ? 1 : 0;
3427
3428         scc.write();
3429     }
3430
3431     Inst_SOPC__S_CMP_GE_U32::Inst_SOPC__S_CMP_GE_U32(InFmt_SOPC *iFmt)
3432         : Inst_SOPC(iFmt, "s_cmp_ge_u32")
3433     {
3434         setFlag(ALU);
3435     } // Inst_SOPC__S_CMP_GE_U32
3436
3437     Inst_SOPC__S_CMP_GE_U32::~Inst_SOPC__S_CMP_GE_U32()
3438     {
3439     } // ~Inst_SOPC__S_CMP_GE_U32
3440
3441     // SCC = (S0.u >= S1.u).
3442     void
3443     Inst_SOPC__S_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst)
3444     {
3445         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3446         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3447         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3448
3449         src0.read();
3450         src1.read();
3451
3452         scc = (src0.rawData() >= src1.rawData()) ? 1 : 0;
3453
3454         scc.write();
3455     }
3456
3457     Inst_SOPC__S_CMP_LT_U32::Inst_SOPC__S_CMP_LT_U32(InFmt_SOPC *iFmt)
3458         : Inst_SOPC(iFmt, "s_cmp_lt_u32")
3459     {
3460         setFlag(ALU);
3461     } // Inst_SOPC__S_CMP_LT_U32
3462
3463     Inst_SOPC__S_CMP_LT_U32::~Inst_SOPC__S_CMP_LT_U32()
3464     {
3465     } // ~Inst_SOPC__S_CMP_LT_U32
3466
3467     // SCC = (S0.u < S1.u).
3468     void
3469     Inst_SOPC__S_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst)
3470     {
3471         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3472         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3473         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3474
3475         src0.read();
3476         src1.read();
3477
3478         scc = (src0.rawData() <= src1.rawData()) ? 1 : 0;
3479
3480         scc.write();
3481     }
3482
3483     Inst_SOPC__S_CMP_LE_U32::Inst_SOPC__S_CMP_LE_U32(InFmt_SOPC *iFmt)
3484         : Inst_SOPC(iFmt, "s_cmp_le_u32")
3485     {
3486         setFlag(ALU);
3487     } // Inst_SOPC__S_CMP_LE_U32
3488
3489     Inst_SOPC__S_CMP_LE_U32::~Inst_SOPC__S_CMP_LE_U32()
3490     {
3491     } // ~Inst_SOPC__S_CMP_LE_U32
3492
3493     // SCC = (S0.u <= S1.u).
3494     void
3495     Inst_SOPC__S_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst)
3496     {
3497         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3498         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3499         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3500
3501         src0.read();
3502         src1.read();
3503
3504         scc = (src0.rawData() <= src1.rawData()) ? 1 : 0;
3505
3506         scc.write();
3507     }
3508
3509     Inst_SOPC__S_BITCMP0_B32::Inst_SOPC__S_BITCMP0_B32(InFmt_SOPC *iFmt)
3510         : Inst_SOPC(iFmt, "s_bitcmp0_b32")
3511     {
3512         setFlag(ALU);
3513     } // Inst_SOPC__S_BITCMP0_B32
3514
3515     Inst_SOPC__S_BITCMP0_B32::~Inst_SOPC__S_BITCMP0_B32()
3516     {
3517     } // ~Inst_SOPC__S_BITCMP0_B32
3518
3519     // SCC = (S0.u[S1.u[4:0]] == 0).
3520     void
3521     Inst_SOPC__S_BITCMP0_B32::execute(GPUDynInstPtr gpuDynInst)
3522     {
3523         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3524         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3525         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3526
3527         src0.read();
3528         src1.read();
3529
3530         scc = !bits(src0.rawData(), bits(src1.rawData(), 4, 0)) ? 1 : 0;
3531
3532         scc.write();
3533     }
3534
3535     Inst_SOPC__S_BITCMP1_B32::Inst_SOPC__S_BITCMP1_B32(InFmt_SOPC *iFmt)
3536         : Inst_SOPC(iFmt, "s_bitcmp1_b32")
3537     {
3538         setFlag(ALU);
3539     } // Inst_SOPC__S_BITCMP1_B32
3540
3541     Inst_SOPC__S_BITCMP1_B32::~Inst_SOPC__S_BITCMP1_B32()
3542     {
3543     } // ~Inst_SOPC__S_BITCMP1_B32
3544
3545     // SCC = (S0.u[S1.u[4:0]] == 1).
3546     void
3547     Inst_SOPC__S_BITCMP1_B32::execute(GPUDynInstPtr gpuDynInst)
3548     {
3549         ConstScalarOperandU32 src0(gpuDynInst, instData.SSRC0);
3550         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3551         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3552
3553         src0.read();
3554         src1.read();
3555
3556         scc = bits(src0.rawData(), bits(src1.rawData(), 4, 0)) ? 1 : 0;
3557
3558         scc.write();
3559     }
3560
3561     Inst_SOPC__S_BITCMP0_B64::Inst_SOPC__S_BITCMP0_B64(InFmt_SOPC *iFmt)
3562         : Inst_SOPC(iFmt, "s_bitcmp0_b64")
3563     {
3564         setFlag(ALU);
3565     } // Inst_SOPC__S_BITCMP0_B64
3566
3567     Inst_SOPC__S_BITCMP0_B64::~Inst_SOPC__S_BITCMP0_B64()
3568     {
3569     } // ~Inst_SOPC__S_BITCMP0_B64
3570
3571     // SCC = (S0.u64[S1.u[5:0]] == 0).
3572     void
3573     Inst_SOPC__S_BITCMP0_B64::execute(GPUDynInstPtr gpuDynInst)
3574     {
3575         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
3576         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3577         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3578
3579         src0.read();
3580         src1.read();
3581
3582         scc = !bits(src0.rawData(), bits(src1.rawData(), 5, 0)) ? 1 : 0;
3583
3584         scc.write();
3585     }
3586
3587     Inst_SOPC__S_BITCMP1_B64::Inst_SOPC__S_BITCMP1_B64(InFmt_SOPC *iFmt)
3588         : Inst_SOPC(iFmt, "s_bitcmp1_b64")
3589     {
3590         setFlag(ALU);
3591     } // Inst_SOPC__S_BITCMP1_B64
3592
3593     Inst_SOPC__S_BITCMP1_B64::~Inst_SOPC__S_BITCMP1_B64()
3594     {
3595     } // ~Inst_SOPC__S_BITCMP1_B64
3596
3597     // SCC = (S0.u64[S1.u[5:0]] == 1).
3598     void
3599     Inst_SOPC__S_BITCMP1_B64::execute(GPUDynInstPtr gpuDynInst)
3600     {
3601         ConstScalarOperandU64 src0(gpuDynInst, instData.SSRC0);
3602         ConstScalarOperandU32 src1(gpuDynInst, instData.SSRC1);
3603         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3604
3605         src0.read();
3606         src1.read();
3607
3608         scc = bits(src0.rawData(), bits(src1.rawData(), 5, 0)) ? 1 : 0;
3609
3610         scc.write();
3611     }
3612
3613     Inst_SOPC__S_SETVSKIP::Inst_SOPC__S_SETVSKIP(InFmt_SOPC *iFmt)
3614         : Inst_SOPC(iFmt, "s_setvskip")
3615     {
3616         setFlag(UnconditionalJump);
3617     } // Inst_SOPC__S_SETVSKIP
3618
3619     Inst_SOPC__S_SETVSKIP::~Inst_SOPC__S_SETVSKIP()
3620     {
3621     } // ~Inst_SOPC__S_SETVSKIP
3622
3623     // VSKIP = S0.u[S1.u[4:0]].
3624     // Enables and disables VSKIP mode.
3625     // When VSKIP is enabled, no VOP*/M*BUF/MIMG/DS/FLAT/EXP instuctions are
3626     // issued.
3627     void
3628     Inst_SOPC__S_SETVSKIP::execute(GPUDynInstPtr gpuDynInst)
3629     {
3630         panicUnimplemented();
3631     }
3632
3633     Inst_SOPC__S_SET_GPR_IDX_ON::Inst_SOPC__S_SET_GPR_IDX_ON(InFmt_SOPC *iFmt)
3634         : Inst_SOPC(iFmt, "s_set_gpr_idx_on")
3635     {
3636     } // Inst_SOPC__S_SET_GPR_IDX_ON
3637
3638     Inst_SOPC__S_SET_GPR_IDX_ON::~Inst_SOPC__S_SET_GPR_IDX_ON()
3639     {
3640     } // ~Inst_SOPC__S_SET_GPR_IDX_ON
3641
3642     // MODE.gpr_idx_en = 1;
3643     // M0[7:0] = S0.u[7:0];
3644     // M0[15:12] = SIMM4 (direct contents of S1 field);
3645     // Remaining bits of M0 are unmodified.
3646     // Enable GPR indexing mode. Vector operations after this will perform
3647     // relative GPR addressing based on the contents of M0.
3648     // The raw contents of the S1 field are read and used to set the enable
3649     // bits. S1[0] = VSRC0_REL, S1[1] = VSRC1_REL, S1[2] = VSRC2_REL and
3650     // S1[3] = VDST_REL.
3651     void
3652     Inst_SOPC__S_SET_GPR_IDX_ON::execute(GPUDynInstPtr gpuDynInst)
3653     {
3654         panicUnimplemented();
3655     }
3656
3657     Inst_SOPC__S_CMP_EQ_U64::Inst_SOPC__S_CMP_EQ_U64(InFmt_SOPC *iFmt)
3658         : Inst_SOPC(iFmt, "s_cmp_eq_u64")
3659     {
3660         setFlag(ALU);
3661     } // Inst_SOPC__S_CMP_EQ_U64
3662
3663     Inst_SOPC__S_CMP_EQ_U64::~Inst_SOPC__S_CMP_EQ_U64()
3664     {
3665     } // ~Inst_SOPC__S_CMP_EQ_U64
3666
3667     // SCC = (S0.i64 == S1.i64).
3668     void
3669     Inst_SOPC__S_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst)
3670     {
3671         ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0);
3672         ConstScalarOperandI64 src1(gpuDynInst, instData.SSRC1);
3673         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3674
3675         src0.read();
3676         src1.read();
3677
3678         scc = (src0.rawData() == src1.rawData()) ? 1 : 0;
3679
3680         scc.write();
3681     }
3682
3683     Inst_SOPC__S_CMP_LG_U64::Inst_SOPC__S_CMP_LG_U64(InFmt_SOPC *iFmt)
3684         : Inst_SOPC(iFmt, "s_cmp_lg_u64")
3685     {
3686         setFlag(ALU);
3687     } // Inst_SOPC__S_CMP_LG_U64
3688
3689     Inst_SOPC__S_CMP_LG_U64::~Inst_SOPC__S_CMP_LG_U64()
3690     {
3691     } // ~Inst_SOPC__S_CMP_LG_U64
3692
3693     // SCC = (S0.i64 != S1.i64).
3694     void
3695     Inst_SOPC__S_CMP_LG_U64::execute(GPUDynInstPtr gpuDynInst)
3696     {
3697         ConstScalarOperandI64 src0(gpuDynInst, instData.SSRC0);
3698         ConstScalarOperandI64 src1(gpuDynInst, instData.SSRC1);
3699         ScalarOperandU32 scc(gpuDynInst, REG_SCC);
3700
3701         src0.read();
3702         src1.read();
3703
3704         scc = (src0.rawData() != src1.rawData()) ? 1 : 0;
3705
3706         scc.write();
3707     }
3708
3709     Inst_SOPP__S_NOP::Inst_SOPP__S_NOP(InFmt_SOPP *iFmt)
3710         : Inst_SOPP(iFmt, "s_nop")
3711     {
3712         setFlag(Nop);
3713     } // Inst_SOPP__S_NOP
3714
3715     Inst_SOPP__S_NOP::~Inst_SOPP__S_NOP()
3716     {
3717     } // ~Inst_SOPP__S_NOP
3718
3719     // Do nothing.
3720     void
3721     Inst_SOPP__S_NOP::execute(GPUDynInstPtr gpuDynInst)
3722     {
3723     }
3724
3725     Inst_SOPP__S_ENDPGM::Inst_SOPP__S_ENDPGM(InFmt_SOPP *iFmt)
3726         : Inst_SOPP(iFmt, "s_endpgm")
3727     {
3728         setFlag(EndOfKernel);
3729     } // Inst_SOPP__S_ENDPGM
3730
3731     Inst_SOPP__S_ENDPGM::~Inst_SOPP__S_ENDPGM()
3732     {
3733     } // ~Inst_SOPP__S_ENDPGM
3734
3735     // End of program; terminate wavefront.
3736     void
3737     Inst_SOPP__S_ENDPGM::execute(GPUDynInstPtr gpuDynInst)
3738     {
3739         Wavefront *wf = gpuDynInst->wavefront();
3740         ComputeUnit *cu = gpuDynInst->computeUnit();
3741
3742         // delete extra instructions fetched for completed work-items
3743         wf->instructionBuffer.erase(wf->instructionBuffer.begin() + 1,
3744             wf->instructionBuffer.end());
3745
3746         if (wf->pendingFetch) {
3747             wf->dropFetch = true;
3748         }
3749
3750         wf->computeUnit->fetchStage.fetchUnit(wf->simdId)
3751             .flushBuf(wf->wfSlotId);
3752         wf->setStatus(Wavefront::S_STOPPED);
3753
3754         int refCount = wf->computeUnit->getLds()
3755             .decreaseRefCounter(wf->dispatchId, wf->wgId);
3756
3757         /**
3758          * The parent WF of this instruction is exiting, therefore
3759          * it should not participate in this barrier any longer. This
3760          * prevents possible deadlock issues if WFs exit early.
3761          */
3762         int bar_id = WFBarrier::InvalidID;
3763         if (wf->hasBarrier()) {
3764             assert(wf->getStatus() != Wavefront::S_BARRIER);
3765             bar_id = wf->barrierId();
3766             assert(bar_id != WFBarrier::InvalidID);
3767             wf->releaseBarrier();
3768             cu->decMaxBarrierCnt(bar_id);
3769             DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Exiting the "
3770                     "program and decrementing max barrier count for "
3771                     "barrier Id%d. New max count: %d.\n", cu->cu_id,
3772                     wf->simdId, wf->wfSlotId, wf->wfDynId, bar_id,
3773                     cu->maxBarrierCnt(bar_id));
3774         }
3775
3776         DPRINTF(GPUExec, "CU%d: decrease ref ctr WG[%d] to [%d]\n",
3777             wf->computeUnit->cu_id, wf->wgId, refCount);
3778
3779         wf->computeUnit->registerManager->freeRegisters(wf);
3780         wf->computeUnit->completedWfs++;
3781         wf->computeUnit->activeWaves--;
3782
3783         panic_if(wf->computeUnit->activeWaves < 0, "CU[%d] Active waves less "
3784             "than zero\n", wf->computeUnit->cu_id);
3785
3786         DPRINTF(GPUExec, "Doing return for CU%d: WF[%d][%d][%d]\n",
3787             wf->computeUnit->cu_id, wf->simdId, wf->wfSlotId, wf->wfDynId);
3788
3789         for (int i = 0; i < wf->vecReads.size(); i++) {
3790             if (wf->rawDist.find(i) != wf->rawDist.end()) {
3791                 wf->readsPerWrite.sample(wf->vecReads.at(i));
3792             }
3793         }
3794         wf->vecReads.clear();
3795         wf->rawDist.clear();
3796         wf->lastInstExec = 0;
3797
3798         if (!refCount) {
3799             /**
3800              * If all WFs have finished, and hence the WG has finished,
3801              * then we can free up the barrier belonging to the parent
3802              * WG, but only if we actually used a barrier (i.e., more
3803              * than one WF in the WG).
3804              */
3805             if (bar_id != WFBarrier::InvalidID) {
3806                 DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - All waves are "
3807                         "now complete. Releasing barrier Id%d.\n", cu->cu_id,
3808                         wf->simdId, wf->wfSlotId, wf->wfDynId,
3809                         wf->barrierId());
3810                 cu->releaseBarrier(bar_id);
3811             }
3812
3813            /**
3814              * Last wavefront of the workgroup has executed return. If the
3815              * workgroup is not the final one in the kernel, then simply
3816              * retire it; however, if it is the final one (i.e., indicating
3817              * the kernel end) then release operation is needed.
3818              */
3819
3820             // check whether the workgroup is indicating the kernel end (i.e.,
3821             // the last workgroup in the kernel).
3822             bool kernelEnd =
3823                 wf->computeUnit->shader->dispatcher().isReachingKernelEnd(wf);
3824             // further check whether 'release @ kernel end' is needed
3825             bool relNeeded =
3826                 wf->computeUnit->shader->impl_kern_end_rel;
3827
3828             // if not a kernel end or no release needed, retire the workgroup
3829             // directly
3830             if (!kernelEnd || !relNeeded) {
3831                 wf->computeUnit->shader->dispatcher().notifyWgCompl(wf);
3832                 wf->setStatus(Wavefront::S_STOPPED);
3833                 wf->computeUnit->completedWGs++;
3834
3835                 return;
3836             }
3837
3838             /**
3839              * If a kernel end and release needed, inject a memory sync and
3840              * retire the workgroup after receving all acks.
3841              */
3842             setFlag(MemSync);
3843             setFlag(GlobalSegment);
3844             // Notify Memory System of Kernel Completion
3845             wf->setStatus(Wavefront::S_RETURNING);
3846             gpuDynInst->simdId = wf->simdId;
3847             gpuDynInst->wfSlotId = wf->wfSlotId;
3848             gpuDynInst->wfDynId = wf->wfDynId;
3849
3850             DPRINTF(GPUExec, "inject global memory fence for CU%d: "
3851                             "WF[%d][%d][%d]\n", wf->computeUnit->cu_id,
3852                             wf->simdId, wf->wfSlotId, wf->wfDynId);
3853
3854             // call shader to prepare the flush operations
3855             wf->computeUnit->shader->prepareFlush(gpuDynInst);
3856
3857             wf->computeUnit->completedWGs++;
3858         } else {
3859             wf->computeUnit->shader->dispatcher().scheduleDispatch();
3860         }
3861     }
3862
3863
3864     Inst_SOPP__S_BRANCH::Inst_SOPP__S_BRANCH(InFmt_SOPP *iFmt)
3865         : Inst_SOPP(iFmt, "s_branch")
3866     {
3867         setFlag(Branch);
3868     } // Inst_SOPP__S_BRANCH
3869
3870     Inst_SOPP__S_BRANCH::~Inst_SOPP__S_BRANCH()
3871     {
3872     } // ~Inst_SOPP__S_BRANCH
3873
3874     // PC = PC + signext(SIMM16 * 4) + 4 (short jump).
3875     void
3876     Inst_SOPP__S_BRANCH::execute(GPUDynInstPtr gpuDynInst)
3877     {
3878         Wavefront *wf = gpuDynInst->wavefront();
3879         Addr pc = wf->pc();
3880         ScalarRegI16 simm16 = instData.SIMM16;
3881
3882         pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
3883
3884         wf->pc(pc);
3885     }
3886
3887     Inst_SOPP__S_WAKEUP::Inst_SOPP__S_WAKEUP(InFmt_SOPP *iFmt)
3888         : Inst_SOPP(iFmt, "s_wakeup")
3889     {
3890     } // Inst_SOPP__S_WAKEUP
3891
3892     Inst_SOPP__S_WAKEUP::~Inst_SOPP__S_WAKEUP()
3893     {
3894     } // ~Inst_SOPP__S_WAKEUP
3895
3896     // Allow a wave to wakeup all the other waves in its workgroup to force
3897     // them to wake up immediately from an S_SLEEP instruction. The wakeup is
3898     // ignored if the waves are not sleeping.
3899     void
3900     Inst_SOPP__S_WAKEUP::execute(GPUDynInstPtr gpuDynInst)
3901     {
3902         panicUnimplemented();
3903     }
3904
3905     Inst_SOPP__S_CBRANCH_SCC0::Inst_SOPP__S_CBRANCH_SCC0(InFmt_SOPP *iFmt)
3906         : Inst_SOPP(iFmt, "s_cbranch_scc0")
3907     {
3908         setFlag(Branch);
3909     } // Inst_SOPP__S_CBRANCH_SCC0
3910
3911     Inst_SOPP__S_CBRANCH_SCC0::~Inst_SOPP__S_CBRANCH_SCC0()
3912     {
3913     } // ~Inst_SOPP__S_CBRANCH_SCC0
3914
3915     // if (SCC == 0) then PC = PC + signext(SIMM16 * 4) + 4;
3916     // else NOP.
3917     void
3918     Inst_SOPP__S_CBRANCH_SCC0::execute(GPUDynInstPtr gpuDynInst)
3919     {
3920         Wavefront *wf = gpuDynInst->wavefront();
3921         Addr pc = wf->pc();
3922         ScalarRegI16 simm16 = instData.SIMM16;
3923         ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
3924
3925         scc.read();
3926
3927         if (!scc.rawData()) {
3928             pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
3929         }
3930
3931         wf->pc(pc);
3932     }
3933
3934     Inst_SOPP__S_CBRANCH_SCC1::Inst_SOPP__S_CBRANCH_SCC1(InFmt_SOPP *iFmt)
3935         : Inst_SOPP(iFmt, "s_cbranch_scc1")
3936     {
3937         setFlag(Branch);
3938     } // Inst_SOPP__S_CBRANCH_SCC1
3939
3940     Inst_SOPP__S_CBRANCH_SCC1::~Inst_SOPP__S_CBRANCH_SCC1()
3941     {
3942     } // ~Inst_SOPP__S_CBRANCH_SCC1
3943
3944     // if (SCC == 1) then PC = PC + signext(SIMM16 * 4) + 4;
3945     // else NOP.
3946     void
3947     Inst_SOPP__S_CBRANCH_SCC1::execute(GPUDynInstPtr gpuDynInst)
3948     {
3949         Wavefront *wf = gpuDynInst->wavefront();
3950         Addr pc = wf->pc();
3951         ScalarRegI16 simm16 = instData.SIMM16;
3952         ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
3953
3954         scc.read();
3955
3956         if (scc.rawData()) {
3957             pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
3958         }
3959
3960         wf->pc(pc);
3961     }
3962
3963     Inst_SOPP__S_CBRANCH_VCCZ::Inst_SOPP__S_CBRANCH_VCCZ(InFmt_SOPP *iFmt)
3964         : Inst_SOPP(iFmt, "s_cbranch_vccz")
3965     {
3966         setFlag(Branch);
3967         setFlag(ReadsVCC);
3968     } // Inst_SOPP__S_CBRANCH_VCCZ
3969
3970     Inst_SOPP__S_CBRANCH_VCCZ::~Inst_SOPP__S_CBRANCH_VCCZ()
3971     {
3972     } // ~Inst_SOPP__S_CBRANCH_VCCZ
3973
3974     // if (VCC == 0) then PC = PC + signext(SIMM16 * 4) + 4;
3975     // else NOP.
3976     void
3977     Inst_SOPP__S_CBRANCH_VCCZ::execute(GPUDynInstPtr gpuDynInst)
3978     {
3979         Wavefront *wf = gpuDynInst->wavefront();
3980         ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
3981         Addr pc = wf->pc();
3982         ScalarRegI16 simm16 = instData.SIMM16;
3983
3984         vcc.read();
3985
3986         if (!vcc.rawData()) {
3987             pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
3988         }
3989
3990         wf->pc(pc);
3991     }
3992
3993     Inst_SOPP__S_CBRANCH_VCCNZ::Inst_SOPP__S_CBRANCH_VCCNZ(InFmt_SOPP *iFmt)
3994         : Inst_SOPP(iFmt, "s_cbranch_vccnz")
3995     {
3996         setFlag(Branch);
3997         setFlag(ReadsVCC);
3998     } // Inst_SOPP__S_CBRANCH_VCCNZ
3999
4000     Inst_SOPP__S_CBRANCH_VCCNZ::~Inst_SOPP__S_CBRANCH_VCCNZ()
4001     {
4002     } // ~Inst_SOPP__S_CBRANCH_VCCNZ
4003
4004     // if (VCC != 0) then PC = PC + signext(SIMM16 * 4) + 4;
4005     // else NOP.
4006     void
4007     Inst_SOPP__S_CBRANCH_VCCNZ::execute(GPUDynInstPtr gpuDynInst)
4008     {
4009         Wavefront *wf = gpuDynInst->wavefront();
4010         ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
4011
4012         vcc.read();
4013
4014         if (vcc.rawData()) {
4015             Addr pc = wf->pc();
4016             ScalarRegI16 simm16 = instData.SIMM16;
4017             pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
4018             wf->pc(pc);
4019         }
4020     }
4021
4022     Inst_SOPP__S_CBRANCH_EXECZ::Inst_SOPP__S_CBRANCH_EXECZ(InFmt_SOPP *iFmt)
4023         : Inst_SOPP(iFmt, "s_cbranch_execz")
4024     {
4025         setFlag(Branch);
4026     } // Inst_SOPP__S_CBRANCH_EXECZ
4027
4028     Inst_SOPP__S_CBRANCH_EXECZ::~Inst_SOPP__S_CBRANCH_EXECZ()
4029     {
4030     } // ~Inst_SOPP__S_CBRANCH_EXECZ
4031
4032     // if (EXEC == 0) then PC = PC + signext(SIMM16 * 4) + 4;
4033     // else NOP.
4034     void
4035     Inst_SOPP__S_CBRANCH_EXECZ::execute(GPUDynInstPtr gpuDynInst)
4036     {
4037         Wavefront *wf = gpuDynInst->wavefront();
4038
4039         if (wf->execMask().none()) {
4040             Addr pc = wf->pc();
4041             ScalarRegI16 simm16 = instData.SIMM16;
4042             pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
4043             wf->pc(pc);
4044         }
4045     }
4046
4047     Inst_SOPP__S_CBRANCH_EXECNZ::Inst_SOPP__S_CBRANCH_EXECNZ(InFmt_SOPP *iFmt)
4048         : Inst_SOPP(iFmt, "s_cbranch_execnz")
4049     {
4050         setFlag(Branch);
4051     } // Inst_SOPP__S_CBRANCH_EXECNZ
4052
4053     Inst_SOPP__S_CBRANCH_EXECNZ::~Inst_SOPP__S_CBRANCH_EXECNZ()
4054     {
4055     } // ~Inst_SOPP__S_CBRANCH_EXECNZ
4056
4057     // if (EXEC != 0) then PC = PC + signext(SIMM16 * 4) + 4;
4058     // else NOP.
4059     void
4060     Inst_SOPP__S_CBRANCH_EXECNZ::execute(GPUDynInstPtr gpuDynInst)
4061     {
4062         Wavefront *wf = gpuDynInst->wavefront();
4063
4064         if (wf->execMask().any()) {
4065             Addr pc = wf->pc();
4066             ScalarRegI16 simm16 = instData.SIMM16;
4067             pc = pc + ((ScalarRegI64)simm16 * 4LL) + 4LL;
4068             wf->pc(pc);
4069         }
4070     }
4071
4072     Inst_SOPP__S_BARRIER::Inst_SOPP__S_BARRIER(InFmt_SOPP *iFmt)
4073         : Inst_SOPP(iFmt, "s_barrier")
4074     {
4075         setFlag(MemBarrier);
4076     } // Inst_SOPP__S_BARRIER
4077
4078     Inst_SOPP__S_BARRIER::~Inst_SOPP__S_BARRIER()
4079     {
4080     } // ~Inst_SOPP__S_BARRIER
4081
4082     /**
4083      * Synchronize waves within a workgroup. If not all waves of the workgroup
4084      * have been created yet, wait for entire group before proceeding. If some
4085      * waves in the wokgroup have already terminated, this waits on only the
4086      * surviving waves.
4087      */
4088     void
4089     Inst_SOPP__S_BARRIER::execute(GPUDynInstPtr gpuDynInst)
4090     {
4091         Wavefront *wf = gpuDynInst->wavefront();
4092         ComputeUnit *cu = gpuDynInst->computeUnit();
4093
4094         if (wf->hasBarrier()) {
4095             int bar_id = wf->barrierId();
4096             assert(wf->getStatus() != Wavefront::S_BARRIER);
4097             wf->setStatus(Wavefront::S_BARRIER);
4098             cu->incNumAtBarrier(bar_id);
4099             DPRINTF(GPUSync, "CU[%d] WF[%d][%d] Wave[%d] - Stalling at "
4100                     "barrier Id%d. %d waves now at barrier, %d waves "
4101                     "remain.\n", cu->cu_id, wf->simdId, wf->wfSlotId,
4102                     wf->wfDynId, bar_id, cu->numAtBarrier(bar_id),
4103                     cu->numYetToReachBarrier(bar_id));
4104         }
4105     } // execute
4106     // --- Inst_SOPP__S_SETKILL class methods ---
4107
4108     Inst_SOPP__S_SETKILL::Inst_SOPP__S_SETKILL(InFmt_SOPP *iFmt)
4109         : Inst_SOPP(iFmt, "s_setkill")
4110     {
4111     } // Inst_SOPP__S_SETKILL
4112
4113     Inst_SOPP__S_SETKILL::~Inst_SOPP__S_SETKILL()
4114     {
4115     } // ~Inst_SOPP__S_SETKILL
4116
4117     void
4118     Inst_SOPP__S_SETKILL::execute(GPUDynInstPtr gpuDynInst)
4119     {
4120         panicUnimplemented();
4121     }
4122
4123     Inst_SOPP__S_WAITCNT::Inst_SOPP__S_WAITCNT(InFmt_SOPP *iFmt)
4124         : Inst_SOPP(iFmt, "s_waitcnt")
4125     {
4126         setFlag(ALU);
4127         setFlag(Waitcnt);
4128     } // Inst_SOPP__S_WAITCNT
4129
4130     Inst_SOPP__S_WAITCNT::~Inst_SOPP__S_WAITCNT()
4131     {
4132     } // ~Inst_SOPP__S_WAITCNT
4133
4134     // Wait for the counts of outstanding lds, vector-memory and
4135     // export/vmem-write-data to be at or below the specified levels.
4136     // SIMM16[3:0] = vmcount (vector memory operations),
4137     // SIMM16[6:4] = export/mem-write-data count,
4138     // SIMM16[12:8] = LGKM_cnt (scalar-mem/GDS/LDS count).
4139     void
4140     Inst_SOPP__S_WAITCNT::execute(GPUDynInstPtr gpuDynInst)
4141     {
4142         ScalarRegI32 vm_cnt = 0;
4143         ScalarRegI32 exp_cnt = 0;
4144         ScalarRegI32 lgkm_cnt = 0;
4145         vm_cnt = bits<ScalarRegI16>(instData.SIMM16, 3, 0);
4146         exp_cnt = bits<ScalarRegI16>(instData.SIMM16, 6, 4);
4147         lgkm_cnt = bits<ScalarRegI16>(instData.SIMM16, 12, 8);
4148         gpuDynInst->wavefront()->setWaitCnts(vm_cnt, exp_cnt, lgkm_cnt);
4149     }
4150
4151     Inst_SOPP__S_SETHALT::Inst_SOPP__S_SETHALT(InFmt_SOPP *iFmt)
4152         : Inst_SOPP(iFmt, "s_sethalt")
4153     {
4154     } // Inst_SOPP__S_SETHALT
4155
4156     Inst_SOPP__S_SETHALT::~Inst_SOPP__S_SETHALT()
4157     {
4158     } // ~Inst_SOPP__S_SETHALT
4159
4160     void
4161     Inst_SOPP__S_SETHALT::execute(GPUDynInstPtr gpuDynInst)
4162     {
4163         panicUnimplemented();
4164     }
4165
4166     Inst_SOPP__S_SLEEP::Inst_SOPP__S_SLEEP(InFmt_SOPP *iFmt)
4167         : Inst_SOPP(iFmt, "s_sleep")
4168     {
4169     } // Inst_SOPP__S_SLEEP
4170
4171     Inst_SOPP__S_SLEEP::~Inst_SOPP__S_SLEEP()
4172     {
4173     } // ~Inst_SOPP__S_SLEEP
4174
4175     // Cause a wave to sleep for (64 * SIMM16[2:0] + 1..64) clocks.
4176     void
4177     Inst_SOPP__S_SLEEP::execute(GPUDynInstPtr gpuDynInst)
4178     {
4179         panicUnimplemented();
4180     }
4181
4182     Inst_SOPP__S_SETPRIO::Inst_SOPP__S_SETPRIO(InFmt_SOPP *iFmt)
4183         : Inst_SOPP(iFmt, "s_setprio")
4184     {
4185     } // Inst_SOPP__S_SETPRIO
4186
4187     Inst_SOPP__S_SETPRIO::~Inst_SOPP__S_SETPRIO()
4188     {
4189     } // ~Inst_SOPP__S_SETPRIO
4190
4191     // User settable wave priority is set to SIMM16[1:0]. 0 = lowest,
4192     // 3 = highest.
4193     void
4194     Inst_SOPP__S_SETPRIO::execute(GPUDynInstPtr gpuDynInst)
4195     {
4196         panicUnimplemented();
4197     }
4198
4199     Inst_SOPP__S_SENDMSG::Inst_SOPP__S_SENDMSG(InFmt_SOPP *iFmt)
4200         : Inst_SOPP(iFmt, "s_sendmsg")
4201     {
4202     } // Inst_SOPP__S_SENDMSG
4203
4204     Inst_SOPP__S_SENDMSG::~Inst_SOPP__S_SENDMSG()
4205     {
4206     } // ~Inst_SOPP__S_SENDMSG
4207
4208     void
4209     Inst_SOPP__S_SENDMSG::execute(GPUDynInstPtr gpuDynInst)
4210     {
4211         panicUnimplemented();
4212     }
4213
4214     Inst_SOPP__S_SENDMSGHALT::Inst_SOPP__S_SENDMSGHALT(InFmt_SOPP *iFmt)
4215         : Inst_SOPP(iFmt, "s_sendmsghalt")
4216     {
4217     } // Inst_SOPP__S_SENDMSGHALT
4218
4219     Inst_SOPP__S_SENDMSGHALT::~Inst_SOPP__S_SENDMSGHALT()
4220     {
4221     } // ~Inst_SOPP__S_SENDMSGHALT
4222
4223     void
4224     Inst_SOPP__S_SENDMSGHALT::execute(GPUDynInstPtr gpuDynInst)
4225     {
4226         panicUnimplemented();
4227     }
4228
4229     Inst_SOPP__S_TRAP::Inst_SOPP__S_TRAP(InFmt_SOPP *iFmt)
4230         : Inst_SOPP(iFmt, "s_trap")
4231     {
4232     } // Inst_SOPP__S_TRAP
4233
4234     Inst_SOPP__S_TRAP::~Inst_SOPP__S_TRAP()
4235     {
4236     } // ~Inst_SOPP__S_TRAP
4237
4238     // Enter the trap handler.
4239     void
4240     Inst_SOPP__S_TRAP::execute(GPUDynInstPtr gpuDynInst)
4241     {
4242         panicUnimplemented();
4243     }
4244
4245     Inst_SOPP__S_ICACHE_INV::Inst_SOPP__S_ICACHE_INV(InFmt_SOPP *iFmt)
4246         : Inst_SOPP(iFmt, "s_icache_inv")
4247     {
4248     } // Inst_SOPP__S_ICACHE_INV
4249
4250     Inst_SOPP__S_ICACHE_INV::~Inst_SOPP__S_ICACHE_INV()
4251     {
4252     } // ~Inst_SOPP__S_ICACHE_INV
4253
4254     // Invalidate entire L1 instruction cache.
4255     void
4256     Inst_SOPP__S_ICACHE_INV::execute(GPUDynInstPtr gpuDynInst)
4257     {
4258         panicUnimplemented();
4259     }
4260
4261     Inst_SOPP__S_INCPERFLEVEL::Inst_SOPP__S_INCPERFLEVEL(InFmt_SOPP *iFmt)
4262         : Inst_SOPP(iFmt, "s_incperflevel")
4263     {
4264     } // Inst_SOPP__S_INCPERFLEVEL
4265
4266     Inst_SOPP__S_INCPERFLEVEL::~Inst_SOPP__S_INCPERFLEVEL()
4267     {
4268     } // ~Inst_SOPP__S_INCPERFLEVEL
4269
4270     void
4271     Inst_SOPP__S_INCPERFLEVEL::execute(GPUDynInstPtr gpuDynInst)
4272     {
4273         panicUnimplemented();
4274     }
4275
4276     Inst_SOPP__S_DECPERFLEVEL::Inst_SOPP__S_DECPERFLEVEL(InFmt_SOPP *iFmt)
4277         : Inst_SOPP(iFmt, "s_decperflevel")
4278     {
4279     } // Inst_SOPP__S_DECPERFLEVEL
4280
4281     Inst_SOPP__S_DECPERFLEVEL::~Inst_SOPP__S_DECPERFLEVEL()
4282     {
4283     } // ~Inst_SOPP__S_DECPERFLEVEL
4284
4285     void
4286     Inst_SOPP__S_DECPERFLEVEL::execute(GPUDynInstPtr gpuDynInst)
4287     {
4288         panicUnimplemented();
4289     }
4290
4291     Inst_SOPP__S_TTRACEDATA::Inst_SOPP__S_TTRACEDATA(InFmt_SOPP *iFmt)
4292         : Inst_SOPP(iFmt, "s_ttracedata")
4293     {
4294     } // Inst_SOPP__S_TTRACEDATA
4295
4296     Inst_SOPP__S_TTRACEDATA::~Inst_SOPP__S_TTRACEDATA()
4297     {
4298     } // ~Inst_SOPP__S_TTRACEDATA
4299
4300     void
4301     Inst_SOPP__S_TTRACEDATA::execute(GPUDynInstPtr gpuDynInst)
4302     {
4303         panicUnimplemented();
4304     }
4305
4306     Inst_SOPP__S_CBRANCH_CDBGSYS::Inst_SOPP__S_CBRANCH_CDBGSYS(
4307           InFmt_SOPP *iFmt)
4308         : Inst_SOPP(iFmt, "s_cbranch_cdbgsys")
4309     {
4310         setFlag(Branch);
4311     } // Inst_SOPP__S_CBRANCH_CDBGSYS
4312
4313     Inst_SOPP__S_CBRANCH_CDBGSYS::~Inst_SOPP__S_CBRANCH_CDBGSYS()
4314     {
4315     } // ~Inst_SOPP__S_CBRANCH_CDBGSYS
4316
4317     void
4318     Inst_SOPP__S_CBRANCH_CDBGSYS::execute(GPUDynInstPtr gpuDynInst)
4319     {
4320         panicUnimplemented();
4321     }
4322
4323     Inst_SOPP__S_CBRANCH_CDBGUSER::Inst_SOPP__S_CBRANCH_CDBGUSER(
4324           InFmt_SOPP *iFmt)
4325         : Inst_SOPP(iFmt, "s_cbranch_cdbguser")
4326     {
4327         setFlag(Branch);
4328     } // Inst_SOPP__S_CBRANCH_CDBGUSER
4329
4330     Inst_SOPP__S_CBRANCH_CDBGUSER::~Inst_SOPP__S_CBRANCH_CDBGUSER()
4331     {
4332     } // ~Inst_SOPP__S_CBRANCH_CDBGUSER
4333
4334     void
4335     Inst_SOPP__S_CBRANCH_CDBGUSER::execute(GPUDynInstPtr gpuDynInst)
4336     {
4337         panicUnimplemented();
4338     }
4339
4340     Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER(
4341           InFmt_SOPP *iFmt)
4342         : Inst_SOPP(iFmt, "s_cbranch_cdbgsys_or_user")
4343     {
4344         setFlag(Branch);
4345     } // Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER
4346
4347     Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::
4348         ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER()
4349     {
4350     } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER
4351
4352     void
4353     Inst_SOPP__S_CBRANCH_CDBGSYS_OR_USER::execute(GPUDynInstPtr gpuDynInst)
4354     {
4355         panicUnimplemented();
4356     }
4357
4358     Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::
4359         Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER(InFmt_SOPP *iFmt)
4360             : Inst_SOPP(iFmt, "s_cbranch_cdbgsys_and_user")
4361     {
4362         setFlag(Branch);
4363     } // Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER
4364
4365     Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::
4366         ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER()
4367     {
4368     } // ~Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER
4369
4370     void
4371     Inst_SOPP__S_CBRANCH_CDBGSYS_AND_USER::execute(GPUDynInstPtr gpuDynInst)
4372     {
4373         panicUnimplemented();
4374     }
4375
4376     Inst_SOPP__S_ENDPGM_SAVED::Inst_SOPP__S_ENDPGM_SAVED(InFmt_SOPP *iFmt)
4377         : Inst_SOPP(iFmt, "s_endpgm_saved")
4378     {
4379     } // Inst_SOPP__S_ENDPGM_SAVED
4380
4381     Inst_SOPP__S_ENDPGM_SAVED::~Inst_SOPP__S_ENDPGM_SAVED()
4382     {
4383     } // ~Inst_SOPP__S_ENDPGM_SAVED
4384
4385     // End of program.
4386     void
4387     Inst_SOPP__S_ENDPGM_SAVED::execute(GPUDynInstPtr gpuDynInst)
4388     {
4389         panicUnimplemented();
4390     }
4391
4392     Inst_SOPP__S_SET_GPR_IDX_OFF::Inst_SOPP__S_SET_GPR_IDX_OFF(
4393           InFmt_SOPP *iFmt)
4394         : Inst_SOPP(iFmt, "s_set_gpr_idx_off")
4395     {
4396     } // Inst_SOPP__S_SET_GPR_IDX_OFF
4397
4398     Inst_SOPP__S_SET_GPR_IDX_OFF::~Inst_SOPP__S_SET_GPR_IDX_OFF()
4399     {
4400     } // ~Inst_SOPP__S_SET_GPR_IDX_OFF
4401
4402     // MODE.gpr_idx_en = 0.
4403     // Clear GPR indexing mode. Vector operations after this will not perform
4404     // relative GPR addressing regardless of the contents of M0.
4405     void
4406     Inst_SOPP__S_SET_GPR_IDX_OFF::execute(GPUDynInstPtr gpuDynInst)
4407     {
4408         panicUnimplemented();
4409     }
4410
4411     Inst_SOPP__S_SET_GPR_IDX_MODE::Inst_SOPP__S_SET_GPR_IDX_MODE(
4412           InFmt_SOPP *iFmt)
4413         : Inst_SOPP(iFmt, "s_set_gpr_idx_mode")
4414     {
4415     } // Inst_SOPP__S_SET_GPR_IDX_MODE
4416
4417     Inst_SOPP__S_SET_GPR_IDX_MODE::~Inst_SOPP__S_SET_GPR_IDX_MODE()
4418     {
4419     } // ~Inst_SOPP__S_SET_GPR_IDX_MODE
4420
4421     // M0[15:12] = SIMM4.
4422     // Modify the mode used for vector GPR indexing.
4423     // The raw contents of the source field are read and used to set the enable
4424     // bits. SIMM4[0] = VSRC0_REL, SIMM4[1] = VSRC1_REL, SIMM4[2] = VSRC2_REL
4425     // and SIMM4[3] = VDST_REL.
4426     void
4427     Inst_SOPP__S_SET_GPR_IDX_MODE::execute(GPUDynInstPtr gpuDynInst)
4428     {
4429         panicUnimplemented();
4430     }
4431
4432     Inst_SMEM__S_LOAD_DWORD::Inst_SMEM__S_LOAD_DWORD(InFmt_SMEM *iFmt)
4433         : Inst_SMEM(iFmt, "s_load_dword")
4434     {
4435         setFlag(MemoryRef);
4436         setFlag(Load);
4437     } // Inst_SMEM__S_LOAD_DWORD
4438
4439     Inst_SMEM__S_LOAD_DWORD::~Inst_SMEM__S_LOAD_DWORD()
4440     {
4441     } // ~Inst_SMEM__S_LOAD_DWORD
4442
4443     /**
4444      * Read 1 dword from scalar data cache. If the offset is specified as an
4445      * sgpr, the sgpr contains an unsigned byte offset (the 2 LSBs are
4446      * ignored). If the offset is specified as an immediate 20-bit constant,
4447      * the constant is an unsigned byte offset.
4448      */
4449     void
4450     Inst_SMEM__S_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst)
4451     {
4452         Wavefront *wf = gpuDynInst->wavefront();
4453         gpuDynInst->execUnitId = wf->execUnitId;
4454         gpuDynInst->latency.init(gpuDynInst->computeUnit());
4455         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4456         ScalarRegU32 offset(0);
4457         ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
4458
4459         addr.read();
4460
4461         if (instData.IMM) {
4462             offset = extData.OFFSET;
4463         } else {
4464             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4465             off_sgpr.read();
4466             offset = off_sgpr.rawData();
4467         }
4468
4469         calcAddr(gpuDynInst, addr, offset);
4470
4471         gpuDynInst->computeUnit()->scalarMemoryPipe
4472             .getGMReqFIFO().push(gpuDynInst);
4473
4474         wf->scalarRdGmReqsInPipe--;
4475         wf->scalarOutstandingReqsRdGm++;
4476         gpuDynInst->wavefront()->outstandingReqs++;
4477         gpuDynInst->wavefront()->validateRequestCounters();
4478     }
4479
4480     void
4481     Inst_SMEM__S_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
4482     {
4483         initMemRead<1>(gpuDynInst);
4484     } // initiateAcc
4485
4486     void
4487     Inst_SMEM__S_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
4488     {
4489         ScalarOperandU32 sdst(gpuDynInst, instData.SDATA);
4490         sdst.write();
4491     } // completeAcc
4492
4493     Inst_SMEM__S_LOAD_DWORDX2::Inst_SMEM__S_LOAD_DWORDX2(InFmt_SMEM *iFmt)
4494         : Inst_SMEM(iFmt, "s_load_dwordx2")
4495     {
4496         setFlag(MemoryRef);
4497         setFlag(Load);
4498     } // Inst_SMEM__S_LOAD_DWORDX2
4499
4500     Inst_SMEM__S_LOAD_DWORDX2::~Inst_SMEM__S_LOAD_DWORDX2()
4501     {
4502     } // ~Inst_SMEM__S_LOAD_DWORDX2
4503
4504     /**
4505      * Read 2 dwords from scalar data cache. See s_load_dword for details on
4506      * the offset input.
4507      */
4508     void
4509     Inst_SMEM__S_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
4510     {
4511         Wavefront *wf = gpuDynInst->wavefront();
4512         gpuDynInst->execUnitId = wf->execUnitId;
4513         gpuDynInst->latency.init(gpuDynInst->computeUnit());
4514         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4515         ScalarRegU32 offset(0);
4516         ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
4517
4518         addr.read();
4519
4520         if (instData.IMM) {
4521             offset = extData.OFFSET;
4522         } else {
4523             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4524             off_sgpr.read();
4525             offset = off_sgpr.rawData();
4526         }
4527
4528         calcAddr(gpuDynInst, addr, offset);
4529
4530         gpuDynInst->computeUnit()->scalarMemoryPipe.
4531             getGMReqFIFO().push(gpuDynInst);
4532
4533         wf->scalarRdGmReqsInPipe--;
4534         wf->scalarOutstandingReqsRdGm++;
4535         gpuDynInst->wavefront()->outstandingReqs++;
4536         gpuDynInst->wavefront()->validateRequestCounters();
4537     }
4538
4539     void
4540     Inst_SMEM__S_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
4541     {
4542         initMemRead<2>(gpuDynInst);
4543     } // initiateAcc
4544
4545     void
4546     Inst_SMEM__S_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
4547     {
4548         ScalarOperandU64 sdst(gpuDynInst, instData.SDATA);
4549         sdst.write();
4550     } // completeAcc
4551
4552     Inst_SMEM__S_LOAD_DWORDX4::Inst_SMEM__S_LOAD_DWORDX4(InFmt_SMEM *iFmt)
4553         : Inst_SMEM(iFmt, "s_load_dwordx4")
4554     {
4555         setFlag(MemoryRef);
4556         setFlag(Load);
4557     } // Inst_SMEM__S_LOAD_DWORDX4
4558
4559     Inst_SMEM__S_LOAD_DWORDX4::~Inst_SMEM__S_LOAD_DWORDX4()
4560     {
4561     } // ~Inst_SMEM__S_LOAD_DWORDX4
4562
4563     // Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on
4564     // the offset input.
4565     void
4566     Inst_SMEM__S_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
4567     {
4568         Wavefront *wf = gpuDynInst->wavefront();
4569         gpuDynInst->execUnitId = wf->execUnitId;
4570         gpuDynInst->latency.init(gpuDynInst->computeUnit());
4571         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4572         ScalarRegU32 offset(0);
4573         ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
4574
4575         addr.read();
4576
4577         if (instData.IMM) {
4578             offset = extData.OFFSET;
4579         } else {
4580             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4581             off_sgpr.read();
4582             offset = off_sgpr.rawData();
4583         }
4584
4585         calcAddr(gpuDynInst, addr, offset);
4586
4587         gpuDynInst->computeUnit()->scalarMemoryPipe.
4588             getGMReqFIFO().push(gpuDynInst);
4589
4590         wf->scalarRdGmReqsInPipe--;
4591         wf->scalarOutstandingReqsRdGm++;
4592         gpuDynInst->wavefront()->outstandingReqs++;
4593         gpuDynInst->wavefront()->validateRequestCounters();
4594     }
4595
4596     void
4597     Inst_SMEM__S_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
4598     {
4599         initMemRead<4>(gpuDynInst);
4600     } // initiateAcc
4601
4602     void
4603     Inst_SMEM__S_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
4604     {
4605         ScalarOperandU128 sdst(gpuDynInst, instData.SDATA);
4606         sdst.write();
4607     } // completeAcc
4608
4609     Inst_SMEM__S_LOAD_DWORDX8::Inst_SMEM__S_LOAD_DWORDX8(InFmt_SMEM *iFmt)
4610         : Inst_SMEM(iFmt, "s_load_dwordx8")
4611     {
4612         setFlag(MemoryRef);
4613         setFlag(Load);
4614     } // Inst_SMEM__S_LOAD_DWORDX8
4615
4616     Inst_SMEM__S_LOAD_DWORDX8::~Inst_SMEM__S_LOAD_DWORDX8()
4617     {
4618     } // ~Inst_SMEM__S_LOAD_DWORDX8
4619
4620     // Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on
4621     // the offset input.
4622     void
4623     Inst_SMEM__S_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst)
4624     {
4625         Wavefront *wf = gpuDynInst->wavefront();
4626         gpuDynInst->execUnitId = wf->execUnitId;
4627         gpuDynInst->latency.init(gpuDynInst->computeUnit());
4628         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4629         ScalarRegU32 offset(0);
4630         ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
4631
4632         addr.read();
4633
4634         if (instData.IMM) {
4635             offset = extData.OFFSET;
4636         } else {
4637             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4638             off_sgpr.read();
4639             offset = off_sgpr.rawData();
4640         }
4641
4642         calcAddr(gpuDynInst, addr, offset);
4643
4644         gpuDynInst->computeUnit()->scalarMemoryPipe.
4645             getGMReqFIFO().push(gpuDynInst);
4646
4647         wf->scalarRdGmReqsInPipe--;
4648         wf->scalarOutstandingReqsRdGm++;
4649         gpuDynInst->wavefront()->outstandingReqs++;
4650         gpuDynInst->wavefront()->validateRequestCounters();
4651     }
4652
4653     void
4654     Inst_SMEM__S_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst)
4655     {
4656         initMemRead<8>(gpuDynInst);
4657     } // initiateAcc
4658
4659     void
4660     Inst_SMEM__S_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst)
4661     {
4662         ScalarOperandU256 sdst(gpuDynInst, instData.SDATA);
4663         sdst.write();
4664     } // completeAcc
4665
4666     Inst_SMEM__S_LOAD_DWORDX16::Inst_SMEM__S_LOAD_DWORDX16(InFmt_SMEM *iFmt)
4667         : Inst_SMEM(iFmt, "s_load_dwordx16")
4668     {
4669         setFlag(MemoryRef);
4670         setFlag(Load);
4671     } // Inst_SMEM__S_LOAD_DWORDX16
4672
4673     Inst_SMEM__S_LOAD_DWORDX16::~Inst_SMEM__S_LOAD_DWORDX16()
4674     {
4675     } // ~Inst_SMEM__S_LOAD_DWORDX16
4676
4677     // Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on
4678     // the offset input.
4679     void
4680     Inst_SMEM__S_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst)
4681     {
4682         Wavefront *wf = gpuDynInst->wavefront();
4683         gpuDynInst->execUnitId = wf->execUnitId;
4684         gpuDynInst->latency.init(gpuDynInst->computeUnit());
4685         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4686         ScalarRegU32 offset(0);
4687         ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
4688
4689         addr.read();
4690
4691         if (instData.IMM) {
4692             offset = extData.OFFSET;
4693         } else {
4694             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4695             off_sgpr.read();
4696             offset = off_sgpr.rawData();
4697         }
4698
4699         calcAddr(gpuDynInst, addr, offset);
4700
4701         gpuDynInst->computeUnit()->scalarMemoryPipe.
4702             getGMReqFIFO().push(gpuDynInst);
4703
4704         wf->scalarRdGmReqsInPipe--;
4705         wf->scalarOutstandingReqsRdGm++;
4706         gpuDynInst->wavefront()->outstandingReqs++;
4707         gpuDynInst->wavefront()->validateRequestCounters();
4708     }
4709
4710     void
4711     Inst_SMEM__S_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst)
4712     {
4713         initMemRead<16>(gpuDynInst);
4714     } // initiateAcc
4715
4716     void
4717     Inst_SMEM__S_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst)
4718     {
4719         ScalarOperandU512 sdst(gpuDynInst, instData.SDATA);
4720         sdst.write();
4721     } // completeAcc
4722
4723     Inst_SMEM__S_BUFFER_LOAD_DWORD::Inst_SMEM__S_BUFFER_LOAD_DWORD(
4724           InFmt_SMEM *iFmt)
4725         : Inst_SMEM(iFmt, "s_buffer_load_dword")
4726     {
4727         setFlag(MemoryRef);
4728         setFlag(Load);
4729     } // Inst_SMEM__S_BUFFER_LOAD_DWORD
4730
4731     Inst_SMEM__S_BUFFER_LOAD_DWORD::~Inst_SMEM__S_BUFFER_LOAD_DWORD()
4732     {
4733     } // ~Inst_SMEM__S_BUFFER_LOAD_DWORD
4734
4735     // Read 1 dword from scalar data cache. See S_LOAD_DWORD for details on the
4736     // offset input.
4737     void
4738     Inst_SMEM__S_BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst)
4739     {
4740         Wavefront *wf = gpuDynInst->wavefront();
4741         gpuDynInst->execUnitId = wf->execUnitId;
4742         gpuDynInst->latency.init(gpuDynInst->computeUnit());
4743         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4744         ScalarRegU32 offset(0);
4745         ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
4746
4747         rsrcDesc.read();
4748
4749         if (instData.IMM) {
4750             offset = extData.OFFSET;
4751         } else {
4752             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4753             off_sgpr.read();
4754             offset = off_sgpr.rawData();
4755         }
4756
4757         calcAddr(gpuDynInst, rsrcDesc, offset);
4758
4759         gpuDynInst->computeUnit()->scalarMemoryPipe
4760             .getGMReqFIFO().push(gpuDynInst);
4761
4762         wf->scalarRdGmReqsInPipe--;
4763         wf->scalarOutstandingReqsRdGm++;
4764         gpuDynInst->wavefront()->outstandingReqs++;
4765         gpuDynInst->wavefront()->validateRequestCounters();
4766     } // execute
4767
4768     void
4769     Inst_SMEM__S_BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
4770     {
4771         initMemRead<1>(gpuDynInst);
4772     } // initiateAcc
4773
4774     void
4775     Inst_SMEM__S_BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
4776     {
4777         // 1 request, size 32
4778         ScalarOperandU32 sdst(gpuDynInst, instData.SDATA);
4779         sdst.write();
4780     } // completeAcc
4781
4782     Inst_SMEM__S_BUFFER_LOAD_DWORDX2::Inst_SMEM__S_BUFFER_LOAD_DWORDX2(
4783           InFmt_SMEM *iFmt)
4784         : Inst_SMEM(iFmt, "s_buffer_load_dwordx2")
4785     {
4786         setFlag(MemoryRef);
4787         setFlag(Load);
4788     } // Inst_SMEM__S_BUFFER_LOAD_DWORDX2
4789
4790     Inst_SMEM__S_BUFFER_LOAD_DWORDX2::~Inst_SMEM__S_BUFFER_LOAD_DWORDX2()
4791     {
4792     } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX2
4793
4794     // Read 2 dwords from scalar data cache. See S_LOAD_DWORD for details on
4795     // the offset input.
4796     void
4797     Inst_SMEM__S_BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
4798     {
4799         Wavefront *wf = gpuDynInst->wavefront();
4800         gpuDynInst->execUnitId = wf->execUnitId;
4801         gpuDynInst->latency.init(gpuDynInst->computeUnit());
4802         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4803         ScalarRegU32 offset(0);
4804         ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
4805
4806         rsrcDesc.read();
4807
4808         if (instData.IMM) {
4809             offset = extData.OFFSET;
4810         } else {
4811             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4812             off_sgpr.read();
4813             offset = off_sgpr.rawData();
4814         }
4815
4816         calcAddr(gpuDynInst, rsrcDesc, offset);
4817
4818         gpuDynInst->computeUnit()->scalarMemoryPipe
4819             .getGMReqFIFO().push(gpuDynInst);
4820
4821         wf->scalarRdGmReqsInPipe--;
4822         wf->scalarOutstandingReqsRdGm++;
4823         gpuDynInst->wavefront()->outstandingReqs++;
4824         gpuDynInst->wavefront()->validateRequestCounters();
4825     } // execute
4826
4827     void
4828     Inst_SMEM__S_BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
4829     {
4830         initMemRead<2>(gpuDynInst);
4831     } // initiateAcc
4832
4833     void
4834     Inst_SMEM__S_BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
4835     {
4836         // use U64 because 2 requests, each size 32
4837         ScalarOperandU64 sdst(gpuDynInst, instData.SDATA);
4838         sdst.write();
4839     } // completeAcc
4840
4841     Inst_SMEM__S_BUFFER_LOAD_DWORDX4::Inst_SMEM__S_BUFFER_LOAD_DWORDX4(
4842           InFmt_SMEM *iFmt)
4843         : Inst_SMEM(iFmt, "s_buffer_load_dwordx4")
4844     {
4845         setFlag(MemoryRef);
4846         setFlag(Load);
4847     } // Inst_SMEM__S_BUFFER_LOAD_DWORDX4
4848
4849     Inst_SMEM__S_BUFFER_LOAD_DWORDX4::~Inst_SMEM__S_BUFFER_LOAD_DWORDX4()
4850     {
4851     } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX4
4852
4853     // Read 4 dwords from scalar data cache. See S_LOAD_DWORD for details on
4854     // the offset input.
4855     void
4856     Inst_SMEM__S_BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
4857     {
4858         Wavefront *wf = gpuDynInst->wavefront();
4859         gpuDynInst->execUnitId = wf->execUnitId;
4860         gpuDynInst->latency.init(gpuDynInst->computeUnit());
4861         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4862         ScalarRegU32 offset(0);
4863         ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
4864
4865         rsrcDesc.read();
4866
4867         if (instData.IMM) {
4868             offset = extData.OFFSET;
4869         } else {
4870             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4871             off_sgpr.read();
4872             offset = off_sgpr.rawData();
4873         }
4874
4875         calcAddr(gpuDynInst, rsrcDesc, offset);
4876
4877         gpuDynInst->computeUnit()->scalarMemoryPipe
4878             .getGMReqFIFO().push(gpuDynInst);
4879
4880         wf->scalarRdGmReqsInPipe--;
4881         wf->scalarOutstandingReqsRdGm++;
4882         gpuDynInst->wavefront()->outstandingReqs++;
4883         gpuDynInst->wavefront()->validateRequestCounters();
4884     } // execute
4885
4886     void
4887     Inst_SMEM__S_BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
4888     {
4889         initMemRead<4>(gpuDynInst);
4890     } // initiateAcc
4891
4892     void
4893     Inst_SMEM__S_BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
4894     {
4895         // 4 requests, each size 32
4896         ScalarOperandU128 sdst(gpuDynInst, instData.SDATA);
4897         sdst.write();
4898     } // completeAcc
4899
4900     Inst_SMEM__S_BUFFER_LOAD_DWORDX8::Inst_SMEM__S_BUFFER_LOAD_DWORDX8(
4901           InFmt_SMEM *iFmt)
4902         : Inst_SMEM(iFmt, "s_buffer_load_dwordx8")
4903     {
4904         setFlag(MemoryRef);
4905         setFlag(Load);
4906     } // Inst_SMEM__S_BUFFER_LOAD_DWORDX8
4907
4908     Inst_SMEM__S_BUFFER_LOAD_DWORDX8::~Inst_SMEM__S_BUFFER_LOAD_DWORDX8()
4909     {
4910     } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX8
4911
4912     // Read 8 dwords from scalar data cache. See S_LOAD_DWORD for details on
4913     // the offset input.
4914     void
4915     Inst_SMEM__S_BUFFER_LOAD_DWORDX8::execute(GPUDynInstPtr gpuDynInst)
4916     {
4917         Wavefront *wf = gpuDynInst->wavefront();
4918         gpuDynInst->execUnitId = wf->execUnitId;
4919         gpuDynInst->latency.init(gpuDynInst->computeUnit());
4920         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4921         ScalarRegU32 offset(0);
4922         ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
4923
4924         rsrcDesc.read();
4925
4926         if (instData.IMM) {
4927             offset = extData.OFFSET;
4928         } else {
4929             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4930             off_sgpr.read();
4931             offset = off_sgpr.rawData();
4932         }
4933
4934         calcAddr(gpuDynInst, rsrcDesc, offset);
4935
4936         gpuDynInst->computeUnit()->scalarMemoryPipe
4937             .getGMReqFIFO().push(gpuDynInst);
4938
4939         wf->scalarRdGmReqsInPipe--;
4940         wf->scalarOutstandingReqsRdGm++;
4941         gpuDynInst->wavefront()->outstandingReqs++;
4942         gpuDynInst->wavefront()->validateRequestCounters();
4943     } // execute
4944
4945     void
4946     Inst_SMEM__S_BUFFER_LOAD_DWORDX8::initiateAcc(GPUDynInstPtr gpuDynInst)
4947     {
4948         initMemRead<8>(gpuDynInst);
4949     } // initiateAcc
4950
4951     void
4952     Inst_SMEM__S_BUFFER_LOAD_DWORDX8::completeAcc(GPUDynInstPtr gpuDynInst)
4953     {
4954         // 8 requests, each size 32
4955         ScalarOperandU256 sdst(gpuDynInst, instData.SDATA);
4956         sdst.write();
4957     } // completeAcc
4958
4959     Inst_SMEM__S_BUFFER_LOAD_DWORDX16::Inst_SMEM__S_BUFFER_LOAD_DWORDX16(
4960           InFmt_SMEM *iFmt)
4961         : Inst_SMEM(iFmt, "s_buffer_load_dwordx16")
4962     {
4963         setFlag(MemoryRef);
4964         setFlag(Load);
4965     } // Inst_SMEM__S_BUFFER_LOAD_DWORDX16
4966
4967     Inst_SMEM__S_BUFFER_LOAD_DWORDX16::~Inst_SMEM__S_BUFFER_LOAD_DWORDX16()
4968     {
4969     } // ~Inst_SMEM__S_BUFFER_LOAD_DWORDX16
4970
4971     // Read 16 dwords from scalar data cache. See S_LOAD_DWORD for details on
4972     // the offset input.
4973     void
4974     Inst_SMEM__S_BUFFER_LOAD_DWORDX16::execute(GPUDynInstPtr gpuDynInst)
4975     {
4976         Wavefront *wf = gpuDynInst->wavefront();
4977         gpuDynInst->execUnitId = wf->execUnitId;
4978         gpuDynInst->latency.init(gpuDynInst->computeUnit());
4979         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
4980         ScalarRegU32 offset(0);
4981         ConstScalarOperandU128 rsrcDesc(gpuDynInst, instData.SBASE);
4982
4983         rsrcDesc.read();
4984
4985         if (instData.IMM) {
4986             offset = extData.OFFSET;
4987         } else {
4988             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
4989             off_sgpr.read();
4990             offset = off_sgpr.rawData();
4991         }
4992
4993         calcAddr(gpuDynInst, rsrcDesc, offset);
4994
4995         gpuDynInst->computeUnit()->scalarMemoryPipe
4996             .getGMReqFIFO().push(gpuDynInst);
4997
4998         wf->scalarRdGmReqsInPipe--;
4999         wf->scalarOutstandingReqsRdGm++;
5000         gpuDynInst->wavefront()->outstandingReqs++;
5001         gpuDynInst->wavefront()->validateRequestCounters();
5002     } // execute
5003
5004     void
5005     Inst_SMEM__S_BUFFER_LOAD_DWORDX16::initiateAcc(GPUDynInstPtr gpuDynInst)
5006     {
5007         initMemRead<16>(gpuDynInst);
5008     } // initiateAcc
5009
5010     void
5011     Inst_SMEM__S_BUFFER_LOAD_DWORDX16::completeAcc(GPUDynInstPtr gpuDynInst)
5012     {
5013         // 16 requests, each size 32
5014         ScalarOperandU512 sdst(gpuDynInst, instData.SDATA);
5015         sdst.write();
5016     } // completeAcc
5017
5018     Inst_SMEM__S_STORE_DWORD::Inst_SMEM__S_STORE_DWORD(InFmt_SMEM *iFmt)
5019         : Inst_SMEM(iFmt, "s_store_dword")
5020     {
5021         setFlag(MemoryRef);
5022         setFlag(Store);
5023     } // Inst_SMEM__S_STORE_DWORD
5024
5025     Inst_SMEM__S_STORE_DWORD::~Inst_SMEM__S_STORE_DWORD()
5026     {
5027     } // ~Inst_SMEM__S_STORE_DWORD
5028
5029     // Write 1 dword to scalar data cache.
5030     // If the offset is specified as an SGPR, the SGPR contains an unsigned
5031     // BYTE offset (the 2 LSBs are ignored).
5032     // If the offset is specified as an immediate 20-bit constant, the
5033     // constant is an unsigned BYTE offset.
5034     void
5035     Inst_SMEM__S_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst)
5036     {
5037         Wavefront *wf = gpuDynInst->wavefront();
5038         gpuDynInst->execUnitId = wf->execUnitId;
5039         gpuDynInst->latency.init(gpuDynInst->computeUnit());
5040         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
5041         ScalarRegU32 offset(0);
5042         ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
5043
5044         addr.read();
5045
5046         if (instData.IMM) {
5047             offset = extData.OFFSET;
5048         } else {
5049             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
5050             off_sgpr.read();
5051             offset = off_sgpr.rawData();
5052         }
5053
5054         calcAddr(gpuDynInst, addr, offset);
5055
5056         gpuDynInst->computeUnit()->scalarMemoryPipe.
5057             getGMReqFIFO().push(gpuDynInst);
5058
5059         wf->scalarWrGmReqsInPipe--;
5060         wf->scalarOutstandingReqsWrGm++;
5061         gpuDynInst->wavefront()->outstandingReqs++;
5062         gpuDynInst->wavefront()->validateRequestCounters();
5063     }
5064
5065     void
5066     Inst_SMEM__S_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
5067     {
5068         ConstScalarOperandU32 sdata(gpuDynInst, instData.SDATA);
5069         sdata.read();
5070         std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(),
5071             sizeof(ScalarRegU32));
5072         initMemWrite<1>(gpuDynInst);
5073     } // initiateAcc
5074
5075     void
5076     Inst_SMEM__S_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
5077     {
5078     } // completeAcc
5079
5080     Inst_SMEM__S_STORE_DWORDX2::Inst_SMEM__S_STORE_DWORDX2(InFmt_SMEM *iFmt)
5081         : Inst_SMEM(iFmt, "s_store_dwordx2")
5082     {
5083         setFlag(MemoryRef);
5084         setFlag(Store);
5085     } // Inst_SMEM__S_STORE_DWORDX2
5086
5087     Inst_SMEM__S_STORE_DWORDX2::~Inst_SMEM__S_STORE_DWORDX2()
5088     {
5089     } // ~Inst_SMEM__S_STORE_DWORDX2
5090
5091     // Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on
5092     // the offset input.
5093     void
5094     Inst_SMEM__S_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
5095     {
5096         Wavefront *wf = gpuDynInst->wavefront();
5097         gpuDynInst->execUnitId = wf->execUnitId;
5098         gpuDynInst->latency.init(gpuDynInst->computeUnit());
5099         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
5100         ScalarRegU32 offset(0);
5101         ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
5102
5103         addr.read();
5104
5105         if (instData.IMM) {
5106             offset = extData.OFFSET;
5107         } else {
5108             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
5109             off_sgpr.read();
5110             offset = off_sgpr.rawData();
5111         }
5112
5113         calcAddr(gpuDynInst, addr, offset);
5114
5115         gpuDynInst->computeUnit()->scalarMemoryPipe.
5116             getGMReqFIFO().push(gpuDynInst);
5117
5118         wf->scalarWrGmReqsInPipe--;
5119         wf->scalarOutstandingReqsWrGm++;
5120         gpuDynInst->wavefront()->outstandingReqs++;
5121         gpuDynInst->wavefront()->validateRequestCounters();
5122     }
5123
5124     void
5125     Inst_SMEM__S_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
5126     {
5127         ConstScalarOperandU64 sdata(gpuDynInst, instData.SDATA);
5128         sdata.read();
5129         std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(),
5130             sizeof(ScalarRegU64));
5131         initMemWrite<2>(gpuDynInst);
5132     } // initiateAcc
5133
5134     void
5135     Inst_SMEM__S_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
5136     {
5137     } // completeAcc
5138
5139     Inst_SMEM__S_STORE_DWORDX4::Inst_SMEM__S_STORE_DWORDX4(InFmt_SMEM *iFmt)
5140         : Inst_SMEM(iFmt, "s_store_dwordx4")
5141     {
5142         setFlag(MemoryRef);
5143         setFlag(Store);
5144     } // Inst_SMEM__S_STORE_DWORDX4
5145
5146     Inst_SMEM__S_STORE_DWORDX4::~Inst_SMEM__S_STORE_DWORDX4()
5147     {
5148     } // ~Inst_SMEM__S_STORE_DWORDX4
5149
5150     // Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on
5151     // the offset input.
5152     void
5153     Inst_SMEM__S_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
5154     {
5155         Wavefront *wf = gpuDynInst->wavefront();
5156         gpuDynInst->execUnitId = wf->execUnitId;
5157         gpuDynInst->latency.init(gpuDynInst->computeUnit());
5158         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
5159         ScalarRegU32 offset(0);
5160         ConstScalarOperandU64 addr(gpuDynInst, instData.SBASE << 1);
5161
5162         addr.read();
5163
5164         if (instData.IMM) {
5165             offset = extData.OFFSET;
5166         } else {
5167             ConstScalarOperandU32 off_sgpr(gpuDynInst, extData.OFFSET);
5168             off_sgpr.read();
5169             offset = off_sgpr.rawData();
5170         }
5171
5172         calcAddr(gpuDynInst, addr, offset);
5173
5174         gpuDynInst->computeUnit()->scalarMemoryPipe.
5175             getGMReqFIFO().push(gpuDynInst);
5176
5177         wf->scalarWrGmReqsInPipe--;
5178         wf->scalarOutstandingReqsWrGm++;
5179         gpuDynInst->wavefront()->outstandingReqs++;
5180         gpuDynInst->wavefront()->validateRequestCounters();
5181     }
5182
5183     void
5184     Inst_SMEM__S_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
5185     {
5186         ConstScalarOperandU128 sdata(gpuDynInst, instData.SDATA);
5187         sdata.read();
5188         std::memcpy((void*)gpuDynInst->scalar_data, sdata.rawDataPtr(),
5189             4 * sizeof(ScalarRegU32));
5190         initMemWrite<4>(gpuDynInst);
5191     } // initiateAcc
5192
5193     void
5194     Inst_SMEM__S_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
5195     {
5196     } // completeAcc
5197
5198     Inst_SMEM__S_BUFFER_STORE_DWORD::Inst_SMEM__S_BUFFER_STORE_DWORD(
5199           InFmt_SMEM *iFmt)
5200         : Inst_SMEM(iFmt, "s_buffer_store_dword")
5201     {
5202         setFlag(MemoryRef);
5203         setFlag(Store);
5204     } // Inst_SMEM__S_BUFFER_STORE_DWORD
5205
5206     Inst_SMEM__S_BUFFER_STORE_DWORD::~Inst_SMEM__S_BUFFER_STORE_DWORD()
5207     {
5208     } // ~Inst_SMEM__S_BUFFER_STORE_DWORD
5209
5210     // Write 1 dword to scalar data cache. See S_STORE_DWORD for details on the
5211     // offset input.
5212     void
5213     Inst_SMEM__S_BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst)
5214     {
5215         panicUnimplemented();
5216     }
5217
5218     void
5219     Inst_SMEM__S_BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
5220     {
5221     } // initiateAcc
5222
5223     void
5224     Inst_SMEM__S_BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
5225     {
5226     } // completeAcc
5227
5228     Inst_SMEM__S_BUFFER_STORE_DWORDX2::Inst_SMEM__S_BUFFER_STORE_DWORDX2(
5229           InFmt_SMEM *iFmt)
5230         : Inst_SMEM(iFmt, "s_buffer_store_dwordx2")
5231     {
5232         setFlag(MemoryRef);
5233         setFlag(Store);
5234     } // Inst_SMEM__S_BUFFER_STORE_DWORDX2
5235
5236     Inst_SMEM__S_BUFFER_STORE_DWORDX2::~Inst_SMEM__S_BUFFER_STORE_DWORDX2()
5237     {
5238     } // ~Inst_SMEM__S_BUFFER_STORE_DWORDX2
5239
5240     // Write 2 dwords to scalar data cache. See S_STORE_DWORD for details on
5241     // the offset input.
5242     void
5243     Inst_SMEM__S_BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
5244     {
5245         panicUnimplemented();
5246     }
5247
5248     void
5249     Inst_SMEM__S_BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
5250     {
5251     } // initiateAcc
5252
5253     void
5254     Inst_SMEM__S_BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
5255     {
5256     } // completeAcc
5257
5258     Inst_SMEM__S_BUFFER_STORE_DWORDX4::Inst_SMEM__S_BUFFER_STORE_DWORDX4(
5259           InFmt_SMEM *iFmt)
5260         : Inst_SMEM(iFmt, "s_buffer_store_dwordx4")
5261     {
5262         setFlag(MemoryRef);
5263         setFlag(Store);
5264     } // Inst_SMEM__S_BUFFER_STORE_DWORDX4
5265
5266     Inst_SMEM__S_BUFFER_STORE_DWORDX4::~Inst_SMEM__S_BUFFER_STORE_DWORDX4()
5267     {
5268     } // ~Inst_SMEM__S_BUFFER_STORE_DWORDX4
5269
5270     // Write 4 dwords to scalar data cache. See S_STORE_DWORD for details on
5271     // the offset input.
5272     void
5273     Inst_SMEM__S_BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
5274     {
5275         panicUnimplemented();
5276     }
5277
5278     void
5279     Inst_SMEM__S_BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
5280     {
5281     } // initiateAcc
5282
5283     void
5284     Inst_SMEM__S_BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
5285     {
5286     } // completeAcc
5287
5288     Inst_SMEM__S_DCACHE_INV::Inst_SMEM__S_DCACHE_INV(InFmt_SMEM *iFmt)
5289         : Inst_SMEM(iFmt, "s_dcache_inv")
5290     {
5291     } // Inst_SMEM__S_DCACHE_INV
5292
5293     Inst_SMEM__S_DCACHE_INV::~Inst_SMEM__S_DCACHE_INV()
5294     {
5295     } // ~Inst_SMEM__S_DCACHE_INV
5296
5297     // Invalidate the scalar data cache.
5298     void
5299     Inst_SMEM__S_DCACHE_INV::execute(GPUDynInstPtr gpuDynInst)
5300     {
5301         panicUnimplemented();
5302     }
5303
5304     Inst_SMEM__S_DCACHE_WB::Inst_SMEM__S_DCACHE_WB(InFmt_SMEM *iFmt)
5305         : Inst_SMEM(iFmt, "s_dcache_wb")
5306     {
5307     } // Inst_SMEM__S_DCACHE_WB
5308
5309     Inst_SMEM__S_DCACHE_WB::~Inst_SMEM__S_DCACHE_WB()
5310     {
5311     } // ~Inst_SMEM__S_DCACHE_WB
5312
5313     // Write back dirty data in the scalar data cache.
5314     void
5315     Inst_SMEM__S_DCACHE_WB::execute(GPUDynInstPtr gpuDynInst)
5316     {
5317         panicUnimplemented();
5318     }
5319
5320     Inst_SMEM__S_DCACHE_INV_VOL::Inst_SMEM__S_DCACHE_INV_VOL(InFmt_SMEM *iFmt)
5321         : Inst_SMEM(iFmt, "s_dcache_inv_vol")
5322     {
5323     } // Inst_SMEM__S_DCACHE_INV_VOL
5324
5325     Inst_SMEM__S_DCACHE_INV_VOL::~Inst_SMEM__S_DCACHE_INV_VOL()
5326     {
5327     } // ~Inst_SMEM__S_DCACHE_INV_VOL
5328
5329     // Invalidate the scalar data cache volatile lines.
5330     void
5331     Inst_SMEM__S_DCACHE_INV_VOL::execute(GPUDynInstPtr gpuDynInst)
5332     {
5333         panicUnimplemented();
5334     }
5335
5336     Inst_SMEM__S_DCACHE_WB_VOL::Inst_SMEM__S_DCACHE_WB_VOL(InFmt_SMEM *iFmt)
5337         : Inst_SMEM(iFmt, "s_dcache_wb_vol")
5338     {
5339     } // Inst_SMEM__S_DCACHE_WB_VOL
5340
5341     Inst_SMEM__S_DCACHE_WB_VOL::~Inst_SMEM__S_DCACHE_WB_VOL()
5342     {
5343     } // ~Inst_SMEM__S_DCACHE_WB_VOL
5344
5345     // Write back dirty data in the scalar data cache volatile lines.
5346     void
5347     Inst_SMEM__S_DCACHE_WB_VOL::execute(GPUDynInstPtr gpuDynInst)
5348     {
5349         panicUnimplemented();
5350     }
5351
5352     Inst_SMEM__S_MEMTIME::Inst_SMEM__S_MEMTIME(InFmt_SMEM *iFmt)
5353         : Inst_SMEM(iFmt, "s_memtime")
5354     {
5355     } // Inst_SMEM__S_MEMTIME
5356
5357     Inst_SMEM__S_MEMTIME::~Inst_SMEM__S_MEMTIME()
5358     {
5359     } // ~Inst_SMEM__S_MEMTIME
5360
5361     // Return current 64-bit timestamp.
5362     void
5363     Inst_SMEM__S_MEMTIME::execute(GPUDynInstPtr gpuDynInst)
5364     {
5365         panicUnimplemented();
5366     }
5367
5368     Inst_SMEM__S_MEMREALTIME::Inst_SMEM__S_MEMREALTIME(InFmt_SMEM *iFmt)
5369         : Inst_SMEM(iFmt, "s_memrealtime")
5370     {
5371     } // Inst_SMEM__S_MEMREALTIME
5372
5373     Inst_SMEM__S_MEMREALTIME::~Inst_SMEM__S_MEMREALTIME()
5374     {
5375     } // ~Inst_SMEM__S_MEMREALTIME
5376
5377     // Return current 64-bit RTC.
5378     void
5379     Inst_SMEM__S_MEMREALTIME::execute(GPUDynInstPtr gpuDynInst)
5380     {
5381         panicUnimplemented();
5382     }
5383
5384     Inst_SMEM__S_ATC_PROBE::Inst_SMEM__S_ATC_PROBE(InFmt_SMEM *iFmt)
5385         : Inst_SMEM(iFmt, "s_atc_probe")
5386     {
5387     } // Inst_SMEM__S_ATC_PROBE
5388
5389     Inst_SMEM__S_ATC_PROBE::~Inst_SMEM__S_ATC_PROBE()
5390     {
5391     } // ~Inst_SMEM__S_ATC_PROBE
5392
5393     void
5394     Inst_SMEM__S_ATC_PROBE::execute(GPUDynInstPtr gpuDynInst)
5395     {
5396         panicUnimplemented();
5397     }
5398
5399     Inst_SMEM__S_ATC_PROBE_BUFFER::Inst_SMEM__S_ATC_PROBE_BUFFER(
5400           InFmt_SMEM *iFmt)
5401         : Inst_SMEM(iFmt, "s_atc_probe_buffer")
5402     {
5403     } // Inst_SMEM__S_ATC_PROBE_BUFFER
5404
5405     Inst_SMEM__S_ATC_PROBE_BUFFER::~Inst_SMEM__S_ATC_PROBE_BUFFER()
5406     {
5407     } // ~Inst_SMEM__S_ATC_PROBE_BUFFER
5408
5409     void
5410     Inst_SMEM__S_ATC_PROBE_BUFFER::execute(GPUDynInstPtr gpuDynInst)
5411     {
5412         panicUnimplemented();
5413     }
5414
5415     Inst_VOP2__V_CNDMASK_B32::Inst_VOP2__V_CNDMASK_B32(InFmt_VOP2 *iFmt)
5416         : Inst_VOP2(iFmt, "v_cndmask_b32")
5417     {
5418         setFlag(ALU);
5419         setFlag(ReadsVCC);
5420     } // Inst_VOP2__V_CNDMASK_B32
5421
5422     Inst_VOP2__V_CNDMASK_B32::~Inst_VOP2__V_CNDMASK_B32()
5423     {
5424     } // ~Inst_VOP2__V_CNDMASK_B32
5425
5426     // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC
5427     // as a scalar GPR in S2.
5428     void
5429     Inst_VOP2__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst)
5430     {
5431         Wavefront *wf = gpuDynInst->wavefront();
5432         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
5433         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
5434         VecOperandU32 vdst(gpuDynInst, instData.VDST);
5435         ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
5436
5437         src0.readSrc();
5438         src1.read();
5439         vcc.read();
5440
5441         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5442             if (wf->execMask(lane)) {
5443                 vdst[lane]
5444                     = bits(vcc.rawData(), lane) ? src1[lane] : src0[lane];
5445             }
5446         }
5447
5448         vdst.write();
5449     }
5450
5451     Inst_VOP2__V_ADD_F32::Inst_VOP2__V_ADD_F32(InFmt_VOP2 *iFmt)
5452         : Inst_VOP2(iFmt, "v_add_f32")
5453     {
5454         setFlag(ALU);
5455         setFlag(F32);
5456     } // Inst_VOP2__V_ADD_F32
5457
5458     Inst_VOP2__V_ADD_F32::~Inst_VOP2__V_ADD_F32()
5459     {
5460     } // ~Inst_VOP2__V_ADD_F32
5461
5462     // D.f = S0.f + S1.f.
5463     void
5464     Inst_VOP2__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst)
5465     {
5466         Wavefront *wf = gpuDynInst->wavefront();
5467         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
5468         VecOperandF32 src1(gpuDynInst, instData.VSRC1);
5469         VecOperandF32 vdst(gpuDynInst, instData.VDST);
5470
5471         src0.readSrc();
5472         src1.read();
5473
5474         if (isDPPInst()) {
5475             VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
5476             src0_dpp.read();
5477
5478             DPRINTF(GCN3, "Handling V_ADD_F32 SRC DPP. SRC0: register v[%d], "
5479                     "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
5480                     "SRC1_ABS: %d, SRC1_NEG: %d, BOUND_CTRL: %d, "
5481                     "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0,
5482                     extData.iFmt_VOP_DPP.DPP_CTRL,
5483                     extData.iFmt_VOP_DPP.SRC0_ABS,
5484                     extData.iFmt_VOP_DPP.SRC0_NEG,
5485                     extData.iFmt_VOP_DPP.SRC1_ABS,
5486                     extData.iFmt_VOP_DPP.SRC1_NEG,
5487                     extData.iFmt_VOP_DPP.BOUND_CTRL,
5488                     extData.iFmt_VOP_DPP.BANK_MASK,
5489                     extData.iFmt_VOP_DPP.ROW_MASK);
5490
5491             processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1);
5492
5493             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5494                 if (wf->execMask(lane)) {
5495                     vdst[lane] = src0_dpp[lane] + src1[lane];
5496                 }
5497             }
5498         } else {
5499             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5500                 if (wf->execMask(lane)) {
5501                     vdst[lane] = src0[lane] + src1[lane];
5502                 }
5503             }
5504         }
5505
5506         vdst.write();
5507     }
5508
5509     Inst_VOP2__V_SUB_F32::Inst_VOP2__V_SUB_F32(InFmt_VOP2 *iFmt)
5510         : Inst_VOP2(iFmt, "v_sub_f32")
5511     {
5512         setFlag(ALU);
5513         setFlag(F32);
5514     } // Inst_VOP2__V_SUB_F32
5515
5516     Inst_VOP2__V_SUB_F32::~Inst_VOP2__V_SUB_F32()
5517     {
5518     } // ~Inst_VOP2__V_SUB_F32
5519
5520     // D.f = S0.f - S1.f.
5521     void
5522     Inst_VOP2__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst)
5523     {
5524         Wavefront *wf = gpuDynInst->wavefront();
5525         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
5526         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
5527         VecOperandF32 vdst(gpuDynInst, instData.VDST);
5528
5529         src0.readSrc();
5530         src1.read();
5531
5532         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5533             if (wf->execMask(lane)) {
5534                 vdst[lane] = src0[lane] - src1[lane];
5535             }
5536         }
5537
5538         vdst.write();
5539     }
5540
5541     Inst_VOP2__V_SUBREV_F32::Inst_VOP2__V_SUBREV_F32(InFmt_VOP2 *iFmt)
5542         : Inst_VOP2(iFmt, "v_subrev_f32")
5543     {
5544         setFlag(ALU);
5545         setFlag(F32);
5546     } // Inst_VOP2__V_SUBREV_F32
5547
5548     Inst_VOP2__V_SUBREV_F32::~Inst_VOP2__V_SUBREV_F32()
5549     {
5550     } // ~Inst_VOP2__V_SUBREV_F32
5551
5552     // D.f = S1.f - S0.f.
5553     void
5554     Inst_VOP2__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst)
5555     {
5556         Wavefront *wf = gpuDynInst->wavefront();
5557         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
5558         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
5559         VecOperandF32 vdst(gpuDynInst, instData.VDST);
5560
5561         src0.readSrc();
5562         src1.read();
5563
5564         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5565             if (wf->execMask(lane)) {
5566                 vdst[lane] = src1[lane] - src0[lane];
5567             }
5568         }
5569
5570         vdst.write();
5571     }
5572
5573     Inst_VOP2__V_MUL_LEGACY_F32::Inst_VOP2__V_MUL_LEGACY_F32(InFmt_VOP2 *iFmt)
5574         : Inst_VOP2(iFmt, "v_mul_legacy_f32")
5575     {
5576         setFlag(ALU);
5577         setFlag(F32);
5578     } // Inst_VOP2__V_MUL_LEGACY_F32
5579
5580     Inst_VOP2__V_MUL_LEGACY_F32::~Inst_VOP2__V_MUL_LEGACY_F32()
5581     {
5582     } // ~Inst_VOP2__V_MUL_LEGACY_F32
5583
5584     // D.f = S0.f * S1.f
5585     void
5586     Inst_VOP2__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
5587     {
5588         Wavefront *wf = gpuDynInst->wavefront();
5589         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
5590         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
5591         VecOperandF32 vdst(gpuDynInst, instData.VDST);
5592
5593         src0.readSrc();
5594         src1.read();
5595
5596         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5597             if (wf->execMask(lane)) {
5598                 vdst[lane] = src0[lane] * src1[lane];
5599             }
5600         }
5601
5602         vdst.write();
5603     }
5604
5605     Inst_VOP2__V_MUL_F32::Inst_VOP2__V_MUL_F32(InFmt_VOP2 *iFmt)
5606         : Inst_VOP2(iFmt, "v_mul_f32")
5607     {
5608         setFlag(ALU);
5609         setFlag(F32);
5610     } // Inst_VOP2__V_MUL_F32
5611
5612     Inst_VOP2__V_MUL_F32::~Inst_VOP2__V_MUL_F32()
5613     {
5614     } // ~Inst_VOP2__V_MUL_F32
5615
5616     // D.f = S0.f * S1.f.
5617     void
5618     Inst_VOP2__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst)
5619     {
5620         Wavefront *wf = gpuDynInst->wavefront();
5621         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
5622         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
5623         VecOperandF32 vdst(gpuDynInst, instData.VDST);
5624
5625         src0.readSrc();
5626         src1.read();
5627
5628         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5629             if (wf->execMask(lane)) {
5630                 if (std::isnan(src0[lane]) ||
5631                     std::isnan(src1[lane])) {
5632                     vdst[lane] = NAN;
5633                 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
5634                            std::fpclassify(src0[lane]) == FP_ZERO) &&
5635                            !std::signbit(src0[lane])) {
5636                     if (std::isinf(src1[lane])) {
5637                         vdst[lane] = NAN;
5638                     } else if (!std::signbit(src1[lane])) {
5639                         vdst[lane] = +0.0;
5640                     } else {
5641                         vdst[lane] = -0.0;
5642                     }
5643                 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
5644                            std::fpclassify(src0[lane]) == FP_ZERO) &&
5645                            std::signbit(src0[lane])) {
5646                     if (std::isinf(src1[lane])) {
5647                         vdst[lane] = NAN;
5648                     } else if (std::signbit(src1[lane])) {
5649                         vdst[lane] = +0.0;
5650                     } else {
5651                         vdst[lane] = -0.0;
5652                     }
5653                 } else if (std::isinf(src0[lane]) &&
5654                            !std::signbit(src0[lane])) {
5655                     if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
5656                         std::fpclassify(src1[lane]) == FP_ZERO) {
5657                         vdst[lane] = NAN;
5658                     } else if (!std::signbit(src1[lane])) {
5659                         vdst[lane] = +INFINITY;
5660                     } else {
5661                         vdst[lane] = -INFINITY;
5662                     }
5663                 } else if (std::isinf(src0[lane]) &&
5664                            std::signbit(src0[lane])) {
5665                     if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
5666                         std::fpclassify(src1[lane]) == FP_ZERO) {
5667                         vdst[lane] = NAN;
5668                     } else if (std::signbit(src1[lane])) {
5669                         vdst[lane] = +INFINITY;
5670                     } else {
5671                         vdst[lane] = -INFINITY;
5672                     }
5673                 } else {
5674                     vdst[lane] = src0[lane] * src1[lane];
5675                 }
5676             }
5677         }
5678
5679         vdst.write();
5680     }
5681
5682     Inst_VOP2__V_MUL_I32_I24::Inst_VOP2__V_MUL_I32_I24(InFmt_VOP2 *iFmt)
5683         : Inst_VOP2(iFmt, "v_mul_i32_i24")
5684     {
5685         setFlag(ALU);
5686     } // Inst_VOP2__V_MUL_I32_I24
5687
5688     Inst_VOP2__V_MUL_I32_I24::~Inst_VOP2__V_MUL_I32_I24()
5689     {
5690     } // ~Inst_VOP2__V_MUL_I32_I24
5691
5692     // D.i = S0.i[23:0] * S1.i[23:0].
5693     void
5694     Inst_VOP2__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst)
5695     {
5696         Wavefront *wf = gpuDynInst->wavefront();
5697         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
5698         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
5699         VecOperandI32 vdst(gpuDynInst, instData.VDST);
5700
5701         src0.readSrc();
5702         src1.read();
5703
5704         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5705             if (wf->execMask(lane)) {
5706                 vdst[lane] = sext<24>(bits(src0[lane], 23, 0))
5707                     * sext<24>(bits(src1[lane], 23, 0));
5708             }
5709         }
5710
5711         vdst.write();
5712     }
5713
5714     Inst_VOP2__V_MUL_HI_I32_I24::Inst_VOP2__V_MUL_HI_I32_I24(InFmt_VOP2 *iFmt)
5715         : Inst_VOP2(iFmt, "v_mul_hi_i32_i24")
5716     {
5717         setFlag(ALU);
5718     } // Inst_VOP2__V_MUL_HI_I32_I24
5719
5720     Inst_VOP2__V_MUL_HI_I32_I24::~Inst_VOP2__V_MUL_HI_I32_I24()
5721     {
5722     } // ~Inst_VOP2__V_MUL_HI_I32_I24
5723
5724     // D.i = (S0.i[23:0] * S1.i[23:0]) >> 32.
5725     void
5726     Inst_VOP2__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst)
5727     {
5728         Wavefront *wf = gpuDynInst->wavefront();
5729         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
5730         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
5731         VecOperandI32 vdst(gpuDynInst, instData.VDST);
5732
5733         src0.readSrc();
5734         src1.read();
5735
5736         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5737             if (wf->execMask(lane)) {
5738                 VecElemI64 tmp_src0
5739                     = (VecElemI64)sext<24>(bits(src0[lane], 23, 0));
5740                 VecElemI64 tmp_src1
5741                     = (VecElemI64)sext<24>(bits(src1[lane], 23, 0));
5742
5743                 vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32);
5744             }
5745         }
5746
5747         vdst.write();
5748     }
5749
5750     Inst_VOP2__V_MUL_U32_U24::Inst_VOP2__V_MUL_U32_U24(InFmt_VOP2 *iFmt)
5751         : Inst_VOP2(iFmt, "v_mul_u32_u24")
5752     {
5753         setFlag(ALU);
5754     } // Inst_VOP2__V_MUL_U32_U24
5755
5756     Inst_VOP2__V_MUL_U32_U24::~Inst_VOP2__V_MUL_U32_U24()
5757     {
5758     } // ~Inst_VOP2__V_MUL_U32_U24
5759
5760     // D.u = S0.u[23:0] * S1.u[23:0].
5761     void
5762     Inst_VOP2__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst)
5763     {
5764         Wavefront *wf = gpuDynInst->wavefront();
5765         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
5766         VecOperandU32 src1(gpuDynInst, instData.VSRC1);
5767         VecOperandU32 vdst(gpuDynInst, instData.VDST);
5768
5769         src0.readSrc();
5770         src1.read();
5771
5772         if (isSDWAInst()) {
5773             VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
5774             // use copies of original src0, src1, and dest during selecting
5775             VecOperandU32 origSrc0_sdwa(gpuDynInst,
5776                                         extData.iFmt_VOP_SDWA.SRC0);
5777             VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
5778             VecOperandU32 origVdst(gpuDynInst, instData.VDST);
5779
5780             src0_sdwa.read();
5781             origSrc0_sdwa.read();
5782             origSrc1.read();
5783
5784             DPRINTF(GCN3, "Handling V_MUL_U32_U24 SRC SDWA. SRC0: register "
5785                     "v[%d], DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: "
5786                     "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: "
5787                     "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
5788                     extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
5789                     extData.iFmt_VOP_SDWA.DST_UNUSED,
5790                     extData.iFmt_VOP_SDWA.CLAMP,
5791                     extData.iFmt_VOP_SDWA.SRC0_SEL,
5792                     extData.iFmt_VOP_SDWA.SRC0_SEXT,
5793                     extData.iFmt_VOP_SDWA.SRC0_NEG,
5794                     extData.iFmt_VOP_SDWA.SRC0_ABS,
5795                     extData.iFmt_VOP_SDWA.SRC1_SEL,
5796                     extData.iFmt_VOP_SDWA.SRC1_SEXT,
5797                     extData.iFmt_VOP_SDWA.SRC1_NEG,
5798                     extData.iFmt_VOP_SDWA.SRC1_ABS);
5799
5800             processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
5801                             src1, origSrc1);
5802
5803             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5804                 if (wf->execMask(lane)) {
5805                     vdst[lane] = bits(src0_sdwa[lane], 23, 0) *
5806                                  bits(src1[lane], 23, 0);
5807                     origVdst[lane] = vdst[lane]; // keep copy consistent
5808                 }
5809             }
5810
5811             processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
5812         } else {
5813             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5814                 if (wf->execMask(lane)) {
5815                     vdst[lane] = bits(src0[lane], 23, 0) *
5816                                  bits(src1[lane], 23, 0);
5817                 }
5818             }
5819         }
5820
5821
5822         vdst.write();
5823     }
5824
5825     Inst_VOP2__V_MUL_HI_U32_U24::Inst_VOP2__V_MUL_HI_U32_U24(InFmt_VOP2 *iFmt)
5826         : Inst_VOP2(iFmt, "v_mul_hi_u32_u24")
5827     {
5828         setFlag(ALU);
5829     } // Inst_VOP2__V_MUL_HI_U32_U24
5830
5831     Inst_VOP2__V_MUL_HI_U32_U24::~Inst_VOP2__V_MUL_HI_U32_U24()
5832     {
5833     } // ~Inst_VOP2__V_MUL_HI_U32_U24
5834
5835     // D.i = (S0.u[23:0] * S1.u[23:0]) >> 32.
5836     void
5837     Inst_VOP2__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst)
5838     {
5839         Wavefront *wf = gpuDynInst->wavefront();
5840         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
5841         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
5842         VecOperandU32 vdst(gpuDynInst, instData.VDST);
5843
5844         src0.readSrc();
5845         src1.read();
5846
5847         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5848             if (wf->execMask(lane)) {
5849                 VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0);
5850                 VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0);
5851                 vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32);
5852             }
5853         }
5854
5855         vdst.write();
5856     }
5857
5858     Inst_VOP2__V_MIN_F32::Inst_VOP2__V_MIN_F32(InFmt_VOP2 *iFmt)
5859         : Inst_VOP2(iFmt, "v_min_f32")
5860     {
5861         setFlag(ALU);
5862         setFlag(F32);
5863     } // Inst_VOP2__V_MIN_F32
5864
5865     Inst_VOP2__V_MIN_F32::~Inst_VOP2__V_MIN_F32()
5866     {
5867     } // ~Inst_VOP2__V_MIN_F32
5868
5869     // D.f = (S0.f < S1.f ? S0.f : S1.f).
5870     void
5871     Inst_VOP2__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst)
5872     {
5873         Wavefront *wf = gpuDynInst->wavefront();
5874         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
5875         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
5876         VecOperandF32 vdst(gpuDynInst, instData.VDST);
5877
5878         src0.readSrc();
5879         src1.read();
5880
5881         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5882             if (wf->execMask(lane)) {
5883                 vdst[lane] = std::fmin(src0[lane], src1[lane]);
5884             }
5885         }
5886
5887         vdst.write();
5888     }
5889
5890     Inst_VOP2__V_MAX_F32::Inst_VOP2__V_MAX_F32(InFmt_VOP2 *iFmt)
5891         : Inst_VOP2(iFmt, "v_max_f32")
5892     {
5893         setFlag(ALU);
5894         setFlag(F32);
5895     } // Inst_VOP2__V_MAX_F32
5896
5897     Inst_VOP2__V_MAX_F32::~Inst_VOP2__V_MAX_F32()
5898     {
5899     } // ~Inst_VOP2__V_MAX_F32
5900
5901     // D.f = (S0.f >= S1.f ? S0.f : S1.f).
5902     void
5903     Inst_VOP2__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst)
5904     {
5905         Wavefront *wf = gpuDynInst->wavefront();
5906         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
5907         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
5908         VecOperandF32 vdst(gpuDynInst, instData.VDST);
5909
5910         src0.readSrc();
5911         src1.read();
5912
5913         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5914             if (wf->execMask(lane)) {
5915                 vdst[lane] = std::fmax(src0[lane], src1[lane]);
5916             }
5917         }
5918
5919         vdst.write();
5920     }
5921
5922     Inst_VOP2__V_MIN_I32::Inst_VOP2__V_MIN_I32(InFmt_VOP2 *iFmt)
5923         : Inst_VOP2(iFmt, "v_min_i32")
5924     {
5925         setFlag(ALU);
5926     } // Inst_VOP2__V_MIN_I32
5927
5928     Inst_VOP2__V_MIN_I32::~Inst_VOP2__V_MIN_I32()
5929     {
5930     } // ~Inst_VOP2__V_MIN_I32
5931
5932     // D.i = min(S0.i, S1.i).
5933     void
5934     Inst_VOP2__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst)
5935     {
5936         Wavefront *wf = gpuDynInst->wavefront();
5937         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
5938         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
5939         VecOperandI32 vdst(gpuDynInst, instData.VDST);
5940
5941         src0.readSrc();
5942         src1.read();
5943
5944         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5945             if (wf->execMask(lane)) {
5946                 vdst[lane] = std::min(src0[lane], src1[lane]);
5947             }
5948         }
5949
5950         vdst.write();
5951     }
5952
5953     Inst_VOP2__V_MAX_I32::Inst_VOP2__V_MAX_I32(InFmt_VOP2 *iFmt)
5954         : Inst_VOP2(iFmt, "v_max_i32")
5955     {
5956         setFlag(ALU);
5957     } // Inst_VOP2__V_MAX_I32
5958
5959     Inst_VOP2__V_MAX_I32::~Inst_VOP2__V_MAX_I32()
5960     {
5961     } // ~Inst_VOP2__V_MAX_I32
5962
5963     // D.i = max(S0.i, S1.i).
5964     void
5965     Inst_VOP2__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst)
5966     {
5967         Wavefront *wf = gpuDynInst->wavefront();
5968         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
5969         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
5970         VecOperandI32 vdst(gpuDynInst, instData.VDST);
5971
5972         src0.readSrc();
5973         src1.read();
5974
5975         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
5976             if (wf->execMask(lane)) {
5977                 vdst[lane] = std::max(src0[lane], src1[lane]);
5978             }
5979         }
5980
5981         vdst.write();
5982     }
5983
5984     Inst_VOP2__V_MIN_U32::Inst_VOP2__V_MIN_U32(InFmt_VOP2 *iFmt)
5985         : Inst_VOP2(iFmt, "v_min_u32")
5986     {
5987         setFlag(ALU);
5988     } // Inst_VOP2__V_MIN_U32
5989
5990     Inst_VOP2__V_MIN_U32::~Inst_VOP2__V_MIN_U32()
5991     {
5992     } // ~Inst_VOP2__V_MIN_U32
5993
5994     // D.u = min(S0.u, S1.u).
5995     void
5996     Inst_VOP2__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst)
5997     {
5998         Wavefront *wf = gpuDynInst->wavefront();
5999         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6000         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6001         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6002
6003         src0.readSrc();
6004         src1.read();
6005
6006         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6007             if (wf->execMask(lane)) {
6008                 vdst[lane] = std::min(src0[lane], src1[lane]);
6009             }
6010         }
6011
6012         vdst.write();
6013     }
6014
6015     Inst_VOP2__V_MAX_U32::Inst_VOP2__V_MAX_U32(InFmt_VOP2 *iFmt)
6016         : Inst_VOP2(iFmt, "v_max_u32")
6017     {
6018         setFlag(ALU);
6019     } // Inst_VOP2__V_MAX_U32
6020
6021     Inst_VOP2__V_MAX_U32::~Inst_VOP2__V_MAX_U32()
6022     {
6023     } // ~Inst_VOP2__V_MAX_U32
6024
6025     // D.u = max(S0.u, S1.u).
6026     void
6027     Inst_VOP2__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst)
6028     {
6029         Wavefront *wf = gpuDynInst->wavefront();
6030         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6031         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6032         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6033
6034         src0.readSrc();
6035         src1.read();
6036
6037         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6038             if (wf->execMask(lane)) {
6039                 vdst[lane] = std::max(src0[lane], src1[lane]);
6040             }
6041         }
6042
6043         vdst.write();
6044     }
6045
6046     Inst_VOP2__V_LSHRREV_B32::Inst_VOP2__V_LSHRREV_B32(InFmt_VOP2 *iFmt)
6047         : Inst_VOP2(iFmt, "v_lshrrev_b32")
6048     {
6049         setFlag(ALU);
6050     } // Inst_VOP2__V_LSHRREV_B32
6051
6052     Inst_VOP2__V_LSHRREV_B32::~Inst_VOP2__V_LSHRREV_B32()
6053     {
6054     } // ~Inst_VOP2__V_LSHRREV_B32
6055
6056     // D.u = S1.u >> S0.u[4:0].
6057     // The vacated bits are set to zero.
6058     void
6059     Inst_VOP2__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst)
6060     {
6061         Wavefront *wf = gpuDynInst->wavefront();
6062         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6063         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6064         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6065
6066         src0.readSrc();
6067         src1.read();
6068
6069         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6070             if (wf->execMask(lane)) {
6071                 vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
6072             }
6073         }
6074
6075         vdst.write();
6076     }
6077
6078     Inst_VOP2__V_ASHRREV_I32::Inst_VOP2__V_ASHRREV_I32(InFmt_VOP2 *iFmt)
6079         : Inst_VOP2(iFmt, "v_ashrrev_i32")
6080     {
6081         setFlag(ALU);
6082     } // Inst_VOP2__V_ASHRREV_I32
6083
6084     Inst_VOP2__V_ASHRREV_I32::~Inst_VOP2__V_ASHRREV_I32()
6085     {
6086     } // ~Inst_VOP2__V_ASHRREV_I32
6087
6088     // D.i = signext(S1.i) >> S0.i[4:0].
6089     // The vacated bits are set to the sign bit of the input value.
6090     void
6091     Inst_VOP2__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst)
6092     {
6093         Wavefront *wf = gpuDynInst->wavefront();
6094         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6095         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
6096         VecOperandI32 vdst(gpuDynInst, instData.VDST);
6097
6098         src0.readSrc();
6099         src1.read();
6100
6101         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6102             if (wf->execMask(lane)) {
6103                 vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
6104             }
6105         }
6106
6107         vdst.write();
6108     }
6109
6110     Inst_VOP2__V_LSHLREV_B32::Inst_VOP2__V_LSHLREV_B32(InFmt_VOP2 *iFmt)
6111         : Inst_VOP2(iFmt, "v_lshlrev_b32")
6112     {
6113         setFlag(ALU);
6114     } // Inst_VOP2__V_LSHLREV_B32
6115
6116     Inst_VOP2__V_LSHLREV_B32::~Inst_VOP2__V_LSHLREV_B32()
6117     {
6118     } // ~Inst_VOP2__V_LSHLREV_B32
6119
6120     // D.u = S1.u << S0.u[4:0].
6121     void
6122     Inst_VOP2__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst)
6123     {
6124         Wavefront *wf = gpuDynInst->wavefront();
6125         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6126         VecOperandU32 src1(gpuDynInst, instData.VSRC1);
6127         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6128
6129         src0.readSrc();
6130         src1.read();
6131
6132         if (isSDWAInst()) {
6133             VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
6134             // use copies of original src0, src1, and vdst during selecting
6135             VecOperandU32 origSrc0_sdwa(gpuDynInst,
6136                                         extData.iFmt_VOP_SDWA.SRC0);
6137             VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
6138             VecOperandU32 origVdst(gpuDynInst, instData.VDST);
6139
6140             src0_sdwa.read();
6141             origSrc0_sdwa.read();
6142             origSrc1.read();
6143
6144             DPRINTF(GCN3, "Handling V_LSHLREV_B32 SRC SDWA. SRC0: register "
6145                     "v[%d], DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: "
6146                     "%d, SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: "
6147                     "%d, SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
6148                     extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
6149                     extData.iFmt_VOP_SDWA.DST_UNUSED,
6150                     extData.iFmt_VOP_SDWA.CLAMP,
6151                     extData.iFmt_VOP_SDWA.SRC0_SEL,
6152                     extData.iFmt_VOP_SDWA.SRC0_SEXT,
6153                     extData.iFmt_VOP_SDWA.SRC0_NEG,
6154                     extData.iFmt_VOP_SDWA.SRC0_ABS,
6155                     extData.iFmt_VOP_SDWA.SRC1_SEL,
6156                     extData.iFmt_VOP_SDWA.SRC1_SEXT,
6157                     extData.iFmt_VOP_SDWA.SRC1_NEG,
6158                     extData.iFmt_VOP_SDWA.SRC1_ABS);
6159
6160             processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
6161                             src1, origSrc1);
6162
6163             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6164                 if (wf->execMask(lane)) {
6165                     vdst[lane] = src1[lane] << bits(src0_sdwa[lane], 4, 0);
6166                     origVdst[lane] = vdst[lane]; // keep copy consistent
6167                 }
6168             }
6169
6170             processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
6171         } else {
6172             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6173                 if (wf->execMask(lane)) {
6174                     vdst[lane] = src1[lane] << bits(src0[lane], 4, 0);
6175                 }
6176             }
6177         }
6178
6179         vdst.write();
6180     }
6181
6182     Inst_VOP2__V_AND_B32::Inst_VOP2__V_AND_B32(InFmt_VOP2 *iFmt)
6183         : Inst_VOP2(iFmt, "v_and_b32")
6184     {
6185         setFlag(ALU);
6186     } // Inst_VOP2__V_AND_B32
6187
6188     Inst_VOP2__V_AND_B32::~Inst_VOP2__V_AND_B32()
6189     {
6190     } // ~Inst_VOP2__V_AND_B32
6191
6192     // D.u = S0.u & S1.u.
6193     // Input and output modifiers not supported.
6194     void
6195     Inst_VOP2__V_AND_B32::execute(GPUDynInstPtr gpuDynInst)
6196     {
6197         Wavefront *wf = gpuDynInst->wavefront();
6198         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6199         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6200         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6201
6202         src0.readSrc();
6203         src1.read();
6204
6205         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6206             if (wf->execMask(lane)) {
6207                 vdst[lane] = src0[lane] & src1[lane];
6208             }
6209         }
6210
6211         vdst.write();
6212     }
6213
6214     Inst_VOP2__V_OR_B32::Inst_VOP2__V_OR_B32(InFmt_VOP2 *iFmt)
6215         : Inst_VOP2(iFmt, "v_or_b32")
6216     {
6217         setFlag(ALU);
6218     } // Inst_VOP2__V_OR_B32
6219
6220     Inst_VOP2__V_OR_B32::~Inst_VOP2__V_OR_B32()
6221     {
6222     } // ~Inst_VOP2__V_OR_B32
6223
6224     // D.u = S0.u | S1.u.
6225     // Input and output modifiers not supported.
6226     void
6227     Inst_VOP2__V_OR_B32::execute(GPUDynInstPtr gpuDynInst)
6228     {
6229         Wavefront *wf = gpuDynInst->wavefront();
6230         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6231         VecOperandU32 src1(gpuDynInst, instData.VSRC1);
6232         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6233
6234         src0.readSrc();
6235         src1.read();
6236
6237         if (isSDWAInst()) {
6238             VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
6239             // use copies of original src0, src1, and dest during selecting
6240             VecOperandU32 origSrc0_sdwa(gpuDynInst,
6241                                         extData.iFmt_VOP_SDWA.SRC0);
6242             VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
6243             VecOperandU32 origVdst(gpuDynInst, instData.VDST);
6244
6245             src0_sdwa.read();
6246             origSrc0_sdwa.read();
6247             origSrc1.read();
6248
6249             DPRINTF(GCN3, "Handling V_OR_B32 SRC SDWA. SRC0: register v[%d], "
6250                     "DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: %d, "
6251                     "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
6252                     "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
6253                     extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
6254                     extData.iFmt_VOP_SDWA.DST_UNUSED,
6255                     extData.iFmt_VOP_SDWA.CLAMP,
6256                     extData.iFmt_VOP_SDWA.SRC0_SEL,
6257                     extData.iFmt_VOP_SDWA.SRC0_SEXT,
6258                     extData.iFmt_VOP_SDWA.SRC0_NEG,
6259                     extData.iFmt_VOP_SDWA.SRC0_ABS,
6260                     extData.iFmt_VOP_SDWA.SRC1_SEL,
6261                     extData.iFmt_VOP_SDWA.SRC1_SEXT,
6262                     extData.iFmt_VOP_SDWA.SRC1_NEG,
6263                     extData.iFmt_VOP_SDWA.SRC1_ABS);
6264
6265             processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
6266                             src1, origSrc1);
6267
6268             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6269                 if (wf->execMask(lane)) {
6270                     vdst[lane] = src0_sdwa[lane] | src1[lane];
6271                     origVdst[lane] = vdst[lane]; // keep copy consistent
6272                 }
6273             }
6274
6275             processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
6276         } else {
6277             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6278                 if (wf->execMask(lane)) {
6279                     vdst[lane] = src0[lane] | src1[lane];
6280                 }
6281             }
6282         }
6283
6284         vdst.write();
6285     }
6286
6287     Inst_VOP2__V_XOR_B32::Inst_VOP2__V_XOR_B32(InFmt_VOP2 *iFmt)
6288         : Inst_VOP2(iFmt, "v_xor_b32")
6289     {
6290         setFlag(ALU);
6291     } // Inst_VOP2__V_XOR_B32
6292
6293     Inst_VOP2__V_XOR_B32::~Inst_VOP2__V_XOR_B32()
6294     {
6295     } // ~Inst_VOP2__V_XOR_B32
6296
6297     // D.u = S0.u ^ S1.u.
6298     // Input and output modifiers not supported.
6299     void
6300     Inst_VOP2__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst)
6301     {
6302         Wavefront *wf = gpuDynInst->wavefront();
6303         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6304         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6305         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6306
6307         src0.readSrc();
6308         src1.read();
6309
6310         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6311             if (wf->execMask(lane)) {
6312                 vdst[lane] = src0[lane] ^ src1[lane];
6313             }
6314         }
6315
6316         vdst.write();
6317     }
6318
6319     Inst_VOP2__V_MAC_F32::Inst_VOP2__V_MAC_F32(InFmt_VOP2 *iFmt)
6320         : Inst_VOP2(iFmt, "v_mac_f32")
6321     {
6322         setFlag(ALU);
6323         setFlag(F32);
6324         setFlag(MAC);
6325     } // Inst_VOP2__V_MAC_F32
6326
6327     Inst_VOP2__V_MAC_F32::~Inst_VOP2__V_MAC_F32()
6328     {
6329     } // ~Inst_VOP2__V_MAC_F32
6330
6331     // D.f = S0.f * S1.f + D.f.
6332     void
6333     Inst_VOP2__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst)
6334     {
6335         Wavefront *wf = gpuDynInst->wavefront();
6336         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
6337         VecOperandF32 src1(gpuDynInst, instData.VSRC1);
6338         VecOperandF32 vdst(gpuDynInst, instData.VDST);
6339
6340         src0.readSrc();
6341         src1.read();
6342         vdst.read();
6343
6344         if (isDPPInst()) {
6345             VecOperandF32 src0_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
6346             src0_dpp.read();
6347
6348             DPRINTF(GCN3, "Handling V_MAC_F32 SRC DPP. SRC0: register v[%d], "
6349                     "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
6350                     "SRC1_ABS: %d, SRC1_NEG: %d, BOUND_CTRL: %d, "
6351                     "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0,
6352                     extData.iFmt_VOP_DPP.DPP_CTRL,
6353                     extData.iFmt_VOP_DPP.SRC0_ABS,
6354                     extData.iFmt_VOP_DPP.SRC0_NEG,
6355                     extData.iFmt_VOP_DPP.SRC1_ABS,
6356                     extData.iFmt_VOP_DPP.SRC1_NEG,
6357                     extData.iFmt_VOP_DPP.BOUND_CTRL,
6358                     extData.iFmt_VOP_DPP.BANK_MASK,
6359                     extData.iFmt_VOP_DPP.ROW_MASK);
6360
6361             processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src0_dpp, src1);
6362
6363             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6364                 if (wf->execMask(lane)) {
6365                     vdst[lane] = std::fma(src0_dpp[lane], src1[lane],
6366                                           vdst[lane]);
6367                 }
6368             }
6369         } else {
6370             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6371                 if (wf->execMask(lane)) {
6372                     vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
6373                 }
6374             }
6375         }
6376
6377         vdst.write();
6378     }
6379
6380     Inst_VOP2__V_MADMK_F32::Inst_VOP2__V_MADMK_F32(InFmt_VOP2 *iFmt)
6381         : Inst_VOP2(iFmt, "v_madmk_f32")
6382     {
6383         setFlag(ALU);
6384         setFlag(F32);
6385         setFlag(MAD);
6386     } // Inst_VOP2__V_MADMK_F32
6387
6388     Inst_VOP2__V_MADMK_F32::~Inst_VOP2__V_MADMK_F32()
6389     {
6390     } // ~Inst_VOP2__V_MADMK_F32
6391
6392     // D.f = S0.f * K + S1.f; K is a 32-bit inline constant.
6393     // This opcode cannot use the input/output modifiers.
6394     void
6395     Inst_VOP2__V_MADMK_F32::execute(GPUDynInstPtr gpuDynInst)
6396     {
6397         Wavefront *wf = gpuDynInst->wavefront();
6398         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
6399         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
6400         VecOperandF32 vdst(gpuDynInst, instData.VDST);
6401         VecElemF32 k = extData.imm_f32;
6402
6403         src0.readSrc();
6404         src1.read();
6405
6406         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6407             if (wf->execMask(lane)) {
6408                 vdst[lane] = std::fma(src0[lane], k, src1[lane]);
6409             }
6410         }
6411
6412         vdst.write();
6413     }
6414
6415     Inst_VOP2__V_MADAK_F32::Inst_VOP2__V_MADAK_F32(InFmt_VOP2 *iFmt)
6416         : Inst_VOP2(iFmt, "v_madak_f32")
6417     {
6418         setFlag(ALU);
6419         setFlag(F32);
6420         setFlag(MAD);
6421     } // Inst_VOP2__V_MADAK_F32
6422
6423     Inst_VOP2__V_MADAK_F32::~Inst_VOP2__V_MADAK_F32()
6424     {
6425     } // ~Inst_VOP2__V_MADAK_F32
6426
6427     // D.f = S0.f * S1.f + K; K is a 32-bit inline constant.
6428     // This opcode cannot use input/output modifiers.
6429     void
6430     Inst_VOP2__V_MADAK_F32::execute(GPUDynInstPtr gpuDynInst)
6431     {
6432         Wavefront *wf = gpuDynInst->wavefront();
6433         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
6434         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
6435         VecOperandF32 vdst(gpuDynInst, instData.VDST);
6436         VecElemF32 k = extData.imm_f32;
6437
6438         src0.readSrc();
6439         src1.read();
6440
6441         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6442             if (wf->execMask(lane)) {
6443                 vdst[lane] = std::fma(src0[lane], src1[lane], k);
6444             }
6445         }
6446
6447         vdst.write();
6448     }
6449
6450     Inst_VOP2__V_ADD_U32::Inst_VOP2__V_ADD_U32(InFmt_VOP2 *iFmt)
6451         : Inst_VOP2(iFmt, "v_add_u32")
6452     {
6453         setFlag(ALU);
6454         setFlag(WritesVCC);
6455     } // Inst_VOP2__V_ADD_U32
6456
6457     Inst_VOP2__V_ADD_U32::~Inst_VOP2__V_ADD_U32()
6458     {
6459     } // ~Inst_VOP2__V_ADD_U32
6460
6461     // D.u = S0.u + S1.u;
6462     // VCC[threadId] = (S0.u + S1.u >= 0x100000000ULL ? 1 : 0) is an UNSIGNED
6463     // overflow or carry-out.
6464     // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
6465     void
6466     Inst_VOP2__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
6467     {
6468         Wavefront *wf = gpuDynInst->wavefront();
6469         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6470         VecOperandU32 src1(gpuDynInst, instData.VSRC1);
6471         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6472         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
6473
6474         src0.readSrc();
6475         src1.read();
6476
6477         if (isSDWAInst()) {
6478             VecOperandU32 src0_sdwa(gpuDynInst, extData.iFmt_VOP_SDWA.SRC0);
6479             // use copies of original src0, src1, and dest during selecting
6480             VecOperandU32 origSrc0_sdwa(gpuDynInst,
6481                                         extData.iFmt_VOP_SDWA.SRC0);
6482             VecOperandU32 origSrc1(gpuDynInst, instData.VSRC1);
6483             VecOperandU32 origVdst(gpuDynInst, instData.VDST);
6484
6485             src0_sdwa.read();
6486             origSrc0_sdwa.read();
6487             origSrc1.read();
6488
6489             DPRINTF(GCN3, "Handling V_ADD_U32 SRC SDWA. SRC0: register v[%d], "
6490                     "DST_SEL: %d, DST_UNUSED: %d, CLAMP: %d, SRC0_SEL: %d, "
6491                     "SRC0_SEXT: %d, SRC0_NEG: %d, SRC0_ABS: %d, SRC1_SEL: %d, "
6492                     "SRC1_SEXT: %d, SRC1_NEG: %d, SRC1_ABS: %d\n",
6493                     extData.iFmt_VOP_SDWA.SRC0, extData.iFmt_VOP_SDWA.DST_SEL,
6494                     extData.iFmt_VOP_SDWA.DST_UNUSED,
6495                     extData.iFmt_VOP_SDWA.CLAMP,
6496                     extData.iFmt_VOP_SDWA.SRC0_SEL,
6497                     extData.iFmt_VOP_SDWA.SRC0_SEXT,
6498                     extData.iFmt_VOP_SDWA.SRC0_NEG,
6499                     extData.iFmt_VOP_SDWA.SRC0_ABS,
6500                     extData.iFmt_VOP_SDWA.SRC1_SEL,
6501                     extData.iFmt_VOP_SDWA.SRC1_SEXT,
6502                     extData.iFmt_VOP_SDWA.SRC1_NEG,
6503                     extData.iFmt_VOP_SDWA.SRC1_ABS);
6504
6505             processSDWA_src(extData.iFmt_VOP_SDWA, src0_sdwa, origSrc0_sdwa,
6506                             src1, origSrc1);
6507
6508             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6509                 if (wf->execMask(lane)) {
6510                     vdst[lane] = src0_sdwa[lane] + src1[lane];
6511                     origVdst[lane] = vdst[lane]; // keep copy consistent
6512                     vcc.setBit(lane, ((VecElemU64)src0_sdwa[lane]
6513                         + (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0);
6514                 }
6515             }
6516
6517             processSDWA_dst(extData.iFmt_VOP_SDWA, vdst, origVdst);
6518         } else {
6519             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6520                 if (wf->execMask(lane)) {
6521                     vdst[lane] = src0[lane] + src1[lane];
6522                     vcc.setBit(lane, ((VecElemU64)src0[lane]
6523                         + (VecElemU64)src1[lane] >= 0x100000000ULL) ? 1 : 0);
6524                 }
6525             }
6526         }
6527
6528         vcc.write();
6529         vdst.write();
6530     }
6531
6532     Inst_VOP2__V_SUB_U32::Inst_VOP2__V_SUB_U32(InFmt_VOP2 *iFmt)
6533         : Inst_VOP2(iFmt, "v_sub_u32")
6534     {
6535         setFlag(ALU);
6536         setFlag(WritesVCC);
6537     } // Inst_VOP2__V_SUB_U32
6538
6539     Inst_VOP2__V_SUB_U32::~Inst_VOP2__V_SUB_U32()
6540     {
6541     } // ~Inst_VOP2__V_SUB_U32
6542
6543     // D.u = S0.u - S1.u;
6544     // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or
6545     // carry-out.
6546     // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
6547     void
6548     Inst_VOP2__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
6549     {
6550         Wavefront *wf = gpuDynInst->wavefront();
6551         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6552         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6553         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6554         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
6555
6556         src0.readSrc();
6557         src1.read();
6558
6559         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6560             if (wf->execMask(lane)) {
6561                 vdst[lane] = src0[lane] - src1[lane];
6562                 vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0);
6563             }
6564         }
6565
6566         vdst.write();
6567         vcc.write();
6568     }
6569
6570     Inst_VOP2__V_SUBREV_U32::Inst_VOP2__V_SUBREV_U32(InFmt_VOP2 *iFmt)
6571         : Inst_VOP2(iFmt, "v_subrev_u32")
6572     {
6573         setFlag(ALU);
6574         setFlag(WritesVCC);
6575     } // Inst_VOP2__V_SUBREV_U32
6576
6577     Inst_VOP2__V_SUBREV_U32::~Inst_VOP2__V_SUBREV_U32()
6578     {
6579     } // ~Inst_VOP2__V_SUBREV_U32
6580
6581     // D.u = S1.u - S0.u;
6582     // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or
6583     // carry-out.
6584     // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
6585     void
6586     Inst_VOP2__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst)
6587     {
6588         Wavefront *wf = gpuDynInst->wavefront();
6589         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6590         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6591         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6592         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
6593
6594         src0.readSrc();
6595         src1.read();
6596
6597         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6598             if (wf->execMask(lane)) {
6599                 vdst[lane] = src1[lane] - src0[lane];
6600                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
6601             }
6602         }
6603
6604         vdst.write();
6605         vcc.write();
6606     }
6607
6608     Inst_VOP2__V_ADDC_U32::Inst_VOP2__V_ADDC_U32(InFmt_VOP2 *iFmt)
6609         : Inst_VOP2(iFmt, "v_addc_u32")
6610     {
6611         setFlag(ALU);
6612         setFlag(WritesVCC);
6613         setFlag(ReadsVCC);
6614     } // Inst_VOP2__V_ADDC_U32
6615
6616     Inst_VOP2__V_ADDC_U32::~Inst_VOP2__V_ADDC_U32()
6617     {
6618     } // ~Inst_VOP2__V_ADDC_U32
6619
6620     // D.u = S0.u + S1.u + VCC[threadId];
6621     // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x100000000ULL ? 1 : 0)
6622     // is an UNSIGNED overflow.
6623     // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
6624     // source comes from the SGPR-pair at S2.u.
6625     void
6626     Inst_VOP2__V_ADDC_U32::execute(GPUDynInstPtr gpuDynInst)
6627     {
6628         Wavefront *wf = gpuDynInst->wavefront();
6629         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6630         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6631         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6632         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
6633
6634         src0.readSrc();
6635         src1.read();
6636         vcc.read();
6637
6638         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6639             if (wf->execMask(lane)) {
6640                 vdst[lane] = src0[lane] + src1[lane]
6641                     + bits(vcc.rawData(), lane);
6642                 vcc.setBit(lane, ((VecElemU64)src0[lane]
6643                     + (VecElemU64)src1[lane]
6644                         + (VecElemU64)bits(vcc.rawData(), lane, lane))
6645                             >= 0x100000000 ? 1 : 0);
6646             }
6647         }
6648
6649         vdst.write();
6650         vcc.write();
6651     }
6652
6653     Inst_VOP2__V_SUBB_U32::Inst_VOP2__V_SUBB_U32(InFmt_VOP2 *iFmt)
6654         : Inst_VOP2(iFmt, "v_subb_u32")
6655     {
6656         setFlag(ALU);
6657         setFlag(WritesVCC);
6658         setFlag(ReadsVCC);
6659     } // Inst_VOP2__V_SUBB_U32
6660
6661     Inst_VOP2__V_SUBB_U32::~Inst_VOP2__V_SUBB_U32()
6662     {
6663     } // ~Inst_VOP2__V_SUBB_U32
6664
6665     // D.u = S0.u - S1.u - VCC[threadId];
6666     // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
6667     // overflow.
6668     // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
6669     // source comes from the SGPR-pair at S2.u.
6670     void
6671     Inst_VOP2__V_SUBB_U32::execute(GPUDynInstPtr gpuDynInst)
6672     {
6673         Wavefront *wf = gpuDynInst->wavefront();
6674         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6675         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6676         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6677         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
6678
6679         src0.readSrc();
6680         src1.read();
6681         vcc.read();
6682
6683         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6684             if (wf->execMask(lane)) {
6685                 vdst[lane]
6686                     = src0[lane] - src1[lane] - bits(vcc.rawData(), lane);
6687                 vcc.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane))
6688                     > src0[lane] ? 1 : 0);
6689             }
6690         }
6691
6692         vdst.write();
6693         vcc.write();
6694     }
6695
6696     Inst_VOP2__V_SUBBREV_U32::Inst_VOP2__V_SUBBREV_U32(InFmt_VOP2 *iFmt)
6697         : Inst_VOP2(iFmt, "v_subbrev_u32")
6698     {
6699         setFlag(ALU);
6700         setFlag(WritesVCC);
6701         setFlag(ReadsVCC);
6702     } // Inst_VOP2__V_SUBBREV_U32
6703
6704     Inst_VOP2__V_SUBBREV_U32::~Inst_VOP2__V_SUBBREV_U32()
6705     {
6706     } // ~Inst_VOP2__V_SUBBREV_U32
6707
6708     // D.u = S1.u - S0.u - VCC[threadId];
6709     // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
6710     // overflow.
6711     // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
6712     // source comes from the SGPR-pair at S2.u.
6713     void
6714     Inst_VOP2__V_SUBBREV_U32::execute(GPUDynInstPtr gpuDynInst)
6715     {
6716         Wavefront *wf = gpuDynInst->wavefront();
6717         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
6718         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
6719         VecOperandU32 vdst(gpuDynInst, instData.VDST);
6720         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
6721
6722         src0.readSrc();
6723         src1.read();
6724         vcc.read();
6725
6726         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6727             if (wf->execMask(lane)) {
6728                 vdst[lane]
6729                     = src1[lane] - src0[lane] - bits(vcc.rawData(), lane);
6730                 vcc.setBit(lane, (src0[lane] + bits(vcc.rawData(), lane))
6731                     > src1[lane] ? 1 : 0);
6732             }
6733         }
6734
6735         vdst.write();
6736         vcc.write();
6737     }
6738
6739     Inst_VOP2__V_ADD_F16::Inst_VOP2__V_ADD_F16(InFmt_VOP2 *iFmt)
6740         : Inst_VOP2(iFmt, "v_add_f16")
6741     {
6742         setFlag(ALU);
6743         setFlag(F16);
6744     } // Inst_VOP2__V_ADD_F16
6745
6746     Inst_VOP2__V_ADD_F16::~Inst_VOP2__V_ADD_F16()
6747     {
6748     } // ~Inst_VOP2__V_ADD_F16
6749
6750     // D.f16 = S0.f16 + S1.f16.
6751     void
6752     Inst_VOP2__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst)
6753     {
6754         panicUnimplemented();
6755     }
6756
6757     Inst_VOP2__V_SUB_F16::Inst_VOP2__V_SUB_F16(InFmt_VOP2 *iFmt)
6758         : Inst_VOP2(iFmt, "v_sub_f16")
6759     {
6760         setFlag(ALU);
6761         setFlag(F16);
6762     } // Inst_VOP2__V_SUB_F16
6763
6764     Inst_VOP2__V_SUB_F16::~Inst_VOP2__V_SUB_F16()
6765     {
6766     } // ~Inst_VOP2__V_SUB_F16
6767
6768     // D.f16 = S0.f16 - S1.f16.
6769     void
6770     Inst_VOP2__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst)
6771     {
6772         panicUnimplemented();
6773     }
6774
6775     Inst_VOP2__V_SUBREV_F16::Inst_VOP2__V_SUBREV_F16(InFmt_VOP2 *iFmt)
6776         : Inst_VOP2(iFmt, "v_subrev_f16")
6777     {
6778         setFlag(ALU);
6779         setFlag(F16);
6780     } // Inst_VOP2__V_SUBREV_F16
6781
6782     Inst_VOP2__V_SUBREV_F16::~Inst_VOP2__V_SUBREV_F16()
6783     {
6784     } // ~Inst_VOP2__V_SUBREV_F16
6785
6786     // D.f16 = S1.f16 - S0.f16.
6787     void
6788     Inst_VOP2__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst)
6789     {
6790         panicUnimplemented();
6791     }
6792
6793     Inst_VOP2__V_MUL_F16::Inst_VOP2__V_MUL_F16(InFmt_VOP2 *iFmt)
6794         : Inst_VOP2(iFmt, "v_mul_f16")
6795     {
6796         setFlag(ALU);
6797         setFlag(F16);
6798     } // Inst_VOP2__V_MUL_F16
6799
6800     Inst_VOP2__V_MUL_F16::~Inst_VOP2__V_MUL_F16()
6801     {
6802     } // ~Inst_VOP2__V_MUL_F16
6803
6804     // D.f16 = S0.f16 * S1.f16.
6805     void
6806     Inst_VOP2__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst)
6807     {
6808         panicUnimplemented();
6809     }
6810
6811     Inst_VOP2__V_MAC_F16::Inst_VOP2__V_MAC_F16(InFmt_VOP2 *iFmt)
6812         : Inst_VOP2(iFmt, "v_mac_f16")
6813     {
6814         setFlag(ALU);
6815         setFlag(F16);
6816         setFlag(MAC);
6817     } // Inst_VOP2__V_MAC_F16
6818
6819     Inst_VOP2__V_MAC_F16::~Inst_VOP2__V_MAC_F16()
6820     {
6821     } // ~Inst_VOP2__V_MAC_F16
6822
6823     // D.f16 = S0.f16 * S1.f16 + D.f16.
6824     void
6825     Inst_VOP2__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst)
6826     {
6827         panicUnimplemented();
6828     }
6829
6830     Inst_VOP2__V_MADMK_F16::Inst_VOP2__V_MADMK_F16(InFmt_VOP2 *iFmt)
6831         : Inst_VOP2(iFmt, "v_madmk_f16")
6832     {
6833         setFlag(ALU);
6834         setFlag(F16);
6835         setFlag(MAD);
6836     } // Inst_VOP2__V_MADMK_F16
6837
6838     Inst_VOP2__V_MADMK_F16::~Inst_VOP2__V_MADMK_F16()
6839     {
6840     } // ~Inst_VOP2__V_MADMK_F16
6841
6842     // D.f16 = S0.f16 * K.f16 + S1.f16; K is a 16-bit inline constant stored
6843     // in the following literal DWORD.
6844     // This opcode cannot use the VOP3 encoding and cannot use input/output
6845     // modifiers.
6846     void
6847     Inst_VOP2__V_MADMK_F16::execute(GPUDynInstPtr gpuDynInst)
6848     {
6849         panicUnimplemented();
6850     }
6851
6852     Inst_VOP2__V_MADAK_F16::Inst_VOP2__V_MADAK_F16(InFmt_VOP2 *iFmt)
6853         : Inst_VOP2(iFmt, "v_madak_f16")
6854     {
6855         setFlag(ALU);
6856         setFlag(F16);
6857         setFlag(MAD);
6858     } // Inst_VOP2__V_MADAK_F16
6859
6860     Inst_VOP2__V_MADAK_F16::~Inst_VOP2__V_MADAK_F16()
6861     {
6862     } // ~Inst_VOP2__V_MADAK_F16
6863
6864     // D.f16 = S0.f16 * S1.f16 + K.f16; K is a 16-bit inline constant stored
6865     // in the following literal DWORD.
6866     // This opcode cannot use the VOP3 encoding and cannot use input/output
6867     // modifiers.
6868     void
6869     Inst_VOP2__V_MADAK_F16::execute(GPUDynInstPtr gpuDynInst)
6870     {
6871         panicUnimplemented();
6872     }
6873
6874     Inst_VOP2__V_ADD_U16::Inst_VOP2__V_ADD_U16(InFmt_VOP2 *iFmt)
6875         : Inst_VOP2(iFmt, "v_add_u16")
6876     {
6877         setFlag(ALU);
6878     } // Inst_VOP2__V_ADD_U16
6879
6880     Inst_VOP2__V_ADD_U16::~Inst_VOP2__V_ADD_U16()
6881     {
6882     } // ~Inst_VOP2__V_ADD_U16
6883
6884     // D.u16 = S0.u16 + S1.u16.
6885     void
6886     Inst_VOP2__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst)
6887     {
6888         Wavefront *wf = gpuDynInst->wavefront();
6889         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
6890         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
6891         VecOperandU16 vdst(gpuDynInst, instData.VDST);
6892
6893         src0.readSrc();
6894         src1.read();
6895
6896         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6897             if (wf->execMask(lane)) {
6898                 vdst[lane] = src0[lane] + src1[lane];
6899             }
6900         }
6901
6902         vdst.write();
6903     }
6904
6905     Inst_VOP2__V_SUB_U16::Inst_VOP2__V_SUB_U16(InFmt_VOP2 *iFmt)
6906         : Inst_VOP2(iFmt, "v_sub_u16")
6907     {
6908         setFlag(ALU);
6909     } // Inst_VOP2__V_SUB_U16
6910
6911     Inst_VOP2__V_SUB_U16::~Inst_VOP2__V_SUB_U16()
6912     {
6913     } // ~Inst_VOP2__V_SUB_U16
6914
6915     // D.u16 = S0.u16 - S1.u16.
6916     void
6917     Inst_VOP2__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst)
6918     {
6919         Wavefront *wf = gpuDynInst->wavefront();
6920         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
6921         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
6922         VecOperandU16 vdst(gpuDynInst, instData.VDST);
6923
6924         src0.readSrc();
6925         src1.read();
6926
6927         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6928             if (wf->execMask(lane)) {
6929                 vdst[lane] = src0[lane] - src1[lane];
6930             }
6931         }
6932
6933         vdst.write();
6934     }
6935
6936     Inst_VOP2__V_SUBREV_U16::Inst_VOP2__V_SUBREV_U16(InFmt_VOP2 *iFmt)
6937         : Inst_VOP2(iFmt, "v_subrev_u16")
6938     {
6939         setFlag(ALU);
6940     } // Inst_VOP2__V_SUBREV_U16
6941
6942     Inst_VOP2__V_SUBREV_U16::~Inst_VOP2__V_SUBREV_U16()
6943     {
6944     } // ~Inst_VOP2__V_SUBREV_U16
6945
6946     // D.u16 = S1.u16 - S0.u16.
6947     void
6948     Inst_VOP2__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst)
6949     {
6950         Wavefront *wf = gpuDynInst->wavefront();
6951         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
6952         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
6953         VecOperandU16 vdst(gpuDynInst, instData.VDST);
6954
6955         src0.readSrc();
6956         src1.read();
6957
6958         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6959             if (wf->execMask(lane)) {
6960                 vdst[lane] = src1[lane] - src0[lane];
6961             }
6962         }
6963
6964         vdst.write();
6965     }
6966
6967     Inst_VOP2__V_MUL_LO_U16::Inst_VOP2__V_MUL_LO_U16(InFmt_VOP2 *iFmt)
6968         : Inst_VOP2(iFmt, "v_mul_lo_u16")
6969     {
6970         setFlag(ALU);
6971     } // Inst_VOP2__V_MUL_LO_U16
6972
6973     Inst_VOP2__V_MUL_LO_U16::~Inst_VOP2__V_MUL_LO_U16()
6974     {
6975     } // ~Inst_VOP2__V_MUL_LO_U16
6976
6977     // D.u16 = S0.u16 * S1.u16.
6978     void
6979     Inst_VOP2__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst)
6980     {
6981         Wavefront *wf = gpuDynInst->wavefront();
6982         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
6983         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
6984         VecOperandU16 vdst(gpuDynInst, instData.VDST);
6985
6986         src0.readSrc();
6987         src1.read();
6988
6989         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
6990             if (wf->execMask(lane)) {
6991                 vdst[lane] = src0[lane] * src1[lane];
6992             }
6993         }
6994
6995         vdst.write();
6996     }
6997
6998     Inst_VOP2__V_LSHLREV_B16::Inst_VOP2__V_LSHLREV_B16(InFmt_VOP2 *iFmt)
6999         : Inst_VOP2(iFmt, "v_lshlrev_b16")
7000     {
7001         setFlag(ALU);
7002     } // Inst_VOP2__V_LSHLREV_B16
7003
7004     Inst_VOP2__V_LSHLREV_B16::~Inst_VOP2__V_LSHLREV_B16()
7005     {
7006     } // ~Inst_VOP2__V_LSHLREV_B16
7007
7008     // D.u[15:0] = S1.u[15:0] << S0.u[3:0].
7009     void
7010     Inst_VOP2__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst)
7011     {
7012         Wavefront *wf = gpuDynInst->wavefront();
7013         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
7014         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
7015         VecOperandU16 vdst(gpuDynInst, instData.VDST);
7016
7017         src0.readSrc();
7018         src1.read();
7019
7020         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7021             if (wf->execMask(lane)) {
7022                 vdst[lane] = src1[lane] << bits(src0[lane], 3, 0);
7023             }
7024         }
7025
7026         vdst.write();
7027     }
7028
7029     Inst_VOP2__V_LSHRREV_B16::Inst_VOP2__V_LSHRREV_B16(InFmt_VOP2 *iFmt)
7030         : Inst_VOP2(iFmt, "v_lshrrev_b16")
7031     {
7032         setFlag(ALU);
7033     } // Inst_VOP2__V_LSHRREV_B16
7034
7035     Inst_VOP2__V_LSHRREV_B16::~Inst_VOP2__V_LSHRREV_B16()
7036     {
7037     } // ~Inst_VOP2__V_LSHRREV_B16
7038
7039     // D.u[15:0] = S1.u[15:0] >> S0.u[3:0].
7040     // The vacated bits are set to zero.
7041     void
7042     Inst_VOP2__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst)
7043     {
7044         Wavefront *wf = gpuDynInst->wavefront();
7045         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
7046         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
7047         VecOperandU16 vdst(gpuDynInst, instData.VDST);
7048
7049         src0.readSrc();
7050         src1.read();
7051
7052         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7053             if (wf->execMask(lane)) {
7054                 vdst[lane] = src1[lane] >> src0[lane];
7055             }
7056         }
7057
7058         vdst.write();
7059     }
7060
7061     Inst_VOP2__V_ASHRREV_I16::Inst_VOP2__V_ASHRREV_I16(InFmt_VOP2 *iFmt)
7062         : Inst_VOP2(iFmt, "v_ashrrev_i16")
7063     {
7064         setFlag(ALU);
7065     } // Inst_VOP2__V_ASHRREV_I16
7066
7067     Inst_VOP2__V_ASHRREV_I16::~Inst_VOP2__V_ASHRREV_I16()
7068     {
7069     } // ~Inst_VOP2__V_ASHRREV_I16
7070
7071     // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0].
7072     // The vacated bits are set to the sign bit of the input value.
7073     void
7074     Inst_VOP2__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst)
7075     {
7076         Wavefront *wf = gpuDynInst->wavefront();
7077         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
7078         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
7079         VecOperandI16 vdst(gpuDynInst, instData.VDST);
7080
7081         src0.readSrc();
7082         src1.read();
7083
7084         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7085             if (wf->execMask(lane)) {
7086                 vdst[lane] = src1[lane] >> src0[lane];
7087             }
7088         }
7089
7090         vdst.write();
7091     }
7092
7093     Inst_VOP2__V_MAX_F16::Inst_VOP2__V_MAX_F16(InFmt_VOP2 *iFmt)
7094         : Inst_VOP2(iFmt, "v_max_f16")
7095     {
7096         setFlag(ALU);
7097         setFlag(F16);
7098     } // Inst_VOP2__V_MAX_F16
7099
7100     Inst_VOP2__V_MAX_F16::~Inst_VOP2__V_MAX_F16()
7101     {
7102     } // ~Inst_VOP2__V_MAX_F16
7103
7104     // D.f16 = max(S0.f16, S1.f16).
7105     void
7106     Inst_VOP2__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst)
7107     {
7108         panicUnimplemented();
7109     }
7110
7111     Inst_VOP2__V_MIN_F16::Inst_VOP2__V_MIN_F16(InFmt_VOP2 *iFmt)
7112         : Inst_VOP2(iFmt, "v_min_f16")
7113     {
7114         setFlag(ALU);
7115         setFlag(F16);
7116     } // Inst_VOP2__V_MIN_F16
7117
7118     Inst_VOP2__V_MIN_F16::~Inst_VOP2__V_MIN_F16()
7119     {
7120     } // ~Inst_VOP2__V_MIN_F16
7121
7122     // D.f16 = min(S0.f16, S1.f16).
7123     void
7124     Inst_VOP2__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst)
7125     {
7126         panicUnimplemented();
7127     }
7128
7129     Inst_VOP2__V_MAX_U16::Inst_VOP2__V_MAX_U16(InFmt_VOP2 *iFmt)
7130         : Inst_VOP2(iFmt, "v_max_u16")
7131     {
7132         setFlag(ALU);
7133     } // Inst_VOP2__V_MAX_U16
7134
7135     Inst_VOP2__V_MAX_U16::~Inst_VOP2__V_MAX_U16()
7136     {
7137     } // ~Inst_VOP2__V_MAX_U16
7138
7139     // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]).
7140     void
7141     Inst_VOP2__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst)
7142     {
7143         Wavefront *wf = gpuDynInst->wavefront();
7144         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
7145         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
7146         VecOperandU16 vdst(gpuDynInst, instData.VDST);
7147
7148         src0.readSrc();
7149         src1.read();
7150
7151         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7152             if (wf->execMask(lane)) {
7153                 vdst[lane] = std::max(src0[lane], src1[lane]);
7154             }
7155         }
7156
7157         vdst.write();
7158     }
7159
7160     Inst_VOP2__V_MAX_I16::Inst_VOP2__V_MAX_I16(InFmt_VOP2 *iFmt)
7161         : Inst_VOP2(iFmt, "v_max_i16")
7162     {
7163         setFlag(ALU);
7164     } // Inst_VOP2__V_MAX_I16
7165
7166     Inst_VOP2__V_MAX_I16::~Inst_VOP2__V_MAX_I16()
7167     {
7168     } // ~Inst_VOP2__V_MAX_I16
7169
7170     // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]).
7171     void
7172     Inst_VOP2__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst)
7173     {
7174         Wavefront *wf = gpuDynInst->wavefront();
7175         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
7176         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
7177         VecOperandI16 vdst(gpuDynInst, instData.VDST);
7178
7179         src0.readSrc();
7180         src1.read();
7181
7182         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7183             if (wf->execMask(lane)) {
7184                 vdst[lane] = std::max(src0[lane], src1[lane]);
7185             }
7186         }
7187
7188         vdst.write();
7189     }
7190
7191     Inst_VOP2__V_MIN_U16::Inst_VOP2__V_MIN_U16(InFmt_VOP2 *iFmt)
7192         : Inst_VOP2(iFmt, "v_min_u16")
7193     {
7194         setFlag(ALU);
7195     } // Inst_VOP2__V_MIN_U16
7196
7197     Inst_VOP2__V_MIN_U16::~Inst_VOP2__V_MIN_U16()
7198     {
7199     } // ~Inst_VOP2__V_MIN_U16
7200
7201     // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]).
7202     void
7203     Inst_VOP2__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst)
7204     {
7205         Wavefront *wf = gpuDynInst->wavefront();
7206         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
7207         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
7208         VecOperandU16 vdst(gpuDynInst, instData.VDST);
7209
7210         src0.readSrc();
7211         src1.read();
7212
7213         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7214             if (wf->execMask(lane)) {
7215                 vdst[lane] = std::min(src0[lane], src1[lane]);
7216             }
7217         }
7218
7219         vdst.write();
7220     }
7221
7222     Inst_VOP2__V_MIN_I16::Inst_VOP2__V_MIN_I16(InFmt_VOP2 *iFmt)
7223         : Inst_VOP2(iFmt, "v_min_i16")
7224     {
7225         setFlag(ALU);
7226     } // Inst_VOP2__V_MIN_I16
7227
7228     Inst_VOP2__V_MIN_I16::~Inst_VOP2__V_MIN_I16()
7229     {
7230     } // ~Inst_VOP2__V_MIN_I16
7231
7232     // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]).
7233     void
7234     Inst_VOP2__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst)
7235     {
7236         Wavefront *wf = gpuDynInst->wavefront();
7237         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
7238         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
7239         VecOperandI16 vdst(gpuDynInst, instData.VDST);
7240
7241         src0.readSrc();
7242         src1.read();
7243
7244         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7245             if (wf->execMask(lane)) {
7246                 vdst[lane] = std::min(src0[lane], src1[lane]);
7247             }
7248         }
7249
7250         vdst.write();
7251     }
7252
7253     Inst_VOP2__V_LDEXP_F16::Inst_VOP2__V_LDEXP_F16(InFmt_VOP2 *iFmt)
7254         : Inst_VOP2(iFmt, "v_ldexp_f16")
7255     {
7256         setFlag(ALU);
7257         setFlag(F16);
7258     } // Inst_VOP2__V_LDEXP_F16
7259
7260     Inst_VOP2__V_LDEXP_F16::~Inst_VOP2__V_LDEXP_F16()
7261     {
7262     } // ~Inst_VOP2__V_LDEXP_F16
7263
7264     // D.f16 = S0.f16 * (2 ** S1.i16).
7265     void
7266     Inst_VOP2__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst)
7267     {
7268         panicUnimplemented();
7269     }
7270
7271     Inst_VOP1__V_NOP::Inst_VOP1__V_NOP(InFmt_VOP1 *iFmt)
7272         : Inst_VOP1(iFmt, "v_nop")
7273     {
7274         setFlag(Nop);
7275         setFlag(ALU);
7276     } // Inst_VOP1__V_NOP
7277
7278     Inst_VOP1__V_NOP::~Inst_VOP1__V_NOP()
7279     {
7280     } // ~Inst_VOP1__V_NOP
7281
7282     // Do nothing.
7283     void
7284     Inst_VOP1__V_NOP::execute(GPUDynInstPtr gpuDynInst)
7285     {
7286     }
7287
7288     Inst_VOP1__V_MOV_B32::Inst_VOP1__V_MOV_B32(InFmt_VOP1 *iFmt)
7289         : Inst_VOP1(iFmt, "v_mov_b32")
7290     {
7291         setFlag(ALU);
7292     } // Inst_VOP1__V_MOV_B32
7293
7294     Inst_VOP1__V_MOV_B32::~Inst_VOP1__V_MOV_B32()
7295     {
7296     } // ~Inst_VOP1__V_MOV_B32
7297
7298     // D.u = S0.u.
7299     // Input and output modifiers not supported; this is an untyped operation.
7300     void
7301     Inst_VOP1__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst)
7302     {
7303         Wavefront *wf = gpuDynInst->wavefront();
7304         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7305         VecOperandU32 vdst(gpuDynInst, instData.VDST);
7306
7307         src.readSrc();
7308
7309         if (isDPPInst()) {
7310             VecOperandU32 src_dpp(gpuDynInst, extData.iFmt_VOP_DPP.SRC0);
7311             src_dpp.read();
7312
7313             DPRINTF(GCN3, "Handling V_MOV_B32 SRC DPP. SRC0: register v[%d], "
7314                     "DPP_CTRL: 0x%#x, SRC0_ABS: %d, SRC0_NEG: %d, "
7315                     "SRC1_ABS: %d, SRC1_NEG: %d, BOUND_CTRL: %d, "
7316                     "BANK_MASK: %d, ROW_MASK: %d\n", extData.iFmt_VOP_DPP.SRC0,
7317                     extData.iFmt_VOP_DPP.DPP_CTRL,
7318                     extData.iFmt_VOP_DPP.SRC0_ABS,
7319                     extData.iFmt_VOP_DPP.SRC0_NEG,
7320                     extData.iFmt_VOP_DPP.SRC1_ABS,
7321                     extData.iFmt_VOP_DPP.SRC1_NEG,
7322                     extData.iFmt_VOP_DPP.BOUND_CTRL,
7323                     extData.iFmt_VOP_DPP.BANK_MASK,
7324                     extData.iFmt_VOP_DPP.ROW_MASK);
7325
7326             // NOTE: For VOP1, there is no SRC1, so make sure we're not trying
7327             // to negate it or take the absolute value of it
7328             assert(!extData.iFmt_VOP_DPP.SRC1_ABS);
7329             assert(!extData.iFmt_VOP_DPP.SRC1_NEG);
7330             processDPP(gpuDynInst, extData.iFmt_VOP_DPP, src_dpp);
7331
7332             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7333                 if (wf->execMask(lane)) {
7334                     vdst[lane] = src_dpp[lane];
7335                 }
7336             }
7337         } else {
7338             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7339                 if (wf->execMask(lane)) {
7340                     vdst[lane] = src[lane];
7341                 }
7342             }
7343         }
7344
7345         vdst.write();
7346     }
7347
7348     Inst_VOP1__V_READFIRSTLANE_B32::Inst_VOP1__V_READFIRSTLANE_B32(
7349           InFmt_VOP1 *iFmt)
7350         : Inst_VOP1(iFmt, "v_readfirstlane_b32")
7351     {
7352         setFlag(ALU);
7353     } // Inst_VOP1__V_READFIRSTLANE_B32
7354
7355     Inst_VOP1__V_READFIRSTLANE_B32::~Inst_VOP1__V_READFIRSTLANE_B32()
7356     {
7357     } // ~Inst_VOP1__V_READFIRSTLANE_B32
7358
7359     // Copy one VGPR value to one SGPR. D = SGPR destination, S0 = source data
7360     // (VGPR# or M0 for lds direct access), Lane# = FindFirst1fromLSB(exec)
7361     // (Lane# = 0 if exec is zero). Ignores exec mask for the access.
7362     // Input and output modifiers not supported; this is an untyped operation.
7363     void
7364     Inst_VOP1__V_READFIRSTLANE_B32::execute(GPUDynInstPtr gpuDynInst)
7365     {
7366         Wavefront *wf = gpuDynInst->wavefront();
7367         ScalarRegI32 src_lane(0);
7368         ScalarRegU64 exec_mask = wf->execMask().to_ullong();
7369         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7370         ScalarOperandU32 sdst(gpuDynInst, instData.VDST);
7371
7372         src.readSrc();
7373
7374         if (exec_mask) {
7375             src_lane = findLsbSet(exec_mask);
7376         }
7377
7378         sdst = src[src_lane];
7379
7380         sdst.write();
7381     }
7382
7383     Inst_VOP1__V_CVT_I32_F64::Inst_VOP1__V_CVT_I32_F64(InFmt_VOP1 *iFmt)
7384         : Inst_VOP1(iFmt, "v_cvt_i32_f64")
7385     {
7386         setFlag(ALU);
7387         setFlag(F64);
7388     } // Inst_VOP1__V_CVT_I32_F64
7389
7390     Inst_VOP1__V_CVT_I32_F64::~Inst_VOP1__V_CVT_I32_F64()
7391     {
7392     } // ~Inst_VOP1__V_CVT_I32_F64
7393
7394     // D.i = (int)S0.d.
7395     // Out-of-range floating point values (including infinity) saturate. NaN
7396     // is converted to 0.
7397     void
7398     Inst_VOP1__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst)
7399     {
7400         Wavefront *wf = gpuDynInst->wavefront();
7401         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
7402         VecOperandI32 vdst(gpuDynInst, instData.VDST);
7403
7404         src.readSrc();
7405
7406         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7407             if (wf->execMask(lane)) {
7408                 int exp;
7409                 std::frexp(src[lane],&exp);
7410                 if (std::isnan(src[lane])) {
7411                     vdst[lane] = 0;
7412                 } else if (std::isinf(src[lane]) || exp > 30) {
7413                     if (std::signbit(src[lane])) {
7414                         vdst[lane] = INT_MIN;
7415                     } else {
7416                         vdst[lane] = INT_MAX;
7417                     }
7418                 } else {
7419                     vdst[lane] = (VecElemI32)src[lane];
7420                 }
7421             }
7422         }
7423
7424         vdst.write();
7425     }
7426
7427     Inst_VOP1__V_CVT_F64_I32::Inst_VOP1__V_CVT_F64_I32(InFmt_VOP1 *iFmt)
7428         : Inst_VOP1(iFmt, "v_cvt_f64_i32")
7429     {
7430         setFlag(ALU);
7431         setFlag(F64);
7432     } // Inst_VOP1__V_CVT_F64_I32
7433
7434     Inst_VOP1__V_CVT_F64_I32::~Inst_VOP1__V_CVT_F64_I32()
7435     {
7436     } // ~Inst_VOP1__V_CVT_F64_I32
7437
7438     // D.d = (double)S0.i.
7439     void
7440     Inst_VOP1__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst)
7441     {
7442         Wavefront *wf = gpuDynInst->wavefront();
7443         ConstVecOperandI32 src(gpuDynInst, instData.SRC0);
7444         VecOperandF64 vdst(gpuDynInst, instData.VDST);
7445
7446         src.readSrc();
7447
7448         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7449             if (wf->execMask(lane)) {
7450                 vdst[lane] = (VecElemF64)src[lane];
7451             }
7452         }
7453
7454         vdst.write();
7455     }
7456
7457     Inst_VOP1__V_CVT_F32_I32::Inst_VOP1__V_CVT_F32_I32(InFmt_VOP1 *iFmt)
7458         : Inst_VOP1(iFmt, "v_cvt_f32_i32")
7459     {
7460         setFlag(ALU);
7461         setFlag(F32);
7462     } // Inst_VOP1__V_CVT_F32_I32
7463
7464     Inst_VOP1__V_CVT_F32_I32::~Inst_VOP1__V_CVT_F32_I32()
7465     {
7466     } // ~Inst_VOP1__V_CVT_F32_I32
7467
7468     // D.f = (float)S0.i.
7469     void
7470     Inst_VOP1__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst)
7471     {
7472         Wavefront *wf = gpuDynInst->wavefront();
7473         ConstVecOperandI32 src(gpuDynInst, instData.SRC0);
7474         VecOperandF32 vdst(gpuDynInst, instData.VDST);
7475
7476         src.readSrc();
7477
7478         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7479             if (wf->execMask(lane)) {
7480                 vdst[lane] = (VecElemF32)src[lane];
7481             }
7482         }
7483
7484         vdst.write();
7485     }
7486
7487     Inst_VOP1__V_CVT_F32_U32::Inst_VOP1__V_CVT_F32_U32(InFmt_VOP1 *iFmt)
7488         : Inst_VOP1(iFmt, "v_cvt_f32_u32")
7489     {
7490         setFlag(ALU);
7491         setFlag(F32);
7492     } // Inst_VOP1__V_CVT_F32_U32
7493
7494     Inst_VOP1__V_CVT_F32_U32::~Inst_VOP1__V_CVT_F32_U32()
7495     {
7496     } // ~Inst_VOP1__V_CVT_F32_U32
7497
7498     // D.f = (float)S0.u.
7499     void
7500     Inst_VOP1__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst)
7501     {
7502         Wavefront *wf = gpuDynInst->wavefront();
7503         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7504         VecOperandF32 vdst(gpuDynInst, instData.VDST);
7505
7506         src.readSrc();
7507
7508         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7509             if (wf->execMask(lane)) {
7510                 vdst[lane] = (VecElemF32)src[lane];
7511             }
7512         }
7513
7514         vdst.write();
7515     }
7516
7517     Inst_VOP1__V_CVT_U32_F32::Inst_VOP1__V_CVT_U32_F32(InFmt_VOP1 *iFmt)
7518         : Inst_VOP1(iFmt, "v_cvt_u32_f32")
7519     {
7520         setFlag(ALU);
7521         setFlag(F32);
7522     } // Inst_VOP1__V_CVT_U32_F32
7523
7524     Inst_VOP1__V_CVT_U32_F32::~Inst_VOP1__V_CVT_U32_F32()
7525     {
7526     } // ~Inst_VOP1__V_CVT_U32_F32
7527
7528     // D.u = (unsigned)S0.f.
7529     // Out-of-range floating point values (including infinity) saturate. NaN
7530     // is converted to 0.
7531     void
7532     Inst_VOP1__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst)
7533     {
7534         Wavefront *wf = gpuDynInst->wavefront();
7535         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
7536         VecOperandU32 vdst(gpuDynInst, instData.VDST);
7537
7538         src.readSrc();
7539
7540         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7541             if (wf->execMask(lane)) {
7542                 int exp;
7543                 std::frexp(src[lane],&exp);
7544                 if (std::isnan(src[lane])) {
7545                     vdst[lane] = 0;
7546                 } else if (std::isinf(src[lane])) {
7547                     if (std::signbit(src[lane])) {
7548                         vdst[lane] = 0;
7549                     } else {
7550                         vdst[lane] = UINT_MAX;
7551                     }
7552                 } else if (exp > 31) {
7553                     vdst[lane] = UINT_MAX;
7554                 } else {
7555                     vdst[lane] = (VecElemU32)src[lane];
7556                 }
7557             }
7558         }
7559
7560         vdst.write();
7561     }
7562
7563     Inst_VOP1__V_CVT_I32_F32::Inst_VOP1__V_CVT_I32_F32(InFmt_VOP1 *iFmt)
7564         : Inst_VOP1(iFmt, "v_cvt_i32_f32")
7565     {
7566         setFlag(ALU);
7567         setFlag(F32);
7568     } // Inst_VOP1__V_CVT_I32_F32
7569
7570     Inst_VOP1__V_CVT_I32_F32::~Inst_VOP1__V_CVT_I32_F32()
7571     {
7572     } // ~Inst_VOP1__V_CVT_I32_F32
7573
7574     // D.i = (int)S0.f.
7575     // Out-of-range floating point values (including infinity) saturate. NaN
7576     // is converted to 0.
7577     void
7578     Inst_VOP1__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst)
7579     {
7580         Wavefront *wf = gpuDynInst->wavefront();
7581         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
7582         VecOperandI32 vdst(gpuDynInst, instData.VDST);
7583
7584         src.readSrc();
7585
7586         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7587             if (wf->execMask(lane)) {
7588                 int exp;
7589                 std::frexp(src[lane],&exp);
7590                 if (std::isnan(src[lane])) {
7591                     vdst[lane] = 0;
7592                 } else if (std::isinf(src[lane]) || exp > 30) {
7593                     if (std::signbit(src[lane])) {
7594                         vdst[lane] = INT_MIN;
7595                     } else {
7596                         vdst[lane] = INT_MAX;
7597                     }
7598                 } else {
7599                     vdst[lane] = (VecElemI32)src[lane];
7600                 }
7601             }
7602         }
7603
7604         vdst.write();
7605     }
7606
7607     Inst_VOP1__V_MOV_FED_B32::Inst_VOP1__V_MOV_FED_B32(InFmt_VOP1 *iFmt)
7608         : Inst_VOP1(iFmt, "v_mov_fed_b32")
7609     {
7610         setFlag(ALU);
7611     } // Inst_VOP1__V_MOV_FED_B32
7612
7613     Inst_VOP1__V_MOV_FED_B32::~Inst_VOP1__V_MOV_FED_B32()
7614     {
7615     } // ~Inst_VOP1__V_MOV_FED_B32
7616
7617     // D.u = S0.u;
7618     // Input and output modifiers not supported; this is an untyped operation.
7619     void
7620     Inst_VOP1__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst)
7621     {
7622         panicUnimplemented();
7623     }
7624
7625     Inst_VOP1__V_CVT_F16_F32::Inst_VOP1__V_CVT_F16_F32(InFmt_VOP1 *iFmt)
7626         : Inst_VOP1(iFmt, "v_cvt_f16_f32")
7627     {
7628         setFlag(ALU);
7629         setFlag(F32);
7630     } // Inst_VOP1__V_CVT_F16_F32
7631
7632     Inst_VOP1__V_CVT_F16_F32::~Inst_VOP1__V_CVT_F16_F32()
7633     {
7634     } // ~Inst_VOP1__V_CVT_F16_F32
7635
7636     // D.f16 = flt32_to_flt16(S0.f).
7637     void
7638     Inst_VOP1__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst)
7639     {
7640         panicUnimplemented();
7641     }
7642
7643     Inst_VOP1__V_CVT_F32_F16::Inst_VOP1__V_CVT_F32_F16(InFmt_VOP1 *iFmt)
7644         : Inst_VOP1(iFmt, "v_cvt_f32_f16")
7645     {
7646         setFlag(ALU);
7647         setFlag(F32);
7648     } // Inst_VOP1__V_CVT_F32_F16
7649
7650     Inst_VOP1__V_CVT_F32_F16::~Inst_VOP1__V_CVT_F32_F16()
7651     {
7652     } // ~Inst_VOP1__V_CVT_F32_F16
7653
7654     // D.f = flt16_to_flt32(S0.f16).
7655     void
7656     Inst_VOP1__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst)
7657     {
7658         panicUnimplemented();
7659     }
7660
7661     Inst_VOP1__V_CVT_RPI_I32_F32::Inst_VOP1__V_CVT_RPI_I32_F32(
7662           InFmt_VOP1 *iFmt)
7663         : Inst_VOP1(iFmt, "v_cvt_rpi_i32_f32")
7664     {
7665         setFlag(ALU);
7666         setFlag(F32);
7667     } // Inst_VOP1__V_CVT_RPI_I32_F32
7668
7669     Inst_VOP1__V_CVT_RPI_I32_F32::~Inst_VOP1__V_CVT_RPI_I32_F32()
7670     {
7671     } // ~Inst_VOP1__V_CVT_RPI_I32_F32
7672
7673     // D.i = (int)floor(S0.f + 0.5).
7674     void
7675     Inst_VOP1__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst)
7676     {
7677         Wavefront *wf = gpuDynInst->wavefront();
7678         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
7679         VecOperandI32 vdst(gpuDynInst, instData.VDST);
7680
7681         src.readSrc();
7682
7683         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7684             if (wf->execMask(lane)) {
7685                 vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5);
7686             }
7687         }
7688
7689         vdst.write();
7690     }
7691
7692     Inst_VOP1__V_CVT_FLR_I32_F32::Inst_VOP1__V_CVT_FLR_I32_F32(
7693           InFmt_VOP1 *iFmt)
7694         : Inst_VOP1(iFmt, "v_cvt_flr_i32_f32")
7695     {
7696         setFlag(ALU);
7697         setFlag(F32);
7698     } // Inst_VOP1__V_CVT_FLR_I32_F32
7699
7700     Inst_VOP1__V_CVT_FLR_I32_F32::~Inst_VOP1__V_CVT_FLR_I32_F32()
7701     {
7702     } // ~Inst_VOP1__V_CVT_FLR_I32_F32
7703
7704     // D.i = (int)floor(S0.f).
7705     void
7706     Inst_VOP1__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst)
7707     {
7708         Wavefront *wf = gpuDynInst->wavefront();
7709         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
7710         VecOperandI32 vdst(gpuDynInst, instData.VDST);
7711
7712         src.readSrc();
7713
7714         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7715             if (wf->execMask(lane)) {
7716                 vdst[lane] = (VecElemI32)std::floor(src[lane]);
7717             }
7718         }
7719
7720         vdst.write();
7721     }
7722
7723     Inst_VOP1__V_CVT_OFF_F32_I4::Inst_VOP1__V_CVT_OFF_F32_I4(InFmt_VOP1 *iFmt)
7724         : Inst_VOP1(iFmt, "v_cvt_off_f32_i4")
7725     {
7726         setFlag(ALU);
7727         setFlag(F32);
7728     } // Inst_VOP1__V_CVT_OFF_F32_I4
7729
7730     Inst_VOP1__V_CVT_OFF_F32_I4::~Inst_VOP1__V_CVT_OFF_F32_I4()
7731     {
7732     } // ~Inst_VOP1__V_CVT_OFF_F32_I4
7733
7734     // 4-bit signed int to 32-bit float.
7735     void
7736     Inst_VOP1__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst)
7737     {
7738         panicUnimplemented();
7739     }
7740
7741     Inst_VOP1__V_CVT_F32_F64::Inst_VOP1__V_CVT_F32_F64(InFmt_VOP1 *iFmt)
7742         : Inst_VOP1(iFmt, "v_cvt_f32_f64")
7743     {
7744         setFlag(ALU);
7745         setFlag(F64);
7746     } // Inst_VOP1__V_CVT_F32_F64
7747
7748     Inst_VOP1__V_CVT_F32_F64::~Inst_VOP1__V_CVT_F32_F64()
7749     {
7750     } // ~Inst_VOP1__V_CVT_F32_F64
7751
7752     // D.f = (float)S0.d.
7753     void
7754     Inst_VOP1__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst)
7755     {
7756         Wavefront *wf = gpuDynInst->wavefront();
7757         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
7758         VecOperandF32 vdst(gpuDynInst, instData.VDST);
7759
7760         src.readSrc();
7761
7762         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7763             if (wf->execMask(lane)) {
7764                 vdst[lane] = (VecElemF32)src[lane];
7765             }
7766         }
7767
7768         vdst.write();
7769     }
7770
7771     Inst_VOP1__V_CVT_F64_F32::Inst_VOP1__V_CVT_F64_F32(InFmt_VOP1 *iFmt)
7772         : Inst_VOP1(iFmt, "v_cvt_f64_f32")
7773     {
7774         setFlag(ALU);
7775         setFlag(F64);
7776     } // Inst_VOP1__V_CVT_F64_F32
7777
7778     Inst_VOP1__V_CVT_F64_F32::~Inst_VOP1__V_CVT_F64_F32()
7779     {
7780     } // ~Inst_VOP1__V_CVT_F64_F32
7781
7782     // D.d = (double)S0.f.
7783     void
7784     Inst_VOP1__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst)
7785     {
7786         Wavefront *wf = gpuDynInst->wavefront();
7787         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
7788         VecOperandF64 vdst(gpuDynInst, instData.VDST);
7789
7790         src.readSrc();
7791
7792         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7793             if (wf->execMask(lane)) {
7794                 vdst[lane] = (VecElemF64)src[lane];
7795             }
7796         }
7797
7798         vdst.write();
7799     }
7800
7801     Inst_VOP1__V_CVT_F32_UBYTE0::Inst_VOP1__V_CVT_F32_UBYTE0(InFmt_VOP1 *iFmt)
7802         : Inst_VOP1(iFmt, "v_cvt_f32_ubyte0")
7803     {
7804         setFlag(ALU);
7805         setFlag(F32);
7806     } // Inst_VOP1__V_CVT_F32_UBYTE0
7807
7808     Inst_VOP1__V_CVT_F32_UBYTE0::~Inst_VOP1__V_CVT_F32_UBYTE0()
7809     {
7810     } // ~Inst_VOP1__V_CVT_F32_UBYTE0
7811
7812     // D.f = (float)(S0.u[7:0]).
7813     void
7814     Inst_VOP1__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst)
7815     {
7816         Wavefront *wf = gpuDynInst->wavefront();
7817         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7818         VecOperandF32 vdst(gpuDynInst, instData.VDST);
7819
7820         src.readSrc();
7821
7822         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7823             if (wf->execMask(lane)) {
7824                 vdst[lane] = (VecElemF32)(bits(src[lane], 7, 0));
7825             }
7826         }
7827
7828         vdst.write();
7829     }
7830
7831     Inst_VOP1__V_CVT_F32_UBYTE1::Inst_VOP1__V_CVT_F32_UBYTE1(InFmt_VOP1 *iFmt)
7832         : Inst_VOP1(iFmt, "v_cvt_f32_ubyte1")
7833     {
7834         setFlag(ALU);
7835         setFlag(F32);
7836     } // Inst_VOP1__V_CVT_F32_UBYTE1
7837
7838     Inst_VOP1__V_CVT_F32_UBYTE1::~Inst_VOP1__V_CVT_F32_UBYTE1()
7839     {
7840     } // ~Inst_VOP1__V_CVT_F32_UBYTE1
7841
7842     // D.f = (float)(S0.u[15:8]).
7843     void
7844     Inst_VOP1__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst)
7845     {
7846         Wavefront *wf = gpuDynInst->wavefront();
7847         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7848         VecOperandF32 vdst(gpuDynInst, instData.VDST);
7849
7850         src.readSrc();
7851
7852         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7853             if (wf->execMask(lane)) {
7854                 vdst[lane] = (VecElemF32)(bits(src[lane], 15, 8));
7855             }
7856         }
7857
7858         vdst.write();
7859     }
7860
7861     Inst_VOP1__V_CVT_F32_UBYTE2::Inst_VOP1__V_CVT_F32_UBYTE2(InFmt_VOP1 *iFmt)
7862         : Inst_VOP1(iFmt, "v_cvt_f32_ubyte2")
7863     {
7864         setFlag(ALU);
7865         setFlag(F32);
7866     } // Inst_VOP1__V_CVT_F32_UBYTE2
7867
7868     Inst_VOP1__V_CVT_F32_UBYTE2::~Inst_VOP1__V_CVT_F32_UBYTE2()
7869     {
7870     } // ~Inst_VOP1__V_CVT_F32_UBYTE2
7871
7872     // D.f = (float)(S0.u[23:16]).
7873     void
7874     Inst_VOP1__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst)
7875     {
7876         Wavefront *wf = gpuDynInst->wavefront();
7877         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7878         VecOperandF32 vdst(gpuDynInst, instData.VDST);
7879
7880         src.readSrc();
7881
7882         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7883             if (wf->execMask(lane)) {
7884                 vdst[lane] = (VecElemF32)(bits(src[lane], 23, 16));
7885             }
7886         }
7887
7888         vdst.write();
7889     }
7890
7891     Inst_VOP1__V_CVT_F32_UBYTE3::Inst_VOP1__V_CVT_F32_UBYTE3(InFmt_VOP1 *iFmt)
7892         : Inst_VOP1(iFmt, "v_cvt_f32_ubyte3")
7893     {
7894         setFlag(ALU);
7895         setFlag(F32);
7896     } // Inst_VOP1__V_CVT_F32_UBYTE3
7897
7898     Inst_VOP1__V_CVT_F32_UBYTE3::~Inst_VOP1__V_CVT_F32_UBYTE3()
7899     {
7900     } // ~Inst_VOP1__V_CVT_F32_UBYTE3
7901
7902     // D.f = (float)(S0.u[31:24]).
7903     void
7904     Inst_VOP1__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst)
7905     {
7906         Wavefront *wf = gpuDynInst->wavefront();
7907         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7908         VecOperandF32 vdst(gpuDynInst, instData.VDST);
7909
7910         src.readSrc();
7911
7912         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7913             if (wf->execMask(lane)) {
7914                 vdst[lane] = (VecElemF32)(bits(src[lane], 31, 24));
7915             }
7916         }
7917
7918         vdst.write();
7919     }
7920
7921     Inst_VOP1__V_CVT_U32_F64::Inst_VOP1__V_CVT_U32_F64(InFmt_VOP1 *iFmt)
7922         : Inst_VOP1(iFmt, "v_cvt_u32_f64")
7923     {
7924         setFlag(ALU);
7925         setFlag(F64);
7926     } // Inst_VOP1__V_CVT_U32_F64
7927
7928     Inst_VOP1__V_CVT_U32_F64::~Inst_VOP1__V_CVT_U32_F64()
7929     {
7930     } // ~Inst_VOP1__V_CVT_U32_F64
7931
7932     // D.u = (unsigned)S0.d.
7933     // Out-of-range floating point values (including infinity) saturate. NaN
7934     // is converted to 0.
7935     void
7936     Inst_VOP1__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst)
7937     {
7938         Wavefront *wf = gpuDynInst->wavefront();
7939         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
7940         VecOperandU32 vdst(gpuDynInst, instData.VDST);
7941
7942         src.readSrc();
7943
7944         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7945             if (wf->execMask(lane)) {
7946                 int exp;
7947                 std::frexp(src[lane],&exp);
7948                 if (std::isnan(src[lane])) {
7949                     vdst[lane] = 0;
7950                 } else if (std::isinf(src[lane])) {
7951                     if (std::signbit(src[lane])) {
7952                         vdst[lane] = 0;
7953                     } else {
7954                         vdst[lane] = UINT_MAX;
7955                     }
7956                 } else if (exp > 31) {
7957                     vdst[lane] = UINT_MAX;
7958                 } else {
7959                     vdst[lane] = (VecElemU32)src[lane];
7960                 }
7961             }
7962         }
7963
7964         vdst.write();
7965     }
7966
7967     Inst_VOP1__V_CVT_F64_U32::Inst_VOP1__V_CVT_F64_U32(InFmt_VOP1 *iFmt)
7968         : Inst_VOP1(iFmt, "v_cvt_f64_u32")
7969     {
7970         setFlag(ALU);
7971         setFlag(F64);
7972     } // Inst_VOP1__V_CVT_F64_U32
7973
7974     Inst_VOP1__V_CVT_F64_U32::~Inst_VOP1__V_CVT_F64_U32()
7975     {
7976     } // ~Inst_VOP1__V_CVT_F64_U32
7977
7978     // D.d = (double)S0.u.
7979     void
7980     Inst_VOP1__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst)
7981     {
7982         Wavefront *wf = gpuDynInst->wavefront();
7983         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
7984         VecOperandF64 vdst(gpuDynInst, instData.VDST);
7985
7986         src.readSrc();
7987
7988         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
7989             if (wf->execMask(lane)) {
7990                 vdst[lane] = (VecElemF64)src[lane];
7991             }
7992         }
7993
7994         vdst.write();
7995     }
7996
7997     Inst_VOP1__V_TRUNC_F64::Inst_VOP1__V_TRUNC_F64(InFmt_VOP1 *iFmt)
7998         : Inst_VOP1(iFmt, "v_trunc_f64")
7999     {
8000         setFlag(ALU);
8001         setFlag(F64);
8002     } // Inst_VOP1__V_TRUNC_F64
8003
8004     Inst_VOP1__V_TRUNC_F64::~Inst_VOP1__V_TRUNC_F64()
8005     {
8006     } // ~Inst_VOP1__V_TRUNC_F64
8007
8008     // D.d = trunc(S0.d), return integer part of S0.d.
8009     void
8010     Inst_VOP1__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst)
8011     {
8012         Wavefront *wf = gpuDynInst->wavefront();
8013         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8014         VecOperandF64 vdst(gpuDynInst, instData.VDST);
8015
8016         src.readSrc();
8017
8018         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8019             if (wf->execMask(lane)) {
8020                 vdst[lane] = std::trunc(src[lane]);
8021             }
8022         }
8023
8024         vdst.write();
8025     }
8026
8027     Inst_VOP1__V_CEIL_F64::Inst_VOP1__V_CEIL_F64(InFmt_VOP1 *iFmt)
8028         : Inst_VOP1(iFmt, "v_ceil_f64")
8029     {
8030         setFlag(ALU);
8031         setFlag(F64);
8032     } // Inst_VOP1__V_CEIL_F64
8033
8034     Inst_VOP1__V_CEIL_F64::~Inst_VOP1__V_CEIL_F64()
8035     {
8036     } // ~Inst_VOP1__V_CEIL_F64
8037
8038     // D.d = ceil(S0.d);
8039     void
8040     Inst_VOP1__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst)
8041     {
8042         Wavefront *wf = gpuDynInst->wavefront();
8043         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8044         VecOperandF64 vdst(gpuDynInst, instData.VDST);
8045
8046         src.readSrc();
8047
8048         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8049             if (wf->execMask(lane)) {
8050                 vdst[lane] = std::ceil(src[lane]);
8051             }
8052         }
8053
8054         vdst.write();
8055     }
8056
8057     Inst_VOP1__V_RNDNE_F64::Inst_VOP1__V_RNDNE_F64(InFmt_VOP1 *iFmt)
8058         : Inst_VOP1(iFmt, "v_rndne_f64")
8059     {
8060         setFlag(ALU);
8061         setFlag(F64);
8062     } // Inst_VOP1__V_RNDNE_F64
8063
8064     Inst_VOP1__V_RNDNE_F64::~Inst_VOP1__V_RNDNE_F64()
8065     {
8066     } // ~Inst_VOP1__V_RNDNE_F64
8067
8068     // D.d = round_nearest_even(S0.d).
8069     void
8070     Inst_VOP1__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst)
8071     {
8072         Wavefront *wf = gpuDynInst->wavefront();
8073         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8074         VecOperandF64 vdst(gpuDynInst, instData.VDST);
8075
8076         src.readSrc();
8077
8078         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8079             if (wf->execMask(lane)) {
8080                 vdst[lane] = roundNearestEven(src[lane]);
8081             }
8082         }
8083
8084         vdst.write();
8085     }
8086
8087     Inst_VOP1__V_FLOOR_F64::Inst_VOP1__V_FLOOR_F64(InFmt_VOP1 *iFmt)
8088         : Inst_VOP1(iFmt, "v_floor_f64")
8089     {
8090         setFlag(ALU);
8091         setFlag(F64);
8092     } // Inst_VOP1__V_FLOOR_F64
8093
8094     Inst_VOP1__V_FLOOR_F64::~Inst_VOP1__V_FLOOR_F64()
8095     {
8096     } // ~Inst_VOP1__V_FLOOR_F64
8097
8098     // D.d = floor(S0.d);
8099     void
8100     Inst_VOP1__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst)
8101     {
8102         Wavefront *wf = gpuDynInst->wavefront();
8103         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8104         VecOperandF64 vdst(gpuDynInst, instData.VDST);
8105
8106         src.readSrc();
8107
8108         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8109             if (wf->execMask(lane)) {
8110                 vdst[lane] = std::floor(src[lane]);
8111             }
8112         }
8113
8114         vdst.write();
8115     }
8116
8117     Inst_VOP1__V_FRACT_F32::Inst_VOP1__V_FRACT_F32(InFmt_VOP1 *iFmt)
8118         : Inst_VOP1(iFmt, "v_fract_f32")
8119     {
8120         setFlag(ALU);
8121         setFlag(F32);
8122     } // Inst_VOP1__V_FRACT_F32
8123
8124     Inst_VOP1__V_FRACT_F32::~Inst_VOP1__V_FRACT_F32()
8125     {
8126     } // ~Inst_VOP1__V_FRACT_F32
8127
8128     // D.f = modf(S0.f).
8129     void
8130     Inst_VOP1__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst)
8131     {
8132         Wavefront *wf = gpuDynInst->wavefront();
8133         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8134         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8135
8136         src.readSrc();
8137
8138         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8139             if (wf->execMask(lane)) {
8140                 VecElemF32 int_part(0.0);
8141                 vdst[lane] = std::modf(src[lane], &int_part);
8142             }
8143         }
8144
8145         vdst.write();
8146     }
8147
8148     Inst_VOP1__V_TRUNC_F32::Inst_VOP1__V_TRUNC_F32(InFmt_VOP1 *iFmt)
8149         : Inst_VOP1(iFmt, "v_trunc_f32")
8150     {
8151         setFlag(ALU);
8152         setFlag(F32);
8153     } // Inst_VOP1__V_TRUNC_F32
8154
8155     Inst_VOP1__V_TRUNC_F32::~Inst_VOP1__V_TRUNC_F32()
8156     {
8157     } // ~Inst_VOP1__V_TRUNC_F32
8158
8159     // D.f = trunc(S0.f), return integer part of S0.f.
8160     void
8161     Inst_VOP1__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst)
8162     {
8163         Wavefront *wf = gpuDynInst->wavefront();
8164         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8165         VecOperandF32 vdst (gpuDynInst, instData.VDST);
8166
8167         src.readSrc();
8168
8169         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8170             if (wf->execMask(lane)) {
8171                 vdst[lane] = std::trunc(src[lane]);
8172             }
8173         }
8174
8175         vdst.write();
8176     }
8177
8178     Inst_VOP1__V_CEIL_F32::Inst_VOP1__V_CEIL_F32(InFmt_VOP1 *iFmt)
8179         : Inst_VOP1(iFmt, "v_ceil_f32")
8180     {
8181         setFlag(ALU);
8182         setFlag(F32);
8183     } // Inst_VOP1__V_CEIL_F32
8184
8185     Inst_VOP1__V_CEIL_F32::~Inst_VOP1__V_CEIL_F32()
8186     {
8187     } // ~Inst_VOP1__V_CEIL_F32
8188
8189     // D.f = ceil(S0.f);
8190     void
8191     Inst_VOP1__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst)
8192     {
8193         Wavefront *wf = gpuDynInst->wavefront();
8194         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8195         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8196
8197         src.readSrc();
8198
8199         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8200             if (wf->execMask(lane)) {
8201                 vdst[lane] = std::ceil(src[lane]);
8202             }
8203         }
8204
8205         vdst.write();
8206     }
8207
8208     Inst_VOP1__V_RNDNE_F32::Inst_VOP1__V_RNDNE_F32(InFmt_VOP1 *iFmt)
8209         : Inst_VOP1(iFmt, "v_rndne_f32")
8210     {
8211         setFlag(ALU);
8212         setFlag(F32);
8213     } // Inst_VOP1__V_RNDNE_F32
8214
8215     Inst_VOP1__V_RNDNE_F32::~Inst_VOP1__V_RNDNE_F32()
8216     {
8217     } // ~Inst_VOP1__V_RNDNE_F32
8218
8219     // D.f = round_nearest_even(S0.f).
8220     void
8221     Inst_VOP1__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst)
8222     {
8223         Wavefront *wf = gpuDynInst->wavefront();
8224         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8225         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8226
8227         src.readSrc();
8228
8229         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8230             if (wf->execMask(lane)) {
8231                 vdst[lane] = roundNearestEven(src[lane]);
8232             }
8233         }
8234
8235         vdst.write();
8236     }
8237
8238     Inst_VOP1__V_FLOOR_F32::Inst_VOP1__V_FLOOR_F32(InFmt_VOP1 *iFmt)
8239         : Inst_VOP1(iFmt, "v_floor_f32")
8240     {
8241         setFlag(ALU);
8242         setFlag(F32);
8243     } // Inst_VOP1__V_FLOOR_F32
8244
8245     Inst_VOP1__V_FLOOR_F32::~Inst_VOP1__V_FLOOR_F32()
8246     {
8247     } // ~Inst_VOP1__V_FLOOR_F32
8248
8249     // D.f = floor(S0.f);
8250     void
8251     Inst_VOP1__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst)
8252     {
8253         Wavefront *wf = gpuDynInst->wavefront();
8254         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8255         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8256
8257         src.readSrc();
8258
8259         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8260             if (wf->execMask(lane)) {
8261                 vdst[lane] = std::floor(src[lane]);
8262             }
8263         }
8264
8265         vdst.write();
8266     }
8267
8268     Inst_VOP1__V_EXP_F32::Inst_VOP1__V_EXP_F32(InFmt_VOP1 *iFmt)
8269         : Inst_VOP1(iFmt, "v_exp_f32")
8270     {
8271         setFlag(ALU);
8272         setFlag(F32);
8273     } // Inst_VOP1__V_EXP_F32
8274
8275     Inst_VOP1__V_EXP_F32::~Inst_VOP1__V_EXP_F32()
8276     {
8277     } // ~Inst_VOP1__V_EXP_F32
8278
8279     // D.f = pow(2.0, S0.f).
8280     void
8281     Inst_VOP1__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst)
8282     {
8283         Wavefront *wf = gpuDynInst->wavefront();
8284         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8285         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8286
8287         src.readSrc();
8288
8289         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8290             if (wf->execMask(lane)) {
8291                 vdst[lane] = std::pow(2.0, src[lane]);
8292             }
8293         }
8294
8295         vdst.write();
8296     }
8297
8298     Inst_VOP1__V_LOG_F32::Inst_VOP1__V_LOG_F32(InFmt_VOP1 *iFmt)
8299         : Inst_VOP1(iFmt, "v_log_f32")
8300     {
8301         setFlag(ALU);
8302         setFlag(F32);
8303     } // Inst_VOP1__V_LOG_F32
8304
8305     Inst_VOP1__V_LOG_F32::~Inst_VOP1__V_LOG_F32()
8306     {
8307     } // ~Inst_VOP1__V_LOG_F32
8308
8309     // D.f = log2(S0.f).
8310     void
8311     Inst_VOP1__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst)
8312     {
8313         Wavefront *wf = gpuDynInst->wavefront();
8314         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8315         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8316
8317         src.readSrc();
8318
8319         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8320             if (wf->execMask(lane)) {
8321                 vdst[lane] = std::log2(src[lane]);
8322             }
8323         }
8324
8325         vdst.write();
8326     }
8327
8328     Inst_VOP1__V_RCP_F32::Inst_VOP1__V_RCP_F32(InFmt_VOP1 *iFmt)
8329         : Inst_VOP1(iFmt, "v_rcp_f32")
8330     {
8331         setFlag(ALU);
8332         setFlag(F32);
8333     } // Inst_VOP1__V_RCP_F32
8334
8335     Inst_VOP1__V_RCP_F32::~Inst_VOP1__V_RCP_F32()
8336     {
8337     } // ~Inst_VOP1__V_RCP_F32
8338
8339     // D.f = 1.0 / S0.f.
8340     void
8341     Inst_VOP1__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst)
8342     {
8343         Wavefront *wf = gpuDynInst->wavefront();
8344         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8345         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8346
8347         src.readSrc();
8348
8349         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8350             if (wf->execMask(lane)) {
8351                 vdst[lane] = 1.0 / src[lane];
8352             }
8353         }
8354
8355         vdst.write();
8356     }
8357
8358     Inst_VOP1__V_RCP_IFLAG_F32::Inst_VOP1__V_RCP_IFLAG_F32(InFmt_VOP1 *iFmt)
8359         : Inst_VOP1(iFmt, "v_rcp_iflag_f32")
8360     {
8361         setFlag(ALU);
8362         setFlag(F32);
8363     } // Inst_VOP1__V_RCP_IFLAG_F32
8364
8365     Inst_VOP1__V_RCP_IFLAG_F32::~Inst_VOP1__V_RCP_IFLAG_F32()
8366     {
8367     } // ~Inst_VOP1__V_RCP_IFLAG_F32
8368
8369     // D.f = 1.0 / S0.f.
8370     void
8371     Inst_VOP1__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst)
8372     {
8373         Wavefront *wf = gpuDynInst->wavefront();
8374         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8375         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8376
8377         src.readSrc();
8378
8379         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8380             if (wf->execMask(lane)) {
8381                 vdst[lane] = 1.0 / src[lane];
8382             }
8383         }
8384
8385         vdst.write();
8386     }
8387
8388     Inst_VOP1__V_RSQ_F32::Inst_VOP1__V_RSQ_F32(InFmt_VOP1 *iFmt)
8389         : Inst_VOP1(iFmt, "v_rsq_f32")
8390     {
8391         setFlag(ALU);
8392         setFlag(F32);
8393     } // Inst_VOP1__V_RSQ_F32
8394
8395     Inst_VOP1__V_RSQ_F32::~Inst_VOP1__V_RSQ_F32()
8396     {
8397     } // ~Inst_VOP1__V_RSQ_F32
8398
8399     // D.f = 1.0 / sqrt(S0.f).
8400     void
8401     Inst_VOP1__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst)
8402     {
8403         Wavefront *wf = gpuDynInst->wavefront();
8404         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8405         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8406
8407         src.readSrc();
8408
8409         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8410             if (wf->execMask(lane)) {
8411                 vdst[lane] = 1.0 / std::sqrt(src[lane]);
8412             }
8413         }
8414
8415         vdst.write();
8416     }
8417
8418     Inst_VOP1__V_RCP_F64::Inst_VOP1__V_RCP_F64(InFmt_VOP1 *iFmt)
8419         : Inst_VOP1(iFmt, "v_rcp_f64")
8420     {
8421         setFlag(ALU);
8422         setFlag(F64);
8423     } // Inst_VOP1__V_RCP_F64
8424
8425     Inst_VOP1__V_RCP_F64::~Inst_VOP1__V_RCP_F64()
8426     {
8427     } // ~Inst_VOP1__V_RCP_F64
8428
8429     // D.d = 1.0 / S0.d.
8430     void
8431     Inst_VOP1__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst)
8432     {
8433         Wavefront *wf = gpuDynInst->wavefront();
8434         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8435         VecOperandF64 vdst(gpuDynInst, instData.VDST);
8436
8437         src.readSrc();
8438
8439         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8440             if (wf->execMask(lane)) {
8441                 if (std::fpclassify(src[lane]) == FP_ZERO) {
8442                     vdst[lane] = +INFINITY;
8443                 } else if (std::isnan(src[lane])) {
8444                     vdst[lane] = NAN;
8445                 } else if (std::isinf(src[lane])) {
8446                     if (std::signbit(src[lane])) {
8447                         vdst[lane] = -0.0;
8448                     } else {
8449                         vdst[lane] = 0.0;
8450                     }
8451                 } else {
8452                     vdst[lane] = 1.0 / src[lane];
8453                 }
8454             }
8455         }
8456
8457         vdst.write();
8458     }
8459
8460     Inst_VOP1__V_RSQ_F64::Inst_VOP1__V_RSQ_F64(InFmt_VOP1 *iFmt)
8461         : Inst_VOP1(iFmt, "v_rsq_f64")
8462     {
8463         setFlag(ALU);
8464         setFlag(F64);
8465     } // Inst_VOP1__V_RSQ_F64
8466
8467     Inst_VOP1__V_RSQ_F64::~Inst_VOP1__V_RSQ_F64()
8468     {
8469     } // ~Inst_VOP1__V_RSQ_F64
8470
8471     // D.d = 1.0 / sqrt(S0.d).
8472     void
8473     Inst_VOP1__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst)
8474     {
8475         Wavefront *wf = gpuDynInst->wavefront();
8476         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8477         VecOperandF64 vdst(gpuDynInst, instData.VDST);
8478
8479         src.readSrc();
8480
8481         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8482             if (wf->execMask(lane)) {
8483                 if (std::fpclassify(src[lane]) == FP_ZERO) {
8484                     vdst[lane] = +INFINITY;
8485                 } else if (std::isnan(src[lane])) {
8486                     vdst[lane] = NAN;
8487                 } else if (std::isinf(src[lane])
8488                            && !std::signbit(src[lane])) {
8489                     vdst[lane] = 0.0;
8490                 } else if (std::signbit(src[lane])) {
8491                     vdst[lane] = NAN;
8492                 } else {
8493                     vdst[lane] = 1.0 / std::sqrt(src[lane]);
8494                 }
8495             }
8496         }
8497
8498         vdst.write();
8499     }
8500
8501     Inst_VOP1__V_SQRT_F32::Inst_VOP1__V_SQRT_F32(InFmt_VOP1 *iFmt)
8502         : Inst_VOP1(iFmt, "v_sqrt_f32")
8503     {
8504         setFlag(ALU);
8505         setFlag(F32);
8506     } // Inst_VOP1__V_SQRT_F32
8507
8508     Inst_VOP1__V_SQRT_F32::~Inst_VOP1__V_SQRT_F32()
8509     {
8510     } // ~Inst_VOP1__V_SQRT_F32
8511
8512     // D.f = sqrt(S0.f).
8513     void
8514     Inst_VOP1__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst)
8515     {
8516         Wavefront *wf = gpuDynInst->wavefront();
8517         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8518         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8519
8520         src.readSrc();
8521
8522         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8523             if (wf->execMask(lane)) {
8524                 vdst[lane] = std::sqrt(src[lane]);
8525             }
8526         }
8527
8528         vdst.write();
8529     }
8530
8531     Inst_VOP1__V_SQRT_F64::Inst_VOP1__V_SQRT_F64(InFmt_VOP1 *iFmt)
8532         : Inst_VOP1(iFmt, "v_sqrt_f64")
8533     {
8534         setFlag(ALU);
8535         setFlag(F64);
8536     } // Inst_VOP1__V_SQRT_F64
8537
8538     Inst_VOP1__V_SQRT_F64::~Inst_VOP1__V_SQRT_F64()
8539     {
8540     } // ~Inst_VOP1__V_SQRT_F64
8541
8542     // D.d = sqrt(S0.d).
8543     void
8544     Inst_VOP1__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst)
8545     {
8546         Wavefront *wf = gpuDynInst->wavefront();
8547         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8548         VecOperandF64 vdst(gpuDynInst, instData.VDST);
8549
8550         src.readSrc();
8551
8552         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8553             if (wf->execMask(lane)) {
8554                 vdst[lane] = std::sqrt(src[lane]);
8555             }
8556         }
8557
8558         vdst.write();
8559     }
8560
8561     Inst_VOP1__V_SIN_F32::Inst_VOP1__V_SIN_F32(InFmt_VOP1 *iFmt)
8562         : Inst_VOP1(iFmt, "v_sin_f32")
8563     {
8564         setFlag(ALU);
8565         setFlag(F32);
8566     } // Inst_VOP1__V_SIN_F32
8567
8568     Inst_VOP1__V_SIN_F32::~Inst_VOP1__V_SIN_F32()
8569     {
8570     } // ~Inst_VOP1__V_SIN_F32
8571
8572     // D.f = sin(S0.f * 2 * PI).
8573     void
8574     Inst_VOP1__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst)
8575     {
8576         Wavefront *wf = gpuDynInst->wavefront();
8577         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8578         ConstScalarOperandF32 pi(gpuDynInst, REG_PI);
8579         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8580
8581         src.readSrc();
8582         pi.read();
8583
8584         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8585             if (wf->execMask(lane)) {
8586                 if (src[lane] < -256.0 || src[lane] > 256.0) {
8587                     vdst[lane] = 0.0;
8588                 } else {
8589                     vdst[lane] = std::sin(src[lane] * 2.0 * pi.rawData());
8590                 }
8591             }
8592         }
8593
8594         vdst.write();
8595     }
8596
8597     Inst_VOP1__V_COS_F32::Inst_VOP1__V_COS_F32(InFmt_VOP1 *iFmt)
8598         : Inst_VOP1(iFmt, "v_cos_f32")
8599     {
8600         setFlag(ALU);
8601         setFlag(F32);
8602     } // Inst_VOP1__V_COS_F32
8603
8604     Inst_VOP1__V_COS_F32::~Inst_VOP1__V_COS_F32()
8605     {
8606     } // ~Inst_VOP1__V_COS_F32
8607
8608     // D.f = cos(S0.f * 2 * PI).
8609     void
8610     Inst_VOP1__V_COS_F32::execute(GPUDynInstPtr gpuDynInst)
8611     {
8612         Wavefront *wf = gpuDynInst->wavefront();
8613         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8614         ConstScalarOperandF32 pi(gpuDynInst, REG_PI);
8615         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8616
8617         src.readSrc();
8618         pi.read();
8619
8620         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8621             if (wf->execMask(lane)) {
8622                 if (src[lane] < -256.0 || src[lane] > 256.0) {
8623                     vdst[lane] = 0.0;
8624                 } else {
8625                     vdst[lane] = std::cos(src[lane] * 2.0 * pi.rawData());
8626                 }
8627             }
8628         }
8629
8630         vdst.write();
8631     }
8632
8633     Inst_VOP1__V_NOT_B32::Inst_VOP1__V_NOT_B32(InFmt_VOP1 *iFmt)
8634         : Inst_VOP1(iFmt, "v_not_b32")
8635     {
8636         setFlag(ALU);
8637     } // Inst_VOP1__V_NOT_B32
8638
8639     Inst_VOP1__V_NOT_B32::~Inst_VOP1__V_NOT_B32()
8640     {
8641     } // ~Inst_VOP1__V_NOT_B32
8642
8643     // D.u = ~S0.u.
8644     // Input and output modifiers not supported.
8645     void
8646     Inst_VOP1__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst)
8647     {
8648         Wavefront *wf = gpuDynInst->wavefront();
8649         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
8650         VecOperandU32 vdst(gpuDynInst, instData.VDST);
8651
8652         src.readSrc();
8653
8654         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8655             if (wf->execMask(lane)) {
8656                 vdst[lane] = ~src[lane];
8657             }
8658         }
8659
8660         vdst.write();
8661     }
8662
8663     Inst_VOP1__V_BFREV_B32::Inst_VOP1__V_BFREV_B32(InFmt_VOP1 *iFmt)
8664         : Inst_VOP1(iFmt, "v_bfrev_b32")
8665     {
8666         setFlag(ALU);
8667     } // Inst_VOP1__V_BFREV_B32
8668
8669     Inst_VOP1__V_BFREV_B32::~Inst_VOP1__V_BFREV_B32()
8670     {
8671     } // ~Inst_VOP1__V_BFREV_B32
8672
8673     // D.u[31:0] = S0.u[0:31], bitfield reverse.
8674     // Input and output modifiers not supported.
8675     void
8676     Inst_VOP1__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst)
8677     {
8678         Wavefront *wf = gpuDynInst->wavefront();
8679         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
8680         VecOperandU32 vdst(gpuDynInst, instData.VDST);
8681
8682         src.readSrc();
8683
8684         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8685             if (wf->execMask(lane)) {
8686                 vdst[lane] = reverseBits(src[lane]);
8687             }
8688         }
8689
8690         vdst.write();
8691     }
8692
8693     Inst_VOP1__V_FFBH_U32::Inst_VOP1__V_FFBH_U32(InFmt_VOP1 *iFmt)
8694         : Inst_VOP1(iFmt, "v_ffbh_u32")
8695     {
8696         setFlag(ALU);
8697     } // Inst_VOP1__V_FFBH_U32
8698
8699     Inst_VOP1__V_FFBH_U32::~Inst_VOP1__V_FFBH_U32()
8700     {
8701     } // ~Inst_VOP1__V_FFBH_U32
8702
8703     // D.u = position of first 1 in S0.u from MSB;
8704     // D.u = 0xffffffff if S0.u == 0.
8705     void
8706     Inst_VOP1__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst)
8707     {
8708         Wavefront *wf = gpuDynInst->wavefront();
8709         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
8710         VecOperandU32 vdst(gpuDynInst, instData.VDST);
8711
8712         src.readSrc();
8713
8714         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8715             if (wf->execMask(lane)) {
8716                 vdst[lane] = findFirstOneMsb(src[lane]);
8717             }
8718         }
8719
8720         vdst.write();
8721     }
8722
8723     Inst_VOP1__V_FFBL_B32::Inst_VOP1__V_FFBL_B32(InFmt_VOP1 *iFmt)
8724         : Inst_VOP1(iFmt, "v_ffbl_b32")
8725     {
8726         setFlag(ALU);
8727     } // Inst_VOP1__V_FFBL_B32
8728
8729     Inst_VOP1__V_FFBL_B32::~Inst_VOP1__V_FFBL_B32()
8730     {
8731     } // ~Inst_VOP1__V_FFBL_B32
8732
8733     // D.u = position of first 1 in S0.u from LSB;
8734     // D.u = 0xffffffff if S0.u == 0.
8735     void
8736     Inst_VOP1__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst)
8737     {
8738         Wavefront *wf = gpuDynInst->wavefront();
8739         ConstVecOperandU32 src(gpuDynInst, instData.SRC0);
8740         VecOperandU32 vdst(gpuDynInst, instData.VDST);
8741
8742         src.readSrc();
8743
8744         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8745             if (wf->execMask(lane)) {
8746                 vdst[lane] = findFirstOne(src[lane]);
8747             }
8748         }
8749
8750         vdst.write();
8751     }
8752
8753     Inst_VOP1__V_FFBH_I32::Inst_VOP1__V_FFBH_I32(InFmt_VOP1 *iFmt)
8754         : Inst_VOP1(iFmt, "v_ffbh_i32")
8755     {
8756         setFlag(ALU);
8757     } // Inst_VOP1__V_FFBH_I32
8758
8759     Inst_VOP1__V_FFBH_I32::~Inst_VOP1__V_FFBH_I32()
8760     {
8761     } // ~Inst_VOP1__V_FFBH_I32
8762
8763     // D.u = position of first bit different from sign bit in S0.i from MSB;
8764     // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff.
8765     void
8766     Inst_VOP1__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst)
8767     {
8768         Wavefront *wf = gpuDynInst->wavefront();
8769         ConstVecOperandI32 src(gpuDynInst, instData.SRC0);
8770         VecOperandU32 vdst(gpuDynInst, instData.VDST);
8771
8772         src.readSrc();
8773
8774         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8775             if (wf->execMask(lane)) {
8776                 vdst[lane] = firstOppositeSignBit(src[lane]);
8777             }
8778         }
8779
8780         vdst.write();
8781     }
8782
8783     Inst_VOP1__V_FREXP_EXP_I32_F64::Inst_VOP1__V_FREXP_EXP_I32_F64(
8784           InFmt_VOP1 *iFmt)
8785         : Inst_VOP1(iFmt, "v_frexp_exp_i32_f64")
8786     {
8787         setFlag(ALU);
8788         setFlag(F64);
8789     } // Inst_VOP1__V_FREXP_EXP_I32_F64
8790
8791     Inst_VOP1__V_FREXP_EXP_I32_F64::~Inst_VOP1__V_FREXP_EXP_I32_F64()
8792     {
8793     } // ~Inst_VOP1__V_FREXP_EXP_I32_F64
8794
8795     void
8796     Inst_VOP1__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst)
8797     {
8798         Wavefront *wf = gpuDynInst->wavefront();
8799         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8800         VecOperandI32 vdst(gpuDynInst, instData.VDST);
8801
8802         src.readSrc();
8803
8804         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8805             if (wf->execMask(lane)) {
8806                 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
8807                     vdst[lane] = 0;
8808                 } else {
8809                     VecElemI32 exp = 0;
8810                     std::frexp(src[lane], &exp);
8811                     vdst[lane] = exp;
8812                 }
8813             }
8814         }
8815
8816         vdst.write();
8817     }
8818
8819     Inst_VOP1__V_FREXP_MANT_F64::Inst_VOP1__V_FREXP_MANT_F64(InFmt_VOP1 *iFmt)
8820         : Inst_VOP1(iFmt, "v_frexp_mant_f64")
8821     {
8822         setFlag(ALU);
8823         setFlag(F64);
8824     } // Inst_VOP1__V_FREXP_MANT_F64
8825
8826     Inst_VOP1__V_FREXP_MANT_F64::~Inst_VOP1__V_FREXP_MANT_F64()
8827     {
8828     } // ~Inst_VOP1__V_FREXP_MANT_F64
8829
8830     void
8831     Inst_VOP1__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst)
8832     {
8833         Wavefront *wf = gpuDynInst->wavefront();
8834         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8835         VecOperandF64 vdst(gpuDynInst, instData.VDST);
8836
8837         src.readSrc();
8838
8839         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8840             if (wf->execMask(lane)) {
8841                 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
8842                     vdst[lane] = src[lane];
8843                 } else {
8844                     VecElemI32 exp(0);
8845                     vdst[lane] = std::frexp(src[lane], &exp);
8846                 }
8847             }
8848         }
8849
8850         vdst.write();
8851     }
8852
8853     Inst_VOP1__V_FRACT_F64::Inst_VOP1__V_FRACT_F64(InFmt_VOP1 *iFmt)
8854         : Inst_VOP1(iFmt, "v_fract_f64")
8855     {
8856         setFlag(ALU);
8857         setFlag(F64);
8858     } // Inst_VOP1__V_FRACT_F64
8859
8860     Inst_VOP1__V_FRACT_F64::~Inst_VOP1__V_FRACT_F64()
8861     {
8862     } // ~Inst_VOP1__V_FRACT_F64
8863
8864     void
8865     Inst_VOP1__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst)
8866     {
8867         Wavefront *wf = gpuDynInst->wavefront();
8868         ConstVecOperandF64 src(gpuDynInst, instData.SRC0);
8869         VecOperandF64 vdst(gpuDynInst, instData.VDST);
8870
8871         src.readSrc();
8872
8873         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8874             if (wf->execMask(lane)) {
8875                 VecElemF64 int_part(0.0);
8876                 vdst[lane] = std::modf(src[lane], &int_part);
8877             }
8878         }
8879
8880         vdst.write();
8881     }
8882
8883     Inst_VOP1__V_FREXP_EXP_I32_F32::Inst_VOP1__V_FREXP_EXP_I32_F32(
8884           InFmt_VOP1 *iFmt)
8885         : Inst_VOP1(iFmt, "v_frexp_exp_i32_f32")
8886     {
8887         setFlag(ALU);
8888         setFlag(F32);
8889     } // Inst_VOP1__V_FREXP_EXP_I32_F32
8890
8891     Inst_VOP1__V_FREXP_EXP_I32_F32::~Inst_VOP1__V_FREXP_EXP_I32_F32()
8892     {
8893     } // ~Inst_VOP1__V_FREXP_EXP_I32_F32
8894
8895     // frexp(S0.f, Exponent(S0.f))
8896     // if (S0.f == INF || S0.f == NAN) then D.i = 0;
8897     // else D.i = Exponent(S0.f);
8898     void
8899     Inst_VOP1__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst)
8900     {
8901         Wavefront *wf = gpuDynInst->wavefront();
8902         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8903         VecOperandI32 vdst(gpuDynInst, instData.VDST);
8904
8905         src.readSrc();
8906
8907         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8908             if (wf->execMask(lane)) {
8909                 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
8910                     vdst[lane] = 0;
8911                 } else {
8912                     VecElemI32 exp(0);
8913                     std::frexp(src[lane], &exp);
8914                     vdst[lane] = exp;
8915                 }
8916             }
8917         }
8918
8919         vdst.write();
8920     }
8921
8922     Inst_VOP1__V_FREXP_MANT_F32::Inst_VOP1__V_FREXP_MANT_F32(InFmt_VOP1 *iFmt)
8923         : Inst_VOP1(iFmt, "v_frexp_mant_f32")
8924     {
8925         setFlag(ALU);
8926         setFlag(F32);
8927     } // Inst_VOP1__V_FREXP_MANT_F32
8928
8929     Inst_VOP1__V_FREXP_MANT_F32::~Inst_VOP1__V_FREXP_MANT_F32()
8930     {
8931     } // ~Inst_VOP1__V_FREXP_MANT_F32
8932
8933     // if (S0.f == INF || S0.f == NAN) then D.f = S0.f;
8934     // else D.f = frexp(S0.f, Exponent(S0.f)).
8935     void
8936     Inst_VOP1__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst)
8937     {
8938         Wavefront *wf = gpuDynInst->wavefront();
8939         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
8940         VecOperandF32 vdst(gpuDynInst, instData.VDST);
8941
8942         src.readSrc();
8943
8944         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
8945             if (wf->execMask(lane)) {
8946                 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
8947                     vdst[lane] = src[lane];
8948                 } else {
8949                     VecElemI32 exp(0);
8950                     vdst[lane] = std::frexp(src[lane], &exp);
8951                 }
8952             }
8953         }
8954
8955         vdst.write();
8956     }
8957
8958     Inst_VOP1__V_CLREXCP::Inst_VOP1__V_CLREXCP(InFmt_VOP1 *iFmt)
8959         : Inst_VOP1(iFmt, "v_clrexcp")
8960     {
8961         setFlag(ALU);
8962     } // Inst_VOP1__V_CLREXCP
8963
8964     Inst_VOP1__V_CLREXCP::~Inst_VOP1__V_CLREXCP()
8965     {
8966     } // ~Inst_VOP1__V_CLREXCP
8967
8968     void
8969     Inst_VOP1__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst)
8970     {
8971         panicUnimplemented();
8972     }
8973
8974     Inst_VOP1__V_CVT_F16_U16::Inst_VOP1__V_CVT_F16_U16(InFmt_VOP1 *iFmt)
8975         : Inst_VOP1(iFmt, "v_cvt_f16_u16")
8976     {
8977         setFlag(ALU);
8978         setFlag(F16);
8979     } // Inst_VOP1__V_CVT_F16_U16
8980
8981     Inst_VOP1__V_CVT_F16_U16::~Inst_VOP1__V_CVT_F16_U16()
8982     {
8983     } // ~Inst_VOP1__V_CVT_F16_U16
8984
8985     // D.f16 = uint16_to_flt16(S.u16).
8986     void
8987     Inst_VOP1__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst)
8988     {
8989         panicUnimplemented();
8990     }
8991
8992     Inst_VOP1__V_CVT_F16_I16::Inst_VOP1__V_CVT_F16_I16(InFmt_VOP1 *iFmt)
8993         : Inst_VOP1(iFmt, "v_cvt_f16_i16")
8994     {
8995         setFlag(ALU);
8996         setFlag(F16);
8997     } // Inst_VOP1__V_CVT_F16_I16
8998
8999     Inst_VOP1__V_CVT_F16_I16::~Inst_VOP1__V_CVT_F16_I16()
9000     {
9001     } // ~Inst_VOP1__V_CVT_F16_I16
9002
9003     // D.f16 = int16_to_flt16(S.i16).
9004     void
9005     Inst_VOP1__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst)
9006     {
9007         panicUnimplemented();
9008     }
9009
9010     Inst_VOP1__V_CVT_U16_F16::Inst_VOP1__V_CVT_U16_F16(InFmt_VOP1 *iFmt)
9011         : Inst_VOP1(iFmt, "v_cvt_u16_f16")
9012     {
9013         setFlag(ALU);
9014         setFlag(F16);
9015     } // Inst_VOP1__V_CVT_U16_F16
9016
9017     Inst_VOP1__V_CVT_U16_F16::~Inst_VOP1__V_CVT_U16_F16()
9018     {
9019     } // ~Inst_VOP1__V_CVT_U16_F16
9020
9021     // D.u16 = flt16_to_uint16(S.f16).
9022     void
9023     Inst_VOP1__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst)
9024     {
9025         panicUnimplemented();
9026     }
9027
9028     Inst_VOP1__V_CVT_I16_F16::Inst_VOP1__V_CVT_I16_F16(InFmt_VOP1 *iFmt)
9029         : Inst_VOP1(iFmt, "v_cvt_i16_f16")
9030     {
9031         setFlag(ALU);
9032         setFlag(F16);
9033     } // Inst_VOP1__V_CVT_I16_F16
9034
9035     Inst_VOP1__V_CVT_I16_F16::~Inst_VOP1__V_CVT_I16_F16()
9036     {
9037     } // ~Inst_VOP1__V_CVT_I16_F16
9038
9039     // D.i16 = flt16_to_int16(S.f16).
9040     void
9041     Inst_VOP1__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst)
9042     {
9043         panicUnimplemented();
9044     }
9045
9046     Inst_VOP1__V_RCP_F16::Inst_VOP1__V_RCP_F16(InFmt_VOP1 *iFmt)
9047         : Inst_VOP1(iFmt, "v_rcp_f16")
9048     {
9049         setFlag(ALU);
9050         setFlag(F16);
9051     } // Inst_VOP1__V_RCP_F16
9052
9053     Inst_VOP1__V_RCP_F16::~Inst_VOP1__V_RCP_F16()
9054     {
9055     } // ~Inst_VOP1__V_RCP_F16
9056
9057     // if (S0.f16 == 1.0f)
9058     //     D.f16 = 1.0f;
9059     // else
9060     //     D.f16 = 1 / S0.f16;
9061     void
9062     Inst_VOP1__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst)
9063     {
9064         panicUnimplemented();
9065     }
9066
9067     Inst_VOP1__V_SQRT_F16::Inst_VOP1__V_SQRT_F16(InFmt_VOP1 *iFmt)
9068         : Inst_VOP1(iFmt, "v_sqrt_f16")
9069     {
9070         setFlag(ALU);
9071         setFlag(F16);
9072     } // Inst_VOP1__V_SQRT_F16
9073
9074     Inst_VOP1__V_SQRT_F16::~Inst_VOP1__V_SQRT_F16()
9075     {
9076     } // ~Inst_VOP1__V_SQRT_F16
9077
9078     // if (S0.f16 == 1.0f)
9079     //     D.f16 = 1.0f;
9080     // else
9081     //     D.f16 = sqrt(S0.f16);
9082     void
9083     Inst_VOP1__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst)
9084     {
9085         panicUnimplemented();
9086     }
9087
9088     Inst_VOP1__V_RSQ_F16::Inst_VOP1__V_RSQ_F16(InFmt_VOP1 *iFmt)
9089         : Inst_VOP1(iFmt, "v_rsq_f16")
9090     {
9091         setFlag(ALU);
9092         setFlag(F16);
9093     } // Inst_VOP1__V_RSQ_F16
9094
9095     Inst_VOP1__V_RSQ_F16::~Inst_VOP1__V_RSQ_F16()
9096     {
9097     } // ~Inst_VOP1__V_RSQ_F16
9098
9099     // if (S0.f16 == 1.0f)
9100     //     D.f16 = 1.0f;
9101     // else
9102     //     D.f16 = 1 / sqrt(S0.f16);
9103     void
9104     Inst_VOP1__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst)
9105     {
9106         panicUnimplemented();
9107     }
9108
9109     Inst_VOP1__V_LOG_F16::Inst_VOP1__V_LOG_F16(InFmt_VOP1 *iFmt)
9110         : Inst_VOP1(iFmt, "v_log_f16")
9111     {
9112         setFlag(ALU);
9113         setFlag(F16);
9114     } // Inst_VOP1__V_LOG_F16
9115
9116     Inst_VOP1__V_LOG_F16::~Inst_VOP1__V_LOG_F16()
9117     {
9118     } // ~Inst_VOP1__V_LOG_F16
9119
9120     // if (S0.f16 == 1.0f)
9121     //     D.f16 = 0.0f;
9122     // else
9123     //     D.f16 = log2(S0.f16);
9124     void
9125     Inst_VOP1__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst)
9126     {
9127         panicUnimplemented();
9128     }
9129
9130     Inst_VOP1__V_EXP_F16::Inst_VOP1__V_EXP_F16(InFmt_VOP1 *iFmt)
9131         : Inst_VOP1(iFmt, "v_exp_f16")
9132     {
9133         setFlag(ALU);
9134         setFlag(F16);
9135     } // Inst_VOP1__V_EXP_F16
9136
9137     Inst_VOP1__V_EXP_F16::~Inst_VOP1__V_EXP_F16()
9138     {
9139     } // ~Inst_VOP1__V_EXP_F16
9140
9141     // if (S0.f16 == 0.0f)
9142     //     D.f16 = 1.0f;
9143     // else
9144     //     D.f16 = pow(2.0, S0.f16).
9145     void
9146     Inst_VOP1__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst)
9147     {
9148         panicUnimplemented();
9149     }
9150
9151     Inst_VOP1__V_FREXP_MANT_F16::Inst_VOP1__V_FREXP_MANT_F16(InFmt_VOP1 *iFmt)
9152         : Inst_VOP1(iFmt, "v_frexp_mant_f16")
9153     {
9154         setFlag(ALU);
9155         setFlag(F16);
9156     } // Inst_VOP1__V_FREXP_MANT_F16
9157
9158     Inst_VOP1__V_FREXP_MANT_F16::~Inst_VOP1__V_FREXP_MANT_F16()
9159     {
9160     } // ~Inst_VOP1__V_FREXP_MANT_F16
9161
9162     // if (S0.f16 == +-INF || S0.f16 == NAN)
9163     //     D.f16 = S0.f16;
9164     // else
9165     //     D.f16 = mantissa(S0.f16).
9166     void
9167     Inst_VOP1__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst)
9168     {
9169         panicUnimplemented();
9170     }
9171
9172     Inst_VOP1__V_FREXP_EXP_I16_F16::Inst_VOP1__V_FREXP_EXP_I16_F16(
9173           InFmt_VOP1 *iFmt)
9174         : Inst_VOP1(iFmt, "v_frexp_exp_i16_f16")
9175     {
9176         setFlag(ALU);
9177         setFlag(F16);
9178     } // Inst_VOP1__V_FREXP_EXP_I16_F16
9179
9180     Inst_VOP1__V_FREXP_EXP_I16_F16::~Inst_VOP1__V_FREXP_EXP_I16_F16()
9181     {
9182     } // ~Inst_VOP1__V_FREXP_EXP_I16_F16
9183
9184     // frexp(S0.f16, Exponent(S0.f16))
9185     // if (S0.f16 == +-INF || S0.f16 == NAN)
9186     //     D.i16 = 0;
9187     // else
9188     //     D.i16 = Exponent(S0.f16);
9189     void
9190     Inst_VOP1__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst)
9191     {
9192         panicUnimplemented();
9193     }
9194
9195     Inst_VOP1__V_FLOOR_F16::Inst_VOP1__V_FLOOR_F16(InFmt_VOP1 *iFmt)
9196         : Inst_VOP1(iFmt, "v_floor_f16")
9197     {
9198         setFlag(ALU);
9199         setFlag(F16);
9200     } // Inst_VOP1__V_FLOOR_F16
9201
9202     Inst_VOP1__V_FLOOR_F16::~Inst_VOP1__V_FLOOR_F16()
9203     {
9204     } // ~Inst_VOP1__V_FLOOR_F16
9205
9206     // D.f16 = floor(S0.f16);
9207     void
9208     Inst_VOP1__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst)
9209     {
9210         panicUnimplemented();
9211     }
9212
9213     Inst_VOP1__V_CEIL_F16::Inst_VOP1__V_CEIL_F16(InFmt_VOP1 *iFmt)
9214         : Inst_VOP1(iFmt, "v_ceil_f16")
9215     {
9216         setFlag(ALU);
9217         setFlag(F16);
9218     } // Inst_VOP1__V_CEIL_F16
9219
9220     Inst_VOP1__V_CEIL_F16::~Inst_VOP1__V_CEIL_F16()
9221     {
9222     } // ~Inst_VOP1__V_CEIL_F16
9223
9224     // D.f16 = ceil(S0.f16);
9225     void
9226     Inst_VOP1__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst)
9227     {
9228         panicUnimplemented();
9229     }
9230
9231     Inst_VOP1__V_TRUNC_F16::Inst_VOP1__V_TRUNC_F16(InFmt_VOP1 *iFmt)
9232         : Inst_VOP1(iFmt, "v_trunc_f16")
9233     {
9234         setFlag(ALU);
9235         setFlag(F16);
9236     } // Inst_VOP1__V_TRUNC_F16
9237
9238     Inst_VOP1__V_TRUNC_F16::~Inst_VOP1__V_TRUNC_F16()
9239     {
9240     } // ~Inst_VOP1__V_TRUNC_F16
9241
9242     // D.f16 = trunc(S0.f16).
9243     void
9244     Inst_VOP1__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst)
9245     {
9246         panicUnimplemented();
9247     }
9248
9249     Inst_VOP1__V_RNDNE_F16::Inst_VOP1__V_RNDNE_F16(InFmt_VOP1 *iFmt)
9250         : Inst_VOP1(iFmt, "v_rndne_f16")
9251     {
9252         setFlag(ALU);
9253         setFlag(F16);
9254     } // Inst_VOP1__V_RNDNE_F16
9255
9256     Inst_VOP1__V_RNDNE_F16::~Inst_VOP1__V_RNDNE_F16()
9257     {
9258     } // ~Inst_VOP1__V_RNDNE_F16
9259
9260     // D.f16 = roundNearestEven(S0.f16);
9261     void
9262     Inst_VOP1__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst)
9263     {
9264         panicUnimplemented();
9265     }
9266
9267     Inst_VOP1__V_FRACT_F16::Inst_VOP1__V_FRACT_F16(InFmt_VOP1 *iFmt)
9268         : Inst_VOP1(iFmt, "v_fract_f16")
9269     {
9270         setFlag(ALU);
9271         setFlag(F16);
9272     } // Inst_VOP1__V_FRACT_F16
9273
9274     Inst_VOP1__V_FRACT_F16::~Inst_VOP1__V_FRACT_F16()
9275     {
9276     } // ~Inst_VOP1__V_FRACT_F16
9277
9278     // D.f16 = S0.f16 + -floor(S0.f16).
9279     void
9280     Inst_VOP1__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst)
9281     {
9282         panicUnimplemented();
9283     }
9284
9285     Inst_VOP1__V_SIN_F16::Inst_VOP1__V_SIN_F16(InFmt_VOP1 *iFmt)
9286         : Inst_VOP1(iFmt, "v_sin_f16")
9287     {
9288         setFlag(ALU);
9289         setFlag(F16);
9290     } // Inst_VOP1__V_SIN_F16
9291
9292     Inst_VOP1__V_SIN_F16::~Inst_VOP1__V_SIN_F16()
9293     {
9294     } // ~Inst_VOP1__V_SIN_F16
9295
9296     // D.f16 = sin(S0.f16 * 2 * PI).
9297     void
9298     Inst_VOP1__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst)
9299     {
9300         panicUnimplemented();
9301     }
9302
9303     Inst_VOP1__V_COS_F16::Inst_VOP1__V_COS_F16(InFmt_VOP1 *iFmt)
9304         : Inst_VOP1(iFmt, "v_cos_f16")
9305     {
9306         setFlag(ALU);
9307         setFlag(F16);
9308     } // Inst_VOP1__V_COS_F16
9309
9310     Inst_VOP1__V_COS_F16::~Inst_VOP1__V_COS_F16()
9311     {
9312     } // ~Inst_VOP1__V_COS_F16
9313
9314     // D.f16 = cos(S0.f16 * 2 * PI).
9315     void
9316     Inst_VOP1__V_COS_F16::execute(GPUDynInstPtr gpuDynInst)
9317     {
9318         panicUnimplemented();
9319     }
9320
9321     Inst_VOP1__V_EXP_LEGACY_F32::Inst_VOP1__V_EXP_LEGACY_F32(InFmt_VOP1 *iFmt)
9322         : Inst_VOP1(iFmt, "v_exp_legacy_f32")
9323     {
9324         setFlag(ALU);
9325         setFlag(F32);
9326     } // Inst_VOP1__V_EXP_LEGACY_F32
9327
9328     Inst_VOP1__V_EXP_LEGACY_F32::~Inst_VOP1__V_EXP_LEGACY_F32()
9329     {
9330     } // ~Inst_VOP1__V_EXP_LEGACY_F32
9331
9332     // D.f = pow(2.0, S0.f)
9333     void
9334     Inst_VOP1__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
9335     {
9336         Wavefront *wf = gpuDynInst->wavefront();
9337         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
9338         VecOperandF32 vdst(gpuDynInst, instData.VDST);
9339
9340         src.readSrc();
9341
9342         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
9343             if (wf->execMask(lane)) {
9344                 vdst[lane] = std::pow(2.0, src[lane]);
9345             }
9346         }
9347
9348         vdst.write();
9349     }
9350
9351     Inst_VOP1__V_LOG_LEGACY_F32::Inst_VOP1__V_LOG_LEGACY_F32(InFmt_VOP1 *iFmt)
9352         : Inst_VOP1(iFmt, "v_log_legacy_f32")
9353     {
9354         setFlag(ALU);
9355         setFlag(F32);
9356     } // Inst_VOP1__V_LOG_LEGACY_F32
9357
9358     Inst_VOP1__V_LOG_LEGACY_F32::~Inst_VOP1__V_LOG_LEGACY_F32()
9359     {
9360     } // ~Inst_VOP1__V_LOG_LEGACY_F32
9361
9362     // D.f = log2(S0.f).
9363     void
9364     Inst_VOP1__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
9365     {
9366         Wavefront *wf = gpuDynInst->wavefront();
9367         ConstVecOperandF32 src(gpuDynInst, instData.SRC0);
9368         VecOperandF32 vdst(gpuDynInst, instData.VDST);
9369
9370         src.readSrc();
9371
9372         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
9373             if (wf->execMask(lane)) {
9374                 vdst[lane] = std::log2(src[lane]);
9375             }
9376         }
9377
9378         vdst.write();
9379     }
9380
9381     Inst_VOPC__V_CMP_CLASS_F32::Inst_VOPC__V_CMP_CLASS_F32(InFmt_VOPC *iFmt)
9382         : Inst_VOPC(iFmt, "v_cmp_class_f32")
9383     {
9384         setFlag(ALU);
9385         setFlag(F32);
9386     } // Inst_VOPC__V_CMP_CLASS_F32
9387
9388     Inst_VOPC__V_CMP_CLASS_F32::~Inst_VOPC__V_CMP_CLASS_F32()
9389     {
9390     } // ~Inst_VOPC__V_CMP_CLASS_F32
9391
9392     // VCC = IEEE numeric class function specified in S1.u, performed on S0.f
9393     // The function reports true if the floating point value is any of the
9394     // numeric types selected in S1.u according to the following list:
9395     // S1.u[0] -- value is a signaling NaN.
9396     // S1.u[1] -- value is a quiet NaN.
9397     // S1.u[2] -- value is negative infinity.
9398     // S1.u[3] -- value is a negative normal value.
9399     // S1.u[4] -- value is a negative denormal value.
9400     // S1.u[5] -- value is negative zero.
9401     // S1.u[6] -- value is positive zero.
9402     // S1.u[7] -- value is a positive denormal value.
9403     // S1.u[8] -- value is a positive normal value.
9404     // S1.u[9] -- value is positive infinity.
9405     void
9406     Inst_VOPC__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst)
9407     {
9408         Wavefront *wf = gpuDynInst->wavefront();
9409         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
9410         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
9411         ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
9412
9413         src0.readSrc();
9414         src1.read();
9415
9416         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
9417             if (wf->execMask(lane)) {
9418                 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
9419                     // is NaN
9420                     if (std::isnan(src0[lane])) {
9421                         vcc.setBit(lane, 1);
9422                         continue;
9423                     }
9424                 }
9425                 if (bits(src1[lane], 2)) {
9426                     // is -infinity
9427                     if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
9428                         vcc.setBit(lane, 1);
9429                         continue;
9430                     }
9431                 }
9432                 if (bits(src1[lane], 3)) {
9433                     // is -normal
9434                     if (std::isnormal(src0[lane])
9435                         && std::signbit(src0[lane])) {
9436                         vcc.setBit(lane, 1);
9437                         continue;
9438                     }
9439                 }
9440                 if (bits(src1[lane], 4)) {
9441                     // is -denormal
9442                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9443                         && std::signbit(src0[lane])) {
9444                         vcc.setBit(lane, 1);
9445                         continue;
9446                     }
9447                 }
9448                 if (bits(src1[lane], 5)) {
9449                     // is -zero
9450                     if (std::fpclassify(src0[lane]) == FP_ZERO
9451                         && std::signbit(src0[lane])) {
9452                         vcc.setBit(lane, 1);
9453                         continue;
9454                     }
9455                 }
9456                 if (bits(src1[lane], 6)) {
9457                     // is +zero
9458                     if (std::fpclassify(src0[lane]) == FP_ZERO
9459                         && !std::signbit(src0[lane])) {
9460                         vcc.setBit(lane, 1);
9461                         continue;
9462                     }
9463                 }
9464                 if (bits(src1[lane], 7)) {
9465                     // is +denormal
9466                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9467                         && !std::signbit(src0[lane])) {
9468                         vcc.setBit(lane, 1);
9469                         continue;
9470                     }
9471                 }
9472                 if (bits(src1[lane], 8)) {
9473                     // is +normal
9474                     if (std::isnormal(src0[lane])
9475                         && !std::signbit(src0[lane])) {
9476                         vcc.setBit(lane, 1);
9477                         continue;
9478                     }
9479                 }
9480                 if (bits(src1[lane], 9)) {
9481                     // is +infinity
9482                     if (std::isinf(src0[lane]) && !std::signbit(src0[lane])) {
9483                         vcc.setBit(lane, 1);
9484                         continue;
9485                     }
9486                 }
9487             }
9488         }
9489
9490         vcc.write();
9491     }
9492
9493     Inst_VOPC__V_CMPX_CLASS_F32::Inst_VOPC__V_CMPX_CLASS_F32(InFmt_VOPC *iFmt)
9494         : Inst_VOPC(iFmt, "v_cmpx_class_f32")
9495     {
9496         setFlag(ALU);
9497         setFlag(F32);
9498     } // Inst_VOPC__V_CMPX_CLASS_F32
9499
9500     Inst_VOPC__V_CMPX_CLASS_F32::~Inst_VOPC__V_CMPX_CLASS_F32()
9501     {
9502     } // ~Inst_VOPC__V_CMPX_CLASS_F32
9503
9504     // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
9505     // S0.f The function reports true if the floating point value is any of
9506     // the numeric types selected in S1.u according to the following list:
9507     // S1.u[0] -- value is a signaling NaN.
9508     // S1.u[1] -- value is a quiet NaN.
9509     // S1.u[2] -- value is negative infinity.
9510     // S1.u[3] -- value is a negative normal value.
9511     // S1.u[4] -- value is a negative denormal value.
9512     // S1.u[5] -- value is negative zero.
9513     // S1.u[6] -- value is positive zero.
9514     // S1.u[7] -- value is a positive denormal value.
9515     // S1.u[8] -- value is a positive normal value.
9516     // S1.u[9] -- value is positive infinity.
9517     void
9518     Inst_VOPC__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst)
9519     {
9520         Wavefront *wf = gpuDynInst->wavefront();
9521         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
9522         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
9523         ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
9524
9525         src0.readSrc();
9526         src1.read();
9527
9528         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
9529             if (wf->execMask(lane)) {
9530                 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
9531                     // is NaN
9532                     if (std::isnan(src0[lane])) {
9533                         vcc.setBit(lane, 1);
9534                         continue;
9535                     }
9536                 }
9537                 if (bits(src1[lane], 2)) {
9538                     // is -infinity
9539                     if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
9540                         vcc.setBit(lane, 1);
9541                         continue;
9542                     }
9543                 }
9544                 if (bits(src1[lane], 3)) {
9545                     // is -normal
9546                     if (std::isnormal(src0[lane])
9547                         && std::signbit(src0[lane])) {
9548                         vcc.setBit(lane, 1);
9549                         continue;
9550                     }
9551                 }
9552                 if (bits(src1[lane], 4)) {
9553                     // is -denormal
9554                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9555                         && std::signbit(src0[lane])) {
9556                         vcc.setBit(lane, 1);
9557                         continue;
9558                     }
9559                 }
9560                 if (bits(src1[lane], 5)) {
9561                     // is -zero
9562                     if (std::fpclassify(src0[lane]) == FP_ZERO
9563                         && std::signbit(src0[lane])) {
9564                         vcc.setBit(lane, 1);
9565                         continue;
9566                     }
9567                 }
9568                 if (bits(src1[lane], 6)) {
9569                     // is +zero
9570                     if (std::fpclassify(src0[lane]) == FP_ZERO
9571                         && !std::signbit(src0[lane])) {
9572                         vcc.setBit(lane, 1);
9573                         continue;
9574                     }
9575                 }
9576                 if (bits(src1[lane], 7)) {
9577                     // is +denormal
9578                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9579                         && !std::signbit(src0[lane])) {
9580                         vcc.setBit(lane, 1);
9581                         continue;
9582                     }
9583                 }
9584                 if (bits(src1[lane], 8)) {
9585                     // is +normal
9586                     if (std::isnormal(src0[lane])
9587                         && !std::signbit(src0[lane])) {
9588                         vcc.setBit(lane, 1);
9589                         continue;
9590                     }
9591                 }
9592                 if (bits(src1[lane], 9)) {
9593                     // is +infinity
9594                     if (std::isinf(src0[lane]) && !std::signbit(src0[lane])) {
9595                         vcc.setBit(lane, 1);
9596                         continue;
9597                     }
9598                 }
9599             }
9600         }
9601
9602         vcc.write();
9603         wf->execMask() = vcc.rawData();
9604     }
9605
9606     Inst_VOPC__V_CMP_CLASS_F64::Inst_VOPC__V_CMP_CLASS_F64(InFmt_VOPC *iFmt)
9607         : Inst_VOPC(iFmt, "v_cmp_class_f64")
9608     {
9609         setFlag(ALU);
9610         setFlag(F64);
9611     } // Inst_VOPC__V_CMP_CLASS_F64
9612
9613     Inst_VOPC__V_CMP_CLASS_F64::~Inst_VOPC__V_CMP_CLASS_F64()
9614     {
9615     } // ~Inst_VOPC__V_CMP_CLASS_F64
9616
9617     // VCC = IEEE numeric class function specified in S1.u, performed on S0.d
9618     // The function reports true if the floating point value is any of the
9619     // numeric types selected in S1.u according to the following list:
9620     // S1.u[0] -- value is a signaling NaN.
9621     // S1.u[1] -- value is a quiet NaN.
9622     // S1.u[2] -- value is negative infinity.
9623     // S1.u[3] -- value is a negative normal value.
9624     // S1.u[4] -- value is a negative denormal value.
9625     // S1.u[5] -- value is negative zero.
9626     // S1.u[6] -- value is positive zero.
9627     // S1.u[7] -- value is a positive denormal value.
9628     // S1.u[8] -- value is a positive normal value.
9629     // S1.u[9] -- value is positive infinity.
9630     void
9631     Inst_VOPC__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst)
9632     {
9633         Wavefront *wf = gpuDynInst->wavefront();
9634         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
9635         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
9636         ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
9637
9638         src0.readSrc();
9639         src1.read();
9640
9641         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
9642             if (wf->execMask(lane)) {
9643                 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
9644                     // is NaN
9645                     if (std::isnan(src0[lane])) {
9646                         vcc.setBit(lane, 1);
9647                         continue;
9648                     }
9649                 }
9650                 if (bits(src1[lane], 2)) {
9651                     // is -infinity
9652                     if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
9653                         vcc.setBit(lane, 1);
9654                         continue;
9655                     }
9656                 }
9657                 if (bits(src1[lane], 3)) {
9658                     // is -normal
9659                     if (std::isnormal(src0[lane])
9660                         && std::signbit(src0[lane])) {
9661                         vcc.setBit(lane, 1);
9662                         continue;
9663                     }
9664                 }
9665                 if (bits(src1[lane], 4)) {
9666                     // is -denormal
9667                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9668                         && std::signbit(src0[lane])) {
9669                         vcc.setBit(lane, 1);
9670                         continue;
9671                     }
9672                 }
9673                 if (bits(src1[lane], 5)) {
9674                     // is -zero
9675                     if (std::fpclassify(src0[lane]) == FP_ZERO
9676                         && std::signbit(src0[lane])) {
9677                         vcc.setBit(lane, 1);
9678                         continue;
9679                     }
9680                 }
9681                 if (bits(src1[lane], 6)) {
9682                     // is +zero
9683                     if (std::fpclassify(src0[lane]) == FP_ZERO
9684                         && !std::signbit(src0[lane])) {
9685                         vcc.setBit(lane, 1);
9686                         continue;
9687                     }
9688                 }
9689                 if (bits(src1[lane], 7)) {
9690                     // is +denormal
9691                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9692                         && !std::signbit(src0[lane])) {
9693                         vcc.setBit(lane, 1);
9694                         continue;
9695                     }
9696                 }
9697                 if (bits(src1[lane], 8)) {
9698                     // is +normal
9699                     if (std::isnormal(src0[lane])
9700                         && !std::signbit(src0[lane])) {
9701                         vcc.setBit(lane, 1);
9702                         continue;
9703                     }
9704                 }
9705                 if (bits(src1[lane], 9)) {
9706                     // is +infinity
9707                     if (std::isinf(src0[lane])
9708                         && !std::signbit(src0[lane])) {
9709                         vcc.setBit(lane, 1);
9710                         continue;
9711                     }
9712                 }
9713             }
9714         }
9715
9716         vcc.write();
9717     }
9718
9719     Inst_VOPC__V_CMPX_CLASS_F64::Inst_VOPC__V_CMPX_CLASS_F64(InFmt_VOPC *iFmt)
9720         : Inst_VOPC(iFmt, "v_cmpx_class_f64")
9721     {
9722         setFlag(ALU);
9723         setFlag(F64);
9724     } // Inst_VOPC__V_CMPX_CLASS_F64
9725
9726     Inst_VOPC__V_CMPX_CLASS_F64::~Inst_VOPC__V_CMPX_CLASS_F64()
9727     {
9728     } // ~Inst_VOPC__V_CMPX_CLASS_F64
9729
9730     // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
9731     // S0.d The function reports true if the floating point value is any of
9732     // the numeric types selected in S1.u according to the following list:
9733     // S1.u[0] -- value is a signaling NaN.
9734     // S1.u[1] -- value is a quiet NaN.
9735     // S1.u[2] -- value is negative infinity.
9736     // S1.u[3] -- value is a negative normal value.
9737     // S1.u[4] -- value is a negative denormal value.
9738     // S1.u[5] -- value is negative zero.
9739     // S1.u[6] -- value is positive zero.
9740     // S1.u[7] -- value is a positive denormal value.
9741     // S1.u[8] -- value is a positive normal value.
9742     // S1.u[9] -- value is positive infinity.
9743     void
9744     Inst_VOPC__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst)
9745     {
9746         Wavefront *wf = gpuDynInst->wavefront();
9747         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
9748         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
9749         ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
9750
9751         src0.readSrc();
9752         src1.read();
9753
9754         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
9755             if (wf->execMask(lane)) {
9756                 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
9757                     // is NaN
9758                     if (std::isnan(src0[lane])) {
9759                         vcc.setBit(lane, 1);
9760                         continue;
9761                     }
9762                 }
9763                 if (bits(src1[lane], 2)) {
9764                     // is -infinity
9765                     if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
9766                         vcc.setBit(lane, 1);
9767                         continue;
9768                     }
9769                 }
9770                 if (bits(src1[lane], 3)) {
9771                     // is -normal
9772                     if (std::isnormal(src0[lane])
9773                         && std::signbit(src0[lane])) {
9774                         vcc.setBit(lane, 1);
9775                         continue;
9776                     }
9777                 }
9778                 if (bits(src1[lane], 4)) {
9779                     // is -denormal
9780                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9781                         && std::signbit(src0[lane])) {
9782                         vcc.setBit(lane, 1);
9783                         continue;
9784                     }
9785                 }
9786                 if (bits(src1[lane], 5)) {
9787                     // is -zero
9788                     if (std::fpclassify(src0[lane]) == FP_ZERO
9789                         && std::signbit(src0[lane])) {
9790                         vcc.setBit(lane, 1);
9791                         continue;
9792                     }
9793                 }
9794                 if (bits(src1[lane], 6)) {
9795                     // is +zero
9796                     if (std::fpclassify(src0[lane]) == FP_ZERO
9797                         && !std::signbit(src0[lane])) {
9798                         vcc.setBit(lane, 1);
9799                         continue;
9800                     }
9801                 }
9802                 if (bits(src1[lane], 7)) {
9803                     // is +denormal
9804                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
9805                         && !std::signbit(src0[lane])) {
9806                         vcc.setBit(lane, 1);
9807                         continue;
9808                     }
9809                 }
9810                 if (bits(src1[lane], 8)) {
9811                     // is +normal
9812                     if (std::isnormal(src0[lane])
9813                         && !std::signbit(src0[lane])) {
9814                         vcc.setBit(lane, 1);
9815                         continue;
9816                     }
9817                 }
9818                 if (bits(src1[lane], 9)) {
9819                     // is +infinity
9820                     if (std::isinf(src0[lane])
9821                         && !std::signbit(src0[lane])) {
9822                         vcc.setBit(lane, 1);
9823                         continue;
9824                     }
9825                 }
9826             }
9827         }
9828
9829         vcc.write();
9830         wf->execMask() = vcc.rawData();
9831     }
9832
9833     Inst_VOPC__V_CMP_CLASS_F16::Inst_VOPC__V_CMP_CLASS_F16(InFmt_VOPC *iFmt)
9834         : Inst_VOPC(iFmt, "v_cmp_class_f16")
9835     {
9836         setFlag(ALU);
9837         setFlag(F16);
9838     } // Inst_VOPC__V_CMP_CLASS_F16
9839
9840     Inst_VOPC__V_CMP_CLASS_F16::~Inst_VOPC__V_CMP_CLASS_F16()
9841     {
9842     } // ~Inst_VOPC__V_CMP_CLASS_F16
9843
9844     // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16
9845     // The function reports true if the floating point value is any of the
9846     // numeric types selected in S1.u according to the following list:
9847     // S1.u[0] -- value is a signaling NaN.
9848     // S1.u[1] -- value is a quiet NaN.
9849     // S1.u[2] -- value is negative infinity.
9850     // S1.u[3] -- value is a negative normal value.
9851     // S1.u[4] -- value is a negative denormal value.
9852     // S1.u[5] -- value is negative zero.
9853     // S1.u[6] -- value is positive zero.
9854     // S1.u[7] -- value is a positive denormal value.
9855     // S1.u[8] -- value is a positive normal value.
9856     // S1.u[9] -- value is positive infinity.
9857     void
9858     Inst_VOPC__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst)
9859     {
9860         panicUnimplemented();
9861     }
9862
9863     Inst_VOPC__V_CMPX_CLASS_F16::Inst_VOPC__V_CMPX_CLASS_F16(InFmt_VOPC *iFmt)
9864         : Inst_VOPC(iFmt, "v_cmpx_class_f16")
9865     {
9866         setFlag(ALU);
9867         setFlag(F16);
9868     } // Inst_VOPC__V_CMPX_CLASS_F16
9869
9870     Inst_VOPC__V_CMPX_CLASS_F16::~Inst_VOPC__V_CMPX_CLASS_F16()
9871     {
9872     } // ~Inst_VOPC__V_CMPX_CLASS_F16
9873
9874     // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
9875     // S0.f16
9876     // The function reports true if the floating point value is any of the
9877     // numeric types selected in S1.u according to the following list:
9878     // S1.u[0] -- value is a signaling NaN.
9879     // S1.u[1] -- value is a quiet NaN.
9880     // S1.u[2] -- value is negative infinity.
9881     // S1.u[3] -- value is a negative normal value.
9882     // S1.u[4] -- value is a negative denormal value.
9883     // S1.u[5] -- value is negative zero.
9884     // S1.u[6] -- value is positive zero.
9885     // S1.u[7] -- value is a positive denormal value.
9886     // S1.u[8] -- value is a positive normal value.
9887     // S1.u[9] -- value is positive infinity.
9888     void
9889     Inst_VOPC__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst)
9890     {
9891         panicUnimplemented();
9892     }
9893
9894     Inst_VOPC__V_CMP_F_F16::Inst_VOPC__V_CMP_F_F16(InFmt_VOPC *iFmt)
9895         : Inst_VOPC(iFmt, "v_cmp_f_f16")
9896     {
9897         setFlag(ALU);
9898         setFlag(F16);
9899     } // Inst_VOPC__V_CMP_F_F16
9900
9901     Inst_VOPC__V_CMP_F_F16::~Inst_VOPC__V_CMP_F_F16()
9902     {
9903     } // ~Inst_VOPC__V_CMP_F_F16
9904
9905     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
9906     void
9907     Inst_VOPC__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst)
9908     {
9909         panicUnimplemented();
9910     }
9911
9912     Inst_VOPC__V_CMP_LT_F16::Inst_VOPC__V_CMP_LT_F16(InFmt_VOPC *iFmt)
9913         : Inst_VOPC(iFmt, "v_cmp_lt_f16")
9914     {
9915         setFlag(ALU);
9916         setFlag(F16);
9917     } // Inst_VOPC__V_CMP_LT_F16
9918
9919     Inst_VOPC__V_CMP_LT_F16::~Inst_VOPC__V_CMP_LT_F16()
9920     {
9921     } // ~Inst_VOPC__V_CMP_LT_F16
9922
9923     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
9924     void
9925     Inst_VOPC__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst)
9926     {
9927         panicUnimplemented();
9928     }
9929
9930     Inst_VOPC__V_CMP_EQ_F16::Inst_VOPC__V_CMP_EQ_F16(InFmt_VOPC *iFmt)
9931         : Inst_VOPC(iFmt, "v_cmp_eq_f16")
9932     {
9933         setFlag(ALU);
9934         setFlag(F16);
9935     } // Inst_VOPC__V_CMP_EQ_F16
9936
9937     Inst_VOPC__V_CMP_EQ_F16::~Inst_VOPC__V_CMP_EQ_F16()
9938     {
9939     } // ~Inst_VOPC__V_CMP_EQ_F16
9940
9941     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
9942     void
9943     Inst_VOPC__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst)
9944     {
9945         panicUnimplemented();
9946     }
9947
9948     Inst_VOPC__V_CMP_LE_F16::Inst_VOPC__V_CMP_LE_F16(InFmt_VOPC *iFmt)
9949         : Inst_VOPC(iFmt, "v_cmp_le_f16")
9950     {
9951         setFlag(ALU);
9952         setFlag(F16);
9953     } // Inst_VOPC__V_CMP_LE_F16
9954
9955     Inst_VOPC__V_CMP_LE_F16::~Inst_VOPC__V_CMP_LE_F16()
9956     {
9957     } // ~Inst_VOPC__V_CMP_LE_F16
9958
9959     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
9960     void
9961     Inst_VOPC__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst)
9962     {
9963         panicUnimplemented();
9964     }
9965
9966     Inst_VOPC__V_CMP_GT_F16::Inst_VOPC__V_CMP_GT_F16(InFmt_VOPC *iFmt)
9967         : Inst_VOPC(iFmt, "v_cmp_gt_f16")
9968     {
9969         setFlag(ALU);
9970         setFlag(F16);
9971     } // Inst_VOPC__V_CMP_GT_F16
9972
9973     Inst_VOPC__V_CMP_GT_F16::~Inst_VOPC__V_CMP_GT_F16()
9974     {
9975     } // ~Inst_VOPC__V_CMP_GT_F16
9976
9977     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
9978     void
9979     Inst_VOPC__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst)
9980     {
9981         panicUnimplemented();
9982     }
9983
9984     Inst_VOPC__V_CMP_LG_F16::Inst_VOPC__V_CMP_LG_F16(InFmt_VOPC *iFmt)
9985         : Inst_VOPC(iFmt, "v_cmp_lg_f16")
9986     {
9987         setFlag(ALU);
9988         setFlag(F16);
9989     } // Inst_VOPC__V_CMP_LG_F16
9990
9991     Inst_VOPC__V_CMP_LG_F16::~Inst_VOPC__V_CMP_LG_F16()
9992     {
9993     } // ~Inst_VOPC__V_CMP_LG_F16
9994
9995     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
9996     void
9997     Inst_VOPC__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst)
9998     {
9999         panicUnimplemented();
10000     }
10001
10002     Inst_VOPC__V_CMP_GE_F16::Inst_VOPC__V_CMP_GE_F16(InFmt_VOPC *iFmt)
10003         : Inst_VOPC(iFmt, "v_cmp_ge_f16")
10004     {
10005         setFlag(ALU);
10006         setFlag(F16);
10007     } // Inst_VOPC__V_CMP_GE_F16
10008
10009     Inst_VOPC__V_CMP_GE_F16::~Inst_VOPC__V_CMP_GE_F16()
10010     {
10011     } // ~Inst_VOPC__V_CMP_GE_F16
10012
10013     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
10014     void
10015     Inst_VOPC__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst)
10016     {
10017         panicUnimplemented();
10018     }
10019
10020     Inst_VOPC__V_CMP_O_F16::Inst_VOPC__V_CMP_O_F16(InFmt_VOPC *iFmt)
10021         : Inst_VOPC(iFmt, "v_cmp_o_f16")
10022     {
10023         setFlag(ALU);
10024         setFlag(F16);
10025     } // Inst_VOPC__V_CMP_O_F16
10026
10027     Inst_VOPC__V_CMP_O_F16::~Inst_VOPC__V_CMP_O_F16()
10028     {
10029     } // ~Inst_VOPC__V_CMP_O_F16
10030
10031     // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
10032     void
10033     Inst_VOPC__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst)
10034     {
10035         panicUnimplemented();
10036     }
10037
10038     Inst_VOPC__V_CMP_U_F16::Inst_VOPC__V_CMP_U_F16(InFmt_VOPC *iFmt)
10039         : Inst_VOPC(iFmt, "v_cmp_u_f16")
10040     {
10041         setFlag(ALU);
10042         setFlag(F16);
10043     } // Inst_VOPC__V_CMP_U_F16
10044
10045     Inst_VOPC__V_CMP_U_F16::~Inst_VOPC__V_CMP_U_F16()
10046     {
10047     } // ~Inst_VOPC__V_CMP_U_F16
10048
10049     // D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC encoding.
10050     void
10051     Inst_VOPC__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst)
10052     {
10053         panicUnimplemented();
10054     }
10055
10056     Inst_VOPC__V_CMP_NGE_F16::Inst_VOPC__V_CMP_NGE_F16(InFmt_VOPC *iFmt)
10057         : Inst_VOPC(iFmt, "v_cmp_nge_f16")
10058     {
10059         setFlag(ALU);
10060         setFlag(F16);
10061     } // Inst_VOPC__V_CMP_NGE_F16
10062
10063     Inst_VOPC__V_CMP_NGE_F16::~Inst_VOPC__V_CMP_NGE_F16()
10064     {
10065     } // ~Inst_VOPC__V_CMP_NGE_F16
10066
10067     // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
10068     void
10069     Inst_VOPC__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst)
10070     {
10071         panicUnimplemented();
10072     }
10073
10074     Inst_VOPC__V_CMP_NLG_F16::Inst_VOPC__V_CMP_NLG_F16(InFmt_VOPC *iFmt)
10075         : Inst_VOPC(iFmt, "v_cmp_nlg_f16")
10076     {
10077         setFlag(ALU);
10078         setFlag(F16);
10079     } // Inst_VOPC__V_CMP_NLG_F16
10080
10081     Inst_VOPC__V_CMP_NLG_F16::~Inst_VOPC__V_CMP_NLG_F16()
10082     {
10083     } // ~Inst_VOPC__V_CMP_NLG_F16
10084
10085     // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
10086     void
10087     Inst_VOPC__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst)
10088     {
10089         panicUnimplemented();
10090     }
10091
10092     Inst_VOPC__V_CMP_NGT_F16::Inst_VOPC__V_CMP_NGT_F16(InFmt_VOPC *iFmt)
10093         : Inst_VOPC(iFmt, "v_cmp_ngt_f16")
10094     {
10095         setFlag(ALU);
10096         setFlag(F16);
10097     } // Inst_VOPC__V_CMP_NGT_F16
10098
10099     Inst_VOPC__V_CMP_NGT_F16::~Inst_VOPC__V_CMP_NGT_F16()
10100     {
10101     } // ~Inst_VOPC__V_CMP_NGT_F16
10102
10103     // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
10104     void
10105     Inst_VOPC__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst)
10106     {
10107         panicUnimplemented();
10108     }
10109
10110     Inst_VOPC__V_CMP_NLE_F16::Inst_VOPC__V_CMP_NLE_F16(InFmt_VOPC *iFmt)
10111         : Inst_VOPC(iFmt, "v_cmp_nle_f16")
10112     {
10113         setFlag(ALU);
10114         setFlag(F16);
10115     } // Inst_VOPC__V_CMP_NLE_F16
10116
10117     Inst_VOPC__V_CMP_NLE_F16::~Inst_VOPC__V_CMP_NLE_F16()
10118     {
10119     } // ~Inst_VOPC__V_CMP_NLE_F16
10120
10121     // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
10122     void
10123     Inst_VOPC__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst)
10124     {
10125         panicUnimplemented();
10126     }
10127
10128     Inst_VOPC__V_CMP_NEQ_F16::Inst_VOPC__V_CMP_NEQ_F16(InFmt_VOPC *iFmt)
10129         : Inst_VOPC(iFmt, "v_cmp_neq_f16")
10130     {
10131         setFlag(ALU);
10132         setFlag(F16);
10133     } // Inst_VOPC__V_CMP_NEQ_F16
10134
10135     Inst_VOPC__V_CMP_NEQ_F16::~Inst_VOPC__V_CMP_NEQ_F16()
10136     {
10137     } // ~Inst_VOPC__V_CMP_NEQ_F16
10138
10139     // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
10140     void
10141     Inst_VOPC__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst)
10142     {
10143         panicUnimplemented();
10144     }
10145
10146     Inst_VOPC__V_CMP_NLT_F16::Inst_VOPC__V_CMP_NLT_F16(InFmt_VOPC *iFmt)
10147         : Inst_VOPC(iFmt, "v_cmp_nlt_f16")
10148     {
10149         setFlag(ALU);
10150         setFlag(F16);
10151     } // Inst_VOPC__V_CMP_NLT_F16
10152
10153     Inst_VOPC__V_CMP_NLT_F16::~Inst_VOPC__V_CMP_NLT_F16()
10154     {
10155     } // ~Inst_VOPC__V_CMP_NLT_F16
10156
10157     // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
10158     void
10159     Inst_VOPC__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst)
10160     {
10161         panicUnimplemented();
10162     }
10163
10164     Inst_VOPC__V_CMP_TRU_F16::Inst_VOPC__V_CMP_TRU_F16(InFmt_VOPC *iFmt)
10165         : Inst_VOPC(iFmt, "v_cmp_tru_f16")
10166     {
10167         setFlag(ALU);
10168         setFlag(F16);
10169     } // Inst_VOPC__V_CMP_TRU_F16
10170
10171     Inst_VOPC__V_CMP_TRU_F16::~Inst_VOPC__V_CMP_TRU_F16()
10172     {
10173     } // ~Inst_VOPC__V_CMP_TRU_F16
10174
10175     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
10176     void
10177     Inst_VOPC__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst)
10178     {
10179         panicUnimplemented();
10180     }
10181
10182     Inst_VOPC__V_CMPX_F_F16::Inst_VOPC__V_CMPX_F_F16(InFmt_VOPC *iFmt)
10183         : Inst_VOPC(iFmt, "v_cmpx_f_f16")
10184     {
10185         setFlag(ALU);
10186         setFlag(F16);
10187     } // Inst_VOPC__V_CMPX_F_F16
10188
10189     Inst_VOPC__V_CMPX_F_F16::~Inst_VOPC__V_CMPX_F_F16()
10190     {
10191     } // ~Inst_VOPC__V_CMPX_F_F16
10192
10193     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
10194     void
10195     Inst_VOPC__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst)
10196     {
10197         panicUnimplemented();
10198     }
10199
10200     Inst_VOPC__V_CMPX_LT_F16::Inst_VOPC__V_CMPX_LT_F16(InFmt_VOPC *iFmt)
10201         : Inst_VOPC(iFmt, "v_cmpx_lt_f16")
10202     {
10203         setFlag(ALU);
10204         setFlag(F16);
10205     } // Inst_VOPC__V_CMPX_LT_F16
10206
10207     Inst_VOPC__V_CMPX_LT_F16::~Inst_VOPC__V_CMPX_LT_F16()
10208     {
10209     } // ~Inst_VOPC__V_CMPX_LT_F16
10210
10211     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
10212     void
10213     Inst_VOPC__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst)
10214     {
10215         panicUnimplemented();
10216     }
10217
10218     Inst_VOPC__V_CMPX_EQ_F16::Inst_VOPC__V_CMPX_EQ_F16(InFmt_VOPC *iFmt)
10219         : Inst_VOPC(iFmt, "v_cmpx_eq_f16")
10220     {
10221         setFlag(ALU);
10222         setFlag(F16);
10223     } // Inst_VOPC__V_CMPX_EQ_F16
10224
10225     Inst_VOPC__V_CMPX_EQ_F16::~Inst_VOPC__V_CMPX_EQ_F16()
10226     {
10227     } // ~Inst_VOPC__V_CMPX_EQ_F16
10228
10229     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
10230     void
10231     Inst_VOPC__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst)
10232     {
10233         panicUnimplemented();
10234     }
10235
10236     Inst_VOPC__V_CMPX_LE_F16::Inst_VOPC__V_CMPX_LE_F16(InFmt_VOPC *iFmt)
10237         : Inst_VOPC(iFmt, "v_cmpx_le_f16")
10238     {
10239         setFlag(ALU);
10240         setFlag(F16);
10241     } // Inst_VOPC__V_CMPX_LE_F16
10242
10243     Inst_VOPC__V_CMPX_LE_F16::~Inst_VOPC__V_CMPX_LE_F16()
10244     {
10245     } // ~Inst_VOPC__V_CMPX_LE_F16
10246
10247     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
10248     void
10249     Inst_VOPC__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst)
10250     {
10251         panicUnimplemented();
10252     }
10253
10254     Inst_VOPC__V_CMPX_GT_F16::Inst_VOPC__V_CMPX_GT_F16(InFmt_VOPC *iFmt)
10255         : Inst_VOPC(iFmt, "v_cmpx_gt_f16")
10256     {
10257         setFlag(ALU);
10258         setFlag(F16);
10259     } // Inst_VOPC__V_CMPX_GT_F16
10260
10261     Inst_VOPC__V_CMPX_GT_F16::~Inst_VOPC__V_CMPX_GT_F16()
10262     {
10263     } // ~Inst_VOPC__V_CMPX_GT_F16
10264
10265     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
10266     void
10267     Inst_VOPC__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst)
10268     {
10269         panicUnimplemented();
10270     }
10271
10272     Inst_VOPC__V_CMPX_LG_F16::Inst_VOPC__V_CMPX_LG_F16(InFmt_VOPC *iFmt)
10273         : Inst_VOPC(iFmt, "v_cmpx_lg_f16")
10274     {
10275         setFlag(ALU);
10276         setFlag(F16);
10277     } // Inst_VOPC__V_CMPX_LG_F16
10278
10279     Inst_VOPC__V_CMPX_LG_F16::~Inst_VOPC__V_CMPX_LG_F16()
10280     {
10281     } // ~Inst_VOPC__V_CMPX_LG_F16
10282
10283     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
10284     void
10285     Inst_VOPC__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst)
10286     {
10287         panicUnimplemented();
10288     }
10289
10290     Inst_VOPC__V_CMPX_GE_F16::Inst_VOPC__V_CMPX_GE_F16(InFmt_VOPC *iFmt)
10291         : Inst_VOPC(iFmt, "v_cmpx_ge_f16")
10292     {
10293         setFlag(ALU);
10294         setFlag(F16);
10295     } // Inst_VOPC__V_CMPX_GE_F16
10296
10297     Inst_VOPC__V_CMPX_GE_F16::~Inst_VOPC__V_CMPX_GE_F16()
10298     {
10299     } // ~Inst_VOPC__V_CMPX_GE_F16
10300
10301     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
10302     void
10303     Inst_VOPC__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst)
10304     {
10305         panicUnimplemented();
10306     }
10307
10308     Inst_VOPC__V_CMPX_O_F16::Inst_VOPC__V_CMPX_O_F16(InFmt_VOPC *iFmt)
10309         : Inst_VOPC(iFmt, "v_cmpx_o_f16")
10310     {
10311         setFlag(ALU);
10312         setFlag(F16);
10313     } // Inst_VOPC__V_CMPX_O_F16
10314
10315     Inst_VOPC__V_CMPX_O_F16::~Inst_VOPC__V_CMPX_O_F16()
10316     {
10317     } // ~Inst_VOPC__V_CMPX_O_F16
10318
10319     // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
10320     // encoding.
10321     void
10322     Inst_VOPC__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst)
10323     {
10324         panicUnimplemented();
10325     }
10326
10327     Inst_VOPC__V_CMPX_U_F16::Inst_VOPC__V_CMPX_U_F16(InFmt_VOPC *iFmt)
10328         : Inst_VOPC(iFmt, "v_cmpx_u_f16")
10329     {
10330         setFlag(ALU);
10331         setFlag(F16);
10332     } // Inst_VOPC__V_CMPX_U_F16
10333
10334     Inst_VOPC__V_CMPX_U_F16::~Inst_VOPC__V_CMPX_U_F16()
10335     {
10336     } // ~Inst_VOPC__V_CMPX_U_F16
10337
10338     // EXEC,D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC
10339     // encoding.
10340     void
10341     Inst_VOPC__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst)
10342     {
10343         panicUnimplemented();
10344     }
10345
10346     Inst_VOPC__V_CMPX_NGE_F16::Inst_VOPC__V_CMPX_NGE_F16(InFmt_VOPC *iFmt)
10347         : Inst_VOPC(iFmt, "v_cmpx_nge_f16")
10348     {
10349         setFlag(ALU);
10350         setFlag(F16);
10351     } // Inst_VOPC__V_CMPX_NGE_F16
10352
10353     Inst_VOPC__V_CMPX_NGE_F16::~Inst_VOPC__V_CMPX_NGE_F16()
10354     {
10355     } // ~Inst_VOPC__V_CMPX_NGE_F16
10356
10357     // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
10358     void
10359     Inst_VOPC__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst)
10360     {
10361         panicUnimplemented();
10362     }
10363
10364     Inst_VOPC__V_CMPX_NLG_F16::Inst_VOPC__V_CMPX_NLG_F16(InFmt_VOPC *iFmt)
10365         : Inst_VOPC(iFmt, "v_cmpx_nlg_f16")
10366     {
10367         setFlag(ALU);
10368         setFlag(F16);
10369     } // Inst_VOPC__V_CMPX_NLG_F16
10370
10371     Inst_VOPC__V_CMPX_NLG_F16::~Inst_VOPC__V_CMPX_NLG_F16()
10372     {
10373     } // ~Inst_VOPC__V_CMPX_NLG_F16
10374
10375     // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
10376     void
10377     Inst_VOPC__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst)
10378     {
10379         panicUnimplemented();
10380     }
10381
10382     Inst_VOPC__V_CMPX_NGT_F16::Inst_VOPC__V_CMPX_NGT_F16(InFmt_VOPC *iFmt)
10383         : Inst_VOPC(iFmt, "v_cmpx_ngt_f16")
10384     {
10385         setFlag(ALU);
10386         setFlag(F16);
10387     } // Inst_VOPC__V_CMPX_NGT_F16
10388
10389     Inst_VOPC__V_CMPX_NGT_F16::~Inst_VOPC__V_CMPX_NGT_F16()
10390     {
10391     } // ~Inst_VOPC__V_CMPX_NGT_F16
10392
10393     // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
10394     void
10395     Inst_VOPC__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst)
10396     {
10397         panicUnimplemented();
10398     }
10399
10400     Inst_VOPC__V_CMPX_NLE_F16::Inst_VOPC__V_CMPX_NLE_F16(InFmt_VOPC *iFmt)
10401         : Inst_VOPC(iFmt, "v_cmpx_nle_f16")
10402     {
10403         setFlag(ALU);
10404         setFlag(F16);
10405     } // Inst_VOPC__V_CMPX_NLE_F16
10406
10407     Inst_VOPC__V_CMPX_NLE_F16::~Inst_VOPC__V_CMPX_NLE_F16()
10408     {
10409     } // ~Inst_VOPC__V_CMPX_NLE_F16
10410
10411     // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
10412     void
10413     Inst_VOPC__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst)
10414     {
10415         panicUnimplemented();
10416     }
10417
10418     Inst_VOPC__V_CMPX_NEQ_F16::Inst_VOPC__V_CMPX_NEQ_F16(InFmt_VOPC *iFmt)
10419         : Inst_VOPC(iFmt, "v_cmpx_neq_f16")
10420     {
10421         setFlag(ALU);
10422         setFlag(F16);
10423     } // Inst_VOPC__V_CMPX_NEQ_F16
10424
10425     Inst_VOPC__V_CMPX_NEQ_F16::~Inst_VOPC__V_CMPX_NEQ_F16()
10426     {
10427     } // ~Inst_VOPC__V_CMPX_NEQ_F16
10428
10429     // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
10430     void
10431     Inst_VOPC__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst)
10432     {
10433         panicUnimplemented();
10434     }
10435
10436     Inst_VOPC__V_CMPX_NLT_F16::Inst_VOPC__V_CMPX_NLT_F16(InFmt_VOPC *iFmt)
10437         : Inst_VOPC(iFmt, "v_cmpx_nlt_f16")
10438     {
10439         setFlag(ALU);
10440         setFlag(F16);
10441     } // Inst_VOPC__V_CMPX_NLT_F16
10442
10443     Inst_VOPC__V_CMPX_NLT_F16::~Inst_VOPC__V_CMPX_NLT_F16()
10444     {
10445     } // ~Inst_VOPC__V_CMPX_NLT_F16
10446
10447     // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
10448     void
10449     Inst_VOPC__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst)
10450     {
10451         panicUnimplemented();
10452     }
10453
10454     Inst_VOPC__V_CMPX_TRU_F16::Inst_VOPC__V_CMPX_TRU_F16(InFmt_VOPC *iFmt)
10455         : Inst_VOPC(iFmt, "v_cmpx_tru_f16")
10456     {
10457         setFlag(ALU);
10458         setFlag(F16);
10459     } // Inst_VOPC__V_CMPX_TRU_F16
10460
10461     Inst_VOPC__V_CMPX_TRU_F16::~Inst_VOPC__V_CMPX_TRU_F16()
10462     {
10463     } // ~Inst_VOPC__V_CMPX_TRU_F16
10464
10465     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
10466     void
10467     Inst_VOPC__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst)
10468     {
10469         panicUnimplemented();
10470     }
10471
10472     Inst_VOPC__V_CMP_F_F32::Inst_VOPC__V_CMP_F_F32(InFmt_VOPC *iFmt)
10473         : Inst_VOPC(iFmt, "v_cmp_f_f32")
10474     {
10475         setFlag(ALU);
10476         setFlag(F32);
10477     } // Inst_VOPC__V_CMP_F_F32
10478
10479     Inst_VOPC__V_CMP_F_F32::~Inst_VOPC__V_CMP_F_F32()
10480     {
10481     } // ~Inst_VOPC__V_CMP_F_F32
10482
10483     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
10484     void
10485     Inst_VOPC__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst)
10486     {
10487         Wavefront *wf = gpuDynInst->wavefront();
10488         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10489
10490         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10491             if (wf->execMask(lane)) {
10492                 vcc.setBit(lane, 0);
10493             }
10494         }
10495
10496         vcc.write();
10497     }
10498
10499     Inst_VOPC__V_CMP_LT_F32::Inst_VOPC__V_CMP_LT_F32(InFmt_VOPC *iFmt)
10500         : Inst_VOPC(iFmt, "v_cmp_lt_f32")
10501     {
10502         setFlag(ALU);
10503         setFlag(F32);
10504     } // Inst_VOPC__V_CMP_LT_F32
10505
10506     Inst_VOPC__V_CMP_LT_F32::~Inst_VOPC__V_CMP_LT_F32()
10507     {
10508     } // ~Inst_VOPC__V_CMP_LT_F32
10509
10510     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
10511     void
10512     Inst_VOPC__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst)
10513     {
10514         Wavefront *wf = gpuDynInst->wavefront();
10515         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10516         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10517         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10518
10519         src0.readSrc();
10520         src1.read();
10521
10522         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10523             if (wf->execMask(lane)) {
10524                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
10525             }
10526         }
10527
10528         vcc.write();
10529     }
10530
10531     Inst_VOPC__V_CMP_EQ_F32::Inst_VOPC__V_CMP_EQ_F32(InFmt_VOPC *iFmt)
10532         : Inst_VOPC(iFmt, "v_cmp_eq_f32")
10533     {
10534         setFlag(ALU);
10535         setFlag(F32);
10536     } // Inst_VOPC__V_CMP_EQ_F32
10537
10538     Inst_VOPC__V_CMP_EQ_F32::~Inst_VOPC__V_CMP_EQ_F32()
10539     {
10540     } // ~Inst_VOPC__V_CMP_EQ_F32
10541
10542     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
10543     void
10544     Inst_VOPC__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst)
10545     {
10546         Wavefront *wf = gpuDynInst->wavefront();
10547         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10548         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10549         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10550
10551         src0.readSrc();
10552         src1.read();
10553
10554         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10555             if (wf->execMask(lane)) {
10556                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
10557             }
10558         }
10559
10560         vcc.write();
10561     }
10562
10563     Inst_VOPC__V_CMP_LE_F32::Inst_VOPC__V_CMP_LE_F32(InFmt_VOPC *iFmt)
10564         : Inst_VOPC(iFmt, "v_cmp_le_f32")
10565     {
10566         setFlag(ALU);
10567         setFlag(F32);
10568     } // Inst_VOPC__V_CMP_LE_F32
10569
10570     Inst_VOPC__V_CMP_LE_F32::~Inst_VOPC__V_CMP_LE_F32()
10571     {
10572     } // ~Inst_VOPC__V_CMP_LE_F32
10573
10574     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
10575     void
10576     Inst_VOPC__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst)
10577     {
10578         Wavefront *wf = gpuDynInst->wavefront();
10579         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10580         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10581         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10582
10583         src0.readSrc();
10584         src1.read();
10585
10586         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10587             if (wf->execMask(lane)) {
10588                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
10589             }
10590         }
10591
10592         vcc.write();
10593     }
10594
10595     Inst_VOPC__V_CMP_GT_F32::Inst_VOPC__V_CMP_GT_F32(InFmt_VOPC *iFmt)
10596         : Inst_VOPC(iFmt, "v_cmp_gt_f32")
10597     {
10598         setFlag(ALU);
10599         setFlag(F32);
10600     } // Inst_VOPC__V_CMP_GT_F32
10601
10602     Inst_VOPC__V_CMP_GT_F32::~Inst_VOPC__V_CMP_GT_F32()
10603     {
10604     } // ~Inst_VOPC__V_CMP_GT_F32
10605
10606     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
10607     void
10608     Inst_VOPC__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst)
10609     {
10610         Wavefront *wf = gpuDynInst->wavefront();
10611         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10612         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10613         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10614
10615         src0.readSrc();
10616         src1.read();
10617
10618         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10619             if (wf->execMask(lane)) {
10620                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
10621             }
10622         }
10623
10624         vcc.write();
10625     }
10626
10627     Inst_VOPC__V_CMP_LG_F32::Inst_VOPC__V_CMP_LG_F32(InFmt_VOPC *iFmt)
10628         : Inst_VOPC(iFmt, "v_cmp_lg_f32")
10629     {
10630         setFlag(ALU);
10631         setFlag(F32);
10632     } // Inst_VOPC__V_CMP_LG_F32
10633
10634     Inst_VOPC__V_CMP_LG_F32::~Inst_VOPC__V_CMP_LG_F32()
10635     {
10636     } // ~Inst_VOPC__V_CMP_LG_F32
10637
10638     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
10639     void
10640     Inst_VOPC__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst)
10641     {
10642         Wavefront *wf = gpuDynInst->wavefront();
10643         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10644         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10645         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10646
10647         src0.readSrc();
10648         src1.read();
10649
10650         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10651             if (wf->execMask(lane)) {
10652                 vcc.setBit(lane, (src0[lane] < src1[lane]
10653                     || src0[lane] > src1[lane]) ? 1 : 0);
10654             }
10655         }
10656
10657         vcc.write();
10658     }
10659
10660     Inst_VOPC__V_CMP_GE_F32::Inst_VOPC__V_CMP_GE_F32(InFmt_VOPC *iFmt)
10661         : Inst_VOPC(iFmt, "v_cmp_ge_f32")
10662     {
10663         setFlag(ALU);
10664         setFlag(F32);
10665     } // Inst_VOPC__V_CMP_GE_F32
10666
10667     Inst_VOPC__V_CMP_GE_F32::~Inst_VOPC__V_CMP_GE_F32()
10668     {
10669     } // ~Inst_VOPC__V_CMP_GE_F32
10670
10671     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
10672     void
10673     Inst_VOPC__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst)
10674     {
10675         Wavefront *wf = gpuDynInst->wavefront();
10676         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10677         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10678         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10679
10680         src0.readSrc();
10681         src1.read();
10682
10683         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10684             if (wf->execMask(lane)) {
10685                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
10686             }
10687         }
10688
10689         vcc.write();
10690     }
10691
10692     Inst_VOPC__V_CMP_O_F32::Inst_VOPC__V_CMP_O_F32(InFmt_VOPC *iFmt)
10693         : Inst_VOPC(iFmt, "v_cmp_o_f32")
10694     {
10695         setFlag(ALU);
10696         setFlag(F32);
10697     } // Inst_VOPC__V_CMP_O_F32
10698
10699     Inst_VOPC__V_CMP_O_F32::~Inst_VOPC__V_CMP_O_F32()
10700     {
10701     } // ~Inst_VOPC__V_CMP_O_F32
10702
10703     // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
10704     void
10705     Inst_VOPC__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst)
10706     {
10707         Wavefront *wf = gpuDynInst->wavefront();
10708         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10709         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10710         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10711
10712         src0.readSrc();
10713         src1.read();
10714
10715         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10716             if (wf->execMask(lane)) {
10717                 vcc.setBit(lane, (!std::isnan(src0[lane])
10718                     && !std::isnan(src1[lane])) ? 1 : 0);
10719             }
10720         }
10721
10722         vcc.write();
10723     }
10724
10725     Inst_VOPC__V_CMP_U_F32::Inst_VOPC__V_CMP_U_F32(InFmt_VOPC *iFmt)
10726         : Inst_VOPC(iFmt, "v_cmp_u_f32")
10727     {
10728         setFlag(ALU);
10729         setFlag(F32);
10730     } // Inst_VOPC__V_CMP_U_F32
10731
10732     Inst_VOPC__V_CMP_U_F32::~Inst_VOPC__V_CMP_U_F32()
10733     {
10734     } // ~Inst_VOPC__V_CMP_U_F32
10735
10736     // D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC encoding.
10737     void
10738     Inst_VOPC__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst)
10739     {
10740         Wavefront *wf = gpuDynInst->wavefront();
10741         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10742         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10743         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10744
10745         src0.readSrc();
10746         src1.read();
10747
10748         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10749             if (wf->execMask(lane)) {
10750                 vcc.setBit(lane, (std::isnan(src0[lane])
10751                     || std::isnan(src1[lane])) ? 1 : 0);
10752             }
10753         }
10754
10755         vcc.write();
10756     }
10757
10758     Inst_VOPC__V_CMP_NGE_F32::Inst_VOPC__V_CMP_NGE_F32(InFmt_VOPC *iFmt)
10759         : Inst_VOPC(iFmt, "v_cmp_nge_f32")
10760     {
10761         setFlag(ALU);
10762         setFlag(F32);
10763     } // Inst_VOPC__V_CMP_NGE_F32
10764
10765     Inst_VOPC__V_CMP_NGE_F32::~Inst_VOPC__V_CMP_NGE_F32()
10766     {
10767     } // ~Inst_VOPC__V_CMP_NGE_F32
10768
10769     // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
10770     void
10771     Inst_VOPC__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst)
10772     {
10773         Wavefront *wf = gpuDynInst->wavefront();
10774         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10775         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10776         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10777
10778         src0.readSrc();
10779         src1.read();
10780
10781         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10782             if (wf->execMask(lane)) {
10783                 vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
10784             }
10785         }
10786
10787         vcc.write();
10788     }
10789
10790     Inst_VOPC__V_CMP_NLG_F32::Inst_VOPC__V_CMP_NLG_F32(InFmt_VOPC *iFmt)
10791         : Inst_VOPC(iFmt, "v_cmp_nlg_f32")
10792     {
10793         setFlag(ALU);
10794         setFlag(F32);
10795     } // Inst_VOPC__V_CMP_NLG_F32
10796
10797     Inst_VOPC__V_CMP_NLG_F32::~Inst_VOPC__V_CMP_NLG_F32()
10798     {
10799     } // ~Inst_VOPC__V_CMP_NLG_F32
10800
10801     // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
10802     void
10803     Inst_VOPC__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst)
10804     {
10805         Wavefront *wf = gpuDynInst->wavefront();
10806         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10807         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10808         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10809
10810         src0.readSrc();
10811         src1.read();
10812
10813         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10814             if (wf->execMask(lane)) {
10815                 vcc.setBit(lane, !(src0[lane] < src1[lane]
10816                     || src0[lane] > src1[lane]) ? 1 : 0);
10817             }
10818         }
10819
10820         vcc.write();
10821     }
10822
10823     Inst_VOPC__V_CMP_NGT_F32::Inst_VOPC__V_CMP_NGT_F32(InFmt_VOPC *iFmt)
10824         : Inst_VOPC(iFmt, "v_cmp_ngt_f32")
10825     {
10826         setFlag(ALU);
10827         setFlag(F32);
10828     } // Inst_VOPC__V_CMP_NGT_F32
10829
10830     Inst_VOPC__V_CMP_NGT_F32::~Inst_VOPC__V_CMP_NGT_F32()
10831     {
10832     } // ~Inst_VOPC__V_CMP_NGT_F32
10833
10834     // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
10835     void
10836     Inst_VOPC__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst)
10837     {
10838         Wavefront *wf = gpuDynInst->wavefront();
10839         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10840         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10841         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10842
10843         src0.readSrc();
10844         src1.read();
10845
10846         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10847             if (wf->execMask(lane)) {
10848                 vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
10849             }
10850         }
10851
10852         vcc.write();
10853     }
10854
10855     Inst_VOPC__V_CMP_NLE_F32::Inst_VOPC__V_CMP_NLE_F32(InFmt_VOPC *iFmt)
10856         : Inst_VOPC(iFmt, "v_cmp_nle_f32")
10857     {
10858         setFlag(ALU);
10859         setFlag(F32);
10860     } // Inst_VOPC__V_CMP_NLE_F32
10861
10862     Inst_VOPC__V_CMP_NLE_F32::~Inst_VOPC__V_CMP_NLE_F32()
10863     {
10864     } // ~Inst_VOPC__V_CMP_NLE_F32
10865
10866     // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
10867     void
10868     Inst_VOPC__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst)
10869     {
10870         Wavefront *wf = gpuDynInst->wavefront();
10871         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10872         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10873         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10874
10875         src0.readSrc();
10876         src1.read();
10877
10878         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10879             if (wf->execMask(lane)) {
10880                 vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
10881             }
10882         }
10883
10884         vcc.write();
10885     }
10886
10887     Inst_VOPC__V_CMP_NEQ_F32::Inst_VOPC__V_CMP_NEQ_F32(InFmt_VOPC *iFmt)
10888         : Inst_VOPC(iFmt, "v_cmp_neq_f32")
10889     {
10890         setFlag(ALU);
10891         setFlag(F32);
10892     } // Inst_VOPC__V_CMP_NEQ_F32
10893
10894     Inst_VOPC__V_CMP_NEQ_F32::~Inst_VOPC__V_CMP_NEQ_F32()
10895     {
10896     } // ~Inst_VOPC__V_CMP_NEQ_F32
10897
10898     // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
10899     void
10900     Inst_VOPC__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst)
10901     {
10902         Wavefront *wf = gpuDynInst->wavefront();
10903         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10904         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10905         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10906
10907         src0.readSrc();
10908         src1.read();
10909
10910         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10911             if (wf->execMask(lane)) {
10912                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
10913             }
10914         }
10915
10916         vcc.write();
10917     }
10918
10919     Inst_VOPC__V_CMP_NLT_F32::Inst_VOPC__V_CMP_NLT_F32(InFmt_VOPC *iFmt)
10920         : Inst_VOPC(iFmt, "v_cmp_nlt_f32")
10921     {
10922         setFlag(ALU);
10923         setFlag(F32);
10924     } // Inst_VOPC__V_CMP_NLT_F32
10925
10926     Inst_VOPC__V_CMP_NLT_F32::~Inst_VOPC__V_CMP_NLT_F32()
10927     {
10928     } // ~Inst_VOPC__V_CMP_NLT_F32
10929
10930     // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
10931     void
10932     Inst_VOPC__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst)
10933     {
10934         Wavefront *wf = gpuDynInst->wavefront();
10935         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
10936         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
10937         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10938
10939         src0.readSrc();
10940         src1.read();
10941
10942         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10943             if (wf->execMask(lane)) {
10944                 vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
10945             }
10946         }
10947
10948         vcc.write();
10949     }
10950
10951     Inst_VOPC__V_CMP_TRU_F32::Inst_VOPC__V_CMP_TRU_F32(InFmt_VOPC *iFmt)
10952         : Inst_VOPC(iFmt, "v_cmp_tru_f32")
10953     {
10954         setFlag(ALU);
10955         setFlag(F32);
10956     } // Inst_VOPC__V_CMP_TRU_F32
10957
10958     Inst_VOPC__V_CMP_TRU_F32::~Inst_VOPC__V_CMP_TRU_F32()
10959     {
10960     } // ~Inst_VOPC__V_CMP_TRU_F32
10961
10962     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
10963     void
10964     Inst_VOPC__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst)
10965     {
10966         Wavefront *wf = gpuDynInst->wavefront();
10967         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10968
10969         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10970             if (wf->execMask(lane)) {
10971                 vcc.setBit(lane, 1);
10972             }
10973         }
10974
10975         vcc.write();
10976     }
10977
10978     Inst_VOPC__V_CMPX_F_F32::Inst_VOPC__V_CMPX_F_F32(InFmt_VOPC *iFmt)
10979         : Inst_VOPC(iFmt, "v_cmpx_f_f32")
10980     {
10981         setFlag(ALU);
10982         setFlag(F32);
10983     } // Inst_VOPC__V_CMPX_F_F32
10984
10985     Inst_VOPC__V_CMPX_F_F32::~Inst_VOPC__V_CMPX_F_F32()
10986     {
10987     } // ~Inst_VOPC__V_CMPX_F_F32
10988
10989     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
10990     void
10991     Inst_VOPC__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst)
10992     {
10993         Wavefront *wf = gpuDynInst->wavefront();
10994         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
10995
10996         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
10997             if (wf->execMask(lane)) {
10998                 vcc.setBit(lane, 0);
10999             }
11000         }
11001
11002         vcc.write();
11003         wf->execMask() = vcc.rawData();
11004     }
11005
11006     Inst_VOPC__V_CMPX_LT_F32::Inst_VOPC__V_CMPX_LT_F32(InFmt_VOPC *iFmt)
11007         : Inst_VOPC(iFmt, "v_cmpx_lt_f32")
11008     {
11009         setFlag(ALU);
11010         setFlag(F32);
11011     } // Inst_VOPC__V_CMPX_LT_F32
11012
11013     Inst_VOPC__V_CMPX_LT_F32::~Inst_VOPC__V_CMPX_LT_F32()
11014     {
11015     } // ~Inst_VOPC__V_CMPX_LT_F32
11016
11017     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
11018     void
11019     Inst_VOPC__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst)
11020     {
11021         Wavefront *wf = gpuDynInst->wavefront();
11022         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11023         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11024         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11025
11026         src0.readSrc();
11027         src1.read();
11028
11029         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11030             if (wf->execMask(lane)) {
11031                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
11032             }
11033         }
11034
11035         vcc.write();
11036         wf->execMask() = vcc.rawData();
11037     }
11038
11039     Inst_VOPC__V_CMPX_EQ_F32::Inst_VOPC__V_CMPX_EQ_F32(InFmt_VOPC *iFmt)
11040         : Inst_VOPC(iFmt, "v_cmpx_eq_f32")
11041     {
11042         setFlag(ALU);
11043         setFlag(F32);
11044     } // Inst_VOPC__V_CMPX_EQ_F32
11045
11046     Inst_VOPC__V_CMPX_EQ_F32::~Inst_VOPC__V_CMPX_EQ_F32()
11047     {
11048     } // ~Inst_VOPC__V_CMPX_EQ_F32
11049
11050     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
11051     void
11052     Inst_VOPC__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst)
11053     {
11054         Wavefront *wf = gpuDynInst->wavefront();
11055         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11056         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11057         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11058
11059         src0.readSrc();
11060         src1.read();
11061
11062         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11063             if (wf->execMask(lane)) {
11064                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
11065             }
11066         }
11067
11068         vcc.write();
11069         wf->execMask() = vcc.rawData();
11070     }
11071
11072     Inst_VOPC__V_CMPX_LE_F32::Inst_VOPC__V_CMPX_LE_F32(InFmt_VOPC *iFmt)
11073         : Inst_VOPC(iFmt, "v_cmpx_le_f32")
11074     {
11075         setFlag(ALU);
11076         setFlag(F32);
11077     } // Inst_VOPC__V_CMPX_LE_F32
11078
11079     Inst_VOPC__V_CMPX_LE_F32::~Inst_VOPC__V_CMPX_LE_F32()
11080     {
11081     } // ~Inst_VOPC__V_CMPX_LE_F32
11082
11083     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
11084     void
11085     Inst_VOPC__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst)
11086     {
11087         Wavefront *wf = gpuDynInst->wavefront();
11088         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11089         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11090         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11091
11092         src0.readSrc();
11093         src1.read();
11094
11095         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11096             if (wf->execMask(lane)) {
11097                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
11098             }
11099         }
11100
11101         vcc.write();
11102         wf->execMask() = vcc.rawData();
11103     }
11104
11105     Inst_VOPC__V_CMPX_GT_F32::Inst_VOPC__V_CMPX_GT_F32(InFmt_VOPC *iFmt)
11106         : Inst_VOPC(iFmt, "v_cmpx_gt_f32")
11107     {
11108         setFlag(ALU);
11109         setFlag(F32);
11110     } // Inst_VOPC__V_CMPX_GT_F32
11111
11112     Inst_VOPC__V_CMPX_GT_F32::~Inst_VOPC__V_CMPX_GT_F32()
11113     {
11114     } // ~Inst_VOPC__V_CMPX_GT_F32
11115
11116     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
11117     void
11118     Inst_VOPC__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst)
11119     {
11120         Wavefront *wf = gpuDynInst->wavefront();
11121         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11122         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11123         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11124
11125         src0.readSrc();
11126         src1.read();
11127
11128         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11129             if (wf->execMask(lane)) {
11130                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
11131             }
11132         }
11133
11134         vcc.write();
11135         wf->execMask() = vcc.rawData();
11136     }
11137
11138     Inst_VOPC__V_CMPX_LG_F32::Inst_VOPC__V_CMPX_LG_F32(InFmt_VOPC *iFmt)
11139         : Inst_VOPC(iFmt, "v_cmpx_lg_f32")
11140     {
11141         setFlag(ALU);
11142         setFlag(F32);
11143     } // Inst_VOPC__V_CMPX_LG_F32
11144
11145     Inst_VOPC__V_CMPX_LG_F32::~Inst_VOPC__V_CMPX_LG_F32()
11146     {
11147     } // ~Inst_VOPC__V_CMPX_LG_F32
11148
11149     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
11150     void
11151     Inst_VOPC__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst)
11152     {
11153         Wavefront *wf = gpuDynInst->wavefront();
11154         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11155         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11156         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11157
11158         src0.readSrc();
11159         src1.read();
11160
11161         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11162             if (wf->execMask(lane)) {
11163                 vcc.setBit(lane, (src0[lane] < src1[lane]
11164                     || src0[lane] > src1[lane]) ? 1 : 0);
11165             }
11166         }
11167
11168         vcc.write();
11169         wf->execMask() = vcc.rawData();
11170     }
11171
11172     Inst_VOPC__V_CMPX_GE_F32::Inst_VOPC__V_CMPX_GE_F32(InFmt_VOPC *iFmt)
11173         : Inst_VOPC(iFmt, "v_cmpx_ge_f32")
11174     {
11175         setFlag(ALU);
11176         setFlag(F32);
11177     } // Inst_VOPC__V_CMPX_GE_F32
11178
11179     Inst_VOPC__V_CMPX_GE_F32::~Inst_VOPC__V_CMPX_GE_F32()
11180     {
11181     } // ~Inst_VOPC__V_CMPX_GE_F32
11182
11183     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
11184     void
11185     Inst_VOPC__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst)
11186     {
11187         Wavefront *wf = gpuDynInst->wavefront();
11188         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11189         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11190         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11191
11192         src0.readSrc();
11193         src1.read();
11194
11195         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11196             if (wf->execMask(lane)) {
11197                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
11198             }
11199         }
11200
11201         vcc.write();
11202         wf->execMask() = vcc.rawData();
11203     }
11204
11205     Inst_VOPC__V_CMPX_O_F32::Inst_VOPC__V_CMPX_O_F32(InFmt_VOPC *iFmt)
11206         : Inst_VOPC(iFmt, "v_cmpx_o_f32")
11207     {
11208         setFlag(ALU);
11209         setFlag(F32);
11210     } // Inst_VOPC__V_CMPX_O_F32
11211
11212     Inst_VOPC__V_CMPX_O_F32::~Inst_VOPC__V_CMPX_O_F32()
11213     {
11214     } // ~Inst_VOPC__V_CMPX_O_F32
11215
11216     // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
11217     // encoding.
11218     void
11219     Inst_VOPC__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst)
11220     {
11221         Wavefront *wf = gpuDynInst->wavefront();
11222         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11223         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11224         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11225
11226         src0.readSrc();
11227         src1.read();
11228
11229         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11230             if (wf->execMask(lane)) {
11231                 vcc.setBit(lane, (!std::isnan(src0[lane])
11232                     && !std::isnan(src1[lane])) ? 1 : 0);
11233             }
11234         }
11235
11236         vcc.write();
11237         wf->execMask() = vcc.rawData();
11238     }
11239
11240     Inst_VOPC__V_CMPX_U_F32::Inst_VOPC__V_CMPX_U_F32(InFmt_VOPC *iFmt)
11241         : Inst_VOPC(iFmt, "v_cmpx_u_f32")
11242     {
11243         setFlag(ALU);
11244         setFlag(F32);
11245     } // Inst_VOPC__V_CMPX_U_F32
11246
11247     Inst_VOPC__V_CMPX_U_F32::~Inst_VOPC__V_CMPX_U_F32()
11248     {
11249     } // ~Inst_VOPC__V_CMPX_U_F32
11250
11251     // EXEC,D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC
11252     // encoding.
11253     void
11254     Inst_VOPC__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst)
11255     {
11256         Wavefront *wf = gpuDynInst->wavefront();
11257         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11258         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11259         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11260
11261         src0.readSrc();
11262         src1.read();
11263
11264         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11265             if (wf->execMask(lane)) {
11266                 vcc.setBit(lane, (std::isnan(src0[lane])
11267                     || std::isnan(src1[lane])) ? 1 : 0);
11268             }
11269         }
11270
11271         vcc.write();
11272         wf->execMask() = vcc.rawData();
11273     }
11274
11275     Inst_VOPC__V_CMPX_NGE_F32::Inst_VOPC__V_CMPX_NGE_F32(InFmt_VOPC *iFmt)
11276         : Inst_VOPC(iFmt, "v_cmpx_nge_f32")
11277     {
11278         setFlag(ALU);
11279         setFlag(F32);
11280     } // Inst_VOPC__V_CMPX_NGE_F32
11281
11282     Inst_VOPC__V_CMPX_NGE_F32::~Inst_VOPC__V_CMPX_NGE_F32()
11283     {
11284     } // ~Inst_VOPC__V_CMPX_NGE_F32
11285
11286     // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
11287     void
11288     Inst_VOPC__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst)
11289     {
11290         Wavefront *wf = gpuDynInst->wavefront();
11291         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11292         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11293         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11294
11295         src0.readSrc();
11296         src1.read();
11297
11298         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11299             if (wf->execMask(lane)) {
11300                 vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
11301             }
11302         }
11303
11304         vcc.write();
11305         wf->execMask() = vcc.rawData();
11306     }
11307
11308     Inst_VOPC__V_CMPX_NLG_F32::Inst_VOPC__V_CMPX_NLG_F32(InFmt_VOPC *iFmt)
11309         : Inst_VOPC(iFmt, "v_cmpx_nlg_f32")
11310     {
11311         setFlag(ALU);
11312         setFlag(F32);
11313     } // Inst_VOPC__V_CMPX_NLG_F32
11314
11315     Inst_VOPC__V_CMPX_NLG_F32::~Inst_VOPC__V_CMPX_NLG_F32()
11316     {
11317     } // ~Inst_VOPC__V_CMPX_NLG_F32
11318
11319     // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
11320     void
11321     Inst_VOPC__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst)
11322     {
11323         Wavefront *wf = gpuDynInst->wavefront();
11324         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11325         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11326         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11327
11328         src0.readSrc();
11329         src1.read();
11330
11331         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11332             if (wf->execMask(lane)) {
11333                 vcc.setBit(lane, !(src0[lane] < src1[lane]
11334                     || src0[lane] > src1[lane]) ? 1 : 0);
11335             }
11336         }
11337
11338         vcc.write();
11339         wf->execMask() = vcc.rawData();
11340     }
11341
11342     Inst_VOPC__V_CMPX_NGT_F32::Inst_VOPC__V_CMPX_NGT_F32(InFmt_VOPC *iFmt)
11343         : Inst_VOPC(iFmt, "v_cmpx_ngt_f32")
11344     {
11345         setFlag(ALU);
11346         setFlag(F32);
11347     } // Inst_VOPC__V_CMPX_NGT_F32
11348
11349     Inst_VOPC__V_CMPX_NGT_F32::~Inst_VOPC__V_CMPX_NGT_F32()
11350     {
11351     } // ~Inst_VOPC__V_CMPX_NGT_F32
11352
11353     // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
11354     void
11355     Inst_VOPC__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst)
11356     {
11357         Wavefront *wf = gpuDynInst->wavefront();
11358         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11359         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11360         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11361
11362         src0.readSrc();
11363         src1.read();
11364
11365         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11366             if (wf->execMask(lane)) {
11367                 vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
11368             }
11369         }
11370
11371         vcc.write();
11372         wf->execMask() = vcc.rawData();
11373     }
11374
11375     Inst_VOPC__V_CMPX_NLE_F32::Inst_VOPC__V_CMPX_NLE_F32(InFmt_VOPC *iFmt)
11376         : Inst_VOPC(iFmt, "v_cmpx_nle_f32")
11377     {
11378         setFlag(ALU);
11379         setFlag(F32);
11380     } // Inst_VOPC__V_CMPX_NLE_F32
11381
11382     Inst_VOPC__V_CMPX_NLE_F32::~Inst_VOPC__V_CMPX_NLE_F32()
11383     {
11384     } // ~Inst_VOPC__V_CMPX_NLE_F32
11385
11386     // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
11387     void
11388     Inst_VOPC__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst)
11389     {
11390         Wavefront *wf = gpuDynInst->wavefront();
11391         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11392         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11393         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11394
11395         src0.readSrc();
11396         src1.read();
11397
11398         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11399             if (wf->execMask(lane)) {
11400                 vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
11401             }
11402         }
11403
11404         vcc.write();
11405         wf->execMask() = vcc.rawData();
11406     }
11407
11408     Inst_VOPC__V_CMPX_NEQ_F32::Inst_VOPC__V_CMPX_NEQ_F32(InFmt_VOPC *iFmt)
11409         : Inst_VOPC(iFmt, "v_cmpx_neq_f32")
11410     {
11411         setFlag(ALU);
11412         setFlag(F32);
11413     } // Inst_VOPC__V_CMPX_NEQ_F32
11414
11415     Inst_VOPC__V_CMPX_NEQ_F32::~Inst_VOPC__V_CMPX_NEQ_F32()
11416     {
11417     } // ~Inst_VOPC__V_CMPX_NEQ_F32
11418
11419     // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
11420     void
11421     Inst_VOPC__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst)
11422     {
11423         Wavefront *wf = gpuDynInst->wavefront();
11424         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11425         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11426         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11427
11428         src0.readSrc();
11429         src1.read();
11430
11431         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11432             if (wf->execMask(lane)) {
11433                 vcc.setBit(lane, !(src0[lane] == src1[lane]) ? 1 : 0);
11434             }
11435         }
11436
11437         vcc.write();
11438     }
11439
11440     Inst_VOPC__V_CMPX_NLT_F32::Inst_VOPC__V_CMPX_NLT_F32(InFmt_VOPC *iFmt)
11441         : Inst_VOPC(iFmt, "v_cmpx_nlt_f32")
11442     {
11443         setFlag(ALU);
11444         setFlag(F32);
11445     } // Inst_VOPC__V_CMPX_NLT_F32
11446
11447     Inst_VOPC__V_CMPX_NLT_F32::~Inst_VOPC__V_CMPX_NLT_F32()
11448     {
11449     } // ~Inst_VOPC__V_CMPX_NLT_F32
11450
11451     // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
11452     void
11453     Inst_VOPC__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst)
11454     {
11455         Wavefront *wf = gpuDynInst->wavefront();
11456         ConstVecOperandF32 src0(gpuDynInst, instData.SRC0);
11457         ConstVecOperandF32 src1(gpuDynInst, instData.VSRC1);
11458         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11459
11460         src0.readSrc();
11461         src1.read();
11462
11463         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11464             if (wf->execMask(lane)) {
11465                 vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
11466             }
11467         }
11468
11469         vcc.write();
11470         wf->execMask() = vcc.rawData();
11471     }
11472
11473     Inst_VOPC__V_CMPX_TRU_F32::Inst_VOPC__V_CMPX_TRU_F32(InFmt_VOPC *iFmt)
11474         : Inst_VOPC(iFmt, "v_cmpx_tru_f32")
11475     {
11476         setFlag(ALU);
11477         setFlag(F32);
11478     } // Inst_VOPC__V_CMPX_TRU_F32
11479
11480     Inst_VOPC__V_CMPX_TRU_F32::~Inst_VOPC__V_CMPX_TRU_F32()
11481     {
11482     } // ~Inst_VOPC__V_CMPX_TRU_F32
11483
11484     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
11485     void
11486     Inst_VOPC__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst)
11487     {
11488         Wavefront *wf = gpuDynInst->wavefront();
11489         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11490
11491         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11492             if (wf->execMask(lane)) {
11493                 vcc.setBit(lane, 1);
11494             }
11495         }
11496
11497         vcc.write();
11498         wf->execMask() = vcc.rawData();
11499     }
11500
11501     Inst_VOPC__V_CMP_F_F64::Inst_VOPC__V_CMP_F_F64(InFmt_VOPC *iFmt)
11502         : Inst_VOPC(iFmt, "v_cmp_f_f64")
11503     {
11504         setFlag(ALU);
11505         setFlag(F64);
11506     } // Inst_VOPC__V_CMP_F_F64
11507
11508     Inst_VOPC__V_CMP_F_F64::~Inst_VOPC__V_CMP_F_F64()
11509     {
11510     } // ~Inst_VOPC__V_CMP_F_F64
11511
11512     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
11513     void
11514     Inst_VOPC__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst)
11515     {
11516         Wavefront *wf = gpuDynInst->wavefront();
11517         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11518
11519         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11520             if (wf->execMask(lane)) {
11521                 vcc.setBit(lane, 0);
11522             }
11523         }
11524
11525         vcc.write();
11526     }
11527
11528     Inst_VOPC__V_CMP_LT_F64::Inst_VOPC__V_CMP_LT_F64(InFmt_VOPC *iFmt)
11529         : Inst_VOPC(iFmt, "v_cmp_lt_f64")
11530     {
11531         setFlag(ALU);
11532         setFlag(F64);
11533     } // Inst_VOPC__V_CMP_LT_F64
11534
11535     Inst_VOPC__V_CMP_LT_F64::~Inst_VOPC__V_CMP_LT_F64()
11536     {
11537     } // ~Inst_VOPC__V_CMP_LT_F64
11538
11539     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
11540     void
11541     Inst_VOPC__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst)
11542     {
11543         Wavefront *wf = gpuDynInst->wavefront();
11544         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11545         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11546         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11547
11548         src0.readSrc();
11549         src1.read();
11550
11551         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11552             if (wf->execMask(lane)) {
11553                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
11554             }
11555         }
11556
11557         vcc.write();
11558     }
11559
11560     Inst_VOPC__V_CMP_EQ_F64::Inst_VOPC__V_CMP_EQ_F64(InFmt_VOPC *iFmt)
11561         : Inst_VOPC(iFmt, "v_cmp_eq_f64")
11562     {
11563         setFlag(ALU);
11564         setFlag(F64);
11565     } // Inst_VOPC__V_CMP_EQ_F64
11566
11567     Inst_VOPC__V_CMP_EQ_F64::~Inst_VOPC__V_CMP_EQ_F64()
11568     {
11569     } // ~Inst_VOPC__V_CMP_EQ_F64
11570
11571     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
11572     void
11573     Inst_VOPC__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst)
11574     {
11575         Wavefront *wf = gpuDynInst->wavefront();
11576         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11577         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11578         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11579
11580         src0.readSrc();
11581         src1.read();
11582
11583         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11584             if (wf->execMask(lane)) {
11585                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
11586             }
11587         }
11588
11589         vcc.write();
11590     }
11591
11592     Inst_VOPC__V_CMP_LE_F64::Inst_VOPC__V_CMP_LE_F64(InFmt_VOPC *iFmt)
11593         : Inst_VOPC(iFmt, "v_cmp_le_f64")
11594     {
11595         setFlag(ALU);
11596         setFlag(F64);
11597     } // Inst_VOPC__V_CMP_LE_F64
11598
11599     Inst_VOPC__V_CMP_LE_F64::~Inst_VOPC__V_CMP_LE_F64()
11600     {
11601     } // ~Inst_VOPC__V_CMP_LE_F64
11602
11603     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
11604     void
11605     Inst_VOPC__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst)
11606     {
11607         Wavefront *wf = gpuDynInst->wavefront();
11608         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11609         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11610         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11611
11612         src0.readSrc();
11613         src1.read();
11614
11615         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11616             if (wf->execMask(lane)) {
11617                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
11618             }
11619         }
11620
11621         vcc.write();
11622     }
11623
11624     Inst_VOPC__V_CMP_GT_F64::Inst_VOPC__V_CMP_GT_F64(InFmt_VOPC *iFmt)
11625         : Inst_VOPC(iFmt, "v_cmp_gt_f64")
11626     {
11627         setFlag(ALU);
11628         setFlag(F64);
11629     } // Inst_VOPC__V_CMP_GT_F64
11630
11631     Inst_VOPC__V_CMP_GT_F64::~Inst_VOPC__V_CMP_GT_F64()
11632     {
11633     } // ~Inst_VOPC__V_CMP_GT_F64
11634
11635     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
11636     void
11637     Inst_VOPC__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst)
11638     {
11639         Wavefront *wf = gpuDynInst->wavefront();
11640         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11641         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11642         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11643
11644         src0.readSrc();
11645         src1.read();
11646
11647         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11648             if (wf->execMask(lane)) {
11649                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
11650             }
11651         }
11652
11653         vcc.write();
11654     }
11655
11656     Inst_VOPC__V_CMP_LG_F64::Inst_VOPC__V_CMP_LG_F64(InFmt_VOPC *iFmt)
11657         : Inst_VOPC(iFmt, "v_cmp_lg_f64")
11658     {
11659         setFlag(ALU);
11660         setFlag(F64);
11661     } // Inst_VOPC__V_CMP_LG_F64
11662
11663     Inst_VOPC__V_CMP_LG_F64::~Inst_VOPC__V_CMP_LG_F64()
11664     {
11665     } // ~Inst_VOPC__V_CMP_LG_F64
11666
11667     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
11668     void
11669     Inst_VOPC__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst)
11670     {
11671         Wavefront *wf = gpuDynInst->wavefront();
11672         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11673         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11674         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11675
11676         src0.readSrc();
11677         src1.read();
11678
11679         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11680             if (wf->execMask(lane)) {
11681                 vcc.setBit(lane, (src0[lane] < src1[lane]
11682                     || src0[lane] > src1[lane]) ? 1 : 0);
11683             }
11684         }
11685
11686         vcc.write();
11687     }
11688
11689     Inst_VOPC__V_CMP_GE_F64::Inst_VOPC__V_CMP_GE_F64(InFmt_VOPC *iFmt)
11690         : Inst_VOPC(iFmt, "v_cmp_ge_f64")
11691     {
11692         setFlag(ALU);
11693         setFlag(F64);
11694     } // Inst_VOPC__V_CMP_GE_F64
11695
11696     Inst_VOPC__V_CMP_GE_F64::~Inst_VOPC__V_CMP_GE_F64()
11697     {
11698     } // ~Inst_VOPC__V_CMP_GE_F64
11699
11700     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
11701     void
11702     Inst_VOPC__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst)
11703     {
11704         Wavefront *wf = gpuDynInst->wavefront();
11705         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11706         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11707         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11708
11709         src0.readSrc();
11710         src1.read();
11711
11712         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11713             if (wf->execMask(lane)) {
11714                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
11715             }
11716         }
11717
11718         vcc.write();
11719     }
11720
11721     Inst_VOPC__V_CMP_O_F64::Inst_VOPC__V_CMP_O_F64(InFmt_VOPC *iFmt)
11722         : Inst_VOPC(iFmt, "v_cmp_o_f64")
11723     {
11724         setFlag(ALU);
11725         setFlag(F64);
11726     } // Inst_VOPC__V_CMP_O_F64
11727
11728     Inst_VOPC__V_CMP_O_F64::~Inst_VOPC__V_CMP_O_F64()
11729     {
11730     } // ~Inst_VOPC__V_CMP_O_F64
11731
11732     // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
11733     void
11734     Inst_VOPC__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst)
11735     {
11736         Wavefront *wf = gpuDynInst->wavefront();
11737         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11738         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11739         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11740
11741         src0.readSrc();
11742         src1.read();
11743
11744         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11745             if (wf->execMask(lane)) {
11746                 vcc.setBit(lane, (!std::isnan(src0[lane])
11747                     && !std::isnan(src1[lane])) ? 1 : 0);
11748             }
11749         }
11750
11751         vcc.write();
11752     }
11753
11754     Inst_VOPC__V_CMP_U_F64::Inst_VOPC__V_CMP_U_F64(InFmt_VOPC *iFmt)
11755         : Inst_VOPC(iFmt, "v_cmp_u_f64")
11756     {
11757         setFlag(ALU);
11758         setFlag(F64);
11759     } // Inst_VOPC__V_CMP_U_F64
11760
11761     Inst_VOPC__V_CMP_U_F64::~Inst_VOPC__V_CMP_U_F64()
11762     {
11763     } // ~Inst_VOPC__V_CMP_U_F64
11764
11765     // D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC encoding.
11766     void
11767     Inst_VOPC__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst)
11768     {
11769         Wavefront *wf = gpuDynInst->wavefront();
11770         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11771         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11772         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11773
11774         src0.readSrc();
11775         src1.read();
11776
11777         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11778             if (wf->execMask(lane)) {
11779                 vcc.setBit(lane, (std::isnan(src0[lane])
11780                     || std::isnan(src1[lane])) ? 1 : 0);
11781             }
11782         }
11783
11784         vcc.write();
11785     }
11786
11787     Inst_VOPC__V_CMP_NGE_F64::Inst_VOPC__V_CMP_NGE_F64(InFmt_VOPC *iFmt)
11788         : Inst_VOPC(iFmt, "v_cmp_nge_f64")
11789     {
11790         setFlag(ALU);
11791         setFlag(F64);
11792     } // Inst_VOPC__V_CMP_NGE_F64
11793
11794     Inst_VOPC__V_CMP_NGE_F64::~Inst_VOPC__V_CMP_NGE_F64()
11795     {
11796     } // ~Inst_VOPC__V_CMP_NGE_F64
11797
11798     // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
11799     void
11800     Inst_VOPC__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst)
11801     {
11802         Wavefront *wf = gpuDynInst->wavefront();
11803         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11804         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11805         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11806
11807         src0.readSrc();
11808         src1.read();
11809
11810         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11811             if (wf->execMask(lane)) {
11812                 vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
11813             }
11814         }
11815
11816         vcc.write();
11817     }
11818
11819     Inst_VOPC__V_CMP_NLG_F64::Inst_VOPC__V_CMP_NLG_F64(InFmt_VOPC *iFmt)
11820         : Inst_VOPC(iFmt, "v_cmp_nlg_f64")
11821     {
11822         setFlag(ALU);
11823         setFlag(F64);
11824     } // Inst_VOPC__V_CMP_NLG_F64
11825
11826     Inst_VOPC__V_CMP_NLG_F64::~Inst_VOPC__V_CMP_NLG_F64()
11827     {
11828     } // ~Inst_VOPC__V_CMP_NLG_F64
11829
11830     // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
11831     void
11832     Inst_VOPC__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst)
11833     {
11834         Wavefront *wf = gpuDynInst->wavefront();
11835         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11836         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11837         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11838
11839         src0.readSrc();
11840         src1.read();
11841
11842         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11843             if (wf->execMask(lane)) {
11844                 vcc.setBit(lane, !(src0[lane] < src1[lane]
11845                     || src0[lane] > src1[lane]) ? 1 : 0);
11846             }
11847         }
11848
11849         vcc.write();
11850     }
11851
11852     Inst_VOPC__V_CMP_NGT_F64::Inst_VOPC__V_CMP_NGT_F64(InFmt_VOPC *iFmt)
11853         : Inst_VOPC(iFmt, "v_cmp_ngt_f64")
11854     {
11855         setFlag(ALU);
11856         setFlag(F64);
11857     } // Inst_VOPC__V_CMP_NGT_F64
11858
11859     Inst_VOPC__V_CMP_NGT_F64::~Inst_VOPC__V_CMP_NGT_F64()
11860     {
11861     } // ~Inst_VOPC__V_CMP_NGT_F64
11862
11863     // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
11864     void
11865     Inst_VOPC__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst)
11866     {
11867         Wavefront *wf = gpuDynInst->wavefront();
11868         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11869         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11870         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11871
11872         src0.readSrc();
11873         src1.read();
11874
11875         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11876             if (wf->execMask(lane)) {
11877                 vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
11878             }
11879         }
11880
11881         vcc.write();
11882     }
11883
11884     Inst_VOPC__V_CMP_NLE_F64::Inst_VOPC__V_CMP_NLE_F64(InFmt_VOPC *iFmt)
11885         : Inst_VOPC(iFmt, "v_cmp_nle_f64")
11886     {
11887         setFlag(ALU);
11888         setFlag(F64);
11889     } // Inst_VOPC__V_CMP_NLE_F64
11890
11891     Inst_VOPC__V_CMP_NLE_F64::~Inst_VOPC__V_CMP_NLE_F64()
11892     {
11893     } // ~Inst_VOPC__V_CMP_NLE_F64
11894
11895     // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
11896     void
11897     Inst_VOPC__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst)
11898     {
11899         Wavefront *wf = gpuDynInst->wavefront();
11900         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11901         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11902         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11903
11904         src0.readSrc();
11905         src1.read();
11906
11907         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11908             if (wf->execMask(lane)) {
11909                 vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
11910             }
11911         }
11912
11913         vcc.write();
11914     }
11915
11916     Inst_VOPC__V_CMP_NEQ_F64::Inst_VOPC__V_CMP_NEQ_F64(InFmt_VOPC *iFmt)
11917         : Inst_VOPC(iFmt, "v_cmp_neq_f64")
11918     {
11919         setFlag(ALU);
11920         setFlag(F64);
11921     } // Inst_VOPC__V_CMP_NEQ_F64
11922
11923     Inst_VOPC__V_CMP_NEQ_F64::~Inst_VOPC__V_CMP_NEQ_F64()
11924     {
11925     } // ~Inst_VOPC__V_CMP_NEQ_F64
11926
11927     // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
11928     void
11929     Inst_VOPC__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst)
11930     {
11931         Wavefront *wf = gpuDynInst->wavefront();
11932         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11933         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11934         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11935
11936         src0.readSrc();
11937         src1.read();
11938
11939         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11940             if (wf->execMask(lane)) {
11941                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
11942             }
11943         }
11944
11945         vcc.write();
11946     }
11947
11948     Inst_VOPC__V_CMP_NLT_F64::Inst_VOPC__V_CMP_NLT_F64(InFmt_VOPC *iFmt)
11949         : Inst_VOPC(iFmt, "v_cmp_nlt_f64")
11950     {
11951         setFlag(ALU);
11952         setFlag(F64);
11953     } // Inst_VOPC__V_CMP_NLT_F64
11954
11955     Inst_VOPC__V_CMP_NLT_F64::~Inst_VOPC__V_CMP_NLT_F64()
11956     {
11957     } // ~Inst_VOPC__V_CMP_NLT_F64
11958
11959     // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
11960     void
11961     Inst_VOPC__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst)
11962     {
11963         Wavefront *wf = gpuDynInst->wavefront();
11964         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
11965         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
11966         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11967
11968         src0.readSrc();
11969         src1.read();
11970
11971         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11972             if (wf->execMask(lane)) {
11973                 vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
11974             }
11975         }
11976
11977         vcc.write();
11978     }
11979
11980     Inst_VOPC__V_CMP_TRU_F64::Inst_VOPC__V_CMP_TRU_F64(InFmt_VOPC *iFmt)
11981         : Inst_VOPC(iFmt, "v_cmp_tru_f64")
11982     {
11983         setFlag(ALU);
11984         setFlag(F64);
11985     } // Inst_VOPC__V_CMP_TRU_F64
11986
11987     Inst_VOPC__V_CMP_TRU_F64::~Inst_VOPC__V_CMP_TRU_F64()
11988     {
11989     } // ~Inst_VOPC__V_CMP_TRU_F64
11990
11991     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
11992     void
11993     Inst_VOPC__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst)
11994     {
11995         Wavefront *wf = gpuDynInst->wavefront();
11996         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
11997
11998         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
11999             if (wf->execMask(lane)) {
12000                 vcc.setBit(lane, 1);
12001             }
12002         }
12003
12004         vcc.write();
12005     }
12006
12007     Inst_VOPC__V_CMPX_F_F64::Inst_VOPC__V_CMPX_F_F64(InFmt_VOPC *iFmt)
12008         : Inst_VOPC(iFmt, "v_cmpx_f_f64")
12009     {
12010         setFlag(ALU);
12011         setFlag(F64);
12012     } // Inst_VOPC__V_CMPX_F_F64
12013
12014     Inst_VOPC__V_CMPX_F_F64::~Inst_VOPC__V_CMPX_F_F64()
12015     {
12016     } // ~Inst_VOPC__V_CMPX_F_F64
12017
12018     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
12019     void
12020     Inst_VOPC__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst)
12021     {
12022         Wavefront *wf = gpuDynInst->wavefront();
12023         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12024
12025         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12026             if (wf->execMask(lane)) {
12027                 vcc.setBit(lane, 0);
12028             }
12029         }
12030
12031         vcc.write();
12032         wf->execMask() = vcc.rawData();
12033     }
12034
12035     Inst_VOPC__V_CMPX_LT_F64::Inst_VOPC__V_CMPX_LT_F64(InFmt_VOPC *iFmt)
12036         : Inst_VOPC(iFmt, "v_cmpx_lt_f64")
12037     {
12038         setFlag(ALU);
12039         setFlag(F64);
12040     } // Inst_VOPC__V_CMPX_LT_F64
12041
12042     Inst_VOPC__V_CMPX_LT_F64::~Inst_VOPC__V_CMPX_LT_F64()
12043     {
12044     } // ~Inst_VOPC__V_CMPX_LT_F64
12045
12046     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
12047     void
12048     Inst_VOPC__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst)
12049     {
12050         Wavefront *wf = gpuDynInst->wavefront();
12051         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12052         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12053         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12054
12055         src0.readSrc();
12056         src1.read();
12057
12058         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12059             if (wf->execMask(lane)) {
12060                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
12061             }
12062         }
12063
12064         vcc.write();
12065         wf->execMask() = vcc.rawData();
12066     }
12067
12068     Inst_VOPC__V_CMPX_EQ_F64::Inst_VOPC__V_CMPX_EQ_F64(InFmt_VOPC *iFmt)
12069         : Inst_VOPC(iFmt, "v_cmpx_eq_f64")
12070     {
12071         setFlag(ALU);
12072         setFlag(F64);
12073     } // Inst_VOPC__V_CMPX_EQ_F64
12074
12075     Inst_VOPC__V_CMPX_EQ_F64::~Inst_VOPC__V_CMPX_EQ_F64()
12076     {
12077     } // ~Inst_VOPC__V_CMPX_EQ_F64
12078
12079     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
12080     void
12081     Inst_VOPC__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst)
12082     {
12083         Wavefront *wf = gpuDynInst->wavefront();
12084         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12085         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12086         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12087
12088         src0.readSrc();
12089         src1.read();
12090
12091         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12092             if (wf->execMask(lane)) {
12093                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
12094             }
12095         }
12096
12097         vcc.write();
12098         wf->execMask() = vcc.rawData();
12099     }
12100
12101     Inst_VOPC__V_CMPX_LE_F64::Inst_VOPC__V_CMPX_LE_F64(InFmt_VOPC *iFmt)
12102         : Inst_VOPC(iFmt, "v_cmpx_le_f64")
12103     {
12104         setFlag(ALU);
12105         setFlag(F64);
12106     } // Inst_VOPC__V_CMPX_LE_F64
12107
12108     Inst_VOPC__V_CMPX_LE_F64::~Inst_VOPC__V_CMPX_LE_F64()
12109     {
12110     } // ~Inst_VOPC__V_CMPX_LE_F64
12111
12112     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
12113     void
12114     Inst_VOPC__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst)
12115     {
12116         Wavefront *wf = gpuDynInst->wavefront();
12117         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12118         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12119         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12120
12121         src0.readSrc();
12122         src1.read();
12123
12124         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12125             if (wf->execMask(lane)) {
12126                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
12127             }
12128         }
12129
12130         wf->execMask() = vcc.rawData();
12131         vcc.write();
12132     }
12133
12134     Inst_VOPC__V_CMPX_GT_F64::Inst_VOPC__V_CMPX_GT_F64(InFmt_VOPC *iFmt)
12135         : Inst_VOPC(iFmt, "v_cmpx_gt_f64")
12136     {
12137         setFlag(ALU);
12138         setFlag(F64);
12139     } // Inst_VOPC__V_CMPX_GT_F64
12140
12141     Inst_VOPC__V_CMPX_GT_F64::~Inst_VOPC__V_CMPX_GT_F64()
12142     {
12143     } // ~Inst_VOPC__V_CMPX_GT_F64
12144
12145     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
12146     void
12147     Inst_VOPC__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst)
12148     {
12149         Wavefront *wf = gpuDynInst->wavefront();
12150         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12151         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12152         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12153
12154         src0.readSrc();
12155         src1.read();
12156
12157         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12158             if (wf->execMask(lane)) {
12159                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
12160             }
12161         }
12162
12163         wf->execMask() = vcc.rawData();
12164         vcc.write();
12165     }
12166
12167     Inst_VOPC__V_CMPX_LG_F64::Inst_VOPC__V_CMPX_LG_F64(InFmt_VOPC *iFmt)
12168         : Inst_VOPC(iFmt, "v_cmpx_lg_f64")
12169     {
12170         setFlag(ALU);
12171         setFlag(F64);
12172     } // Inst_VOPC__V_CMPX_LG_F64
12173
12174     Inst_VOPC__V_CMPX_LG_F64::~Inst_VOPC__V_CMPX_LG_F64()
12175     {
12176     } // ~Inst_VOPC__V_CMPX_LG_F64
12177
12178     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
12179     void
12180     Inst_VOPC__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst)
12181     {
12182         Wavefront *wf = gpuDynInst->wavefront();
12183         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12184         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12185         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12186
12187         src0.readSrc();
12188         src1.read();
12189
12190         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12191             if (wf->execMask(lane)) {
12192                 vcc.setBit(lane, (src0[lane] < src1[lane]
12193                     || src0[lane] > src1[lane]) ? 1 : 0);
12194             }
12195         }
12196
12197         wf->execMask() = vcc.rawData();
12198         vcc.write();
12199     }
12200
12201     Inst_VOPC__V_CMPX_GE_F64::Inst_VOPC__V_CMPX_GE_F64(InFmt_VOPC *iFmt)
12202         : Inst_VOPC(iFmt, "v_cmpx_ge_f64")
12203     {
12204         setFlag(ALU);
12205         setFlag(F64);
12206     } // Inst_VOPC__V_CMPX_GE_F64
12207
12208     Inst_VOPC__V_CMPX_GE_F64::~Inst_VOPC__V_CMPX_GE_F64()
12209     {
12210     } // ~Inst_VOPC__V_CMPX_GE_F64
12211
12212     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
12213     void
12214     Inst_VOPC__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst)
12215     {
12216         Wavefront *wf = gpuDynInst->wavefront();
12217         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12218         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12219         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12220
12221         src0.readSrc();
12222         src1.read();
12223
12224         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12225             if (wf->execMask(lane)) {
12226                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
12227             }
12228         }
12229
12230         wf->execMask() = vcc.rawData();
12231         vcc.write();
12232     }
12233
12234     Inst_VOPC__V_CMPX_O_F64::Inst_VOPC__V_CMPX_O_F64(InFmt_VOPC *iFmt)
12235         : Inst_VOPC(iFmt, "v_cmpx_o_f64")
12236     {
12237         setFlag(ALU);
12238         setFlag(F64);
12239     } // Inst_VOPC__V_CMPX_O_F64
12240
12241     Inst_VOPC__V_CMPX_O_F64::~Inst_VOPC__V_CMPX_O_F64()
12242     {
12243     } // ~Inst_VOPC__V_CMPX_O_F64
12244
12245     // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
12246     // encoding.
12247     void
12248     Inst_VOPC__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst)
12249     {
12250         Wavefront *wf = gpuDynInst->wavefront();
12251         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12252         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12253         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12254
12255         src0.readSrc();
12256         src1.read();
12257
12258         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12259             if (wf->execMask(lane)) {
12260                 vcc.setBit(lane, (!std::isnan(src0[lane])
12261                     && !std::isnan(src1[lane])) ? 1 : 0);
12262             }
12263         }
12264
12265         wf->execMask() = vcc.rawData();
12266         vcc.write();
12267     }
12268
12269     Inst_VOPC__V_CMPX_U_F64::Inst_VOPC__V_CMPX_U_F64(InFmt_VOPC *iFmt)
12270         : Inst_VOPC(iFmt, "v_cmpx_u_f64")
12271     {
12272         setFlag(ALU);
12273         setFlag(F64);
12274     } // Inst_VOPC__V_CMPX_U_F64
12275
12276     Inst_VOPC__V_CMPX_U_F64::~Inst_VOPC__V_CMPX_U_F64()
12277     {
12278     } // ~Inst_VOPC__V_CMPX_U_F64
12279
12280     // EXEC,D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC
12281     // encoding.
12282     void
12283     Inst_VOPC__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst)
12284     {
12285         Wavefront *wf = gpuDynInst->wavefront();
12286         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12287         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12288         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12289
12290         src0.readSrc();
12291         src1.read();
12292
12293         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12294             if (wf->execMask(lane)) {
12295                 vcc.setBit(lane, (std::isnan(src0[lane])
12296                     || std::isnan(src1[lane])) ? 1 : 0);
12297             }
12298         }
12299
12300         wf->execMask() = vcc.rawData();
12301         vcc.write();
12302     }
12303
12304     Inst_VOPC__V_CMPX_NGE_F64::Inst_VOPC__V_CMPX_NGE_F64(InFmt_VOPC *iFmt)
12305         : Inst_VOPC(iFmt, "v_cmpx_nge_f64")
12306     {
12307         setFlag(ALU);
12308         setFlag(F64);
12309     } // Inst_VOPC__V_CMPX_NGE_F64
12310
12311     Inst_VOPC__V_CMPX_NGE_F64::~Inst_VOPC__V_CMPX_NGE_F64()
12312     {
12313     } // ~Inst_VOPC__V_CMPX_NGE_F64
12314
12315     // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
12316     void
12317     Inst_VOPC__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst)
12318     {
12319         Wavefront *wf = gpuDynInst->wavefront();
12320         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12321         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12322         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12323
12324         src0.readSrc();
12325         src1.read();
12326
12327         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12328             if (wf->execMask(lane)) {
12329                 vcc.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
12330             }
12331         }
12332
12333         wf->execMask() = vcc.rawData();
12334         vcc.write();
12335     }
12336
12337     Inst_VOPC__V_CMPX_NLG_F64::Inst_VOPC__V_CMPX_NLG_F64(InFmt_VOPC *iFmt)
12338         : Inst_VOPC(iFmt, "v_cmpx_nlg_f64")
12339     {
12340         setFlag(ALU);
12341         setFlag(F64);
12342     } // Inst_VOPC__V_CMPX_NLG_F64
12343
12344     Inst_VOPC__V_CMPX_NLG_F64::~Inst_VOPC__V_CMPX_NLG_F64()
12345     {
12346     } // ~Inst_VOPC__V_CMPX_NLG_F64
12347
12348     // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
12349     void
12350     Inst_VOPC__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst)
12351     {
12352         Wavefront *wf = gpuDynInst->wavefront();
12353         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12354         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12355         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12356
12357         src0.readSrc();
12358         src1.read();
12359
12360         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12361             if (wf->execMask(lane)) {
12362                 vcc.setBit(lane, !(src0[lane] < src1[lane]
12363                     || src0[lane] > src1[lane]) ? 1 : 0);
12364             }
12365         }
12366
12367         wf->execMask() = vcc.rawData();
12368         vcc.write();
12369     }
12370
12371     Inst_VOPC__V_CMPX_NGT_F64::Inst_VOPC__V_CMPX_NGT_F64(InFmt_VOPC *iFmt)
12372         : Inst_VOPC(iFmt, "v_cmpx_ngt_f64")
12373     {
12374         setFlag(ALU);
12375         setFlag(F64);
12376     } // Inst_VOPC__V_CMPX_NGT_F64
12377
12378     Inst_VOPC__V_CMPX_NGT_F64::~Inst_VOPC__V_CMPX_NGT_F64()
12379     {
12380     } // ~Inst_VOPC__V_CMPX_NGT_F64
12381
12382     // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
12383     void
12384     Inst_VOPC__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst)
12385     {
12386         Wavefront *wf = gpuDynInst->wavefront();
12387         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12388         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12389         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12390
12391         src0.readSrc();
12392         src1.read();
12393
12394         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12395             if (wf->execMask(lane)) {
12396                 vcc.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
12397             }
12398         }
12399
12400         wf->execMask() = vcc.rawData();
12401         vcc.write();
12402     }
12403
12404     Inst_VOPC__V_CMPX_NLE_F64::Inst_VOPC__V_CMPX_NLE_F64(InFmt_VOPC *iFmt)
12405         : Inst_VOPC(iFmt, "v_cmpx_nle_f64")
12406     {
12407         setFlag(ALU);
12408         setFlag(F64);
12409     } // Inst_VOPC__V_CMPX_NLE_F64
12410
12411     Inst_VOPC__V_CMPX_NLE_F64::~Inst_VOPC__V_CMPX_NLE_F64()
12412     {
12413     } // ~Inst_VOPC__V_CMPX_NLE_F64
12414
12415     // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
12416     void
12417     Inst_VOPC__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst)
12418     {
12419         Wavefront *wf = gpuDynInst->wavefront();
12420         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12421         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12422         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12423
12424         src0.readSrc();
12425         src1.read();
12426
12427         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12428             if (wf->execMask(lane)) {
12429                 vcc.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
12430             }
12431         }
12432
12433         wf->execMask() = vcc.rawData();
12434         vcc.write();
12435     }
12436
12437     Inst_VOPC__V_CMPX_NEQ_F64::Inst_VOPC__V_CMPX_NEQ_F64(InFmt_VOPC *iFmt)
12438         : Inst_VOPC(iFmt, "v_cmpx_neq_f64")
12439     {
12440         setFlag(ALU);
12441         setFlag(F64);
12442     } // Inst_VOPC__V_CMPX_NEQ_F64
12443
12444     Inst_VOPC__V_CMPX_NEQ_F64::~Inst_VOPC__V_CMPX_NEQ_F64()
12445     {
12446     } // ~Inst_VOPC__V_CMPX_NEQ_F64
12447
12448     // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
12449     void
12450     Inst_VOPC__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst)
12451     {
12452         Wavefront *wf = gpuDynInst->wavefront();
12453         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12454         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12455         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12456
12457         src0.readSrc();
12458         src1.read();
12459
12460         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12461             if (wf->execMask(lane)) {
12462                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
12463             }
12464         }
12465
12466         wf->execMask() = vcc.rawData();
12467         vcc.write();
12468     }
12469
12470     Inst_VOPC__V_CMPX_NLT_F64::Inst_VOPC__V_CMPX_NLT_F64(InFmt_VOPC *iFmt)
12471         : Inst_VOPC(iFmt, "v_cmpx_nlt_f64")
12472     {
12473         setFlag(ALU);
12474         setFlag(F64);
12475     } // Inst_VOPC__V_CMPX_NLT_F64
12476
12477     Inst_VOPC__V_CMPX_NLT_F64::~Inst_VOPC__V_CMPX_NLT_F64()
12478     {
12479     } // ~Inst_VOPC__V_CMPX_NLT_F64
12480
12481     // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
12482     void
12483     Inst_VOPC__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst)
12484     {
12485         Wavefront *wf = gpuDynInst->wavefront();
12486         ConstVecOperandF64 src0(gpuDynInst, instData.SRC0);
12487         ConstVecOperandF64 src1(gpuDynInst, instData.VSRC1);
12488         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12489
12490         src0.readSrc();
12491         src1.read();
12492
12493         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12494             if (wf->execMask(lane)) {
12495                 vcc.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
12496             }
12497         }
12498
12499         wf->execMask() = vcc.rawData();
12500         vcc.write();
12501     }
12502
12503     Inst_VOPC__V_CMPX_TRU_F64::Inst_VOPC__V_CMPX_TRU_F64(InFmt_VOPC *iFmt)
12504         : Inst_VOPC(iFmt, "v_cmpx_tru_f64")
12505     {
12506         setFlag(ALU);
12507         setFlag(F64);
12508     } // Inst_VOPC__V_CMPX_TRU_F64
12509
12510     Inst_VOPC__V_CMPX_TRU_F64::~Inst_VOPC__V_CMPX_TRU_F64()
12511     {
12512     } // ~Inst_VOPC__V_CMPX_TRU_F64
12513
12514     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
12515     void
12516     Inst_VOPC__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst)
12517     {
12518         Wavefront *wf = gpuDynInst->wavefront();
12519         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12520
12521         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12522             if (wf->execMask(lane)) {
12523                 vcc.setBit(lane, 1);
12524             }
12525         }
12526
12527         wf->execMask() = vcc.rawData();
12528         vcc.write();
12529     }
12530
12531     Inst_VOPC__V_CMP_F_I16::Inst_VOPC__V_CMP_F_I16(InFmt_VOPC *iFmt)
12532         : Inst_VOPC(iFmt, "v_cmp_f_i16")
12533     {
12534         setFlag(ALU);
12535     } // Inst_VOPC__V_CMP_F_I16
12536
12537     Inst_VOPC__V_CMP_F_I16::~Inst_VOPC__V_CMP_F_I16()
12538     {
12539     } // ~Inst_VOPC__V_CMP_F_I16
12540
12541     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
12542     void
12543     Inst_VOPC__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst)
12544     {
12545         Wavefront *wf = gpuDynInst->wavefront();
12546         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12547
12548         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12549             if (wf->execMask(lane)) {
12550                 vcc.setBit(lane, 0);
12551             }
12552         }
12553
12554         vcc.write();
12555     }
12556
12557     Inst_VOPC__V_CMP_LT_I16::Inst_VOPC__V_CMP_LT_I16(InFmt_VOPC *iFmt)
12558         : Inst_VOPC(iFmt, "v_cmp_lt_i16")
12559     {
12560         setFlag(ALU);
12561     } // Inst_VOPC__V_CMP_LT_I16
12562
12563     Inst_VOPC__V_CMP_LT_I16::~Inst_VOPC__V_CMP_LT_I16()
12564     {
12565     } // ~Inst_VOPC__V_CMP_LT_I16
12566
12567     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
12568     void
12569     Inst_VOPC__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst)
12570     {
12571         Wavefront *wf = gpuDynInst->wavefront();
12572         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
12573         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
12574         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12575
12576         src0.readSrc();
12577         src1.read();
12578
12579         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12580             if (wf->execMask(lane)) {
12581                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
12582             }
12583         }
12584
12585         vcc.write();
12586     }
12587
12588     Inst_VOPC__V_CMP_EQ_I16::Inst_VOPC__V_CMP_EQ_I16(InFmt_VOPC *iFmt)
12589         : Inst_VOPC(iFmt, "v_cmp_eq_i16")
12590     {
12591         setFlag(ALU);
12592     } // Inst_VOPC__V_CMP_EQ_I16
12593
12594     Inst_VOPC__V_CMP_EQ_I16::~Inst_VOPC__V_CMP_EQ_I16()
12595     {
12596     } // ~Inst_VOPC__V_CMP_EQ_I16
12597
12598     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
12599     void
12600     Inst_VOPC__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst)
12601     {
12602         Wavefront *wf = gpuDynInst->wavefront();
12603         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
12604         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
12605         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12606
12607         src0.readSrc();
12608         src1.read();
12609
12610         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12611             if (wf->execMask(lane)) {
12612                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
12613             }
12614         }
12615
12616         vcc.write();
12617     }
12618
12619     Inst_VOPC__V_CMP_LE_I16::Inst_VOPC__V_CMP_LE_I16(InFmt_VOPC *iFmt)
12620         : Inst_VOPC(iFmt, "v_cmp_le_i16")
12621     {
12622         setFlag(ALU);
12623     } // Inst_VOPC__V_CMP_LE_I16
12624
12625     Inst_VOPC__V_CMP_LE_I16::~Inst_VOPC__V_CMP_LE_I16()
12626     {
12627     } // ~Inst_VOPC__V_CMP_LE_I16
12628
12629     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
12630     void
12631     Inst_VOPC__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst)
12632     {
12633         Wavefront *wf = gpuDynInst->wavefront();
12634         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
12635         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
12636         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12637
12638         src0.readSrc();
12639         src1.read();
12640
12641         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12642             if (wf->execMask(lane)) {
12643                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
12644             }
12645         }
12646
12647         vcc.write();
12648     }
12649
12650     Inst_VOPC__V_CMP_GT_I16::Inst_VOPC__V_CMP_GT_I16(InFmt_VOPC *iFmt)
12651         : Inst_VOPC(iFmt, "v_cmp_gt_i16")
12652     {
12653         setFlag(ALU);
12654     } // Inst_VOPC__V_CMP_GT_I16
12655
12656     Inst_VOPC__V_CMP_GT_I16::~Inst_VOPC__V_CMP_GT_I16()
12657     {
12658     } // ~Inst_VOPC__V_CMP_GT_I16
12659
12660     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
12661     void
12662     Inst_VOPC__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst)
12663     {
12664         Wavefront *wf = gpuDynInst->wavefront();
12665         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
12666         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
12667         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12668
12669         src0.readSrc();
12670         src1.read();
12671
12672         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12673             if (wf->execMask(lane)) {
12674                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
12675             }
12676         }
12677
12678         vcc.write();
12679     }
12680
12681     Inst_VOPC__V_CMP_NE_I16::Inst_VOPC__V_CMP_NE_I16(InFmt_VOPC *iFmt)
12682         : Inst_VOPC(iFmt, "v_cmp_ne_i16")
12683     {
12684         setFlag(ALU);
12685     } // Inst_VOPC__V_CMP_NE_I16
12686
12687     Inst_VOPC__V_CMP_NE_I16::~Inst_VOPC__V_CMP_NE_I16()
12688     {
12689     } // ~Inst_VOPC__V_CMP_NE_I16
12690
12691     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
12692     void
12693     Inst_VOPC__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst)
12694     {
12695         Wavefront *wf = gpuDynInst->wavefront();
12696         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
12697         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
12698         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12699
12700         src0.readSrc();
12701         src1.read();
12702
12703         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12704             if (wf->execMask(lane)) {
12705                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
12706             }
12707         }
12708
12709         vcc.write();
12710     }
12711
12712     Inst_VOPC__V_CMP_GE_I16::Inst_VOPC__V_CMP_GE_I16(InFmt_VOPC *iFmt)
12713         : Inst_VOPC(iFmt, "v_cmp_ge_i16")
12714     {
12715         setFlag(ALU);
12716     } // Inst_VOPC__V_CMP_GE_I16
12717
12718     Inst_VOPC__V_CMP_GE_I16::~Inst_VOPC__V_CMP_GE_I16()
12719     {
12720     } // ~Inst_VOPC__V_CMP_GE_I16
12721
12722     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
12723     void
12724     Inst_VOPC__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst)
12725     {
12726         Wavefront *wf = gpuDynInst->wavefront();
12727         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
12728         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
12729         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12730
12731         src0.readSrc();
12732         src1.read();
12733
12734         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12735             if (wf->execMask(lane)) {
12736                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
12737             }
12738         }
12739
12740         vcc.write();
12741     }
12742
12743     Inst_VOPC__V_CMP_T_I16::Inst_VOPC__V_CMP_T_I16(InFmt_VOPC *iFmt)
12744         : Inst_VOPC(iFmt, "v_cmp_t_i16")
12745     {
12746         setFlag(ALU);
12747     } // Inst_VOPC__V_CMP_T_I16
12748
12749     Inst_VOPC__V_CMP_T_I16::~Inst_VOPC__V_CMP_T_I16()
12750     {
12751     } // ~Inst_VOPC__V_CMP_T_I16
12752
12753     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
12754     void
12755     Inst_VOPC__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst)
12756     {
12757         Wavefront *wf = gpuDynInst->wavefront();
12758         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12759
12760         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12761             if (wf->execMask(lane)) {
12762                 vcc.setBit(lane, 1);
12763             }
12764         }
12765
12766         vcc.write();
12767     }
12768
12769     Inst_VOPC__V_CMP_F_U16::Inst_VOPC__V_CMP_F_U16(InFmt_VOPC *iFmt)
12770         : Inst_VOPC(iFmt, "v_cmp_f_u16")
12771     {
12772         setFlag(ALU);
12773     } // Inst_VOPC__V_CMP_F_U16
12774
12775     Inst_VOPC__V_CMP_F_U16::~Inst_VOPC__V_CMP_F_U16()
12776     {
12777     } // ~Inst_VOPC__V_CMP_F_U16
12778
12779     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
12780     void
12781     Inst_VOPC__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst)
12782     {
12783         Wavefront *wf = gpuDynInst->wavefront();
12784         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12785
12786         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12787             if (wf->execMask(lane)) {
12788                 vcc.setBit(lane, 0);
12789             }
12790         }
12791
12792         vcc.write();
12793     }
12794
12795     Inst_VOPC__V_CMP_LT_U16::Inst_VOPC__V_CMP_LT_U16(InFmt_VOPC *iFmt)
12796         : Inst_VOPC(iFmt, "v_cmp_lt_u16")
12797     {
12798         setFlag(ALU);
12799     } // Inst_VOPC__V_CMP_LT_U16
12800
12801     Inst_VOPC__V_CMP_LT_U16::~Inst_VOPC__V_CMP_LT_U16()
12802     {
12803     } // ~Inst_VOPC__V_CMP_LT_U16
12804
12805     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
12806     void
12807     Inst_VOPC__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst)
12808     {
12809         Wavefront *wf = gpuDynInst->wavefront();
12810         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
12811         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
12812         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12813
12814         src0.readSrc();
12815         src1.read();
12816
12817         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12818             if (wf->execMask(lane)) {
12819                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
12820             }
12821         }
12822
12823         vcc.write();
12824     }
12825
12826     Inst_VOPC__V_CMP_EQ_U16::Inst_VOPC__V_CMP_EQ_U16(InFmt_VOPC *iFmt)
12827         : Inst_VOPC(iFmt, "v_cmp_eq_u16")
12828     {
12829         setFlag(ALU);
12830     } // Inst_VOPC__V_CMP_EQ_U16
12831
12832     Inst_VOPC__V_CMP_EQ_U16::~Inst_VOPC__V_CMP_EQ_U16()
12833     {
12834     } // ~Inst_VOPC__V_CMP_EQ_U16
12835
12836     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
12837     void
12838     Inst_VOPC__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst)
12839     {
12840         Wavefront *wf = gpuDynInst->wavefront();
12841         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
12842         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
12843         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12844
12845         src0.readSrc();
12846         src1.read();
12847
12848         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12849             if (wf->execMask(lane)) {
12850                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
12851             }
12852         }
12853
12854         vcc.write();
12855     }
12856
12857     Inst_VOPC__V_CMP_LE_U16::Inst_VOPC__V_CMP_LE_U16(InFmt_VOPC *iFmt)
12858         : Inst_VOPC(iFmt, "v_cmp_le_u16")
12859     {
12860         setFlag(ALU);
12861     } // Inst_VOPC__V_CMP_LE_U16
12862
12863     Inst_VOPC__V_CMP_LE_U16::~Inst_VOPC__V_CMP_LE_U16()
12864     {
12865     } // ~Inst_VOPC__V_CMP_LE_U16
12866
12867     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
12868     void
12869     Inst_VOPC__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst)
12870     {
12871         Wavefront *wf = gpuDynInst->wavefront();
12872         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
12873         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
12874         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12875
12876         src0.readSrc();
12877         src1.read();
12878
12879         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12880             if (wf->execMask(lane)) {
12881                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
12882             }
12883         }
12884
12885         vcc.write();
12886     }
12887
12888     Inst_VOPC__V_CMP_GT_U16::Inst_VOPC__V_CMP_GT_U16(InFmt_VOPC *iFmt)
12889         : Inst_VOPC(iFmt, "v_cmp_gt_u16")
12890     {
12891         setFlag(ALU);
12892     } // Inst_VOPC__V_CMP_GT_U16
12893
12894     Inst_VOPC__V_CMP_GT_U16::~Inst_VOPC__V_CMP_GT_U16()
12895     {
12896     } // ~Inst_VOPC__V_CMP_GT_U16
12897
12898     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
12899     void
12900     Inst_VOPC__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst)
12901     {
12902         Wavefront *wf = gpuDynInst->wavefront();
12903         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
12904         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
12905         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12906
12907         src0.readSrc();
12908         src1.read();
12909
12910         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12911             if (wf->execMask(lane)) {
12912                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
12913             }
12914         }
12915
12916         vcc.write();
12917     }
12918
12919     Inst_VOPC__V_CMP_NE_U16::Inst_VOPC__V_CMP_NE_U16(InFmt_VOPC *iFmt)
12920         : Inst_VOPC(iFmt, "v_cmp_ne_u16")
12921     {
12922         setFlag(ALU);
12923     } // Inst_VOPC__V_CMP_NE_U16
12924
12925     Inst_VOPC__V_CMP_NE_U16::~Inst_VOPC__V_CMP_NE_U16()
12926     {
12927     } // ~Inst_VOPC__V_CMP_NE_U16
12928
12929     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
12930     void
12931     Inst_VOPC__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst)
12932     {
12933         Wavefront *wf = gpuDynInst->wavefront();
12934         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
12935         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
12936         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12937
12938         src0.readSrc();
12939         src1.read();
12940
12941         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12942             if (wf->execMask(lane)) {
12943                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
12944             }
12945         }
12946
12947         vcc.write();
12948     }
12949
12950     Inst_VOPC__V_CMP_GE_U16::Inst_VOPC__V_CMP_GE_U16(InFmt_VOPC *iFmt)
12951         : Inst_VOPC(iFmt, "v_cmp_ge_u16")
12952     {
12953         setFlag(ALU);
12954     } // Inst_VOPC__V_CMP_GE_U16
12955
12956     Inst_VOPC__V_CMP_GE_U16::~Inst_VOPC__V_CMP_GE_U16()
12957     {
12958     } // ~Inst_VOPC__V_CMP_GE_U16
12959
12960     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
12961     void
12962     Inst_VOPC__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst)
12963     {
12964         Wavefront *wf = gpuDynInst->wavefront();
12965         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
12966         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
12967         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12968
12969         src0.readSrc();
12970         src1.read();
12971
12972         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12973             if (wf->execMask(lane)) {
12974                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
12975             }
12976         }
12977
12978         vcc.write();
12979     }
12980
12981     Inst_VOPC__V_CMP_T_U16::Inst_VOPC__V_CMP_T_U16(InFmt_VOPC *iFmt)
12982         : Inst_VOPC(iFmt, "v_cmp_t_u16")
12983     {
12984         setFlag(ALU);
12985     } // Inst_VOPC__V_CMP_T_U16
12986
12987     Inst_VOPC__V_CMP_T_U16::~Inst_VOPC__V_CMP_T_U16()
12988     {
12989     } // ~Inst_VOPC__V_CMP_T_U16
12990
12991     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
12992     void
12993     Inst_VOPC__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst)
12994     {
12995         Wavefront *wf = gpuDynInst->wavefront();
12996         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
12997
12998         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
12999             if (wf->execMask(lane)) {
13000                 vcc.setBit(lane, 1);
13001             }
13002         }
13003
13004         vcc.write();
13005     }
13006
13007     Inst_VOPC__V_CMPX_F_I16::Inst_VOPC__V_CMPX_F_I16(InFmt_VOPC *iFmt)
13008         : Inst_VOPC(iFmt, "v_cmpx_f_i16")
13009     {
13010         setFlag(ALU);
13011     } // Inst_VOPC__V_CMPX_F_I16
13012
13013     Inst_VOPC__V_CMPX_F_I16::~Inst_VOPC__V_CMPX_F_I16()
13014     {
13015     } // ~Inst_VOPC__V_CMPX_F_I16
13016
13017     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
13018     void
13019     Inst_VOPC__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst)
13020     {
13021         Wavefront *wf = gpuDynInst->wavefront();
13022         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13023
13024         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13025             if (wf->execMask(lane)) {
13026                 vcc.setBit(lane, 0);
13027             }
13028         }
13029
13030         wf->execMask() = vcc.rawData();
13031         vcc.write();
13032     }
13033
13034     Inst_VOPC__V_CMPX_LT_I16::Inst_VOPC__V_CMPX_LT_I16(InFmt_VOPC *iFmt)
13035         : Inst_VOPC(iFmt, "v_cmpx_lt_i16")
13036     {
13037         setFlag(ALU);
13038     } // Inst_VOPC__V_CMPX_LT_I16
13039
13040     Inst_VOPC__V_CMPX_LT_I16::~Inst_VOPC__V_CMPX_LT_I16()
13041     {
13042     } // ~Inst_VOPC__V_CMPX_LT_I16
13043
13044     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
13045     void
13046     Inst_VOPC__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst)
13047     {
13048         Wavefront *wf = gpuDynInst->wavefront();
13049         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
13050         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
13051         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13052
13053         src0.readSrc();
13054         src1.read();
13055
13056         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13057             if (wf->execMask(lane)) {
13058                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
13059             }
13060         }
13061
13062         wf->execMask() = vcc.rawData();
13063         vcc.write();
13064     }
13065
13066     Inst_VOPC__V_CMPX_EQ_I16::Inst_VOPC__V_CMPX_EQ_I16(InFmt_VOPC *iFmt)
13067         : Inst_VOPC(iFmt, "v_cmpx_eq_i16")
13068     {
13069         setFlag(ALU);
13070     } // Inst_VOPC__V_CMPX_EQ_I16
13071
13072     Inst_VOPC__V_CMPX_EQ_I16::~Inst_VOPC__V_CMPX_EQ_I16()
13073     {
13074     } // ~Inst_VOPC__V_CMPX_EQ_I16
13075
13076     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
13077     void
13078     Inst_VOPC__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst)
13079     {
13080         Wavefront *wf = gpuDynInst->wavefront();
13081         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
13082         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
13083         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13084
13085         src0.readSrc();
13086         src1.read();
13087
13088         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13089             if (wf->execMask(lane)) {
13090                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
13091             }
13092         }
13093
13094         wf->execMask() = vcc.rawData();
13095         vcc.write();
13096     }
13097
13098     Inst_VOPC__V_CMPX_LE_I16::Inst_VOPC__V_CMPX_LE_I16(InFmt_VOPC *iFmt)
13099         : Inst_VOPC(iFmt, "v_cmpx_le_i16")
13100     {
13101         setFlag(ALU);
13102     } // Inst_VOPC__V_CMPX_LE_I16
13103
13104     Inst_VOPC__V_CMPX_LE_I16::~Inst_VOPC__V_CMPX_LE_I16()
13105     {
13106     } // ~Inst_VOPC__V_CMPX_LE_I16
13107
13108     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
13109     void
13110     Inst_VOPC__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst)
13111     {
13112         Wavefront *wf = gpuDynInst->wavefront();
13113         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
13114         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
13115         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13116
13117         src0.readSrc();
13118         src1.read();
13119
13120         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13121             if (wf->execMask(lane)) {
13122                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
13123             }
13124         }
13125
13126         wf->execMask() = vcc.rawData();
13127         vcc.write();
13128     }
13129
13130     Inst_VOPC__V_CMPX_GT_I16::Inst_VOPC__V_CMPX_GT_I16(InFmt_VOPC *iFmt)
13131         : Inst_VOPC(iFmt, "v_cmpx_gt_i16")
13132     {
13133         setFlag(ALU);
13134     } // Inst_VOPC__V_CMPX_GT_I16
13135
13136     Inst_VOPC__V_CMPX_GT_I16::~Inst_VOPC__V_CMPX_GT_I16()
13137     {
13138     } // ~Inst_VOPC__V_CMPX_GT_I16
13139
13140     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
13141     void
13142     Inst_VOPC__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst)
13143     {
13144         Wavefront *wf = gpuDynInst->wavefront();
13145         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
13146         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
13147         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13148
13149         src0.readSrc();
13150         src1.read();
13151
13152         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13153             if (wf->execMask(lane)) {
13154                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
13155             }
13156         }
13157
13158         wf->execMask() = vcc.rawData();
13159         vcc.write();
13160     }
13161
13162     Inst_VOPC__V_CMPX_NE_I16::Inst_VOPC__V_CMPX_NE_I16(InFmt_VOPC *iFmt)
13163         : Inst_VOPC(iFmt, "v_cmpx_ne_i16")
13164     {
13165         setFlag(ALU);
13166     } // Inst_VOPC__V_CMPX_NE_I16
13167
13168     Inst_VOPC__V_CMPX_NE_I16::~Inst_VOPC__V_CMPX_NE_I16()
13169     {
13170     } // ~Inst_VOPC__V_CMPX_NE_I16
13171
13172     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
13173     void
13174     Inst_VOPC__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst)
13175     {
13176         Wavefront *wf = gpuDynInst->wavefront();
13177         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
13178         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
13179         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13180
13181         src0.readSrc();
13182         src1.read();
13183
13184         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13185             if (wf->execMask(lane)) {
13186                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
13187             }
13188         }
13189
13190         wf->execMask() = vcc.rawData();
13191         vcc.write();
13192     }
13193
13194     Inst_VOPC__V_CMPX_GE_I16::Inst_VOPC__V_CMPX_GE_I16(InFmt_VOPC *iFmt)
13195         : Inst_VOPC(iFmt, "v_cmpx_ge_i16")
13196     {
13197         setFlag(ALU);
13198     } // Inst_VOPC__V_CMPX_GE_I16
13199
13200     Inst_VOPC__V_CMPX_GE_I16::~Inst_VOPC__V_CMPX_GE_I16()
13201     {
13202     } // ~Inst_VOPC__V_CMPX_GE_I16
13203
13204     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
13205     void
13206     Inst_VOPC__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst)
13207     {
13208         Wavefront *wf = gpuDynInst->wavefront();
13209         ConstVecOperandI16 src0(gpuDynInst, instData.SRC0);
13210         ConstVecOperandI16 src1(gpuDynInst, instData.VSRC1);
13211         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13212
13213         src0.readSrc();
13214         src1.read();
13215
13216         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13217             if (wf->execMask(lane)) {
13218                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
13219             }
13220         }
13221
13222         wf->execMask() = vcc.rawData();
13223         vcc.write();
13224     }
13225
13226     Inst_VOPC__V_CMPX_T_I16::Inst_VOPC__V_CMPX_T_I16(InFmt_VOPC *iFmt)
13227         : Inst_VOPC(iFmt, "v_cmpx_t_i16")
13228     {
13229         setFlag(ALU);
13230     } // Inst_VOPC__V_CMPX_T_I16
13231
13232     Inst_VOPC__V_CMPX_T_I16::~Inst_VOPC__V_CMPX_T_I16()
13233     {
13234     } // ~Inst_VOPC__V_CMPX_T_I16
13235
13236     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
13237     void
13238     Inst_VOPC__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst)
13239     {
13240         Wavefront *wf = gpuDynInst->wavefront();
13241         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13242
13243         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13244             if (wf->execMask(lane)) {
13245                 vcc.setBit(lane, 1);
13246             }
13247         }
13248
13249         wf->execMask() = vcc.rawData();
13250         vcc.write();
13251     }
13252
13253     Inst_VOPC__V_CMPX_F_U16::Inst_VOPC__V_CMPX_F_U16(InFmt_VOPC *iFmt)
13254         : Inst_VOPC(iFmt, "v_cmpx_f_u16")
13255     {
13256         setFlag(ALU);
13257     } // Inst_VOPC__V_CMPX_F_U16
13258
13259     Inst_VOPC__V_CMPX_F_U16::~Inst_VOPC__V_CMPX_F_U16()
13260     {
13261     } // ~Inst_VOPC__V_CMPX_F_U16
13262
13263     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
13264     void
13265     Inst_VOPC__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst)
13266     {
13267         Wavefront *wf = gpuDynInst->wavefront();
13268         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13269
13270         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13271             if (wf->execMask(lane)) {
13272                 vcc.setBit(lane, 0);
13273             }
13274         }
13275
13276         wf->execMask() = vcc.rawData();
13277         vcc.write();
13278     }
13279
13280     Inst_VOPC__V_CMPX_LT_U16::Inst_VOPC__V_CMPX_LT_U16(InFmt_VOPC *iFmt)
13281         : Inst_VOPC(iFmt, "v_cmpx_lt_u16")
13282     {
13283         setFlag(ALU);
13284     } // Inst_VOPC__V_CMPX_LT_U16
13285
13286     Inst_VOPC__V_CMPX_LT_U16::~Inst_VOPC__V_CMPX_LT_U16()
13287     {
13288     } // ~Inst_VOPC__V_CMPX_LT_U16
13289
13290     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
13291     void
13292     Inst_VOPC__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst)
13293     {
13294         Wavefront *wf = gpuDynInst->wavefront();
13295         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
13296         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
13297         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13298
13299         src0.readSrc();
13300         src1.read();
13301
13302         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13303             if (wf->execMask(lane)) {
13304                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
13305             }
13306         }
13307
13308         wf->execMask() = vcc.rawData();
13309         vcc.write();
13310     }
13311
13312     Inst_VOPC__V_CMPX_EQ_U16::Inst_VOPC__V_CMPX_EQ_U16(InFmt_VOPC *iFmt)
13313         : Inst_VOPC(iFmt, "v_cmpx_eq_u16")
13314     {
13315         setFlag(ALU);
13316     } // Inst_VOPC__V_CMPX_EQ_U16
13317
13318     Inst_VOPC__V_CMPX_EQ_U16::~Inst_VOPC__V_CMPX_EQ_U16()
13319     {
13320     } // ~Inst_VOPC__V_CMPX_EQ_U16
13321
13322     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
13323     void
13324     Inst_VOPC__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst)
13325     {
13326         Wavefront *wf = gpuDynInst->wavefront();
13327         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
13328         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
13329         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13330
13331         src0.readSrc();
13332         src1.read();
13333
13334         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13335             if (wf->execMask(lane)) {
13336                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
13337             }
13338         }
13339
13340         wf->execMask() = vcc.rawData();
13341         vcc.write();
13342     }
13343
13344     Inst_VOPC__V_CMPX_LE_U16::Inst_VOPC__V_CMPX_LE_U16(InFmt_VOPC *iFmt)
13345         : Inst_VOPC(iFmt, "v_cmpx_le_u16")
13346     {
13347         setFlag(ALU);
13348     } // Inst_VOPC__V_CMPX_LE_U16
13349
13350     Inst_VOPC__V_CMPX_LE_U16::~Inst_VOPC__V_CMPX_LE_U16()
13351     {
13352     } // ~Inst_VOPC__V_CMPX_LE_U16
13353
13354     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
13355     void
13356     Inst_VOPC__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst)
13357     {
13358         Wavefront *wf = gpuDynInst->wavefront();
13359         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
13360         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
13361         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13362
13363         src0.readSrc();
13364         src1.read();
13365
13366         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13367             if (wf->execMask(lane)) {
13368                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
13369             }
13370         }
13371
13372         wf->execMask() = vcc.rawData();
13373         vcc.write();
13374     }
13375
13376     Inst_VOPC__V_CMPX_GT_U16::Inst_VOPC__V_CMPX_GT_U16(InFmt_VOPC *iFmt)
13377         : Inst_VOPC(iFmt, "v_cmpx_gt_u16")
13378     {
13379         setFlag(ALU);
13380     } // Inst_VOPC__V_CMPX_GT_U16
13381
13382     Inst_VOPC__V_CMPX_GT_U16::~Inst_VOPC__V_CMPX_GT_U16()
13383     {
13384     } // ~Inst_VOPC__V_CMPX_GT_U16
13385
13386     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
13387     void
13388     Inst_VOPC__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst)
13389     {
13390         Wavefront *wf = gpuDynInst->wavefront();
13391         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
13392         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
13393         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13394
13395         src0.readSrc();
13396         src1.read();
13397
13398         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13399             if (wf->execMask(lane)) {
13400                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
13401             }
13402         }
13403
13404         wf->execMask() = vcc.rawData();
13405         vcc.write();
13406     }
13407
13408     Inst_VOPC__V_CMPX_NE_U16::Inst_VOPC__V_CMPX_NE_U16(InFmt_VOPC *iFmt)
13409         : Inst_VOPC(iFmt, "v_cmpx_ne_u16")
13410     {
13411         setFlag(ALU);
13412     } // Inst_VOPC__V_CMPX_NE_U16
13413
13414     Inst_VOPC__V_CMPX_NE_U16::~Inst_VOPC__V_CMPX_NE_U16()
13415     {
13416     } // ~Inst_VOPC__V_CMPX_NE_U16
13417
13418     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
13419     void
13420     Inst_VOPC__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst)
13421     {
13422         Wavefront *wf = gpuDynInst->wavefront();
13423         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
13424         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
13425         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13426
13427         src0.readSrc();
13428         src1.read();
13429
13430         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13431             if (wf->execMask(lane)) {
13432                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
13433             }
13434         }
13435
13436         wf->execMask() = vcc.rawData();
13437         vcc.write();
13438     }
13439
13440     Inst_VOPC__V_CMPX_GE_U16::Inst_VOPC__V_CMPX_GE_U16(InFmt_VOPC *iFmt)
13441         : Inst_VOPC(iFmt, "v_cmpx_ge_u16")
13442     {
13443         setFlag(ALU);
13444     } // Inst_VOPC__V_CMPX_GE_U16
13445
13446     Inst_VOPC__V_CMPX_GE_U16::~Inst_VOPC__V_CMPX_GE_U16()
13447     {
13448     } // ~Inst_VOPC__V_CMPX_GE_U16
13449
13450     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
13451     void
13452     Inst_VOPC__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst)
13453     {
13454         Wavefront *wf = gpuDynInst->wavefront();
13455         ConstVecOperandU16 src0(gpuDynInst, instData.SRC0);
13456         ConstVecOperandU16 src1(gpuDynInst, instData.VSRC1);
13457         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13458
13459         src0.readSrc();
13460         src1.read();
13461
13462         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13463             if (wf->execMask(lane)) {
13464                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
13465             }
13466         }
13467
13468         wf->execMask() = vcc.rawData();
13469         vcc.write();
13470     }
13471
13472     Inst_VOPC__V_CMPX_T_U16::Inst_VOPC__V_CMPX_T_U16(InFmt_VOPC *iFmt)
13473         : Inst_VOPC(iFmt, "v_cmpx_t_u16")
13474     {
13475         setFlag(ALU);
13476     } // Inst_VOPC__V_CMPX_T_U16
13477
13478     Inst_VOPC__V_CMPX_T_U16::~Inst_VOPC__V_CMPX_T_U16()
13479     {
13480     } // ~Inst_VOPC__V_CMPX_T_U16
13481
13482     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
13483     void
13484     Inst_VOPC__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst)
13485     {
13486         Wavefront *wf = gpuDynInst->wavefront();
13487         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13488
13489         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13490             if (wf->execMask(lane)) {
13491                 vcc.setBit(lane, 1);
13492             }
13493         }
13494
13495         wf->execMask() = vcc.rawData();
13496         vcc.write();
13497     }
13498
13499     Inst_VOPC__V_CMP_F_I32::Inst_VOPC__V_CMP_F_I32(InFmt_VOPC *iFmt)
13500         : Inst_VOPC(iFmt, "v_cmp_f_i32")
13501     {
13502         setFlag(ALU);
13503     } // Inst_VOPC__V_CMP_F_I32
13504
13505     Inst_VOPC__V_CMP_F_I32::~Inst_VOPC__V_CMP_F_I32()
13506     {
13507     } // ~Inst_VOPC__V_CMP_F_I32
13508
13509     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
13510     void
13511     Inst_VOPC__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst)
13512     {
13513         Wavefront *wf = gpuDynInst->wavefront();
13514         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13515
13516         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13517             if (wf->execMask(lane)) {
13518                 vcc.setBit(lane, 0);
13519             }
13520         }
13521
13522         vcc.write();
13523     }
13524
13525     Inst_VOPC__V_CMP_LT_I32::Inst_VOPC__V_CMP_LT_I32(InFmt_VOPC *iFmt)
13526         : Inst_VOPC(iFmt, "v_cmp_lt_i32")
13527     {
13528         setFlag(ALU);
13529     } // Inst_VOPC__V_CMP_LT_I32
13530
13531     Inst_VOPC__V_CMP_LT_I32::~Inst_VOPC__V_CMP_LT_I32()
13532     {
13533     } // ~Inst_VOPC__V_CMP_LT_I32
13534
13535     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
13536     void
13537     Inst_VOPC__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst)
13538     {
13539         Wavefront *wf = gpuDynInst->wavefront();
13540         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
13541         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
13542         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13543
13544         src0.readSrc();
13545         src1.read();
13546
13547         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13548             if (wf->execMask(lane)) {
13549                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
13550             }
13551         }
13552
13553         vcc.write();
13554     }
13555
13556     Inst_VOPC__V_CMP_EQ_I32::Inst_VOPC__V_CMP_EQ_I32(InFmt_VOPC *iFmt)
13557         : Inst_VOPC(iFmt, "v_cmp_eq_i32")
13558     {
13559         setFlag(ALU);
13560     } // Inst_VOPC__V_CMP_EQ_I32
13561
13562     Inst_VOPC__V_CMP_EQ_I32::~Inst_VOPC__V_CMP_EQ_I32()
13563     {
13564     } // ~Inst_VOPC__V_CMP_EQ_I32
13565
13566     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
13567     void
13568     Inst_VOPC__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
13569     {
13570         Wavefront *wf = gpuDynInst->wavefront();
13571         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
13572         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
13573         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13574
13575         src0.readSrc();
13576         src1.read();
13577
13578         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13579             if (wf->execMask(lane)) {
13580                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
13581             }
13582         }
13583
13584         vcc.write();
13585     }
13586
13587     Inst_VOPC__V_CMP_LE_I32::Inst_VOPC__V_CMP_LE_I32(InFmt_VOPC *iFmt)
13588         : Inst_VOPC(iFmt, "v_cmp_le_i32")
13589     {
13590         setFlag(ALU);
13591     } // Inst_VOPC__V_CMP_LE_I32
13592
13593     Inst_VOPC__V_CMP_LE_I32::~Inst_VOPC__V_CMP_LE_I32()
13594     {
13595     } // ~Inst_VOPC__V_CMP_LE_I32
13596
13597     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
13598     void
13599     Inst_VOPC__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst)
13600     {
13601         Wavefront *wf = gpuDynInst->wavefront();
13602         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
13603         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
13604         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13605
13606         src0.readSrc();
13607         src1.read();
13608
13609         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13610             if (wf->execMask(lane)) {
13611                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
13612             }
13613         }
13614
13615         vcc.write();
13616     }
13617
13618     Inst_VOPC__V_CMP_GT_I32::Inst_VOPC__V_CMP_GT_I32(InFmt_VOPC *iFmt)
13619         : Inst_VOPC(iFmt, "v_cmp_gt_i32")
13620     {
13621         setFlag(ALU);
13622     } // Inst_VOPC__V_CMP_GT_I32
13623
13624     Inst_VOPC__V_CMP_GT_I32::~Inst_VOPC__V_CMP_GT_I32()
13625     {
13626     } // ~Inst_VOPC__V_CMP_GT_I32
13627
13628     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
13629     void
13630     Inst_VOPC__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst)
13631     {
13632         Wavefront *wf = gpuDynInst->wavefront();
13633         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
13634         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
13635         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13636
13637         src0.readSrc();
13638         src1.read();
13639
13640         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13641             if (wf->execMask(lane)) {
13642                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
13643             }
13644         }
13645
13646         vcc.write();
13647     }
13648
13649     Inst_VOPC__V_CMP_NE_I32::Inst_VOPC__V_CMP_NE_I32(InFmt_VOPC *iFmt)
13650         : Inst_VOPC(iFmt, "v_cmp_ne_i32")
13651     {
13652         setFlag(ALU);
13653     } // Inst_VOPC__V_CMP_NE_I32
13654
13655     Inst_VOPC__V_CMP_NE_I32::~Inst_VOPC__V_CMP_NE_I32()
13656     {
13657     } // ~Inst_VOPC__V_CMP_NE_I32
13658
13659     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
13660     void
13661     Inst_VOPC__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst)
13662     {
13663         Wavefront *wf = gpuDynInst->wavefront();
13664         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
13665         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
13666         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13667
13668         src0.readSrc();
13669         src1.read();
13670
13671         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13672             if (wf->execMask(lane)) {
13673                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
13674             }
13675         }
13676
13677         vcc.write();
13678     }
13679
13680     Inst_VOPC__V_CMP_GE_I32::Inst_VOPC__V_CMP_GE_I32(InFmt_VOPC *iFmt)
13681         : Inst_VOPC(iFmt, "v_cmp_ge_i32")
13682     {
13683         setFlag(ALU);
13684     } // Inst_VOPC__V_CMP_GE_I32
13685
13686     Inst_VOPC__V_CMP_GE_I32::~Inst_VOPC__V_CMP_GE_I32()
13687     {
13688     } // ~Inst_VOPC__V_CMP_GE_I32
13689
13690     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
13691     void
13692     Inst_VOPC__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst)
13693     {
13694         Wavefront *wf = gpuDynInst->wavefront();
13695         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
13696         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
13697         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13698
13699         src0.readSrc();
13700         src1.read();
13701
13702         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13703             if (wf->execMask(lane)) {
13704                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
13705             }
13706         }
13707
13708         vcc.write();
13709     }
13710
13711     Inst_VOPC__V_CMP_T_I32::Inst_VOPC__V_CMP_T_I32(InFmt_VOPC *iFmt)
13712         : Inst_VOPC(iFmt, "v_cmp_t_i32")
13713     {
13714         setFlag(ALU);
13715     } // Inst_VOPC__V_CMP_T_I32
13716
13717     Inst_VOPC__V_CMP_T_I32::~Inst_VOPC__V_CMP_T_I32()
13718     {
13719     } // ~Inst_VOPC__V_CMP_T_I32
13720
13721     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
13722     void
13723     Inst_VOPC__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst)
13724     {
13725         Wavefront *wf = gpuDynInst->wavefront();
13726         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13727
13728         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13729             if (wf->execMask(lane)) {
13730                 vcc.setBit(lane, 1);
13731             }
13732         }
13733
13734         vcc.write();
13735     }
13736
13737     Inst_VOPC__V_CMP_F_U32::Inst_VOPC__V_CMP_F_U32(InFmt_VOPC *iFmt)
13738         : Inst_VOPC(iFmt, "v_cmp_f_u32")
13739     {
13740         setFlag(ALU);
13741     } // Inst_VOPC__V_CMP_F_U32
13742
13743     Inst_VOPC__V_CMP_F_U32::~Inst_VOPC__V_CMP_F_U32()
13744     {
13745     } // ~Inst_VOPC__V_CMP_F_U32
13746
13747     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
13748     void
13749     Inst_VOPC__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst)
13750     {
13751         Wavefront *wf = gpuDynInst->wavefront();
13752         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13753
13754         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13755             if (wf->execMask(lane)) {
13756                 vcc.setBit(lane, 0);
13757             }
13758         }
13759
13760         vcc.write();
13761     }
13762
13763     Inst_VOPC__V_CMP_LT_U32::Inst_VOPC__V_CMP_LT_U32(InFmt_VOPC *iFmt)
13764         : Inst_VOPC(iFmt, "v_cmp_lt_u32")
13765     {
13766         setFlag(ALU);
13767     } // Inst_VOPC__V_CMP_LT_U32
13768
13769     Inst_VOPC__V_CMP_LT_U32::~Inst_VOPC__V_CMP_LT_U32()
13770     {
13771     } // ~Inst_VOPC__V_CMP_LT_U32
13772
13773     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
13774     void
13775     Inst_VOPC__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst)
13776     {
13777         Wavefront *wf = gpuDynInst->wavefront();
13778         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
13779         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
13780         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13781
13782         src0.readSrc();
13783         src1.read();
13784
13785         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13786             if (wf->execMask(lane)) {
13787                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
13788             }
13789         }
13790
13791         vcc.write();
13792     }
13793
13794     Inst_VOPC__V_CMP_EQ_U32::Inst_VOPC__V_CMP_EQ_U32(InFmt_VOPC *iFmt)
13795         : Inst_VOPC(iFmt, "v_cmp_eq_u32")
13796     {
13797         setFlag(ALU);
13798     } // Inst_VOPC__V_CMP_EQ_U32
13799
13800     Inst_VOPC__V_CMP_EQ_U32::~Inst_VOPC__V_CMP_EQ_U32()
13801     {
13802     } // ~Inst_VOPC__V_CMP_EQ_U32
13803
13804     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
13805     void
13806     Inst_VOPC__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
13807     {
13808         Wavefront *wf = gpuDynInst->wavefront();
13809         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
13810         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
13811         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13812
13813         src0.readSrc();
13814         src1.read();
13815
13816         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13817             if (wf->execMask(lane)) {
13818                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
13819             }
13820         }
13821
13822         vcc.write();
13823     }
13824
13825     Inst_VOPC__V_CMP_LE_U32::Inst_VOPC__V_CMP_LE_U32(InFmt_VOPC *iFmt)
13826         : Inst_VOPC(iFmt, "v_cmp_le_u32")
13827     {
13828         setFlag(ALU);
13829     } // Inst_VOPC__V_CMP_LE_U32
13830
13831     Inst_VOPC__V_CMP_LE_U32::~Inst_VOPC__V_CMP_LE_U32()
13832     {
13833     } // ~Inst_VOPC__V_CMP_LE_U32
13834
13835     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
13836     void
13837     Inst_VOPC__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst)
13838     {
13839         Wavefront *wf = gpuDynInst->wavefront();
13840         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
13841         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
13842         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13843
13844         src0.readSrc();
13845         src1.read();
13846
13847         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13848             if (wf->execMask(lane)) {
13849                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
13850             }
13851         }
13852
13853         vcc.write();
13854     }
13855
13856     Inst_VOPC__V_CMP_GT_U32::Inst_VOPC__V_CMP_GT_U32(InFmt_VOPC *iFmt)
13857         : Inst_VOPC(iFmt, "v_cmp_gt_u32")
13858     {
13859         setFlag(ALU);
13860     } // Inst_VOPC__V_CMP_GT_U32
13861
13862     Inst_VOPC__V_CMP_GT_U32::~Inst_VOPC__V_CMP_GT_U32()
13863     {
13864     } // ~Inst_VOPC__V_CMP_GT_U32
13865
13866     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
13867     void
13868     Inst_VOPC__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst)
13869     {
13870         Wavefront *wf = gpuDynInst->wavefront();
13871         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
13872         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
13873         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13874
13875         src0.readSrc();
13876         src1.read();
13877
13878         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13879             if (wf->execMask(lane)) {
13880                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
13881             }
13882         }
13883
13884         vcc.write();
13885     }
13886
13887     Inst_VOPC__V_CMP_NE_U32::Inst_VOPC__V_CMP_NE_U32(InFmt_VOPC *iFmt)
13888         : Inst_VOPC(iFmt, "v_cmp_ne_u32")
13889     {
13890         setFlag(ALU);
13891     } // Inst_VOPC__V_CMP_NE_U32
13892
13893     Inst_VOPC__V_CMP_NE_U32::~Inst_VOPC__V_CMP_NE_U32()
13894     {
13895     } // ~Inst_VOPC__V_CMP_NE_U32
13896
13897     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
13898     void
13899     Inst_VOPC__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst)
13900     {
13901         Wavefront *wf = gpuDynInst->wavefront();
13902         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
13903         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
13904         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13905
13906         src0.readSrc();
13907         src1.read();
13908
13909         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13910             if (wf->execMask(lane)) {
13911                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
13912             }
13913         }
13914
13915         vcc.write();
13916     }
13917
13918     Inst_VOPC__V_CMP_GE_U32::Inst_VOPC__V_CMP_GE_U32(InFmt_VOPC *iFmt)
13919         : Inst_VOPC(iFmt, "v_cmp_ge_u32")
13920     {
13921         setFlag(ALU);
13922     } // Inst_VOPC__V_CMP_GE_U32
13923
13924     Inst_VOPC__V_CMP_GE_U32::~Inst_VOPC__V_CMP_GE_U32()
13925     {
13926     } // ~Inst_VOPC__V_CMP_GE_U32
13927
13928     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
13929     void
13930     Inst_VOPC__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst)
13931     {
13932         Wavefront *wf = gpuDynInst->wavefront();
13933         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
13934         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
13935         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13936
13937         src0.readSrc();
13938         src1.read();
13939
13940         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13941             if (wf->execMask(lane)) {
13942                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
13943             }
13944         }
13945
13946         vcc.write();
13947     }
13948
13949     Inst_VOPC__V_CMP_T_U32::Inst_VOPC__V_CMP_T_U32(InFmt_VOPC *iFmt)
13950         : Inst_VOPC(iFmt, "v_cmp_t_u32")
13951     {
13952         setFlag(ALU);
13953     } // Inst_VOPC__V_CMP_T_U32
13954
13955     Inst_VOPC__V_CMP_T_U32::~Inst_VOPC__V_CMP_T_U32()
13956     {
13957     } // ~Inst_VOPC__V_CMP_T_U32
13958
13959     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
13960     void
13961     Inst_VOPC__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst)
13962     {
13963         Wavefront *wf = gpuDynInst->wavefront();
13964         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13965
13966         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13967             if (wf->execMask(lane)) {
13968                 vcc.setBit(lane, 1);
13969             }
13970         }
13971
13972         vcc.write();
13973     }
13974
13975     Inst_VOPC__V_CMPX_F_I32::Inst_VOPC__V_CMPX_F_I32(InFmt_VOPC *iFmt)
13976         : Inst_VOPC(iFmt, "v_cmpx_f_i32")
13977     {
13978         setFlag(ALU);
13979     } // Inst_VOPC__V_CMPX_F_I32
13980
13981     Inst_VOPC__V_CMPX_F_I32::~Inst_VOPC__V_CMPX_F_I32()
13982     {
13983     } // ~Inst_VOPC__V_CMPX_F_I32
13984
13985     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
13986     void
13987     Inst_VOPC__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst)
13988     {
13989         Wavefront *wf = gpuDynInst->wavefront();
13990         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
13991
13992         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
13993             if (wf->execMask(lane)) {
13994                 vcc.setBit(lane, 0);
13995             }
13996         }
13997
13998         wf->execMask() = vcc.rawData();
13999         vcc.write();
14000     }
14001
14002     Inst_VOPC__V_CMPX_LT_I32::Inst_VOPC__V_CMPX_LT_I32(InFmt_VOPC *iFmt)
14003         : Inst_VOPC(iFmt, "v_cmpx_lt_i32")
14004     {
14005         setFlag(ALU);
14006     } // Inst_VOPC__V_CMPX_LT_I32
14007
14008     Inst_VOPC__V_CMPX_LT_I32::~Inst_VOPC__V_CMPX_LT_I32()
14009     {
14010     } // ~Inst_VOPC__V_CMPX_LT_I32
14011
14012     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14013     void
14014     Inst_VOPC__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst)
14015     {
14016         Wavefront *wf = gpuDynInst->wavefront();
14017         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
14018         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
14019         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14020
14021         src0.readSrc();
14022         src1.read();
14023
14024         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14025             if (wf->execMask(lane)) {
14026                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
14027             }
14028         }
14029
14030         wf->execMask() = vcc.rawData();
14031         vcc.write();
14032     }
14033
14034     Inst_VOPC__V_CMPX_EQ_I32::Inst_VOPC__V_CMPX_EQ_I32(InFmt_VOPC *iFmt)
14035         : Inst_VOPC(iFmt, "v_cmpx_eq_i32")
14036     {
14037         setFlag(ALU);
14038     } // Inst_VOPC__V_CMPX_EQ_I32
14039
14040     Inst_VOPC__V_CMPX_EQ_I32::~Inst_VOPC__V_CMPX_EQ_I32()
14041     {
14042     } // ~Inst_VOPC__V_CMPX_EQ_I32
14043
14044     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
14045     void
14046     Inst_VOPC__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
14047     {
14048         Wavefront *wf = gpuDynInst->wavefront();
14049         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
14050         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
14051         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14052
14053         src0.readSrc();
14054         src1.read();
14055
14056         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14057             if (wf->execMask(lane)) {
14058                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
14059             }
14060         }
14061
14062         wf->execMask() = vcc.rawData();
14063         vcc.write();
14064     }
14065
14066     Inst_VOPC__V_CMPX_LE_I32::Inst_VOPC__V_CMPX_LE_I32(InFmt_VOPC *iFmt)
14067         : Inst_VOPC(iFmt, "v_cmpx_le_i32")
14068     {
14069         setFlag(ALU);
14070     } // Inst_VOPC__V_CMPX_LE_I32
14071
14072     Inst_VOPC__V_CMPX_LE_I32::~Inst_VOPC__V_CMPX_LE_I32()
14073     {
14074     } // ~Inst_VOPC__V_CMPX_LE_I32
14075
14076     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
14077     void
14078     Inst_VOPC__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst)
14079     {
14080         Wavefront *wf = gpuDynInst->wavefront();
14081         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
14082         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
14083         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14084
14085         src0.readSrc();
14086         src1.read();
14087
14088         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14089             if (wf->execMask(lane)) {
14090                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
14091             }
14092         }
14093
14094         wf->execMask() = vcc.rawData();
14095         vcc.write();
14096     }
14097
14098     Inst_VOPC__V_CMPX_GT_I32::Inst_VOPC__V_CMPX_GT_I32(InFmt_VOPC *iFmt)
14099         : Inst_VOPC(iFmt, "v_cmpx_gt_i32")
14100     {
14101         setFlag(ALU);
14102     } // Inst_VOPC__V_CMPX_GT_I32
14103
14104     Inst_VOPC__V_CMPX_GT_I32::~Inst_VOPC__V_CMPX_GT_I32()
14105     {
14106     } // ~Inst_VOPC__V_CMPX_GT_I32
14107
14108     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
14109     void
14110     Inst_VOPC__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst)
14111     {
14112         Wavefront *wf = gpuDynInst->wavefront();
14113         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
14114         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
14115         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14116
14117         src0.readSrc();
14118         src1.read();
14119
14120         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14121             if (wf->execMask(lane)) {
14122                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
14123             }
14124         }
14125
14126         wf->execMask() = vcc.rawData();
14127         vcc.write();
14128     }
14129
14130     Inst_VOPC__V_CMPX_NE_I32::Inst_VOPC__V_CMPX_NE_I32(InFmt_VOPC *iFmt)
14131         : Inst_VOPC(iFmt, "v_cmpx_ne_i32")
14132     {
14133         setFlag(ALU);
14134     } // Inst_VOPC__V_CMPX_NE_I32
14135
14136     Inst_VOPC__V_CMPX_NE_I32::~Inst_VOPC__V_CMPX_NE_I32()
14137     {
14138     } // ~Inst_VOPC__V_CMPX_NE_I32
14139
14140     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
14141     void
14142     Inst_VOPC__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst)
14143     {
14144         Wavefront *wf = gpuDynInst->wavefront();
14145         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
14146         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
14147         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14148
14149         src0.readSrc();
14150         src1.read();
14151
14152         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14153             if (wf->execMask(lane)) {
14154                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
14155             }
14156         }
14157
14158         wf->execMask() = vcc.rawData();
14159         vcc.write();
14160     }
14161
14162     Inst_VOPC__V_CMPX_GE_I32::Inst_VOPC__V_CMPX_GE_I32(InFmt_VOPC *iFmt)
14163         : Inst_VOPC(iFmt, "v_cmpx_ge_i32")
14164     {
14165         setFlag(ALU);
14166     } // Inst_VOPC__V_CMPX_GE_I32
14167
14168     Inst_VOPC__V_CMPX_GE_I32::~Inst_VOPC__V_CMPX_GE_I32()
14169     {
14170     } // ~Inst_VOPC__V_CMPX_GE_I32
14171
14172     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
14173     void
14174     Inst_VOPC__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst)
14175     {
14176         Wavefront *wf = gpuDynInst->wavefront();
14177         ConstVecOperandI32 src0(gpuDynInst, instData.SRC0);
14178         ConstVecOperandI32 src1(gpuDynInst, instData.VSRC1);
14179         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14180
14181         src0.readSrc();
14182         src1.read();
14183
14184         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14185             if (wf->execMask(lane)) {
14186                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
14187             }
14188         }
14189
14190         wf->execMask() = vcc.rawData();
14191         vcc.write();
14192     }
14193
14194     Inst_VOPC__V_CMPX_T_I32::Inst_VOPC__V_CMPX_T_I32(InFmt_VOPC *iFmt)
14195         : Inst_VOPC(iFmt, "v_cmpx_t_i32")
14196     {
14197         setFlag(ALU);
14198     } // Inst_VOPC__V_CMPX_T_I32
14199
14200     Inst_VOPC__V_CMPX_T_I32::~Inst_VOPC__V_CMPX_T_I32()
14201     {
14202     } // ~Inst_VOPC__V_CMPX_T_I32
14203
14204     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
14205     void
14206     Inst_VOPC__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst)
14207     {
14208         Wavefront *wf = gpuDynInst->wavefront();
14209         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14210
14211         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14212             if (wf->execMask(lane)) {
14213                 vcc.setBit(lane, 1);
14214             }
14215         }
14216
14217         wf->execMask() = vcc.rawData();
14218         vcc.write();
14219     }
14220
14221     Inst_VOPC__V_CMPX_F_U32::Inst_VOPC__V_CMPX_F_U32(InFmt_VOPC *iFmt)
14222         : Inst_VOPC(iFmt, "v_cmpx_f_u32")
14223     {
14224         setFlag(ALU);
14225     } // Inst_VOPC__V_CMPX_F_U32
14226
14227     Inst_VOPC__V_CMPX_F_U32::~Inst_VOPC__V_CMPX_F_U32()
14228     {
14229     } // ~Inst_VOPC__V_CMPX_F_U32
14230
14231     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
14232     void
14233     Inst_VOPC__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst)
14234     {
14235         Wavefront *wf = gpuDynInst->wavefront();
14236         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14237
14238         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14239             if (wf->execMask(lane)) {
14240                 vcc.setBit(lane, 0);
14241             }
14242         }
14243
14244         wf->execMask() = vcc.rawData();
14245         vcc.write();
14246     }
14247
14248     Inst_VOPC__V_CMPX_LT_U32::Inst_VOPC__V_CMPX_LT_U32(InFmt_VOPC *iFmt)
14249         : Inst_VOPC(iFmt, "v_cmpx_lt_u32")
14250     {
14251         setFlag(ALU);
14252     } // Inst_VOPC__V_CMPX_LT_U32
14253
14254     Inst_VOPC__V_CMPX_LT_U32::~Inst_VOPC__V_CMPX_LT_U32()
14255     {
14256     } // ~Inst_VOPC__V_CMPX_LT_U32
14257
14258     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14259     void
14260     Inst_VOPC__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst)
14261     {
14262         Wavefront *wf = gpuDynInst->wavefront();
14263         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
14264         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
14265         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14266
14267         src0.readSrc();
14268         src1.read();
14269
14270         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14271             if (wf->execMask(lane)) {
14272                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
14273             }
14274         }
14275
14276         wf->execMask() = vcc.rawData();
14277         vcc.write();
14278     }
14279
14280     Inst_VOPC__V_CMPX_EQ_U32::Inst_VOPC__V_CMPX_EQ_U32(InFmt_VOPC *iFmt)
14281         : Inst_VOPC(iFmt, "v_cmpx_eq_u32")
14282     {
14283         setFlag(ALU);
14284     } // Inst_VOPC__V_CMPX_EQ_U32
14285
14286     Inst_VOPC__V_CMPX_EQ_U32::~Inst_VOPC__V_CMPX_EQ_U32()
14287     {
14288     } // ~Inst_VOPC__V_CMPX_EQ_U32
14289
14290     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
14291     void
14292     Inst_VOPC__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
14293     {
14294         Wavefront *wf = gpuDynInst->wavefront();
14295         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
14296         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
14297         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14298
14299         src0.readSrc();
14300         src1.read();
14301
14302         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14303             if (wf->execMask(lane)) {
14304                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
14305             }
14306         }
14307
14308         wf->execMask() = vcc.rawData();
14309         vcc.write();
14310     }
14311
14312     Inst_VOPC__V_CMPX_LE_U32::Inst_VOPC__V_CMPX_LE_U32(InFmt_VOPC *iFmt)
14313         : Inst_VOPC(iFmt, "v_cmpx_le_u32")
14314     {
14315         setFlag(ALU);
14316     } // Inst_VOPC__V_CMPX_LE_U32
14317
14318     Inst_VOPC__V_CMPX_LE_U32::~Inst_VOPC__V_CMPX_LE_U32()
14319     {
14320     } // ~Inst_VOPC__V_CMPX_LE_U32
14321
14322     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
14323     void
14324     Inst_VOPC__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst)
14325     {
14326         Wavefront *wf = gpuDynInst->wavefront();
14327         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
14328         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
14329         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14330
14331         src0.readSrc();
14332         src1.read();
14333
14334         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14335             if (wf->execMask(lane)) {
14336                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
14337             }
14338         }
14339
14340         wf->execMask() = vcc.rawData();
14341         vcc.write();
14342     }
14343
14344     Inst_VOPC__V_CMPX_GT_U32::Inst_VOPC__V_CMPX_GT_U32(InFmt_VOPC *iFmt)
14345         : Inst_VOPC(iFmt, "v_cmpx_gt_u32")
14346     {
14347         setFlag(ALU);
14348     } // Inst_VOPC__V_CMPX_GT_U32
14349
14350     Inst_VOPC__V_CMPX_GT_U32::~Inst_VOPC__V_CMPX_GT_U32()
14351     {
14352     } // ~Inst_VOPC__V_CMPX_GT_U32
14353
14354     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
14355     void
14356     Inst_VOPC__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst)
14357     {
14358         Wavefront *wf = gpuDynInst->wavefront();
14359         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
14360         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
14361         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14362
14363         src0.readSrc();
14364         src1.read();
14365
14366         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14367             if (wf->execMask(lane)) {
14368                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
14369             }
14370         }
14371
14372         wf->execMask() = vcc.rawData();
14373         vcc.write();
14374     }
14375
14376     Inst_VOPC__V_CMPX_NE_U32::Inst_VOPC__V_CMPX_NE_U32(InFmt_VOPC *iFmt)
14377         : Inst_VOPC(iFmt, "v_cmpx_ne_u32")
14378     {
14379         setFlag(ALU);
14380     } // Inst_VOPC__V_CMPX_NE_U32
14381
14382     Inst_VOPC__V_CMPX_NE_U32::~Inst_VOPC__V_CMPX_NE_U32()
14383     {
14384     } // ~Inst_VOPC__V_CMPX_NE_U32
14385
14386     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
14387     void
14388     Inst_VOPC__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst)
14389     {
14390         Wavefront *wf = gpuDynInst->wavefront();
14391         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
14392         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
14393         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14394
14395         src0.readSrc();
14396         src1.read();
14397
14398         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14399             if (wf->execMask(lane)) {
14400                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
14401             }
14402         }
14403
14404         wf->execMask() = vcc.rawData();
14405         vcc.write();
14406     }
14407
14408     Inst_VOPC__V_CMPX_GE_U32::Inst_VOPC__V_CMPX_GE_U32(InFmt_VOPC *iFmt)
14409         : Inst_VOPC(iFmt, "v_cmpx_ge_u32")
14410     {
14411         setFlag(ALU);
14412     } // Inst_VOPC__V_CMPX_GE_U32
14413
14414     Inst_VOPC__V_CMPX_GE_U32::~Inst_VOPC__V_CMPX_GE_U32()
14415     {
14416     } // ~Inst_VOPC__V_CMPX_GE_U32
14417
14418     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
14419     void
14420     Inst_VOPC__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst)
14421     {
14422         Wavefront *wf = gpuDynInst->wavefront();
14423         ConstVecOperandU32 src0(gpuDynInst, instData.SRC0);
14424         ConstVecOperandU32 src1(gpuDynInst, instData.VSRC1);
14425         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14426
14427         src0.readSrc();
14428         src1.read();
14429
14430         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14431             if (wf->execMask(lane)) {
14432                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
14433             }
14434         }
14435
14436         wf->execMask() = vcc.rawData();
14437         vcc.write();
14438     }
14439
14440     Inst_VOPC__V_CMPX_T_U32::Inst_VOPC__V_CMPX_T_U32(InFmt_VOPC *iFmt)
14441         : Inst_VOPC(iFmt, "v_cmpx_t_u32")
14442     {
14443         setFlag(ALU);
14444     } // Inst_VOPC__V_CMPX_T_U32
14445
14446     Inst_VOPC__V_CMPX_T_U32::~Inst_VOPC__V_CMPX_T_U32()
14447     {
14448     } // ~Inst_VOPC__V_CMPX_T_U32
14449
14450     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
14451     void
14452     Inst_VOPC__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst)
14453     {
14454         Wavefront *wf = gpuDynInst->wavefront();
14455         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14456
14457         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14458             if (wf->execMask(lane)) {
14459                 vcc.setBit(lane, 1);
14460             }
14461         }
14462
14463         wf->execMask() = vcc.rawData();
14464         vcc.write();
14465     }
14466
14467     Inst_VOPC__V_CMP_F_I64::Inst_VOPC__V_CMP_F_I64(InFmt_VOPC *iFmt)
14468         : Inst_VOPC(iFmt, "v_cmp_f_i64")
14469     {
14470         setFlag(ALU);
14471     } // Inst_VOPC__V_CMP_F_I64
14472
14473     Inst_VOPC__V_CMP_F_I64::~Inst_VOPC__V_CMP_F_I64()
14474     {
14475     } // ~Inst_VOPC__V_CMP_F_I64
14476
14477     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
14478     void
14479     Inst_VOPC__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst)
14480     {
14481         Wavefront *wf = gpuDynInst->wavefront();
14482         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14483
14484         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14485             if (wf->execMask(lane)) {
14486                 vcc.setBit(lane, 0);
14487             }
14488         }
14489
14490         vcc.write();
14491     }
14492
14493     Inst_VOPC__V_CMP_LT_I64::Inst_VOPC__V_CMP_LT_I64(InFmt_VOPC *iFmt)
14494         : Inst_VOPC(iFmt, "v_cmp_lt_i64")
14495     {
14496         setFlag(ALU);
14497     } // Inst_VOPC__V_CMP_LT_I64
14498
14499     Inst_VOPC__V_CMP_LT_I64::~Inst_VOPC__V_CMP_LT_I64()
14500     {
14501     } // ~Inst_VOPC__V_CMP_LT_I64
14502
14503     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14504     void
14505     Inst_VOPC__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst)
14506     {
14507         Wavefront *wf = gpuDynInst->wavefront();
14508         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
14509         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
14510         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14511
14512         src0.readSrc();
14513         src1.read();
14514
14515         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14516             if (wf->execMask(lane)) {
14517                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
14518             }
14519         }
14520
14521         vcc.write();
14522     }
14523
14524     Inst_VOPC__V_CMP_EQ_I64::Inst_VOPC__V_CMP_EQ_I64(InFmt_VOPC *iFmt)
14525         : Inst_VOPC(iFmt, "v_cmp_eq_i64")
14526     {
14527         setFlag(ALU);
14528     } // Inst_VOPC__V_CMP_EQ_I64
14529
14530     Inst_VOPC__V_CMP_EQ_I64::~Inst_VOPC__V_CMP_EQ_I64()
14531     {
14532     } // ~Inst_VOPC__V_CMP_EQ_I64
14533
14534     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
14535     void
14536     Inst_VOPC__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst)
14537     {
14538         Wavefront *wf = gpuDynInst->wavefront();
14539         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
14540         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
14541         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14542
14543         src0.readSrc();
14544         src1.read();
14545
14546         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14547             if (wf->execMask(lane)) {
14548                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
14549             }
14550         }
14551
14552         vcc.write();
14553     }
14554
14555     Inst_VOPC__V_CMP_LE_I64::Inst_VOPC__V_CMP_LE_I64(InFmt_VOPC *iFmt)
14556         : Inst_VOPC(iFmt, "v_cmp_le_i64")
14557     {
14558         setFlag(ALU);
14559     } // Inst_VOPC__V_CMP_LE_I64
14560
14561     Inst_VOPC__V_CMP_LE_I64::~Inst_VOPC__V_CMP_LE_I64()
14562     {
14563     } // ~Inst_VOPC__V_CMP_LE_I64
14564
14565     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
14566     void
14567     Inst_VOPC__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst)
14568     {
14569         Wavefront *wf = gpuDynInst->wavefront();
14570         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
14571         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
14572         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14573
14574         src0.readSrc();
14575         src1.read();
14576
14577         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14578             if (wf->execMask(lane)) {
14579                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
14580             }
14581         }
14582
14583         vcc.write();
14584     }
14585
14586     Inst_VOPC__V_CMP_GT_I64::Inst_VOPC__V_CMP_GT_I64(InFmt_VOPC *iFmt)
14587         : Inst_VOPC(iFmt, "v_cmp_gt_i64")
14588     {
14589         setFlag(ALU);
14590     } // Inst_VOPC__V_CMP_GT_I64
14591
14592     Inst_VOPC__V_CMP_GT_I64::~Inst_VOPC__V_CMP_GT_I64()
14593     {
14594     } // ~Inst_VOPC__V_CMP_GT_I64
14595
14596     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
14597     void
14598     Inst_VOPC__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst)
14599     {
14600         Wavefront *wf = gpuDynInst->wavefront();
14601         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
14602         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
14603         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14604
14605         src0.readSrc();
14606         src1.read();
14607
14608         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14609             if (wf->execMask(lane)) {
14610                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
14611             }
14612         }
14613
14614         vcc.write();
14615     }
14616
14617     Inst_VOPC__V_CMP_NE_I64::Inst_VOPC__V_CMP_NE_I64(InFmt_VOPC *iFmt)
14618         : Inst_VOPC(iFmt, "v_cmp_ne_i64")
14619     {
14620         setFlag(ALU);
14621     } // Inst_VOPC__V_CMP_NE_I64
14622
14623     Inst_VOPC__V_CMP_NE_I64::~Inst_VOPC__V_CMP_NE_I64()
14624     {
14625     } // ~Inst_VOPC__V_CMP_NE_I64
14626
14627     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
14628     void
14629     Inst_VOPC__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst)
14630     {
14631         Wavefront *wf = gpuDynInst->wavefront();
14632         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
14633         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
14634         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14635
14636         src0.readSrc();
14637         src1.read();
14638
14639         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14640             if (wf->execMask(lane)) {
14641                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
14642             }
14643         }
14644
14645         vcc.write();
14646     }
14647
14648     Inst_VOPC__V_CMP_GE_I64::Inst_VOPC__V_CMP_GE_I64(InFmt_VOPC *iFmt)
14649         : Inst_VOPC(iFmt, "v_cmp_ge_i64")
14650     {
14651         setFlag(ALU);
14652     } // Inst_VOPC__V_CMP_GE_I64
14653
14654     Inst_VOPC__V_CMP_GE_I64::~Inst_VOPC__V_CMP_GE_I64()
14655     {
14656     } // ~Inst_VOPC__V_CMP_GE_I64
14657
14658     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
14659     void
14660     Inst_VOPC__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst)
14661     {
14662         Wavefront *wf = gpuDynInst->wavefront();
14663         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
14664         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
14665         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14666
14667         src0.readSrc();
14668         src1.read();
14669
14670         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14671             if (wf->execMask(lane)) {
14672                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
14673             }
14674         }
14675
14676         vcc.write();
14677     }
14678
14679     Inst_VOPC__V_CMP_T_I64::Inst_VOPC__V_CMP_T_I64(InFmt_VOPC *iFmt)
14680         : Inst_VOPC(iFmt, "v_cmp_t_i64")
14681     {
14682         setFlag(ALU);
14683     } // Inst_VOPC__V_CMP_T_I64
14684
14685     Inst_VOPC__V_CMP_T_I64::~Inst_VOPC__V_CMP_T_I64()
14686     {
14687     } // ~Inst_VOPC__V_CMP_T_I64
14688
14689     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
14690     void
14691     Inst_VOPC__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst)
14692     {
14693         Wavefront *wf = gpuDynInst->wavefront();
14694         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14695
14696         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14697             if (wf->execMask(lane)) {
14698                 vcc.setBit(lane, 1);
14699             }
14700         }
14701
14702         vcc.write();
14703     }
14704
14705     Inst_VOPC__V_CMP_F_U64::Inst_VOPC__V_CMP_F_U64(InFmt_VOPC *iFmt)
14706         : Inst_VOPC(iFmt, "v_cmp_f_u64")
14707     {
14708         setFlag(ALU);
14709     } // Inst_VOPC__V_CMP_F_U64
14710
14711     Inst_VOPC__V_CMP_F_U64::~Inst_VOPC__V_CMP_F_U64()
14712     {
14713     } // ~Inst_VOPC__V_CMP_F_U64
14714
14715     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
14716     void
14717     Inst_VOPC__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst)
14718     {
14719         Wavefront *wf = gpuDynInst->wavefront();
14720         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14721
14722         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14723             if (wf->execMask(lane)) {
14724                 vcc.setBit(lane, 0);
14725             }
14726         }
14727
14728         vcc.write();
14729     }
14730
14731     Inst_VOPC__V_CMP_LT_U64::Inst_VOPC__V_CMP_LT_U64(InFmt_VOPC *iFmt)
14732         : Inst_VOPC(iFmt, "v_cmp_lt_u64")
14733     {
14734         setFlag(ALU);
14735     } // Inst_VOPC__V_CMP_LT_U64
14736
14737     Inst_VOPC__V_CMP_LT_U64::~Inst_VOPC__V_CMP_LT_U64()
14738     {
14739     } // ~Inst_VOPC__V_CMP_LT_U64
14740
14741     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14742     void
14743     Inst_VOPC__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst)
14744     {
14745         Wavefront *wf = gpuDynInst->wavefront();
14746         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
14747         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
14748         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14749
14750         src0.readSrc();
14751         src1.read();
14752
14753         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14754             if (wf->execMask(lane)) {
14755                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
14756             }
14757         }
14758
14759         vcc.write();
14760     }
14761
14762     Inst_VOPC__V_CMP_EQ_U64::Inst_VOPC__V_CMP_EQ_U64(InFmt_VOPC *iFmt)
14763         : Inst_VOPC(iFmt, "v_cmp_eq_u64")
14764     {
14765         setFlag(ALU);
14766     } // Inst_VOPC__V_CMP_EQ_U64
14767
14768     Inst_VOPC__V_CMP_EQ_U64::~Inst_VOPC__V_CMP_EQ_U64()
14769     {
14770     } // ~Inst_VOPC__V_CMP_EQ_U64
14771
14772     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
14773     void
14774     Inst_VOPC__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst)
14775     {
14776         Wavefront *wf = gpuDynInst->wavefront();
14777         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
14778         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
14779         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14780
14781         src0.readSrc();
14782         src1.read();
14783
14784         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14785             if (wf->execMask(lane)) {
14786                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
14787             }
14788         }
14789
14790         vcc.write();
14791     }
14792
14793     Inst_VOPC__V_CMP_LE_U64::Inst_VOPC__V_CMP_LE_U64(InFmt_VOPC *iFmt)
14794         : Inst_VOPC(iFmt, "v_cmp_le_u64")
14795     {
14796         setFlag(ALU);
14797     } // Inst_VOPC__V_CMP_LE_U64
14798
14799     Inst_VOPC__V_CMP_LE_U64::~Inst_VOPC__V_CMP_LE_U64()
14800     {
14801     } // ~Inst_VOPC__V_CMP_LE_U64
14802
14803     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
14804     void
14805     Inst_VOPC__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst)
14806     {
14807         Wavefront *wf = gpuDynInst->wavefront();
14808         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
14809         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
14810         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14811
14812         src0.readSrc();
14813         src1.read();
14814
14815         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14816             if (wf->execMask(lane)) {
14817                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
14818             }
14819         }
14820
14821         vcc.write();
14822     }
14823
14824     Inst_VOPC__V_CMP_GT_U64::Inst_VOPC__V_CMP_GT_U64(InFmt_VOPC *iFmt)
14825         : Inst_VOPC(iFmt, "v_cmp_gt_u64")
14826     {
14827         setFlag(ALU);
14828     } // Inst_VOPC__V_CMP_GT_U64
14829
14830     Inst_VOPC__V_CMP_GT_U64::~Inst_VOPC__V_CMP_GT_U64()
14831     {
14832     } // ~Inst_VOPC__V_CMP_GT_U64
14833
14834     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
14835     void
14836     Inst_VOPC__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst)
14837     {
14838         Wavefront *wf = gpuDynInst->wavefront();
14839         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
14840         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
14841         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14842
14843         src0.readSrc();
14844         src1.read();
14845
14846         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14847             if (wf->execMask(lane)) {
14848                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
14849             }
14850         }
14851
14852         vcc.write();
14853     }
14854
14855     Inst_VOPC__V_CMP_NE_U64::Inst_VOPC__V_CMP_NE_U64(InFmt_VOPC *iFmt)
14856         : Inst_VOPC(iFmt, "v_cmp_ne_u64")
14857     {
14858         setFlag(ALU);
14859     } // Inst_VOPC__V_CMP_NE_U64
14860
14861     Inst_VOPC__V_CMP_NE_U64::~Inst_VOPC__V_CMP_NE_U64()
14862     {
14863     } // ~Inst_VOPC__V_CMP_NE_U64
14864
14865     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
14866     void
14867     Inst_VOPC__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst)
14868     {
14869         Wavefront *wf = gpuDynInst->wavefront();
14870         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
14871         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
14872         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14873
14874         src0.readSrc();
14875         src1.read();
14876
14877         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14878             if (wf->execMask(lane)) {
14879                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
14880             }
14881         }
14882
14883         vcc.write();
14884     }
14885
14886     Inst_VOPC__V_CMP_GE_U64::Inst_VOPC__V_CMP_GE_U64(InFmt_VOPC *iFmt)
14887         : Inst_VOPC(iFmt, "v_cmp_ge_u64")
14888     {
14889         setFlag(ALU);
14890     } // Inst_VOPC__V_CMP_GE_U64
14891
14892     Inst_VOPC__V_CMP_GE_U64::~Inst_VOPC__V_CMP_GE_U64()
14893     {
14894     } // ~Inst_VOPC__V_CMP_GE_U64
14895
14896     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
14897     void
14898     Inst_VOPC__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst)
14899     {
14900         Wavefront *wf = gpuDynInst->wavefront();
14901         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
14902         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
14903         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14904
14905         src0.readSrc();
14906         src1.read();
14907
14908         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14909             if (wf->execMask(lane)) {
14910                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
14911             }
14912         }
14913
14914         vcc.write();
14915     }
14916
14917     Inst_VOPC__V_CMP_T_U64::Inst_VOPC__V_CMP_T_U64(InFmt_VOPC *iFmt)
14918         : Inst_VOPC(iFmt, "v_cmp_t_u64")
14919     {
14920         setFlag(ALU);
14921     } // Inst_VOPC__V_CMP_T_U64
14922
14923     Inst_VOPC__V_CMP_T_U64::~Inst_VOPC__V_CMP_T_U64()
14924     {
14925     } // ~Inst_VOPC__V_CMP_T_U64
14926
14927     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
14928     void
14929     Inst_VOPC__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst)
14930     {
14931         Wavefront *wf = gpuDynInst->wavefront();
14932         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14933
14934         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14935             if (wf->execMask(lane)) {
14936                 vcc.setBit(lane, 1);
14937             }
14938         }
14939
14940         vcc.write();
14941     }
14942
14943     Inst_VOPC__V_CMPX_F_I64::Inst_VOPC__V_CMPX_F_I64(InFmt_VOPC *iFmt)
14944         : Inst_VOPC(iFmt, "v_cmpx_f_i64")
14945     {
14946         setFlag(ALU);
14947     } // Inst_VOPC__V_CMPX_F_I64
14948
14949     Inst_VOPC__V_CMPX_F_I64::~Inst_VOPC__V_CMPX_F_I64()
14950     {
14951     } // ~Inst_VOPC__V_CMPX_F_I64
14952
14953     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
14954     void
14955     Inst_VOPC__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst)
14956     {
14957         Wavefront *wf = gpuDynInst->wavefront();
14958         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14959
14960         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14961             if (wf->execMask(lane)) {
14962                 vcc.setBit(lane, 0);
14963             }
14964         }
14965
14966         wf->execMask() = vcc.rawData();
14967         vcc.write();
14968     }
14969
14970     Inst_VOPC__V_CMPX_LT_I64::Inst_VOPC__V_CMPX_LT_I64(InFmt_VOPC *iFmt)
14971         : Inst_VOPC(iFmt, "v_cmpx_lt_i64")
14972     {
14973         setFlag(ALU);
14974     } // Inst_VOPC__V_CMPX_LT_I64
14975
14976     Inst_VOPC__V_CMPX_LT_I64::~Inst_VOPC__V_CMPX_LT_I64()
14977     {
14978     } // ~Inst_VOPC__V_CMPX_LT_I64
14979
14980     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
14981     void
14982     Inst_VOPC__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst)
14983     {
14984         Wavefront *wf = gpuDynInst->wavefront();
14985         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
14986         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
14987         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
14988
14989         src0.readSrc();
14990         src1.read();
14991
14992         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
14993             if (wf->execMask(lane)) {
14994                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
14995             }
14996         }
14997
14998         wf->execMask() = vcc.rawData();
14999         vcc.write();
15000     }
15001
15002     Inst_VOPC__V_CMPX_EQ_I64::Inst_VOPC__V_CMPX_EQ_I64(InFmt_VOPC *iFmt)
15003         : Inst_VOPC(iFmt, "v_cmpx_eq_i64")
15004     {
15005         setFlag(ALU);
15006     } // Inst_VOPC__V_CMPX_EQ_I64
15007
15008     Inst_VOPC__V_CMPX_EQ_I64::~Inst_VOPC__V_CMPX_EQ_I64()
15009     {
15010     } // ~Inst_VOPC__V_CMPX_EQ_I64
15011
15012     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
15013     void
15014     Inst_VOPC__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst)
15015     {
15016         Wavefront *wf = gpuDynInst->wavefront();
15017         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
15018         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
15019         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15020
15021         src0.readSrc();
15022         src1.read();
15023
15024         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15025             if (wf->execMask(lane)) {
15026                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
15027             }
15028         }
15029
15030         wf->execMask() = vcc.rawData();
15031         vcc.write();
15032     }
15033
15034     Inst_VOPC__V_CMPX_LE_I64::Inst_VOPC__V_CMPX_LE_I64(InFmt_VOPC *iFmt)
15035         : Inst_VOPC(iFmt, "v_cmpx_le_i64")
15036     {
15037         setFlag(ALU);
15038     } // Inst_VOPC__V_CMPX_LE_I64
15039
15040     Inst_VOPC__V_CMPX_LE_I64::~Inst_VOPC__V_CMPX_LE_I64()
15041     {
15042     } // ~Inst_VOPC__V_CMPX_LE_I64
15043
15044     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
15045     void
15046     Inst_VOPC__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst)
15047     {
15048         Wavefront *wf = gpuDynInst->wavefront();
15049         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
15050         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
15051         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15052
15053         src0.readSrc();
15054         src1.read();
15055
15056         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15057             if (wf->execMask(lane)) {
15058                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
15059             }
15060         }
15061
15062         wf->execMask() = vcc.rawData();
15063         vcc.write();
15064     }
15065
15066     Inst_VOPC__V_CMPX_GT_I64::Inst_VOPC__V_CMPX_GT_I64(InFmt_VOPC *iFmt)
15067         : Inst_VOPC(iFmt, "v_cmpx_gt_i64")
15068     {
15069         setFlag(ALU);
15070     } // Inst_VOPC__V_CMPX_GT_I64
15071
15072     Inst_VOPC__V_CMPX_GT_I64::~Inst_VOPC__V_CMPX_GT_I64()
15073     {
15074     } // ~Inst_VOPC__V_CMPX_GT_I64
15075
15076     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
15077     void
15078     Inst_VOPC__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst)
15079     {
15080         Wavefront *wf = gpuDynInst->wavefront();
15081         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
15082         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
15083         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15084
15085         src0.readSrc();
15086         src1.read();
15087
15088         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15089             if (wf->execMask(lane)) {
15090                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
15091             }
15092         }
15093
15094         wf->execMask() = vcc.rawData();
15095         vcc.write();
15096     }
15097
15098     Inst_VOPC__V_CMPX_NE_I64::Inst_VOPC__V_CMPX_NE_I64(InFmt_VOPC *iFmt)
15099         : Inst_VOPC(iFmt, "v_cmpx_ne_i64")
15100     {
15101         setFlag(ALU);
15102     } // Inst_VOPC__V_CMPX_NE_I64
15103
15104     Inst_VOPC__V_CMPX_NE_I64::~Inst_VOPC__V_CMPX_NE_I64()
15105     {
15106     } // ~Inst_VOPC__V_CMPX_NE_I64
15107
15108     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
15109     void
15110     Inst_VOPC__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst)
15111     {
15112         Wavefront *wf = gpuDynInst->wavefront();
15113         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
15114         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
15115         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15116
15117         src0.readSrc();
15118         src1.read();
15119
15120         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15121             if (wf->execMask(lane)) {
15122                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
15123             }
15124         }
15125
15126         wf->execMask() = vcc.rawData();
15127         vcc.write();
15128     }
15129
15130     Inst_VOPC__V_CMPX_GE_I64::Inst_VOPC__V_CMPX_GE_I64(InFmt_VOPC *iFmt)
15131         : Inst_VOPC(iFmt, "v_cmpx_ge_i64")
15132     {
15133         setFlag(ALU);
15134     } // Inst_VOPC__V_CMPX_GE_I64
15135
15136     Inst_VOPC__V_CMPX_GE_I64::~Inst_VOPC__V_CMPX_GE_I64()
15137     {
15138     } // ~Inst_VOPC__V_CMPX_GE_I64
15139
15140     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
15141     void
15142     Inst_VOPC__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst)
15143     {
15144         Wavefront *wf = gpuDynInst->wavefront();
15145         ConstVecOperandI64 src0(gpuDynInst, instData.SRC0);
15146         ConstVecOperandI64 src1(gpuDynInst, instData.VSRC1);
15147         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15148
15149         src0.readSrc();
15150         src1.read();
15151
15152         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15153             if (wf->execMask(lane)) {
15154                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
15155             }
15156         }
15157
15158         wf->execMask() = vcc.rawData();
15159         vcc.write();
15160     }
15161
15162     Inst_VOPC__V_CMPX_T_I64::Inst_VOPC__V_CMPX_T_I64(InFmt_VOPC *iFmt)
15163         : Inst_VOPC(iFmt, "v_cmpx_t_i64")
15164     {
15165         setFlag(ALU);
15166     } // Inst_VOPC__V_CMPX_T_I64
15167
15168     Inst_VOPC__V_CMPX_T_I64::~Inst_VOPC__V_CMPX_T_I64()
15169     {
15170     } // ~Inst_VOPC__V_CMPX_T_I64
15171
15172     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
15173     void
15174     Inst_VOPC__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst)
15175     {
15176         Wavefront *wf = gpuDynInst->wavefront();
15177         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15178
15179         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15180             if (wf->execMask(lane)) {
15181                 vcc.setBit(lane, 1);
15182             }
15183         }
15184
15185         wf->execMask() = vcc.rawData();
15186         vcc.write();
15187     }
15188
15189     Inst_VOPC__V_CMPX_F_U64::Inst_VOPC__V_CMPX_F_U64(InFmt_VOPC *iFmt)
15190         : Inst_VOPC(iFmt, "v_cmpx_f_u64")
15191     {
15192         setFlag(ALU);
15193     } // Inst_VOPC__V_CMPX_F_U64
15194
15195     Inst_VOPC__V_CMPX_F_U64::~Inst_VOPC__V_CMPX_F_U64()
15196     {
15197     } // ~Inst_VOPC__V_CMPX_F_U64
15198
15199     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
15200     void
15201     Inst_VOPC__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst)
15202     {
15203         Wavefront *wf = gpuDynInst->wavefront();
15204         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15205
15206         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15207             if (wf->execMask(lane)) {
15208                 vcc.setBit(lane, 0);
15209             }
15210         }
15211
15212         wf->execMask() = vcc.rawData();
15213         vcc.write();
15214     }
15215
15216     Inst_VOPC__V_CMPX_LT_U64::Inst_VOPC__V_CMPX_LT_U64(InFmt_VOPC *iFmt)
15217         : Inst_VOPC(iFmt, "v_cmpx_lt_u64")
15218     {
15219         setFlag(ALU);
15220     } // Inst_VOPC__V_CMPX_LT_U64
15221
15222     Inst_VOPC__V_CMPX_LT_U64::~Inst_VOPC__V_CMPX_LT_U64()
15223     {
15224     } // ~Inst_VOPC__V_CMPX_LT_U64
15225
15226     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
15227     void
15228     Inst_VOPC__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst)
15229     {
15230         Wavefront *wf = gpuDynInst->wavefront();
15231         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
15232         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
15233         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15234
15235         src0.readSrc();
15236         src1.read();
15237
15238         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15239             if (wf->execMask(lane)) {
15240                 vcc.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
15241             }
15242         }
15243
15244         wf->execMask() = vcc.rawData();
15245         vcc.write();
15246     }
15247
15248     Inst_VOPC__V_CMPX_EQ_U64::Inst_VOPC__V_CMPX_EQ_U64(InFmt_VOPC *iFmt)
15249         : Inst_VOPC(iFmt, "v_cmpx_eq_u64")
15250     {
15251         setFlag(ALU);
15252     } // Inst_VOPC__V_CMPX_EQ_U64
15253
15254     Inst_VOPC__V_CMPX_EQ_U64::~Inst_VOPC__V_CMPX_EQ_U64()
15255     {
15256     } // ~Inst_VOPC__V_CMPX_EQ_U64
15257
15258     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
15259     void
15260     Inst_VOPC__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst)
15261     {
15262         Wavefront *wf = gpuDynInst->wavefront();
15263         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
15264         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
15265         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15266
15267         src0.readSrc();
15268         src1.read();
15269
15270         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15271             if (wf->execMask(lane)) {
15272                 vcc.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
15273             }
15274         }
15275
15276         wf->execMask() = vcc.rawData();
15277         vcc.write();
15278     }
15279
15280     Inst_VOPC__V_CMPX_LE_U64::Inst_VOPC__V_CMPX_LE_U64(InFmt_VOPC *iFmt)
15281         : Inst_VOPC(iFmt, "v_cmpx_le_u64")
15282     {
15283         setFlag(ALU);
15284     } // Inst_VOPC__V_CMPX_LE_U64
15285
15286     Inst_VOPC__V_CMPX_LE_U64::~Inst_VOPC__V_CMPX_LE_U64()
15287     {
15288     } // ~Inst_VOPC__V_CMPX_LE_U64
15289
15290     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
15291     void
15292     Inst_VOPC__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst)
15293     {
15294         Wavefront *wf = gpuDynInst->wavefront();
15295         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
15296         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
15297         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15298
15299         src0.readSrc();
15300         src1.read();
15301
15302         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15303             if (wf->execMask(lane)) {
15304                 vcc.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
15305             }
15306         }
15307
15308         wf->execMask() = vcc.rawData();
15309         vcc.write();
15310     }
15311
15312     Inst_VOPC__V_CMPX_GT_U64::Inst_VOPC__V_CMPX_GT_U64(InFmt_VOPC *iFmt)
15313         : Inst_VOPC(iFmt, "v_cmpx_gt_u64")
15314     {
15315         setFlag(ALU);
15316     } // Inst_VOPC__V_CMPX_GT_U64
15317
15318     Inst_VOPC__V_CMPX_GT_U64::~Inst_VOPC__V_CMPX_GT_U64()
15319     {
15320     } // ~Inst_VOPC__V_CMPX_GT_U64
15321
15322     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
15323     void
15324     Inst_VOPC__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst)
15325     {
15326         Wavefront *wf = gpuDynInst->wavefront();
15327         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
15328         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
15329         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15330
15331         src0.readSrc();
15332         src1.read();
15333
15334         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15335             if (wf->execMask(lane)) {
15336                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
15337             }
15338         }
15339
15340         wf->execMask() = vcc.rawData();
15341         vcc.write();
15342     }
15343
15344     Inst_VOPC__V_CMPX_NE_U64::Inst_VOPC__V_CMPX_NE_U64(InFmt_VOPC *iFmt)
15345         : Inst_VOPC(iFmt, "v_cmpx_ne_u64")
15346     {
15347         setFlag(ALU);
15348     } // Inst_VOPC__V_CMPX_NE_U64
15349
15350     Inst_VOPC__V_CMPX_NE_U64::~Inst_VOPC__V_CMPX_NE_U64()
15351     {
15352     } // ~Inst_VOPC__V_CMPX_NE_U64
15353
15354     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
15355     void
15356     Inst_VOPC__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst)
15357     {
15358         Wavefront *wf = gpuDynInst->wavefront();
15359         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
15360         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
15361         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15362
15363         src0.readSrc();
15364         src1.read();
15365
15366         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15367             if (wf->execMask(lane)) {
15368                 vcc.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
15369             }
15370         }
15371
15372         wf->execMask() = vcc.rawData();
15373         vcc.write();
15374     }
15375
15376     Inst_VOPC__V_CMPX_GE_U64::Inst_VOPC__V_CMPX_GE_U64(InFmt_VOPC *iFmt)
15377         : Inst_VOPC(iFmt, "v_cmpx_ge_u64")
15378     {
15379         setFlag(ALU);
15380     } // Inst_VOPC__V_CMPX_GE_U64
15381
15382     Inst_VOPC__V_CMPX_GE_U64::~Inst_VOPC__V_CMPX_GE_U64()
15383     {
15384     } // ~Inst_VOPC__V_CMPX_GE_U64
15385
15386     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
15387     void
15388     Inst_VOPC__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst)
15389     {
15390         Wavefront *wf = gpuDynInst->wavefront();
15391         ConstVecOperandU64 src0(gpuDynInst, instData.SRC0);
15392         ConstVecOperandU64 src1(gpuDynInst, instData.VSRC1);
15393         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15394
15395         src0.readSrc();
15396         src1.read();
15397
15398         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15399             if (wf->execMask(lane)) {
15400                 vcc.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
15401             }
15402         }
15403
15404         wf->execMask() = vcc.rawData();
15405         vcc.write();
15406     }
15407
15408     Inst_VOPC__V_CMPX_T_U64::Inst_VOPC__V_CMPX_T_U64(InFmt_VOPC *iFmt)
15409         : Inst_VOPC(iFmt, "v_cmpx_t_u64")
15410     {
15411         setFlag(ALU);
15412     } // Inst_VOPC__V_CMPX_T_U64
15413
15414     Inst_VOPC__V_CMPX_T_U64::~Inst_VOPC__V_CMPX_T_U64()
15415     {
15416     } // ~Inst_VOPC__V_CMPX_T_U64
15417
15418     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
15419     void
15420     Inst_VOPC__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst)
15421     {
15422         Wavefront *wf = gpuDynInst->wavefront();
15423         ScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
15424
15425         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15426             if (wf->execMask(lane)) {
15427                 vcc.setBit(lane, 1);
15428             }
15429         }
15430
15431         wf->execMask() = vcc.rawData();
15432         vcc.write();
15433     }
15434
15435     Inst_VINTRP__V_INTERP_P1_F32::Inst_VINTRP__V_INTERP_P1_F32(
15436           InFmt_VINTRP *iFmt)
15437         : Inst_VINTRP(iFmt, "v_interp_p1_f32")
15438     {
15439         setFlag(ALU);
15440         setFlag(F32);
15441     } // Inst_VINTRP__V_INTERP_P1_F32
15442
15443     Inst_VINTRP__V_INTERP_P1_F32::~Inst_VINTRP__V_INTERP_P1_F32()
15444     {
15445     } // ~Inst_VINTRP__V_INTERP_P1_F32
15446
15447     // D.f = P10 * S.f + P0; parameter interpolation
15448     void
15449     Inst_VINTRP__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst)
15450     {
15451         panicUnimplemented();
15452     }
15453
15454     Inst_VINTRP__V_INTERP_P2_F32::Inst_VINTRP__V_INTERP_P2_F32(
15455           InFmt_VINTRP *iFmt)
15456         : Inst_VINTRP(iFmt, "v_interp_p2_f32")
15457     {
15458         setFlag(ALU);
15459         setFlag(F32);
15460     } // Inst_VINTRP__V_INTERP_P2_F32
15461
15462     Inst_VINTRP__V_INTERP_P2_F32::~Inst_VINTRP__V_INTERP_P2_F32()
15463     {
15464     } // ~Inst_VINTRP__V_INTERP_P2_F32
15465
15466     // D.f = P20 * S.f + D.f; parameter interpolation
15467     void
15468     Inst_VINTRP__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst)
15469     {
15470         panicUnimplemented();
15471     }
15472
15473     Inst_VINTRP__V_INTERP_MOV_F32::Inst_VINTRP__V_INTERP_MOV_F32(
15474           InFmt_VINTRP *iFmt)
15475         : Inst_VINTRP(iFmt, "v_interp_mov_f32")
15476     {
15477         setFlag(ALU);
15478         setFlag(F32);
15479     } // Inst_VINTRP__V_INTERP_MOV_F32
15480
15481     Inst_VINTRP__V_INTERP_MOV_F32::~Inst_VINTRP__V_INTERP_MOV_F32()
15482     {
15483     } // ~Inst_VINTRP__V_INTERP_MOV_F32
15484
15485     void
15486     Inst_VINTRP__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst)
15487     {
15488         panicUnimplemented();
15489     }
15490
15491     Inst_VOP3__V_CMP_CLASS_F32::Inst_VOP3__V_CMP_CLASS_F32(
15492           InFmt_VOP3 *iFmt)
15493         : Inst_VOP3(iFmt, "v_cmp_class_f32", true)
15494     {
15495         setFlag(ALU);
15496         setFlag(F32);
15497     } // Inst_VOP3__V_CMP_CLASS_F32
15498
15499     Inst_VOP3__V_CMP_CLASS_F32::~Inst_VOP3__V_CMP_CLASS_F32()
15500     {
15501     } // ~Inst_VOP3__V_CMP_CLASS_F32
15502
15503     // VCC = IEEE numeric class function specified in S1.u, performed on S0.f
15504     // The function reports true if the floating point value is any of the
15505     // numeric types selected in S1.u according to the following list:
15506     // S1.u[0] -- value is a signaling NaN.
15507     // S1.u[1] -- value is a quiet NaN.
15508     // S1.u[2] -- value is negative infinity.
15509     // S1.u[3] -- value is a negative normal value.
15510     // S1.u[4] -- value is a negative denormal value.
15511     // S1.u[5] -- value is negative zero.
15512     // S1.u[6] -- value is positive zero.
15513     // S1.u[7] -- value is a positive denormal value.
15514     // S1.u[8] -- value is a positive normal value.
15515     // S1.u[9] -- value is positive infinity.
15516     void
15517     Inst_VOP3__V_CMP_CLASS_F32::execute(GPUDynInstPtr gpuDynInst)
15518     {
15519         Wavefront *wf = gpuDynInst->wavefront();
15520         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
15521         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
15522         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
15523
15524         src0.readSrc();
15525         src1.readSrc();
15526
15527         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15528             if (wf->execMask(lane)) {
15529                 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
15530                     // is NaN
15531                     if (std::isnan(src0[lane])) {
15532                         sdst.setBit(lane,  1);
15533                         continue;
15534                     }
15535                 }
15536                 if (bits(src1[lane], 2)) {
15537                     // is -infinity
15538                     if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
15539                         sdst.setBit(lane,  1);
15540                         continue;
15541                     }
15542                 }
15543                 if (bits(src1[lane], 3)) {
15544                     // is -normal
15545                     if (std::isnormal(src0[lane])
15546                         && std::signbit(src0[lane])) {
15547                         sdst.setBit(lane,  1);
15548                         continue;
15549                     }
15550                 }
15551                 if (bits(src1[lane], 4)) {
15552                     // is -denormal
15553                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15554                         && std::signbit(src0[lane])) {
15555                         sdst.setBit(lane,  1);
15556                         continue;
15557                     }
15558                 }
15559                 if (bits(src1[lane], 5)) {
15560                     // is -zero
15561                     if (std::fpclassify(src0[lane]) == FP_ZERO
15562                         && std::signbit(src0[lane])) {
15563                         sdst.setBit(lane,  1);
15564                         continue;
15565                     }
15566                 }
15567                 if (bits(src1[lane], 6)) {
15568                     // is +zero
15569                     if (std::fpclassify(src0[lane]) == FP_ZERO
15570                         && !std::signbit(src0[lane])) {
15571                         sdst.setBit(lane,  1);
15572                         continue;
15573                     }
15574                 }
15575                 if (bits(src1[lane], 7)) {
15576                     // is +denormal
15577                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15578                         && !std::signbit(src0[lane])) {
15579                         sdst.setBit(lane,  1);
15580                         continue;
15581                     }
15582                 }
15583                 if (bits(src1[lane], 8)) {
15584                     // is +normal
15585                     if (std::isnormal(src0[lane])
15586                         && !std::signbit(src0[lane])) {
15587                         sdst.setBit(lane,  1);
15588                         continue;
15589                     }
15590                 }
15591                 if (bits(src1[lane], 9)) {
15592                     // is +infinity
15593                     if (std::isinf(src0[lane])
15594                         && !std::signbit(src0[lane])) {
15595                         sdst.setBit(lane,  1);
15596                         continue;
15597                     }
15598                 }
15599             }
15600         }
15601
15602         sdst.write();
15603     }
15604
15605     Inst_VOP3__V_CMPX_CLASS_F32::Inst_VOP3__V_CMPX_CLASS_F32(
15606           InFmt_VOP3 *iFmt)
15607         : Inst_VOP3(iFmt, "v_cmpx_class_f32", true)
15608     {
15609         setFlag(ALU);
15610         setFlag(F32);
15611     } // Inst_VOP3__V_CMPX_CLASS_F32
15612
15613     Inst_VOP3__V_CMPX_CLASS_F32::~Inst_VOP3__V_CMPX_CLASS_F32()
15614     {
15615     } // ~Inst_VOP3__V_CMPX_CLASS_F32
15616
15617     // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
15618     // S0.f
15619     // The function reports true if the floating point value is any of the
15620     // numeric types selected in S1.u according to the following list:
15621     // S1.u[0] -- value is a signaling NaN.
15622     // S1.u[1] -- value is a quiet NaN.
15623     // S1.u[2] -- value is negative infinity.
15624     // S1.u[3] -- value is a negative normal value.
15625     // S1.u[4] -- value is a negative denormal value.
15626     // S1.u[5] -- value is negative zero.
15627     // S1.u[6] -- value is positive zero.
15628     // S1.u[7] -- value is a positive denormal value.
15629     // S1.u[8] -- value is a positive normal value.
15630     // S1.u[9] -- value is positive infinity.
15631     void
15632     Inst_VOP3__V_CMPX_CLASS_F32::execute(GPUDynInstPtr gpuDynInst)
15633     {
15634         Wavefront *wf = gpuDynInst->wavefront();
15635         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
15636         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
15637         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
15638
15639         src0.readSrc();
15640         src1.readSrc();
15641
15642         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15643             if (wf->execMask(lane)) {
15644                 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
15645                     // is NaN
15646                     if (std::isnan(src0[lane])) {
15647                         sdst.setBit(lane,  1);
15648                         continue;
15649                     }
15650                 }
15651                 if (bits(src1[lane], 2)) {
15652                     // is -infinity
15653                     if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
15654                         sdst.setBit(lane,  1);
15655                         continue;
15656                     }
15657                 }
15658                 if (bits(src1[lane], 3)) {
15659                     // is -normal
15660                     if (std::isnormal(src0[lane])
15661                         && std::signbit(src0[lane])) {
15662                         sdst.setBit(lane,  1);
15663                         continue;
15664                     }
15665                 }
15666                 if (bits(src1[lane], 4)) {
15667                     // is -denormal
15668                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15669                         && std::signbit(src0[lane])) {
15670                         sdst.setBit(lane,  1);
15671                         continue;
15672                     }
15673                 }
15674                 if (bits(src1[lane], 5)) {
15675                     // is -zero
15676                     if (std::fpclassify(src0[lane]) == FP_ZERO
15677                         && std::signbit(src0[lane])) {
15678                         sdst.setBit(lane,  1);
15679                         continue;
15680                     }
15681                 }
15682                 if (bits(src1[lane], 6)) {
15683                     // is +zero
15684                     if (std::fpclassify(src0[lane]) == FP_ZERO
15685                         && !std::signbit(src0[lane])) {
15686                         sdst.setBit(lane,  1);
15687                         continue;
15688                     }
15689                 }
15690                 if (bits(src1[lane], 7)) {
15691                     // is +denormal
15692                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15693                         && !std::signbit(src0[lane])) {
15694                         sdst.setBit(lane,  1);
15695                         continue;
15696                     }
15697                 }
15698                 if (bits(src1[lane], 8)) {
15699                     // is +normal
15700                     if (std::isnormal(src0[lane])
15701                         && !std::signbit(src0[lane])) {
15702                         sdst.setBit(lane,  1);
15703                         continue;
15704                     }
15705                 }
15706                 if (bits(src1[lane], 9)) {
15707                     // is +infinity
15708                     if (std::isinf(src0[lane])
15709                         && !std::signbit(src0[lane])) {
15710                         sdst.setBit(lane,  1);
15711                         continue;
15712                     }
15713                 }
15714             }
15715         }
15716
15717         wf->execMask() = sdst.rawData();
15718         sdst.write();
15719     }
15720
15721     Inst_VOP3__V_CMP_CLASS_F64::Inst_VOP3__V_CMP_CLASS_F64(
15722           InFmt_VOP3 *iFmt)
15723         : Inst_VOP3(iFmt, "v_cmp_class_f64", true)
15724     {
15725         setFlag(ALU);
15726         setFlag(F64);
15727     } // Inst_VOP3__V_CMP_CLASS_F64
15728
15729     Inst_VOP3__V_CMP_CLASS_F64::~Inst_VOP3__V_CMP_CLASS_F64()
15730     {
15731     } // ~Inst_VOP3__V_CMP_CLASS_F64
15732
15733     // VCC = IEEE numeric class function specified in S1.u, performed on S0.d
15734     // The function reports true if the floating point value is any of the
15735     // numeric types selected in S1.u according to the following list:
15736     // S1.u[0] -- value is a signaling NaN.
15737     // S1.u[1] -- value is a quiet NaN.
15738     // S1.u[2] -- value is negative infinity.
15739     // S1.u[3] -- value is a negative normal value.
15740     // S1.u[4] -- value is a negative denormal value.
15741     // S1.u[5] -- value is negative zero.
15742     // S1.u[6] -- value is positive zero.
15743     // S1.u[7] -- value is a positive denormal value.
15744     // S1.u[8] -- value is a positive normal value.
15745     // S1.u[9] -- value is positive infinity.
15746     void
15747     Inst_VOP3__V_CMP_CLASS_F64::execute(GPUDynInstPtr gpuDynInst)
15748     {
15749         Wavefront *wf = gpuDynInst->wavefront();
15750         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
15751         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
15752         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
15753
15754         src0.readSrc();
15755         src1.readSrc();
15756
15757         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15758             if (wf->execMask(lane)) {
15759                 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
15760                     // is NaN
15761                     if (std::isnan(src0[lane])) {
15762                         sdst.setBit(lane, 1);
15763                         continue;
15764                     }
15765                 }
15766                 if (bits(src1[lane], 2)) {
15767                     // is -infinity
15768                     if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
15769                         sdst.setBit(lane, 1);
15770                         continue;
15771                     }
15772                 }
15773                 if (bits(src1[lane], 3)) {
15774                     // is -normal
15775                     if (std::isnormal(src0[lane])
15776                         && std::signbit(src0[lane])) {
15777                         sdst.setBit(lane, 1);
15778                         continue;
15779                     }
15780                 }
15781                 if (bits(src1[lane], 4)) {
15782                     // is -denormal
15783                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15784                         && std::signbit(src0[lane])) {
15785                         sdst.setBit(lane, 1);
15786                         continue;
15787                     }
15788                 }
15789                 if (bits(src1[lane], 5)) {
15790                     // is -zero
15791                     if (std::fpclassify(src0[lane]) == FP_ZERO
15792                         && std::signbit(src0[lane])) {
15793                         sdst.setBit(lane, 1);
15794                         continue;
15795                     }
15796                 }
15797                 if (bits(src1[lane], 6)) {
15798                     // is +zero
15799                     if (std::fpclassify(src0[lane]) == FP_ZERO
15800                         && !std::signbit(src0[lane])) {
15801                         sdst.setBit(lane, 1);
15802                         continue;
15803                     }
15804                 }
15805                 if (bits(src1[lane], 7)) {
15806                     // is +denormal
15807                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15808                         && !std::signbit(src0[lane])) {
15809                         sdst.setBit(lane, 1);
15810                         continue;
15811                     }
15812                 }
15813                 if (bits(src1[lane], 8)) {
15814                     // is +normal
15815                     if (std::isnormal(src0[lane])
15816                         && !std::signbit(src0[lane])) {
15817                         sdst.setBit(lane, 1);
15818                         continue;
15819                     }
15820                 }
15821                 if (bits(src1[lane], 9)) {
15822                     // is +infinity
15823                     if (std::isinf(src0[lane])
15824                         && !std::signbit(src0[lane])) {
15825                         sdst.setBit(lane, 1);
15826                         continue;
15827                     }
15828                 }
15829             }
15830         }
15831
15832         sdst.write();
15833     }
15834
15835     Inst_VOP3__V_CMPX_CLASS_F64::Inst_VOP3__V_CMPX_CLASS_F64(
15836           InFmt_VOP3 *iFmt)
15837         : Inst_VOP3(iFmt, "v_cmpx_class_f64", true)
15838     {
15839         setFlag(ALU);
15840         setFlag(F64);
15841     } // Inst_VOP3__V_CMPX_CLASS_F64
15842
15843     Inst_VOP3__V_CMPX_CLASS_F64::~Inst_VOP3__V_CMPX_CLASS_F64()
15844     {
15845     } // ~Inst_VOP3__V_CMPX_CLASS_F64
15846
15847     // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
15848     // S0.d
15849     // The function reports true if the floating point value is any of the
15850     // numeric types selected in S1.u according to the following list:
15851     // S1.u[0] -- value is a signaling NaN.
15852     // S1.u[1] -- value is a quiet NaN.
15853     // S1.u[2] -- value is negative infinity.
15854     // S1.u[3] -- value is a negative normal value.
15855     // S1.u[4] -- value is a negative denormal value.
15856     // S1.u[5] -- value is negative zero.
15857     // S1.u[6] -- value is positive zero.
15858     // S1.u[7] -- value is a positive denormal value.
15859     // S1.u[8] -- value is a positive normal value.
15860     // S1.u[9] -- value is positive infinity.
15861     void
15862     Inst_VOP3__V_CMPX_CLASS_F64::execute(GPUDynInstPtr gpuDynInst)
15863     {
15864         Wavefront *wf = gpuDynInst->wavefront();
15865         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
15866         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
15867         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
15868
15869         src0.readSrc();
15870         src1.readSrc();
15871
15872         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
15873             if (wf->execMask(lane)) {
15874                 if (bits(src1[lane], 0) || bits(src1[lane], 1)) {
15875                     // is NaN
15876                     if (std::isnan(src0[lane])) {
15877                         sdst.setBit(lane, 1);
15878                         continue;
15879                     }
15880                 }
15881                 if (bits(src1[lane], 2)) {
15882                     // is -infinity
15883                     if (std::isinf(src0[lane]) && std::signbit(src0[lane])) {
15884                         sdst.setBit(lane, 1);
15885                         continue;
15886                     }
15887                 }
15888                 if (bits(src1[lane], 3)) {
15889                     // is -normal
15890                     if (std::isnormal(src0[lane])
15891                         && std::signbit(src0[lane])) {
15892                         sdst.setBit(lane, 1);
15893                         continue;
15894                     }
15895                 }
15896                 if (bits(src1[lane], 4)) {
15897                     // is -denormal
15898                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15899                         && std::signbit(src0[lane])) {
15900                         sdst.setBit(lane, 1);
15901                         continue;
15902                     }
15903                 }
15904                 if (bits(src1[lane], 5)) {
15905                     // is -zero
15906                     if (std::fpclassify(src0[lane]) == FP_ZERO
15907                         && std::signbit(src0[lane])) {
15908                         sdst.setBit(lane, 1);
15909                         continue;
15910                     }
15911                 }
15912                 if (bits(src1[lane], 6)) {
15913                     // is +zero
15914                     if (std::fpclassify(src0[lane]) == FP_ZERO
15915                         && !std::signbit(src0[lane])) {
15916                         sdst.setBit(lane, 1);
15917                         continue;
15918                     }
15919                 }
15920                 if (bits(src1[lane], 7)) {
15921                     // is +denormal
15922                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
15923                         && !std::signbit(src0[lane])) {
15924                         sdst.setBit(lane, 1);
15925                         continue;
15926                     }
15927                 }
15928                 if (bits(src1[lane], 8)) {
15929                     // is +normal
15930                     if (std::isnormal(src0[lane])
15931                         && !std::signbit(src0[lane])) {
15932                         sdst.setBit(lane, 1);
15933                         continue;
15934                     }
15935                 }
15936                 if (bits(src1[lane], 9)) {
15937                     // is +infinity
15938                     if (std::isinf(src0[lane])
15939                         && !std::signbit(src0[lane])) {
15940                         sdst.setBit(lane, 1);
15941                         continue;
15942                     }
15943                 }
15944             }
15945         }
15946
15947         wf->execMask() = sdst.rawData();
15948         sdst.write();
15949     }
15950
15951     Inst_VOP3__V_CMP_CLASS_F16::Inst_VOP3__V_CMP_CLASS_F16(
15952           InFmt_VOP3 *iFmt)
15953         : Inst_VOP3(iFmt, "v_cmp_class_f16", true)
15954     {
15955         setFlag(ALU);
15956         setFlag(F16);
15957     } // Inst_VOP3__V_CMP_CLASS_F16
15958
15959     Inst_VOP3__V_CMP_CLASS_F16::~Inst_VOP3__V_CMP_CLASS_F16()
15960     {
15961     } // ~Inst_VOP3__V_CMP_CLASS_F16
15962
15963     // VCC = IEEE numeric class function specified in S1.u, performed on S0.f16
15964     // The function reports true if the floating point value is any of the
15965     // numeric types selected in S1.u according to the following list:
15966     // S1.u[0] -- value is a signaling NaN.
15967     // S1.u[1] -- value is a quiet NaN.
15968     // S1.u[2] -- value is negative infinity.
15969     // S1.u[3] -- value is a negative normal value.
15970     // S1.u[4] -- value is a negative denormal value.
15971     // S1.u[5] -- value is negative zero.
15972     // S1.u[6] -- value is positive zero.
15973     // S1.u[7] -- value is a positive denormal value.
15974     // S1.u[8] -- value is a positive normal value.
15975     // S1.u[9] -- value is positive infinity.
15976     void
15977     Inst_VOP3__V_CMP_CLASS_F16::execute(GPUDynInstPtr gpuDynInst)
15978     {
15979         panicUnimplemented();
15980     }
15981
15982     Inst_VOP3__V_CMPX_CLASS_F16::Inst_VOP3__V_CMPX_CLASS_F16(
15983           InFmt_VOP3 *iFmt)
15984         : Inst_VOP3(iFmt, "v_cmpx_class_f16", true)
15985     {
15986         setFlag(ALU);
15987         setFlag(F16);
15988     } // Inst_VOP3__V_CMPX_CLASS_F16
15989
15990     Inst_VOP3__V_CMPX_CLASS_F16::~Inst_VOP3__V_CMPX_CLASS_F16()
15991     {
15992     } // ~Inst_VOP3__V_CMPX_CLASS_F16
15993
15994     // EXEC, VCC = IEEE numeric class function specified in S1.u, performed on
15995     // S0.f16
15996     // The function reports true if the floating point value is any of the
15997     // numeric types selected in S1.u according to the following list:
15998     // S1.u[0] -- value is a signaling NaN.
15999     // S1.u[1] -- value is a quiet NaN.
16000     // S1.u[2] -- value is negative infinity.
16001     // S1.u[3] -- value is a negative normal value.
16002     // S1.u[4] -- value is a negative denormal value.
16003     // S1.u[5] -- value is negative zero.
16004     // S1.u[6] -- value is positive zero.
16005     // S1.u[7] -- value is a positive denormal value.
16006     // S1.u[8] -- value is a positive normal value.
16007     // S1.u[9] -- value is positive infinity.
16008     void
16009     Inst_VOP3__V_CMPX_CLASS_F16::execute(GPUDynInstPtr gpuDynInst)
16010     {
16011         panicUnimplemented();
16012     }
16013
16014     Inst_VOP3__V_CMP_F_F16::Inst_VOP3__V_CMP_F_F16(InFmt_VOP3 *iFmt)
16015         : Inst_VOP3(iFmt, "v_cmp_f_f16", true)
16016     {
16017         setFlag(ALU);
16018         setFlag(F16);
16019     } // Inst_VOP3__V_CMP_F_F16
16020
16021     Inst_VOP3__V_CMP_F_F16::~Inst_VOP3__V_CMP_F_F16()
16022     {
16023     } // ~Inst_VOP3__V_CMP_F_F16
16024
16025     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
16026     void
16027     Inst_VOP3__V_CMP_F_F16::execute(GPUDynInstPtr gpuDynInst)
16028     {
16029         panicUnimplemented();
16030     }
16031
16032     Inst_VOP3__V_CMP_LT_F16::Inst_VOP3__V_CMP_LT_F16(
16033           InFmt_VOP3 *iFmt)
16034         : Inst_VOP3(iFmt, "v_cmp_lt_f16", true)
16035     {
16036         setFlag(ALU);
16037         setFlag(F16);
16038     } // Inst_VOP3__V_CMP_LT_F16
16039
16040     Inst_VOP3__V_CMP_LT_F16::~Inst_VOP3__V_CMP_LT_F16()
16041     {
16042     } // ~Inst_VOP3__V_CMP_LT_F16
16043
16044     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
16045     void
16046     Inst_VOP3__V_CMP_LT_F16::execute(GPUDynInstPtr gpuDynInst)
16047     {
16048         panicUnimplemented();
16049     }
16050
16051     Inst_VOP3__V_CMP_EQ_F16::Inst_VOP3__V_CMP_EQ_F16(
16052           InFmt_VOP3 *iFmt)
16053         : Inst_VOP3(iFmt, "v_cmp_eq_f16", true)
16054     {
16055         setFlag(ALU);
16056         setFlag(F16);
16057     } // Inst_VOP3__V_CMP_EQ_F16
16058
16059     Inst_VOP3__V_CMP_EQ_F16::~Inst_VOP3__V_CMP_EQ_F16()
16060     {
16061     } // ~Inst_VOP3__V_CMP_EQ_F16
16062
16063     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
16064     void
16065     Inst_VOP3__V_CMP_EQ_F16::execute(GPUDynInstPtr gpuDynInst)
16066     {
16067         panicUnimplemented();
16068     }
16069
16070     Inst_VOP3__V_CMP_LE_F16::Inst_VOP3__V_CMP_LE_F16(
16071           InFmt_VOP3 *iFmt)
16072         : Inst_VOP3(iFmt, "v_cmp_le_f16", true)
16073     {
16074         setFlag(ALU);
16075         setFlag(F16);
16076     } // Inst_VOP3__V_CMP_LE_F16
16077
16078     Inst_VOP3__V_CMP_LE_F16::~Inst_VOP3__V_CMP_LE_F16()
16079     {
16080     } // ~Inst_VOP3__V_CMP_LE_F16
16081
16082     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
16083     void
16084     Inst_VOP3__V_CMP_LE_F16::execute(GPUDynInstPtr gpuDynInst)
16085     {
16086         panicUnimplemented();
16087     }
16088
16089     Inst_VOP3__V_CMP_GT_F16::Inst_VOP3__V_CMP_GT_F16(
16090           InFmt_VOP3 *iFmt)
16091         : Inst_VOP3(iFmt, "v_cmp_gt_f16", true)
16092     {
16093         setFlag(ALU);
16094         setFlag(F16);
16095     } // Inst_VOP3__V_CMP_GT_F16
16096
16097     Inst_VOP3__V_CMP_GT_F16::~Inst_VOP3__V_CMP_GT_F16()
16098     {
16099     } // ~Inst_VOP3__V_CMP_GT_F16
16100
16101     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
16102     void
16103     Inst_VOP3__V_CMP_GT_F16::execute(GPUDynInstPtr gpuDynInst)
16104     {
16105         panicUnimplemented();
16106     }
16107
16108     Inst_VOP3__V_CMP_LG_F16::Inst_VOP3__V_CMP_LG_F16(
16109           InFmt_VOP3 *iFmt)
16110         : Inst_VOP3(iFmt, "v_cmp_lg_f16", true)
16111     {
16112         setFlag(ALU);
16113         setFlag(F16);
16114     } // Inst_VOP3__V_CMP_LG_F16
16115
16116     Inst_VOP3__V_CMP_LG_F16::~Inst_VOP3__V_CMP_LG_F16()
16117     {
16118     } // ~Inst_VOP3__V_CMP_LG_F16
16119
16120     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
16121     void
16122     Inst_VOP3__V_CMP_LG_F16::execute(GPUDynInstPtr gpuDynInst)
16123     {
16124         panicUnimplemented();
16125     }
16126
16127     Inst_VOP3__V_CMP_GE_F16::Inst_VOP3__V_CMP_GE_F16(
16128           InFmt_VOP3 *iFmt)
16129         : Inst_VOP3(iFmt, "v_cmp_ge_f16", true)
16130     {
16131         setFlag(ALU);
16132         setFlag(F16);
16133     } // Inst_VOP3__V_CMP_GE_F16
16134
16135     Inst_VOP3__V_CMP_GE_F16::~Inst_VOP3__V_CMP_GE_F16()
16136     {
16137     } // ~Inst_VOP3__V_CMP_GE_F16
16138
16139     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
16140     void
16141     Inst_VOP3__V_CMP_GE_F16::execute(GPUDynInstPtr gpuDynInst)
16142     {
16143         panicUnimplemented();
16144     }
16145
16146     Inst_VOP3__V_CMP_O_F16::Inst_VOP3__V_CMP_O_F16(InFmt_VOP3 *iFmt)
16147         : Inst_VOP3(iFmt, "v_cmp_o_f16", true)
16148     {
16149         setFlag(ALU);
16150         setFlag(F16);
16151     } // Inst_VOP3__V_CMP_O_F16
16152
16153     Inst_VOP3__V_CMP_O_F16::~Inst_VOP3__V_CMP_O_F16()
16154     {
16155     } // ~Inst_VOP3__V_CMP_O_F16
16156
16157     // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
16158     void
16159     Inst_VOP3__V_CMP_O_F16::execute(GPUDynInstPtr gpuDynInst)
16160     {
16161         panicUnimplemented();
16162     }
16163
16164     Inst_VOP3__V_CMP_U_F16::Inst_VOP3__V_CMP_U_F16(InFmt_VOP3 *iFmt)
16165         : Inst_VOP3(iFmt, "v_cmp_u_f16", true)
16166     {
16167         setFlag(ALU);
16168         setFlag(F16);
16169     } // Inst_VOP3__V_CMP_U_F16
16170
16171     Inst_VOP3__V_CMP_U_F16::~Inst_VOP3__V_CMP_U_F16()
16172     {
16173     } // ~Inst_VOP3__V_CMP_U_F16
16174
16175     // D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC encoding.
16176     void
16177     Inst_VOP3__V_CMP_U_F16::execute(GPUDynInstPtr gpuDynInst)
16178     {
16179         panicUnimplemented();
16180     }
16181
16182     Inst_VOP3__V_CMP_NGE_F16::Inst_VOP3__V_CMP_NGE_F16(
16183           InFmt_VOP3 *iFmt)
16184         : Inst_VOP3(iFmt, "v_cmp_nge_f16", true)
16185     {
16186         setFlag(ALU);
16187         setFlag(F16);
16188     } // Inst_VOP3__V_CMP_NGE_F16
16189
16190     Inst_VOP3__V_CMP_NGE_F16::~Inst_VOP3__V_CMP_NGE_F16()
16191     {
16192     } // ~Inst_VOP3__V_CMP_NGE_F16
16193
16194     // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
16195     void
16196     Inst_VOP3__V_CMP_NGE_F16::execute(GPUDynInstPtr gpuDynInst)
16197     {
16198         panicUnimplemented();
16199     }
16200
16201     Inst_VOP3__V_CMP_NLG_F16::Inst_VOP3__V_CMP_NLG_F16(
16202           InFmt_VOP3 *iFmt)
16203         : Inst_VOP3(iFmt, "v_cmp_nlg_f16", true)
16204     {
16205         setFlag(ALU);
16206         setFlag(F16);
16207     } // Inst_VOP3__V_CMP_NLG_F16
16208
16209     Inst_VOP3__V_CMP_NLG_F16::~Inst_VOP3__V_CMP_NLG_F16()
16210     {
16211     } // ~Inst_VOP3__V_CMP_NLG_F16
16212
16213     // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
16214     void
16215     Inst_VOP3__V_CMP_NLG_F16::execute(GPUDynInstPtr gpuDynInst)
16216     {
16217         panicUnimplemented();
16218     }
16219
16220     Inst_VOP3__V_CMP_NGT_F16::Inst_VOP3__V_CMP_NGT_F16(
16221           InFmt_VOP3 *iFmt)
16222         : Inst_VOP3(iFmt, "v_cmp_ngt_f16", true)
16223     {
16224         setFlag(ALU);
16225         setFlag(F16);
16226     } // Inst_VOP3__V_CMP_NGT_F16
16227
16228     Inst_VOP3__V_CMP_NGT_F16::~Inst_VOP3__V_CMP_NGT_F16()
16229     {
16230     } // ~Inst_VOP3__V_CMP_NGT_F16
16231
16232     // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
16233     void
16234     Inst_VOP3__V_CMP_NGT_F16::execute(GPUDynInstPtr gpuDynInst)
16235     {
16236         panicUnimplemented();
16237     }
16238
16239     Inst_VOP3__V_CMP_NLE_F16::Inst_VOP3__V_CMP_NLE_F16(
16240           InFmt_VOP3 *iFmt)
16241         : Inst_VOP3(iFmt, "v_cmp_nle_f16", true)
16242     {
16243         setFlag(ALU);
16244         setFlag(F16);
16245     } // Inst_VOP3__V_CMP_NLE_F16
16246
16247     Inst_VOP3__V_CMP_NLE_F16::~Inst_VOP3__V_CMP_NLE_F16()
16248     {
16249     } // ~Inst_VOP3__V_CMP_NLE_F16
16250
16251     // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
16252     void
16253     Inst_VOP3__V_CMP_NLE_F16::execute(GPUDynInstPtr gpuDynInst)
16254     {
16255         panicUnimplemented();
16256     }
16257
16258     Inst_VOP3__V_CMP_NEQ_F16::Inst_VOP3__V_CMP_NEQ_F16(
16259           InFmt_VOP3 *iFmt)
16260         : Inst_VOP3(iFmt, "v_cmp_neq_f16", true)
16261     {
16262         setFlag(ALU);
16263         setFlag(F16);
16264     } // Inst_VOP3__V_CMP_NEQ_F16
16265
16266     Inst_VOP3__V_CMP_NEQ_F16::~Inst_VOP3__V_CMP_NEQ_F16()
16267     {
16268     } // ~Inst_VOP3__V_CMP_NEQ_F16
16269
16270     // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
16271     void
16272     Inst_VOP3__V_CMP_NEQ_F16::execute(GPUDynInstPtr gpuDynInst)
16273     {
16274         panicUnimplemented();
16275     }
16276
16277     Inst_VOP3__V_CMP_NLT_F16::Inst_VOP3__V_CMP_NLT_F16(
16278           InFmt_VOP3 *iFmt)
16279         : Inst_VOP3(iFmt, "v_cmp_nlt_f16", true)
16280     {
16281         setFlag(ALU);
16282         setFlag(F16);
16283     } // Inst_VOP3__V_CMP_NLT_F16
16284
16285     Inst_VOP3__V_CMP_NLT_F16::~Inst_VOP3__V_CMP_NLT_F16()
16286     {
16287     } // ~Inst_VOP3__V_CMP_NLT_F16
16288
16289     // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
16290     void
16291     Inst_VOP3__V_CMP_NLT_F16::execute(GPUDynInstPtr gpuDynInst)
16292     {
16293         panicUnimplemented();
16294     }
16295
16296     Inst_VOP3__V_CMP_TRU_F16::Inst_VOP3__V_CMP_TRU_F16(
16297           InFmt_VOP3 *iFmt)
16298         : Inst_VOP3(iFmt, "v_cmp_tru_f16", true)
16299     {
16300         setFlag(ALU);
16301         setFlag(F16);
16302     } // Inst_VOP3__V_CMP_TRU_F16
16303
16304     Inst_VOP3__V_CMP_TRU_F16::~Inst_VOP3__V_CMP_TRU_F16()
16305     {
16306     } // ~Inst_VOP3__V_CMP_TRU_F16
16307
16308     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
16309     void
16310     Inst_VOP3__V_CMP_TRU_F16::execute(GPUDynInstPtr gpuDynInst)
16311     {
16312         Wavefront *wf = gpuDynInst->wavefront();
16313         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16314
16315         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16316             if (wf->execMask(lane)) {
16317                 sdst.setBit(lane, 1);
16318             }
16319         }
16320
16321         sdst.write();
16322     }
16323
16324     Inst_VOP3__V_CMPX_F_F16::Inst_VOP3__V_CMPX_F_F16(
16325           InFmt_VOP3 *iFmt)
16326         : Inst_VOP3(iFmt, "v_cmpx_f_f16", true)
16327     {
16328         setFlag(ALU);
16329     } // Inst_VOP3__V_CMPX_F_F16
16330
16331     Inst_VOP3__V_CMPX_F_F16::~Inst_VOP3__V_CMPX_F_F16()
16332     {
16333     } // ~Inst_VOP3__V_CMPX_F_F16
16334
16335     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
16336     void
16337     Inst_VOP3__V_CMPX_F_F16::execute(GPUDynInstPtr gpuDynInst)
16338     {
16339         Wavefront *wf = gpuDynInst->wavefront();
16340         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16341
16342         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16343             if (wf->execMask(lane)) {
16344                 sdst.setBit(lane, 0);
16345             }
16346         }
16347
16348         wf->execMask() = sdst.rawData();
16349         sdst.write();
16350     }
16351
16352     Inst_VOP3__V_CMPX_LT_F16::Inst_VOP3__V_CMPX_LT_F16(
16353           InFmt_VOP3 *iFmt)
16354         : Inst_VOP3(iFmt, "v_cmpx_lt_f16", true)
16355     {
16356         setFlag(ALU);
16357         setFlag(F16);
16358     } // Inst_VOP3__V_CMPX_LT_F16
16359
16360     Inst_VOP3__V_CMPX_LT_F16::~Inst_VOP3__V_CMPX_LT_F16()
16361     {
16362     } // ~Inst_VOP3__V_CMPX_LT_F16
16363
16364     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
16365     void
16366     Inst_VOP3__V_CMPX_LT_F16::execute(GPUDynInstPtr gpuDynInst)
16367     {
16368         panicUnimplemented();
16369     }
16370
16371     Inst_VOP3__V_CMPX_EQ_F16::Inst_VOP3__V_CMPX_EQ_F16(
16372           InFmt_VOP3 *iFmt)
16373         : Inst_VOP3(iFmt, "v_cmpx_eq_f16", true)
16374     {
16375         setFlag(ALU);
16376         setFlag(F16);
16377     } // Inst_VOP3__V_CMPX_EQ_F16
16378
16379     Inst_VOP3__V_CMPX_EQ_F16::~Inst_VOP3__V_CMPX_EQ_F16()
16380     {
16381     } // ~Inst_VOP3__V_CMPX_EQ_F16
16382
16383     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
16384     void
16385     Inst_VOP3__V_CMPX_EQ_F16::execute(GPUDynInstPtr gpuDynInst)
16386     {
16387         panicUnimplemented();
16388     }
16389
16390     Inst_VOP3__V_CMPX_LE_F16::Inst_VOP3__V_CMPX_LE_F16(
16391           InFmt_VOP3 *iFmt)
16392         : Inst_VOP3(iFmt, "v_cmpx_le_f16", true)
16393     {
16394         setFlag(ALU);
16395         setFlag(F16);
16396     } // Inst_VOP3__V_CMPX_LE_F16
16397
16398     Inst_VOP3__V_CMPX_LE_F16::~Inst_VOP3__V_CMPX_LE_F16()
16399     {
16400     } // ~Inst_VOP3__V_CMPX_LE_F16
16401
16402     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
16403     void
16404     Inst_VOP3__V_CMPX_LE_F16::execute(GPUDynInstPtr gpuDynInst)
16405     {
16406         panicUnimplemented();
16407     }
16408
16409     Inst_VOP3__V_CMPX_GT_F16::Inst_VOP3__V_CMPX_GT_F16(
16410           InFmt_VOP3 *iFmt)
16411         : Inst_VOP3(iFmt, "v_cmpx_gt_f16", true)
16412     {
16413         setFlag(ALU);
16414         setFlag(F16);
16415     } // Inst_VOP3__V_CMPX_GT_F16
16416
16417     Inst_VOP3__V_CMPX_GT_F16::~Inst_VOP3__V_CMPX_GT_F16()
16418     {
16419     } // ~Inst_VOP3__V_CMPX_GT_F16
16420
16421     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
16422     void
16423     Inst_VOP3__V_CMPX_GT_F16::execute(GPUDynInstPtr gpuDynInst)
16424     {
16425         panicUnimplemented();
16426     }
16427
16428     Inst_VOP3__V_CMPX_LG_F16::Inst_VOP3__V_CMPX_LG_F16(
16429           InFmt_VOP3 *iFmt)
16430         : Inst_VOP3(iFmt, "v_cmpx_lg_f16", true)
16431     {
16432         setFlag(ALU);
16433         setFlag(F16);
16434     } // Inst_VOP3__V_CMPX_LG_F16
16435
16436     Inst_VOP3__V_CMPX_LG_F16::~Inst_VOP3__V_CMPX_LG_F16()
16437     {
16438     } // ~Inst_VOP3__V_CMPX_LG_F16
16439
16440     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
16441     void
16442     Inst_VOP3__V_CMPX_LG_F16::execute(GPUDynInstPtr gpuDynInst)
16443     {
16444         panicUnimplemented();
16445     }
16446
16447     Inst_VOP3__V_CMPX_GE_F16::Inst_VOP3__V_CMPX_GE_F16(
16448           InFmt_VOP3 *iFmt)
16449         : Inst_VOP3(iFmt, "v_cmpx_ge_f16", true)
16450     {
16451         setFlag(ALU);
16452         setFlag(F16);
16453     } // Inst_VOP3__V_CMPX_GE_F16
16454
16455     Inst_VOP3__V_CMPX_GE_F16::~Inst_VOP3__V_CMPX_GE_F16()
16456     {
16457     } // ~Inst_VOP3__V_CMPX_GE_F16
16458
16459     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
16460     void
16461     Inst_VOP3__V_CMPX_GE_F16::execute(GPUDynInstPtr gpuDynInst)
16462     {
16463         panicUnimplemented();
16464     }
16465
16466     Inst_VOP3__V_CMPX_O_F16::Inst_VOP3__V_CMPX_O_F16(
16467           InFmt_VOP3 *iFmt)
16468         : Inst_VOP3(iFmt, "v_cmpx_o_f16", true)
16469     {
16470         setFlag(ALU);
16471         setFlag(F16);
16472     } // Inst_VOP3__V_CMPX_O_F16
16473
16474     Inst_VOP3__V_CMPX_O_F16::~Inst_VOP3__V_CMPX_O_F16()
16475     {
16476     } // ~Inst_VOP3__V_CMPX_O_F16
16477
16478     // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
16479     // encoding.
16480     void
16481     Inst_VOP3__V_CMPX_O_F16::execute(GPUDynInstPtr gpuDynInst)
16482     {
16483         panicUnimplemented();
16484     }
16485
16486     Inst_VOP3__V_CMPX_U_F16::Inst_VOP3__V_CMPX_U_F16(
16487           InFmt_VOP3 *iFmt)
16488         : Inst_VOP3(iFmt, "v_cmpx_u_f16", true)
16489     {
16490         setFlag(ALU);
16491         setFlag(F16);
16492     } // Inst_VOP3__V_CMPX_U_F16
16493
16494     Inst_VOP3__V_CMPX_U_F16::~Inst_VOP3__V_CMPX_U_F16()
16495     {
16496     } // ~Inst_VOP3__V_CMPX_U_F16
16497
16498     // EXEC,D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC
16499     // encoding.
16500     void
16501     Inst_VOP3__V_CMPX_U_F16::execute(GPUDynInstPtr gpuDynInst)
16502     {
16503         panicUnimplemented();
16504     }
16505
16506     Inst_VOP3__V_CMPX_NGE_F16::Inst_VOP3__V_CMPX_NGE_F16(
16507           InFmt_VOP3 *iFmt)
16508         : Inst_VOP3(iFmt, "v_cmpx_nge_f16", true)
16509     {
16510         setFlag(ALU);
16511         setFlag(F16);
16512     } // Inst_VOP3__V_CMPX_NGE_F16
16513
16514     Inst_VOP3__V_CMPX_NGE_F16::~Inst_VOP3__V_CMPX_NGE_F16()
16515     {
16516     } // ~Inst_VOP3__V_CMPX_NGE_F16
16517
16518     // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
16519     void
16520     Inst_VOP3__V_CMPX_NGE_F16::execute(GPUDynInstPtr gpuDynInst)
16521     {
16522         panicUnimplemented();
16523     }
16524
16525     Inst_VOP3__V_CMPX_NLG_F16::Inst_VOP3__V_CMPX_NLG_F16(
16526           InFmt_VOP3 *iFmt)
16527         : Inst_VOP3(iFmt, "v_cmpx_nlg_f16", true)
16528     {
16529         setFlag(ALU);
16530         setFlag(F16);
16531     } // Inst_VOP3__V_CMPX_NLG_F16
16532
16533     Inst_VOP3__V_CMPX_NLG_F16::~Inst_VOP3__V_CMPX_NLG_F16()
16534     {
16535     } // ~Inst_VOP3__V_CMPX_NLG_F16
16536
16537     // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
16538     void
16539     Inst_VOP3__V_CMPX_NLG_F16::execute(GPUDynInstPtr gpuDynInst)
16540     {
16541         panicUnimplemented();
16542     }
16543
16544     Inst_VOP3__V_CMPX_NGT_F16::Inst_VOP3__V_CMPX_NGT_F16(
16545           InFmt_VOP3 *iFmt)
16546         : Inst_VOP3(iFmt, "v_cmpx_ngt_f16", true)
16547     {
16548         setFlag(ALU);
16549         setFlag(F16);
16550     } // Inst_VOP3__V_CMPX_NGT_F16
16551
16552     Inst_VOP3__V_CMPX_NGT_F16::~Inst_VOP3__V_CMPX_NGT_F16()
16553     {
16554     } // ~Inst_VOP3__V_CMPX_NGT_F16
16555
16556     // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
16557     void
16558     Inst_VOP3__V_CMPX_NGT_F16::execute(GPUDynInstPtr gpuDynInst)
16559     {
16560         panicUnimplemented();
16561     }
16562
16563     Inst_VOP3__V_CMPX_NLE_F16::Inst_VOP3__V_CMPX_NLE_F16(
16564           InFmt_VOP3 *iFmt)
16565         : Inst_VOP3(iFmt, "v_cmpx_nle_f16", true)
16566     {
16567         setFlag(ALU);
16568         setFlag(F16);
16569     } // Inst_VOP3__V_CMPX_NLE_F16
16570
16571     Inst_VOP3__V_CMPX_NLE_F16::~Inst_VOP3__V_CMPX_NLE_F16()
16572     {
16573     } // ~Inst_VOP3__V_CMPX_NLE_F16
16574
16575     // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
16576     void
16577     Inst_VOP3__V_CMPX_NLE_F16::execute(GPUDynInstPtr gpuDynInst)
16578     {
16579         panicUnimplemented();
16580     }
16581
16582     Inst_VOP3__V_CMPX_NEQ_F16::Inst_VOP3__V_CMPX_NEQ_F16(
16583           InFmt_VOP3 *iFmt)
16584         : Inst_VOP3(iFmt, "v_cmpx_neq_f16", true)
16585     {
16586         setFlag(ALU);
16587         setFlag(F16);
16588     } // Inst_VOP3__V_CMPX_NEQ_F16
16589
16590     Inst_VOP3__V_CMPX_NEQ_F16::~Inst_VOP3__V_CMPX_NEQ_F16()
16591     {
16592     } // ~Inst_VOP3__V_CMPX_NEQ_F16
16593
16594     // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
16595     void
16596     Inst_VOP3__V_CMPX_NEQ_F16::execute(GPUDynInstPtr gpuDynInst)
16597     {
16598         panicUnimplemented();
16599     }
16600
16601     Inst_VOP3__V_CMPX_NLT_F16::Inst_VOP3__V_CMPX_NLT_F16(
16602           InFmt_VOP3 *iFmt)
16603         : Inst_VOP3(iFmt, "v_cmpx_nlt_f16", true)
16604     {
16605         setFlag(ALU);
16606         setFlag(F16);
16607     } // Inst_VOP3__V_CMPX_NLT_F16
16608
16609     Inst_VOP3__V_CMPX_NLT_F16::~Inst_VOP3__V_CMPX_NLT_F16()
16610     {
16611     } // ~Inst_VOP3__V_CMPX_NLT_F16
16612
16613     // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
16614     void
16615     Inst_VOP3__V_CMPX_NLT_F16::execute(GPUDynInstPtr gpuDynInst)
16616     {
16617         panicUnimplemented();
16618     }
16619
16620     Inst_VOP3__V_CMPX_TRU_F16::Inst_VOP3__V_CMPX_TRU_F16(
16621           InFmt_VOP3 *iFmt)
16622         : Inst_VOP3(iFmt, "v_cmpx_tru_f16", true)
16623     {
16624         setFlag(ALU);
16625         setFlag(F16);
16626     } // Inst_VOP3__V_CMPX_TRU_F16
16627
16628     Inst_VOP3__V_CMPX_TRU_F16::~Inst_VOP3__V_CMPX_TRU_F16()
16629     {
16630     } // ~Inst_VOP3__V_CMPX_TRU_F16
16631
16632     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
16633     void
16634     Inst_VOP3__V_CMPX_TRU_F16::execute(GPUDynInstPtr gpuDynInst)
16635     {
16636         Wavefront *wf = gpuDynInst->wavefront();
16637         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16638
16639         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16640             if (wf->execMask(lane)) {
16641                 sdst.setBit(lane, 1);
16642             }
16643         }
16644
16645         wf->execMask() = sdst.rawData();
16646         sdst.write();
16647     }
16648
16649     Inst_VOP3__V_CMP_F_F32::Inst_VOP3__V_CMP_F_F32(InFmt_VOP3 *iFmt)
16650         : Inst_VOP3(iFmt, "v_cmp_f_f32", true)
16651     {
16652         setFlag(ALU);
16653         setFlag(F32);
16654     } // Inst_VOP3__V_CMP_F_F32
16655
16656     Inst_VOP3__V_CMP_F_F32::~Inst_VOP3__V_CMP_F_F32()
16657     {
16658     } // ~Inst_VOP3__V_CMP_F_F32
16659
16660     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
16661     void
16662     Inst_VOP3__V_CMP_F_F32::execute(GPUDynInstPtr gpuDynInst)
16663     {
16664         Wavefront *wf = gpuDynInst->wavefront();
16665         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16666
16667         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16668             if (wf->execMask(lane)) {
16669                 sdst.setBit(lane, 0);
16670             }
16671         }
16672
16673         sdst.write();
16674     }
16675
16676     Inst_VOP3__V_CMP_LT_F32::Inst_VOP3__V_CMP_LT_F32(
16677           InFmt_VOP3 *iFmt)
16678         : Inst_VOP3(iFmt, "v_cmp_lt_f32", true)
16679     {
16680         setFlag(ALU);
16681         setFlag(F32);
16682     } // Inst_VOP3__V_CMP_LT_F32
16683
16684     Inst_VOP3__V_CMP_LT_F32::~Inst_VOP3__V_CMP_LT_F32()
16685     {
16686     } // ~Inst_VOP3__V_CMP_LT_F32
16687
16688     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
16689     void
16690     Inst_VOP3__V_CMP_LT_F32::execute(GPUDynInstPtr gpuDynInst)
16691     {
16692         Wavefront *wf = gpuDynInst->wavefront();
16693         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16694         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16695         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16696
16697         src0.readSrc();
16698         src1.readSrc();
16699
16700         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16701             if (wf->execMask(lane)) {
16702                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
16703             }
16704         }
16705
16706         sdst.write();
16707     }
16708
16709     Inst_VOP3__V_CMP_EQ_F32::Inst_VOP3__V_CMP_EQ_F32(
16710           InFmt_VOP3 *iFmt)
16711         : Inst_VOP3(iFmt, "v_cmp_eq_f32", true)
16712     {
16713         setFlag(ALU);
16714         setFlag(F32);
16715     } // Inst_VOP3__V_CMP_EQ_F32
16716
16717     Inst_VOP3__V_CMP_EQ_F32::~Inst_VOP3__V_CMP_EQ_F32()
16718     {
16719     } // ~Inst_VOP3__V_CMP_EQ_F32
16720
16721     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
16722     void
16723     Inst_VOP3__V_CMP_EQ_F32::execute(GPUDynInstPtr gpuDynInst)
16724     {
16725         Wavefront *wf = gpuDynInst->wavefront();
16726         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16727         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16728         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16729
16730         src0.readSrc();
16731         src1.readSrc();
16732
16733         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16734             if (wf->execMask(lane)) {
16735                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
16736             }
16737         }
16738
16739         sdst.write();
16740     }
16741
16742     Inst_VOP3__V_CMP_LE_F32::Inst_VOP3__V_CMP_LE_F32(
16743           InFmt_VOP3 *iFmt)
16744         : Inst_VOP3(iFmt, "v_cmp_le_f32", true)
16745     {
16746         setFlag(ALU);
16747         setFlag(F32);
16748     } // Inst_VOP3__V_CMP_LE_F32
16749
16750     Inst_VOP3__V_CMP_LE_F32::~Inst_VOP3__V_CMP_LE_F32()
16751     {
16752     } // ~Inst_VOP3__V_CMP_LE_F32
16753
16754     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
16755     void
16756     Inst_VOP3__V_CMP_LE_F32::execute(GPUDynInstPtr gpuDynInst)
16757     {
16758         Wavefront *wf = gpuDynInst->wavefront();
16759         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16760         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16761         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16762
16763         src0.readSrc();
16764         src1.readSrc();
16765
16766         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16767             if (wf->execMask(lane)) {
16768                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
16769             }
16770         }
16771
16772         sdst.write();
16773     }
16774
16775     Inst_VOP3__V_CMP_GT_F32::Inst_VOP3__V_CMP_GT_F32(
16776           InFmt_VOP3 *iFmt)
16777         : Inst_VOP3(iFmt, "v_cmp_gt_f32", true)
16778     {
16779         setFlag(ALU);
16780         setFlag(F32);
16781     } // Inst_VOP3__V_CMP_GT_F32
16782
16783     Inst_VOP3__V_CMP_GT_F32::~Inst_VOP3__V_CMP_GT_F32()
16784     {
16785     } // ~Inst_VOP3__V_CMP_GT_F32
16786
16787     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
16788     void
16789     Inst_VOP3__V_CMP_GT_F32::execute(GPUDynInstPtr gpuDynInst)
16790     {
16791         Wavefront *wf = gpuDynInst->wavefront();
16792         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16793         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16794         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16795
16796         src0.readSrc();
16797         src1.readSrc();
16798
16799         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16800             if (wf->execMask(lane)) {
16801                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
16802             }
16803         }
16804
16805         sdst.write();
16806     }
16807
16808     Inst_VOP3__V_CMP_LG_F32::Inst_VOP3__V_CMP_LG_F32(
16809           InFmt_VOP3 *iFmt)
16810         : Inst_VOP3(iFmt, "v_cmp_lg_f32", true)
16811     {
16812         setFlag(ALU);
16813         setFlag(F32);
16814     } // Inst_VOP3__V_CMP_LG_F32
16815
16816     Inst_VOP3__V_CMP_LG_F32::~Inst_VOP3__V_CMP_LG_F32()
16817     {
16818     } // ~Inst_VOP3__V_CMP_LG_F32
16819
16820     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
16821     void
16822     Inst_VOP3__V_CMP_LG_F32::execute(GPUDynInstPtr gpuDynInst)
16823     {
16824         Wavefront *wf = gpuDynInst->wavefront();
16825         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16826         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16827         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16828
16829         src0.readSrc();
16830         src1.readSrc();
16831
16832         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16833             if (wf->execMask(lane)) {
16834                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
16835             }
16836         }
16837
16838         sdst.write();
16839     }
16840
16841     Inst_VOP3__V_CMP_GE_F32::Inst_VOP3__V_CMP_GE_F32(
16842           InFmt_VOP3 *iFmt)
16843         : Inst_VOP3(iFmt, "v_cmp_ge_f32", true)
16844     {
16845         setFlag(ALU);
16846         setFlag(F32);
16847     } // Inst_VOP3__V_CMP_GE_F32
16848
16849     Inst_VOP3__V_CMP_GE_F32::~Inst_VOP3__V_CMP_GE_F32()
16850     {
16851     } // ~Inst_VOP3__V_CMP_GE_F32
16852
16853     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
16854     void
16855     Inst_VOP3__V_CMP_GE_F32::execute(GPUDynInstPtr gpuDynInst)
16856     {
16857         Wavefront *wf = gpuDynInst->wavefront();
16858         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16859         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16860         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16861
16862         src0.readSrc();
16863         src1.readSrc();
16864
16865         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16866             if (wf->execMask(lane)) {
16867                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
16868             }
16869         }
16870
16871         sdst.write();
16872     }
16873
16874     Inst_VOP3__V_CMP_O_F32::Inst_VOP3__V_CMP_O_F32(InFmt_VOP3 *iFmt)
16875         : Inst_VOP3(iFmt, "v_cmp_o_f32", true)
16876     {
16877         setFlag(ALU);
16878         setFlag(F32);
16879     } // Inst_VOP3__V_CMP_O_F32
16880
16881     Inst_VOP3__V_CMP_O_F32::~Inst_VOP3__V_CMP_O_F32()
16882     {
16883     } // ~Inst_VOP3__V_CMP_O_F32
16884
16885     // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
16886     void
16887     Inst_VOP3__V_CMP_O_F32::execute(GPUDynInstPtr gpuDynInst)
16888     {
16889         Wavefront *wf = gpuDynInst->wavefront();
16890         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16891         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16892         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16893
16894         src0.readSrc();
16895         src1.readSrc();
16896
16897         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16898             if (wf->execMask(lane)) {
16899                 sdst.setBit(lane, (!std::isnan(src0[lane])
16900                     && !std::isnan(src1[lane])) ? 1 : 0);
16901             }
16902         }
16903
16904         sdst.write();
16905     }
16906
16907     Inst_VOP3__V_CMP_U_F32::Inst_VOP3__V_CMP_U_F32(InFmt_VOP3 *iFmt)
16908         : Inst_VOP3(iFmt, "v_cmp_u_f32", true)
16909     {
16910         setFlag(ALU);
16911         setFlag(F32);
16912     } // Inst_VOP3__V_CMP_U_F32
16913
16914     Inst_VOP3__V_CMP_U_F32::~Inst_VOP3__V_CMP_U_F32()
16915     {
16916     } // ~Inst_VOP3__V_CMP_U_F32
16917
16918     // D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC encoding.
16919     void
16920     Inst_VOP3__V_CMP_U_F32::execute(GPUDynInstPtr gpuDynInst)
16921     {
16922         Wavefront *wf = gpuDynInst->wavefront();
16923         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16924         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16925         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16926
16927         src0.readSrc();
16928         src1.readSrc();
16929
16930         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16931             if (wf->execMask(lane)) {
16932                 sdst.setBit(lane, (std::isnan(src0[lane])
16933                     || std::isnan(src1[lane])) ? 1 : 0);
16934             }
16935         }
16936
16937         sdst.write();
16938     }
16939
16940     Inst_VOP3__V_CMP_NGE_F32::Inst_VOP3__V_CMP_NGE_F32(
16941           InFmt_VOP3 *iFmt)
16942         : Inst_VOP3(iFmt, "v_cmp_nge_f32", true)
16943     {
16944         setFlag(ALU);
16945         setFlag(F32);
16946     } // Inst_VOP3__V_CMP_NGE_F32
16947
16948     Inst_VOP3__V_CMP_NGE_F32::~Inst_VOP3__V_CMP_NGE_F32()
16949     {
16950     } // ~Inst_VOP3__V_CMP_NGE_F32
16951
16952     // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
16953     void
16954     Inst_VOP3__V_CMP_NGE_F32::execute(GPUDynInstPtr gpuDynInst)
16955     {
16956         Wavefront *wf = gpuDynInst->wavefront();
16957         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16958         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16959         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16960
16961         src0.readSrc();
16962         src1.readSrc();
16963
16964         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16965             if (wf->execMask(lane)) {
16966                 sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
16967             }
16968         }
16969
16970         sdst.write();
16971     }
16972
16973     Inst_VOP3__V_CMP_NLG_F32::Inst_VOP3__V_CMP_NLG_F32(
16974           InFmt_VOP3 *iFmt)
16975         : Inst_VOP3(iFmt, "v_cmp_nlg_f32", true)
16976     {
16977         setFlag(ALU);
16978         setFlag(F32);
16979     } // Inst_VOP3__V_CMP_NLG_F32
16980
16981     Inst_VOP3__V_CMP_NLG_F32::~Inst_VOP3__V_CMP_NLG_F32()
16982     {
16983     } // ~Inst_VOP3__V_CMP_NLG_F32
16984
16985     // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
16986     void
16987     Inst_VOP3__V_CMP_NLG_F32::execute(GPUDynInstPtr gpuDynInst)
16988     {
16989         Wavefront *wf = gpuDynInst->wavefront();
16990         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
16991         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
16992         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
16993
16994         src0.readSrc();
16995         src1.readSrc();
16996
16997         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
16998             if (wf->execMask(lane)) {
16999                 sdst.setBit(lane, !(src0[lane] < src1[lane]
17000                     || src0[lane] > src1[lane]) ? 1 : 0);
17001             }
17002         }
17003
17004         sdst.write();
17005     }
17006
17007     Inst_VOP3__V_CMP_NGT_F32::Inst_VOP3__V_CMP_NGT_F32(
17008           InFmt_VOP3 *iFmt)
17009         : Inst_VOP3(iFmt, "v_cmp_ngt_f32", true)
17010     {
17011         setFlag(ALU);
17012         setFlag(F32);
17013     } // Inst_VOP3__V_CMP_NGT_F32
17014
17015     Inst_VOP3__V_CMP_NGT_F32::~Inst_VOP3__V_CMP_NGT_F32()
17016     {
17017     } // ~Inst_VOP3__V_CMP_NGT_F32
17018
17019     // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
17020     void
17021     Inst_VOP3__V_CMP_NGT_F32::execute(GPUDynInstPtr gpuDynInst)
17022     {
17023         Wavefront *wf = gpuDynInst->wavefront();
17024         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17025         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17026         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17027
17028         src0.readSrc();
17029         src1.readSrc();
17030
17031         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17032             if (wf->execMask(lane)) {
17033                 sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
17034             }
17035         }
17036
17037         sdst.write();
17038     }
17039
17040     Inst_VOP3__V_CMP_NLE_F32::Inst_VOP3__V_CMP_NLE_F32(
17041           InFmt_VOP3 *iFmt)
17042         : Inst_VOP3(iFmt, "v_cmp_nle_f32", true)
17043     {
17044         setFlag(ALU);
17045         setFlag(F32);
17046     } // Inst_VOP3__V_CMP_NLE_F32
17047
17048     Inst_VOP3__V_CMP_NLE_F32::~Inst_VOP3__V_CMP_NLE_F32()
17049     {
17050     } // ~Inst_VOP3__V_CMP_NLE_F32
17051
17052     // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
17053     void
17054     Inst_VOP3__V_CMP_NLE_F32::execute(GPUDynInstPtr gpuDynInst)
17055     {
17056         Wavefront *wf = gpuDynInst->wavefront();
17057         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17058         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17059         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17060
17061         src0.readSrc();
17062         src1.readSrc();
17063
17064         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17065             if (wf->execMask(lane)) {
17066                 sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
17067             }
17068         }
17069
17070         sdst.write();
17071     }
17072
17073     Inst_VOP3__V_CMP_NEQ_F32::Inst_VOP3__V_CMP_NEQ_F32(
17074           InFmt_VOP3 *iFmt)
17075         : Inst_VOP3(iFmt, "v_cmp_neq_f32", true)
17076     {
17077         setFlag(ALU);
17078         setFlag(F32);
17079     } // Inst_VOP3__V_CMP_NEQ_F32
17080
17081     Inst_VOP3__V_CMP_NEQ_F32::~Inst_VOP3__V_CMP_NEQ_F32()
17082     {
17083     } // ~Inst_VOP3__V_CMP_NEQ_F32
17084
17085     // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
17086     void
17087     Inst_VOP3__V_CMP_NEQ_F32::execute(GPUDynInstPtr gpuDynInst)
17088     {
17089         Wavefront *wf = gpuDynInst->wavefront();
17090         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17091         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17092         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17093
17094         src0.readSrc();
17095         src1.readSrc();
17096
17097         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17098             if (wf->execMask(lane)) {
17099                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
17100             }
17101         }
17102
17103         sdst.write();
17104     }
17105
17106     Inst_VOP3__V_CMP_NLT_F32::Inst_VOP3__V_CMP_NLT_F32(
17107           InFmt_VOP3 *iFmt)
17108         : Inst_VOP3(iFmt, "v_cmp_nlt_f32", true)
17109     {
17110         setFlag(ALU);
17111         setFlag(F32);
17112     } // Inst_VOP3__V_CMP_NLT_F32
17113
17114     Inst_VOP3__V_CMP_NLT_F32::~Inst_VOP3__V_CMP_NLT_F32()
17115     {
17116     } // ~Inst_VOP3__V_CMP_NLT_F32
17117
17118     // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
17119     void
17120     Inst_VOP3__V_CMP_NLT_F32::execute(GPUDynInstPtr gpuDynInst)
17121     {
17122         Wavefront *wf = gpuDynInst->wavefront();
17123         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17124         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17125         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17126
17127         src0.readSrc();
17128         src1.readSrc();
17129
17130         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17131             if (wf->execMask(lane)) {
17132                 sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
17133             }
17134         }
17135
17136         sdst.write();
17137     }
17138
17139     Inst_VOP3__V_CMP_TRU_F32::Inst_VOP3__V_CMP_TRU_F32(
17140           InFmt_VOP3 *iFmt)
17141         : Inst_VOP3(iFmt, "v_cmp_tru_f32", true)
17142     {
17143         setFlag(ALU);
17144         setFlag(F32);
17145     } // Inst_VOP3__V_CMP_TRU_F32
17146
17147     Inst_VOP3__V_CMP_TRU_F32::~Inst_VOP3__V_CMP_TRU_F32()
17148     {
17149     } // ~Inst_VOP3__V_CMP_TRU_F32
17150
17151     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
17152     void
17153     Inst_VOP3__V_CMP_TRU_F32::execute(GPUDynInstPtr gpuDynInst)
17154     {
17155         Wavefront *wf = gpuDynInst->wavefront();
17156         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17157
17158         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17159             if (wf->execMask(lane)) {
17160                 sdst.setBit(lane, 1);
17161             }
17162         }
17163
17164         sdst.write();
17165     }
17166
17167     Inst_VOP3__V_CMPX_F_F32::Inst_VOP3__V_CMPX_F_F32(
17168           InFmt_VOP3 *iFmt)
17169         : Inst_VOP3(iFmt, "v_cmpx_f_f32", true)
17170     {
17171         setFlag(ALU);
17172         setFlag(F32);
17173     } // Inst_VOP3__V_CMPX_F_F32
17174
17175     Inst_VOP3__V_CMPX_F_F32::~Inst_VOP3__V_CMPX_F_F32()
17176     {
17177     } // ~Inst_VOP3__V_CMPX_F_F32
17178
17179     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
17180     void
17181     Inst_VOP3__V_CMPX_F_F32::execute(GPUDynInstPtr gpuDynInst)
17182     {
17183         Wavefront *wf = gpuDynInst->wavefront();
17184         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17185
17186         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17187             if (wf->execMask(lane)) {
17188                 sdst.setBit(lane, 0);
17189             }
17190         }
17191
17192         wf->execMask() = sdst.rawData();
17193         sdst.write();
17194     }
17195
17196     Inst_VOP3__V_CMPX_LT_F32::Inst_VOP3__V_CMPX_LT_F32(
17197           InFmt_VOP3 *iFmt)
17198         : Inst_VOP3(iFmt, "v_cmpx_lt_f32", true)
17199     {
17200         setFlag(ALU);
17201         setFlag(F32);
17202     } // Inst_VOP3__V_CMPX_LT_F32
17203
17204     Inst_VOP3__V_CMPX_LT_F32::~Inst_VOP3__V_CMPX_LT_F32()
17205     {
17206     } // ~Inst_VOP3__V_CMPX_LT_F32
17207
17208     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
17209     void
17210     Inst_VOP3__V_CMPX_LT_F32::execute(GPUDynInstPtr gpuDynInst)
17211     {
17212         Wavefront *wf = gpuDynInst->wavefront();
17213         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17214         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17215         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17216
17217         src0.readSrc();
17218         src1.readSrc();
17219
17220         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17221             if (wf->execMask(lane)) {
17222                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
17223             }
17224         }
17225
17226         wf->execMask() = sdst.rawData();
17227         sdst.write();
17228     }
17229
17230     Inst_VOP3__V_CMPX_EQ_F32::Inst_VOP3__V_CMPX_EQ_F32(
17231           InFmt_VOP3 *iFmt)
17232         : Inst_VOP3(iFmt, "v_cmpx_eq_f32", true)
17233     {
17234         setFlag(ALU);
17235         setFlag(F32);
17236     } // Inst_VOP3__V_CMPX_EQ_F32
17237
17238     Inst_VOP3__V_CMPX_EQ_F32::~Inst_VOP3__V_CMPX_EQ_F32()
17239     {
17240     } // ~Inst_VOP3__V_CMPX_EQ_F32
17241
17242     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
17243     void
17244     Inst_VOP3__V_CMPX_EQ_F32::execute(GPUDynInstPtr gpuDynInst)
17245     {
17246         Wavefront *wf = gpuDynInst->wavefront();
17247         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17248         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17249         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17250
17251         src0.readSrc();
17252         src1.readSrc();
17253
17254         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17255             if (wf->execMask(lane)) {
17256                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
17257             }
17258         }
17259
17260         wf->execMask() = sdst.rawData();
17261         sdst.write();
17262     }
17263
17264     Inst_VOP3__V_CMPX_LE_F32::Inst_VOP3__V_CMPX_LE_F32(
17265           InFmt_VOP3 *iFmt)
17266         : Inst_VOP3(iFmt, "v_cmpx_le_f32", true)
17267     {
17268         setFlag(ALU);
17269         setFlag(F32);
17270     } // Inst_VOP3__V_CMPX_LE_F32
17271
17272     Inst_VOP3__V_CMPX_LE_F32::~Inst_VOP3__V_CMPX_LE_F32()
17273     {
17274     } // ~Inst_VOP3__V_CMPX_LE_F32
17275
17276     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
17277     void
17278     Inst_VOP3__V_CMPX_LE_F32::execute(GPUDynInstPtr gpuDynInst)
17279     {
17280         Wavefront *wf = gpuDynInst->wavefront();
17281         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17282         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17283         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17284
17285         src0.readSrc();
17286         src1.readSrc();
17287
17288         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17289             if (wf->execMask(lane)) {
17290                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
17291             }
17292         }
17293
17294         wf->execMask() = sdst.rawData();
17295         sdst.write();
17296     }
17297
17298     Inst_VOP3__V_CMPX_GT_F32::Inst_VOP3__V_CMPX_GT_F32(
17299           InFmt_VOP3 *iFmt)
17300         : Inst_VOP3(iFmt, "v_cmpx_gt_f32", true)
17301     {
17302         setFlag(ALU);
17303         setFlag(F32);
17304     } // Inst_VOP3__V_CMPX_GT_F32
17305
17306     Inst_VOP3__V_CMPX_GT_F32::~Inst_VOP3__V_CMPX_GT_F32()
17307     {
17308     } // ~Inst_VOP3__V_CMPX_GT_F32
17309
17310     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
17311     void
17312     Inst_VOP3__V_CMPX_GT_F32::execute(GPUDynInstPtr gpuDynInst)
17313     {
17314         Wavefront *wf = gpuDynInst->wavefront();
17315         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17316         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17317         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17318
17319         src0.readSrc();
17320         src1.readSrc();
17321
17322         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17323             if (wf->execMask(lane)) {
17324                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
17325             }
17326         }
17327
17328         wf->execMask() = sdst.rawData();
17329         sdst.write();
17330     }
17331
17332     Inst_VOP3__V_CMPX_LG_F32::Inst_VOP3__V_CMPX_LG_F32(
17333           InFmt_VOP3 *iFmt)
17334         : Inst_VOP3(iFmt, "v_cmpx_lg_f32", true)
17335     {
17336         setFlag(ALU);
17337         setFlag(F32);
17338     } // Inst_VOP3__V_CMPX_LG_F32
17339
17340     Inst_VOP3__V_CMPX_LG_F32::~Inst_VOP3__V_CMPX_LG_F32()
17341     {
17342     } // ~Inst_VOP3__V_CMPX_LG_F32
17343
17344     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
17345     void
17346     Inst_VOP3__V_CMPX_LG_F32::execute(GPUDynInstPtr gpuDynInst)
17347     {
17348         Wavefront *wf = gpuDynInst->wavefront();
17349         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17350         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17351         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17352
17353         src0.readSrc();
17354         src1.readSrc();
17355
17356         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17357             if (wf->execMask(lane)) {
17358                 sdst.setBit(lane, (src0[lane] < src1[lane]
17359                     || src0[lane] > src1[lane]) ? 1 : 0);
17360             }
17361         }
17362
17363         wf->execMask() = sdst.rawData();
17364         sdst.write();
17365     }
17366
17367     Inst_VOP3__V_CMPX_GE_F32::Inst_VOP3__V_CMPX_GE_F32(
17368           InFmt_VOP3 *iFmt)
17369         : Inst_VOP3(iFmt, "v_cmpx_ge_f32", true)
17370     {
17371         setFlag(ALU);
17372         setFlag(F32);
17373     } // Inst_VOP3__V_CMPX_GE_F32
17374
17375     Inst_VOP3__V_CMPX_GE_F32::~Inst_VOP3__V_CMPX_GE_F32()
17376     {
17377     } // ~Inst_VOP3__V_CMPX_GE_F32
17378
17379     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
17380     void
17381     Inst_VOP3__V_CMPX_GE_F32::execute(GPUDynInstPtr gpuDynInst)
17382     {
17383         Wavefront *wf = gpuDynInst->wavefront();
17384         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17385         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17386         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17387
17388         src0.readSrc();
17389         src1.readSrc();
17390
17391         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17392             if (wf->execMask(lane)) {
17393                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
17394             }
17395         }
17396
17397         wf->execMask() = sdst.rawData();
17398         sdst.write();
17399     }
17400
17401     Inst_VOP3__V_CMPX_O_F32::Inst_VOP3__V_CMPX_O_F32(
17402           InFmt_VOP3 *iFmt)
17403         : Inst_VOP3(iFmt, "v_cmpx_o_f32", true)
17404     {
17405         setFlag(ALU);
17406         setFlag(F32);
17407     } // Inst_VOP3__V_CMPX_O_F32
17408
17409     Inst_VOP3__V_CMPX_O_F32::~Inst_VOP3__V_CMPX_O_F32()
17410     {
17411     } // ~Inst_VOP3__V_CMPX_O_F32
17412
17413     // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
17414     // encoding.
17415     void
17416     Inst_VOP3__V_CMPX_O_F32::execute(GPUDynInstPtr gpuDynInst)
17417     {
17418         Wavefront *wf = gpuDynInst->wavefront();
17419         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17420         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17421         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17422
17423         src0.readSrc();
17424         src1.readSrc();
17425
17426         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17427             if (wf->execMask(lane)) {
17428                 sdst.setBit(lane, (!std::isnan(src0[lane])
17429                     && !std::isnan(src1[lane])) ? 1 : 0);
17430             }
17431         }
17432
17433         wf->execMask() = sdst.rawData();
17434         sdst.write();
17435     }
17436
17437     Inst_VOP3__V_CMPX_U_F32::Inst_VOP3__V_CMPX_U_F32(
17438           InFmt_VOP3 *iFmt)
17439         : Inst_VOP3(iFmt, "v_cmpx_u_f32", true)
17440     {
17441         setFlag(ALU);
17442         setFlag(F32);
17443     } // Inst_VOP3__V_CMPX_U_F32
17444
17445     Inst_VOP3__V_CMPX_U_F32::~Inst_VOP3__V_CMPX_U_F32()
17446     {
17447     } // ~Inst_VOP3__V_CMPX_U_F32
17448
17449     // EXEC,D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC
17450     // encoding.
17451     void
17452     Inst_VOP3__V_CMPX_U_F32::execute(GPUDynInstPtr gpuDynInst)
17453     {
17454         Wavefront *wf = gpuDynInst->wavefront();
17455         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17456         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17457         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17458
17459         src0.readSrc();
17460         src1.readSrc();
17461
17462         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17463             if (wf->execMask(lane)) {
17464                 sdst.setBit(lane, (std::isnan(src0[lane])
17465                         || std::isnan(src1[lane])) ? 1 : 0);
17466             }
17467         }
17468
17469         wf->execMask() = sdst.rawData();
17470         sdst.write();
17471     }
17472
17473     Inst_VOP3__V_CMPX_NGE_F32::Inst_VOP3__V_CMPX_NGE_F32(
17474           InFmt_VOP3 *iFmt)
17475         : Inst_VOP3(iFmt, "v_cmpx_nge_f32", true)
17476     {
17477         setFlag(ALU);
17478         setFlag(F32);
17479     } // Inst_VOP3__V_CMPX_NGE_F32
17480
17481     Inst_VOP3__V_CMPX_NGE_F32::~Inst_VOP3__V_CMPX_NGE_F32()
17482     {
17483     } // ~Inst_VOP3__V_CMPX_NGE_F32
17484
17485     // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
17486     void
17487     Inst_VOP3__V_CMPX_NGE_F32::execute(GPUDynInstPtr gpuDynInst)
17488     {
17489         Wavefront *wf = gpuDynInst->wavefront();
17490         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17491         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17492         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17493
17494         src0.readSrc();
17495         src1.readSrc();
17496
17497         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17498             if (wf->execMask(lane)) {
17499                 sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
17500             }
17501         }
17502
17503         wf->execMask() = sdst.rawData();
17504         sdst.write();
17505     }
17506
17507     Inst_VOP3__V_CMPX_NLG_F32::Inst_VOP3__V_CMPX_NLG_F32(
17508           InFmt_VOP3 *iFmt)
17509         : Inst_VOP3(iFmt, "v_cmpx_nlg_f32", true)
17510     {
17511         setFlag(ALU);
17512         setFlag(F32);
17513     } // Inst_VOP3__V_CMPX_NLG_F32
17514
17515     Inst_VOP3__V_CMPX_NLG_F32::~Inst_VOP3__V_CMPX_NLG_F32()
17516     {
17517     } // ~Inst_VOP3__V_CMPX_NLG_F32
17518
17519     // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
17520     void
17521     Inst_VOP3__V_CMPX_NLG_F32::execute(GPUDynInstPtr gpuDynInst)
17522     {
17523         Wavefront *wf = gpuDynInst->wavefront();
17524         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17525         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17526         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17527
17528         src0.readSrc();
17529         src1.readSrc();
17530
17531         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17532             if (wf->execMask(lane)) {
17533                 sdst.setBit(lane, !(src0[lane] < src1[lane]
17534                     || src0[lane] > src1[lane]) ? 1 : 0);
17535             }
17536         }
17537
17538         wf->execMask() = sdst.rawData();
17539         sdst.write();
17540     }
17541
17542     Inst_VOP3__V_CMPX_NGT_F32::Inst_VOP3__V_CMPX_NGT_F32(
17543           InFmt_VOP3 *iFmt)
17544         : Inst_VOP3(iFmt, "v_cmpx_ngt_f32", true)
17545     {
17546         setFlag(ALU);
17547         setFlag(F32);
17548     } // Inst_VOP3__V_CMPX_NGT_F32
17549
17550     Inst_VOP3__V_CMPX_NGT_F32::~Inst_VOP3__V_CMPX_NGT_F32()
17551     {
17552     } // ~Inst_VOP3__V_CMPX_NGT_F32
17553
17554     // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
17555     void
17556     Inst_VOP3__V_CMPX_NGT_F32::execute(GPUDynInstPtr gpuDynInst)
17557     {
17558         Wavefront *wf = gpuDynInst->wavefront();
17559         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17560         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17561         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17562
17563         src0.readSrc();
17564         src1.readSrc();
17565
17566         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17567             if (wf->execMask(lane)) {
17568                 sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
17569             }
17570         }
17571
17572         wf->execMask() = sdst.rawData();
17573         sdst.write();
17574     }
17575
17576     Inst_VOP3__V_CMPX_NLE_F32::Inst_VOP3__V_CMPX_NLE_F32(
17577           InFmt_VOP3 *iFmt)
17578         : Inst_VOP3(iFmt, "v_cmpx_nle_f32", true)
17579     {
17580         setFlag(ALU);
17581         setFlag(F32);
17582     } // Inst_VOP3__V_CMPX_NLE_F32
17583
17584     Inst_VOP3__V_CMPX_NLE_F32::~Inst_VOP3__V_CMPX_NLE_F32()
17585     {
17586     } // ~Inst_VOP3__V_CMPX_NLE_F32
17587
17588     // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
17589     void
17590     Inst_VOP3__V_CMPX_NLE_F32::execute(GPUDynInstPtr gpuDynInst)
17591     {
17592         Wavefront *wf = gpuDynInst->wavefront();
17593         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17594         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17595         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17596
17597         src0.readSrc();
17598         src1.readSrc();
17599
17600         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17601             if (wf->execMask(lane)) {
17602                 sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
17603             }
17604         }
17605
17606         wf->execMask() = sdst.rawData();
17607         sdst.write();
17608     }
17609
17610     Inst_VOP3__V_CMPX_NEQ_F32::Inst_VOP3__V_CMPX_NEQ_F32(
17611           InFmt_VOP3 *iFmt)
17612         : Inst_VOP3(iFmt, "v_cmpx_neq_f32", true)
17613     {
17614         setFlag(ALU);
17615         setFlag(F32);
17616     } // Inst_VOP3__V_CMPX_NEQ_F32
17617
17618     Inst_VOP3__V_CMPX_NEQ_F32::~Inst_VOP3__V_CMPX_NEQ_F32()
17619     {
17620     } // ~Inst_VOP3__V_CMPX_NEQ_F32
17621
17622     // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
17623     void
17624     Inst_VOP3__V_CMPX_NEQ_F32::execute(GPUDynInstPtr gpuDynInst)
17625     {
17626         Wavefront *wf = gpuDynInst->wavefront();
17627         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17628         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17629         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17630
17631         src0.readSrc();
17632         src1.readSrc();
17633
17634         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17635             if (wf->execMask(lane)) {
17636                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
17637             }
17638         }
17639
17640         wf->execMask() = sdst.rawData();
17641         sdst.write();
17642     }
17643
17644     Inst_VOP3__V_CMPX_NLT_F32::Inst_VOP3__V_CMPX_NLT_F32(
17645           InFmt_VOP3 *iFmt)
17646         : Inst_VOP3(iFmt, "v_cmpx_nlt_f32", true)
17647     {
17648         setFlag(ALU);
17649         setFlag(F32);
17650     } // Inst_VOP3__V_CMPX_NLT_F32
17651
17652     Inst_VOP3__V_CMPX_NLT_F32::~Inst_VOP3__V_CMPX_NLT_F32()
17653     {
17654     } // ~Inst_VOP3__V_CMPX_NLT_F32
17655
17656     // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
17657     void
17658     Inst_VOP3__V_CMPX_NLT_F32::execute(GPUDynInstPtr gpuDynInst)
17659     {
17660         Wavefront *wf = gpuDynInst->wavefront();
17661         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
17662         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
17663         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17664
17665         src0.readSrc();
17666         src1.readSrc();
17667
17668         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17669             if (wf->execMask(lane)) {
17670                 sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
17671             }
17672         }
17673
17674         wf->execMask() = sdst.rawData();
17675         sdst.write();
17676     }
17677
17678     Inst_VOP3__V_CMPX_TRU_F32::Inst_VOP3__V_CMPX_TRU_F32(
17679           InFmt_VOP3 *iFmt)
17680         : Inst_VOP3(iFmt, "v_cmpx_tru_f32", true)
17681     {
17682         setFlag(ALU);
17683         setFlag(F32);
17684     } // Inst_VOP3__V_CMPX_TRU_F32
17685
17686     Inst_VOP3__V_CMPX_TRU_F32::~Inst_VOP3__V_CMPX_TRU_F32()
17687     {
17688     } // ~Inst_VOP3__V_CMPX_TRU_F32
17689
17690     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
17691     void
17692     Inst_VOP3__V_CMPX_TRU_F32::execute(GPUDynInstPtr gpuDynInst)
17693     {
17694         Wavefront *wf = gpuDynInst->wavefront();
17695         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17696
17697         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17698             if (wf->execMask(lane)) {
17699                 sdst.setBit(lane, 1);
17700             }
17701         }
17702
17703         wf->execMask() = sdst.rawData();
17704         sdst.write();
17705     }
17706
17707     Inst_VOP3__V_CMP_F_F64::Inst_VOP3__V_CMP_F_F64(InFmt_VOP3 *iFmt)
17708         : Inst_VOP3(iFmt, "v_cmp_f_f64", true)
17709     {
17710         setFlag(ALU);
17711         setFlag(F64);
17712     } // Inst_VOP3__V_CMP_F_F64
17713
17714     Inst_VOP3__V_CMP_F_F64::~Inst_VOP3__V_CMP_F_F64()
17715     {
17716     } // ~Inst_VOP3__V_CMP_F_F64
17717
17718     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
17719     void
17720     Inst_VOP3__V_CMP_F_F64::execute(GPUDynInstPtr gpuDynInst)
17721     {
17722         Wavefront *wf = gpuDynInst->wavefront();
17723         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17724
17725         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17726             if (wf->execMask(lane)) {
17727                 sdst.setBit(lane, 0);
17728             }
17729         }
17730
17731         sdst.write();
17732     }
17733
17734     Inst_VOP3__V_CMP_LT_F64::Inst_VOP3__V_CMP_LT_F64(
17735           InFmt_VOP3 *iFmt)
17736         : Inst_VOP3(iFmt, "v_cmp_lt_f64", true)
17737     {
17738         setFlag(ALU);
17739         setFlag(F64);
17740     } // Inst_VOP3__V_CMP_LT_F64
17741
17742     Inst_VOP3__V_CMP_LT_F64::~Inst_VOP3__V_CMP_LT_F64()
17743     {
17744     } // ~Inst_VOP3__V_CMP_LT_F64
17745
17746     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
17747     void
17748     Inst_VOP3__V_CMP_LT_F64::execute(GPUDynInstPtr gpuDynInst)
17749     {
17750         Wavefront *wf = gpuDynInst->wavefront();
17751         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
17752         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
17753         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17754
17755         src0.readSrc();
17756         src1.readSrc();
17757
17758         if (instData.ABS & 0x1) {
17759             src0.absModifier();
17760         }
17761
17762         if (instData.ABS & 0x2) {
17763             src1.absModifier();
17764         }
17765
17766         if (extData.NEG & 0x1) {
17767             src0.negModifier();
17768         }
17769
17770         if (extData.NEG & 0x2) {
17771             src1.negModifier();
17772         }
17773
17774         /**
17775          * input modifiers are supported by FP operations only
17776          */
17777         assert(!(instData.ABS & 0x4));
17778         assert(!(extData.NEG & 0x4));
17779
17780         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17781             if (wf->execMask(lane)) {
17782                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
17783             }
17784         }
17785
17786         sdst.write();
17787     }
17788
17789     Inst_VOP3__V_CMP_EQ_F64::Inst_VOP3__V_CMP_EQ_F64(
17790           InFmt_VOP3 *iFmt)
17791         : Inst_VOP3(iFmt, "v_cmp_eq_f64", true)
17792     {
17793         setFlag(ALU);
17794         setFlag(F64);
17795     } // Inst_VOP3__V_CMP_EQ_F64
17796
17797     Inst_VOP3__V_CMP_EQ_F64::~Inst_VOP3__V_CMP_EQ_F64()
17798     {
17799     } // ~Inst_VOP3__V_CMP_EQ_F64
17800
17801     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
17802     void
17803     Inst_VOP3__V_CMP_EQ_F64::execute(GPUDynInstPtr gpuDynInst)
17804     {
17805         Wavefront *wf = gpuDynInst->wavefront();
17806         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
17807         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
17808         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17809
17810         src0.readSrc();
17811         src1.readSrc();
17812
17813         if (instData.ABS & 0x1) {
17814             src0.absModifier();
17815         }
17816
17817         if (instData.ABS & 0x2) {
17818             src1.absModifier();
17819         }
17820
17821         if (extData.NEG & 0x1) {
17822             src0.negModifier();
17823         }
17824
17825         if (extData.NEG & 0x2) {
17826             src1.negModifier();
17827         }
17828
17829         /**
17830          * input modifiers are supported by FP operations only
17831          */
17832         assert(!(instData.ABS & 0x4));
17833         assert(!(extData.NEG & 0x4));
17834
17835         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17836             if (wf->execMask(lane)) {
17837                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
17838             }
17839         }
17840
17841         sdst.write();
17842     }
17843
17844     Inst_VOP3__V_CMP_LE_F64::Inst_VOP3__V_CMP_LE_F64(
17845           InFmt_VOP3 *iFmt)
17846         : Inst_VOP3(iFmt, "v_cmp_le_f64", true)
17847     {
17848         setFlag(ALU);
17849         setFlag(F64);
17850     } // Inst_VOP3__V_CMP_LE_F64
17851
17852     Inst_VOP3__V_CMP_LE_F64::~Inst_VOP3__V_CMP_LE_F64()
17853     {
17854     } // ~Inst_VOP3__V_CMP_LE_F64
17855
17856     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
17857     void
17858     Inst_VOP3__V_CMP_LE_F64::execute(GPUDynInstPtr gpuDynInst)
17859     {
17860         Wavefront *wf = gpuDynInst->wavefront();
17861         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
17862         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
17863         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17864
17865         src0.readSrc();
17866         src1.readSrc();
17867
17868         if (instData.ABS & 0x1) {
17869             src0.absModifier();
17870         }
17871
17872         if (instData.ABS & 0x2) {
17873             src1.absModifier();
17874         }
17875
17876         if (extData.NEG & 0x1) {
17877             src0.negModifier();
17878         }
17879
17880         if (extData.NEG & 0x2) {
17881             src1.negModifier();
17882         }
17883
17884         /**
17885          * input modifiers are supported by FP operations only
17886          */
17887         assert(!(instData.ABS & 0x4));
17888         assert(!(extData.NEG & 0x4));
17889
17890         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17891             if (wf->execMask(lane)) {
17892                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
17893             }
17894         }
17895
17896         sdst.write();
17897     }
17898
17899     Inst_VOP3__V_CMP_GT_F64::Inst_VOP3__V_CMP_GT_F64(
17900           InFmt_VOP3 *iFmt)
17901         : Inst_VOP3(iFmt, "v_cmp_gt_f64", true)
17902     {
17903         setFlag(ALU);
17904         setFlag(F64);
17905     } // Inst_VOP3__V_CMP_GT_F64
17906
17907     Inst_VOP3__V_CMP_GT_F64::~Inst_VOP3__V_CMP_GT_F64()
17908     {
17909     } // ~Inst_VOP3__V_CMP_GT_F64
17910
17911     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
17912     void
17913     Inst_VOP3__V_CMP_GT_F64::execute(GPUDynInstPtr gpuDynInst)
17914     {
17915         Wavefront *wf = gpuDynInst->wavefront();
17916         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
17917         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
17918         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17919
17920         src0.readSrc();
17921         src1.readSrc();
17922
17923         if (instData.ABS & 0x1) {
17924             src0.absModifier();
17925         }
17926
17927         if (instData.ABS & 0x2) {
17928             src1.absModifier();
17929         }
17930
17931         if (extData.NEG & 0x1) {
17932             src0.negModifier();
17933         }
17934
17935         if (extData.NEG & 0x2) {
17936             src1.negModifier();
17937         }
17938
17939         /**
17940          * input modifiers are supported by FP operations only
17941          */
17942         assert(!(instData.ABS & 0x4));
17943         assert(!(extData.NEG & 0x4));
17944
17945         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
17946             if (wf->execMask(lane)) {
17947                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
17948             }
17949         }
17950
17951         sdst.write();
17952     }
17953
17954     Inst_VOP3__V_CMP_LG_F64::Inst_VOP3__V_CMP_LG_F64(
17955           InFmt_VOP3 *iFmt)
17956         : Inst_VOP3(iFmt, "v_cmp_lg_f64", true)
17957     {
17958         setFlag(ALU);
17959         setFlag(F64);
17960     } // Inst_VOP3__V_CMP_LG_F64
17961
17962     Inst_VOP3__V_CMP_LG_F64::~Inst_VOP3__V_CMP_LG_F64()
17963     {
17964     } // ~Inst_VOP3__V_CMP_LG_F64
17965
17966     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
17967     void
17968     Inst_VOP3__V_CMP_LG_F64::execute(GPUDynInstPtr gpuDynInst)
17969     {
17970         Wavefront *wf = gpuDynInst->wavefront();
17971         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
17972         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
17973         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
17974
17975         src0.readSrc();
17976         src1.readSrc();
17977
17978         if (instData.ABS & 0x1) {
17979             src0.absModifier();
17980         }
17981
17982         if (instData.ABS & 0x2) {
17983             src1.absModifier();
17984         }
17985
17986         if (extData.NEG & 0x1) {
17987             src0.negModifier();
17988         }
17989
17990         if (extData.NEG & 0x2) {
17991             src1.negModifier();
17992         }
17993
17994         /**
17995          * input modifiers are supported by FP operations only
17996          */
17997         assert(!(instData.ABS & 0x4));
17998         assert(!(extData.NEG & 0x4));
17999
18000         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18001             if (wf->execMask(lane)) {
18002                 sdst.setBit(lane, (src0[lane] < src1[lane]
18003                     || src0[lane] > src1[lane]) ? 1 : 0);
18004             }
18005         }
18006
18007         sdst.write();
18008     }
18009
18010     Inst_VOP3__V_CMP_GE_F64::Inst_VOP3__V_CMP_GE_F64(
18011           InFmt_VOP3 *iFmt)
18012         : Inst_VOP3(iFmt, "v_cmp_ge_f64", true)
18013     {
18014         setFlag(ALU);
18015         setFlag(F64);
18016     } // Inst_VOP3__V_CMP_GE_F64
18017
18018     Inst_VOP3__V_CMP_GE_F64::~Inst_VOP3__V_CMP_GE_F64()
18019     {
18020     } // ~Inst_VOP3__V_CMP_GE_F64
18021
18022     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
18023     void
18024     Inst_VOP3__V_CMP_GE_F64::execute(GPUDynInstPtr gpuDynInst)
18025     {
18026         Wavefront *wf = gpuDynInst->wavefront();
18027         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18028         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18029         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18030
18031         src0.readSrc();
18032         src1.readSrc();
18033
18034         if (instData.ABS & 0x1) {
18035             src0.absModifier();
18036         }
18037
18038         if (instData.ABS & 0x2) {
18039             src1.absModifier();
18040         }
18041
18042         if (extData.NEG & 0x1) {
18043             src0.negModifier();
18044         }
18045
18046         if (extData.NEG & 0x2) {
18047             src1.negModifier();
18048         }
18049
18050         /**
18051          * input modifiers are supported by FP operations only
18052          */
18053         assert(!(instData.ABS & 0x4));
18054         assert(!(extData.NEG & 0x4));
18055
18056         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18057             if (wf->execMask(lane)) {
18058                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
18059             }
18060         }
18061
18062         sdst.write();
18063     }
18064
18065     Inst_VOP3__V_CMP_O_F64::Inst_VOP3__V_CMP_O_F64(InFmt_VOP3 *iFmt)
18066         : Inst_VOP3(iFmt, "v_cmp_o_f64", true)
18067     {
18068         setFlag(ALU);
18069         setFlag(F64);
18070     } // Inst_VOP3__V_CMP_O_F64
18071
18072     Inst_VOP3__V_CMP_O_F64::~Inst_VOP3__V_CMP_O_F64()
18073     {
18074     } // ~Inst_VOP3__V_CMP_O_F64
18075
18076     // D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC encoding.
18077     void
18078     Inst_VOP3__V_CMP_O_F64::execute(GPUDynInstPtr gpuDynInst)
18079     {
18080         Wavefront *wf = gpuDynInst->wavefront();
18081         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18082         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18083         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18084
18085         src0.readSrc();
18086         src1.readSrc();
18087
18088         if (instData.ABS & 0x1) {
18089             src0.absModifier();
18090         }
18091
18092         if (instData.ABS & 0x2) {
18093             src1.absModifier();
18094         }
18095
18096         if (extData.NEG & 0x1) {
18097             src0.negModifier();
18098         }
18099
18100         if (extData.NEG & 0x2) {
18101             src1.negModifier();
18102         }
18103
18104         /**
18105          * input modifiers are supported by FP operations only
18106          */
18107         assert(!(instData.ABS & 0x4));
18108         assert(!(extData.NEG & 0x4));
18109
18110         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18111             if (wf->execMask(lane)) {
18112                 sdst.setBit(lane, (!std::isnan(src0[lane])
18113                     && !std::isnan(src1[lane])) ? 1 : 0);
18114             }
18115         }
18116
18117         sdst.write();
18118     }
18119
18120     Inst_VOP3__V_CMP_U_F64::Inst_VOP3__V_CMP_U_F64(InFmt_VOP3 *iFmt)
18121         : Inst_VOP3(iFmt, "v_cmp_u_f64", true)
18122     {
18123         setFlag(ALU);
18124         setFlag(F64);
18125     } // Inst_VOP3__V_CMP_U_F64
18126
18127     Inst_VOP3__V_CMP_U_F64::~Inst_VOP3__V_CMP_U_F64()
18128     {
18129     } // ~Inst_VOP3__V_CMP_U_F64
18130
18131     // D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC encoding.
18132     void
18133     Inst_VOP3__V_CMP_U_F64::execute(GPUDynInstPtr gpuDynInst)
18134     {
18135         Wavefront *wf = gpuDynInst->wavefront();
18136         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18137         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18138         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18139
18140         src0.readSrc();
18141         src1.readSrc();
18142
18143         if (instData.ABS & 0x1) {
18144             src0.absModifier();
18145         }
18146
18147         if (instData.ABS & 0x2) {
18148             src1.absModifier();
18149         }
18150
18151         if (extData.NEG & 0x1) {
18152             src0.negModifier();
18153         }
18154
18155         if (extData.NEG & 0x2) {
18156             src1.negModifier();
18157         }
18158
18159         /**
18160          * input modifiers are supported by FP operations only
18161          */
18162         assert(!(instData.ABS & 0x4));
18163         assert(!(extData.NEG & 0x4));
18164
18165         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18166             if (wf->execMask(lane)) {
18167                 sdst.setBit(lane, (std::isnan(src0[lane])
18168                     || std::isnan(src1[lane])) ? 1 : 0);
18169             }
18170         }
18171
18172         sdst.write();
18173     }
18174
18175     Inst_VOP3__V_CMP_NGE_F64::Inst_VOP3__V_CMP_NGE_F64(
18176           InFmt_VOP3 *iFmt)
18177         : Inst_VOP3(iFmt, "v_cmp_nge_f64", true)
18178     {
18179         setFlag(ALU);
18180         setFlag(F64);
18181     } // Inst_VOP3__V_CMP_NGE_F64
18182
18183     Inst_VOP3__V_CMP_NGE_F64::~Inst_VOP3__V_CMP_NGE_F64()
18184     {
18185     } // ~Inst_VOP3__V_CMP_NGE_F64
18186
18187     // D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
18188     void
18189     Inst_VOP3__V_CMP_NGE_F64::execute(GPUDynInstPtr gpuDynInst)
18190     {
18191         Wavefront *wf = gpuDynInst->wavefront();
18192         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18193         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18194         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18195
18196         src0.readSrc();
18197         src1.readSrc();
18198
18199         if (instData.ABS & 0x1) {
18200             src0.absModifier();
18201         }
18202
18203         if (instData.ABS & 0x2) {
18204             src1.absModifier();
18205         }
18206
18207         if (extData.NEG & 0x1) {
18208             src0.negModifier();
18209         }
18210
18211         if (extData.NEG & 0x2) {
18212             src1.negModifier();
18213         }
18214
18215         /**
18216          * input modifiers are supported by FP operations only
18217          */
18218         assert(!(instData.ABS & 0x4));
18219         assert(!(extData.NEG & 0x4));
18220
18221         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18222             if (wf->execMask(lane)) {
18223                 sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
18224             }
18225         }
18226
18227         sdst.write();
18228     }
18229
18230     Inst_VOP3__V_CMP_NLG_F64::Inst_VOP3__V_CMP_NLG_F64(
18231           InFmt_VOP3 *iFmt)
18232         : Inst_VOP3(iFmt, "v_cmp_nlg_f64", true)
18233     {
18234         setFlag(ALU);
18235         setFlag(F64);
18236     } // Inst_VOP3__V_CMP_NLG_F64
18237
18238     Inst_VOP3__V_CMP_NLG_F64::~Inst_VOP3__V_CMP_NLG_F64()
18239     {
18240     } // ~Inst_VOP3__V_CMP_NLG_F64
18241
18242     // D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
18243     void
18244     Inst_VOP3__V_CMP_NLG_F64::execute(GPUDynInstPtr gpuDynInst)
18245     {
18246         Wavefront *wf = gpuDynInst->wavefront();
18247         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18248         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18249         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18250
18251         src0.readSrc();
18252         src1.readSrc();
18253
18254         if (instData.ABS & 0x1) {
18255             src0.absModifier();
18256         }
18257
18258         if (instData.ABS & 0x2) {
18259             src1.absModifier();
18260         }
18261
18262         if (extData.NEG & 0x1) {
18263             src0.negModifier();
18264         }
18265
18266         if (extData.NEG & 0x2) {
18267             src1.negModifier();
18268         }
18269
18270         /**
18271          * input modifiers are supported by FP operations only
18272          */
18273         assert(!(instData.ABS & 0x4));
18274         assert(!(extData.NEG & 0x4));
18275
18276         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18277             if (wf->execMask(lane)) {
18278                 sdst.setBit(lane, !(src0[lane] < src1[lane]
18279                     || src0[lane] > src1[lane]) ? 1 : 0);
18280             }
18281         }
18282
18283         sdst.write();
18284     }
18285
18286     Inst_VOP3__V_CMP_NGT_F64::Inst_VOP3__V_CMP_NGT_F64(
18287           InFmt_VOP3 *iFmt)
18288         : Inst_VOP3(iFmt, "v_cmp_ngt_f64", true)
18289     {
18290         setFlag(ALU);
18291         setFlag(F64);
18292     } // Inst_VOP3__V_CMP_NGT_F64
18293
18294     Inst_VOP3__V_CMP_NGT_F64::~Inst_VOP3__V_CMP_NGT_F64()
18295     {
18296     } // ~Inst_VOP3__V_CMP_NGT_F64
18297
18298     // D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
18299     void
18300     Inst_VOP3__V_CMP_NGT_F64::execute(GPUDynInstPtr gpuDynInst)
18301     {
18302         Wavefront *wf = gpuDynInst->wavefront();
18303         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18304         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18305         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18306
18307         src0.readSrc();
18308         src1.readSrc();
18309
18310         if (instData.ABS & 0x1) {
18311             src0.absModifier();
18312         }
18313
18314         if (instData.ABS & 0x2) {
18315             src1.absModifier();
18316         }
18317
18318         if (extData.NEG & 0x1) {
18319             src0.negModifier();
18320         }
18321
18322         if (extData.NEG & 0x2) {
18323             src1.negModifier();
18324         }
18325
18326         /**
18327          * input modifiers are supported by FP operations only
18328          */
18329         assert(!(instData.ABS & 0x4));
18330         assert(!(extData.NEG & 0x4));
18331
18332         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18333             if (wf->execMask(lane)) {
18334                 sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
18335             }
18336         }
18337
18338         sdst.write();
18339     }
18340
18341     Inst_VOP3__V_CMP_NLE_F64::Inst_VOP3__V_CMP_NLE_F64(
18342           InFmt_VOP3 *iFmt)
18343         : Inst_VOP3(iFmt, "v_cmp_nle_f64", true)
18344     {
18345         setFlag(ALU);
18346         setFlag(F64);
18347     } // Inst_VOP3__V_CMP_NLE_F64
18348
18349     Inst_VOP3__V_CMP_NLE_F64::~Inst_VOP3__V_CMP_NLE_F64()
18350     {
18351     } // ~Inst_VOP3__V_CMP_NLE_F64
18352
18353     // D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
18354     void
18355     Inst_VOP3__V_CMP_NLE_F64::execute(GPUDynInstPtr gpuDynInst)
18356     {
18357         Wavefront *wf = gpuDynInst->wavefront();
18358         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18359         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18360         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18361
18362         src0.readSrc();
18363         src1.readSrc();
18364
18365         if (instData.ABS & 0x1) {
18366             src0.absModifier();
18367         }
18368
18369         if (instData.ABS & 0x2) {
18370             src1.absModifier();
18371         }
18372
18373         if (extData.NEG & 0x1) {
18374             src0.negModifier();
18375         }
18376
18377         if (extData.NEG & 0x2) {
18378             src1.negModifier();
18379         }
18380
18381         /**
18382          * input modifiers are supported by FP operations only
18383          */
18384         assert(!(instData.ABS & 0x4));
18385         assert(!(extData.NEG & 0x4));
18386
18387         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18388             if (wf->execMask(lane)) {
18389                 sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
18390             }
18391         }
18392
18393         sdst.write();
18394     }
18395
18396     Inst_VOP3__V_CMP_NEQ_F64::Inst_VOP3__V_CMP_NEQ_F64(
18397           InFmt_VOP3 *iFmt)
18398         : Inst_VOP3(iFmt, "v_cmp_neq_f64", true)
18399     {
18400         setFlag(ALU);
18401         setFlag(F64);
18402     } // Inst_VOP3__V_CMP_NEQ_F64
18403
18404     Inst_VOP3__V_CMP_NEQ_F64::~Inst_VOP3__V_CMP_NEQ_F64()
18405     {
18406     } // ~Inst_VOP3__V_CMP_NEQ_F64
18407
18408     // D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
18409     void
18410     Inst_VOP3__V_CMP_NEQ_F64::execute(GPUDynInstPtr gpuDynInst)
18411     {
18412         Wavefront *wf = gpuDynInst->wavefront();
18413         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18414         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18415         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18416
18417         src0.readSrc();
18418         src1.readSrc();
18419
18420         if (instData.ABS & 0x1) {
18421             src0.absModifier();
18422         }
18423
18424         if (instData.ABS & 0x2) {
18425             src1.absModifier();
18426         }
18427
18428         if (extData.NEG & 0x1) {
18429             src0.negModifier();
18430         }
18431
18432         if (extData.NEG & 0x2) {
18433             src1.negModifier();
18434         }
18435
18436         /**
18437          * input modifiers are supported by FP operations only
18438          */
18439         assert(!(instData.ABS & 0x4));
18440         assert(!(extData.NEG & 0x4));
18441
18442         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18443             if (wf->execMask(lane)) {
18444                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
18445             }
18446         }
18447
18448         sdst.write();
18449     }
18450
18451     Inst_VOP3__V_CMP_NLT_F64::Inst_VOP3__V_CMP_NLT_F64(
18452           InFmt_VOP3 *iFmt)
18453         : Inst_VOP3(iFmt, "v_cmp_nlt_f64", true)
18454     {
18455         setFlag(ALU);
18456         setFlag(F64);
18457     } // Inst_VOP3__V_CMP_NLT_F64
18458
18459     Inst_VOP3__V_CMP_NLT_F64::~Inst_VOP3__V_CMP_NLT_F64()
18460     {
18461     } // ~Inst_VOP3__V_CMP_NLT_F64
18462
18463     // D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
18464     void
18465     Inst_VOP3__V_CMP_NLT_F64::execute(GPUDynInstPtr gpuDynInst)
18466     {
18467         Wavefront *wf = gpuDynInst->wavefront();
18468         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18469         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18470         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18471
18472         src0.readSrc();
18473         src1.readSrc();
18474
18475         if (instData.ABS & 0x1) {
18476             src0.absModifier();
18477         }
18478
18479         if (instData.ABS & 0x2) {
18480             src1.absModifier();
18481         }
18482
18483         if (extData.NEG & 0x1) {
18484             src0.negModifier();
18485         }
18486
18487         if (extData.NEG & 0x2) {
18488             src1.negModifier();
18489         }
18490
18491         /**
18492          * input modifiers are supported by FP operations only
18493          */
18494         assert(!(instData.ABS & 0x4));
18495         assert(!(extData.NEG & 0x4));
18496
18497         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18498             if (wf->execMask(lane)) {
18499                 sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
18500             }
18501         }
18502
18503         sdst.write();
18504     }
18505
18506     Inst_VOP3__V_CMP_TRU_F64::Inst_VOP3__V_CMP_TRU_F64(
18507           InFmt_VOP3 *iFmt)
18508         : Inst_VOP3(iFmt, "v_cmp_tru_f64", true)
18509     {
18510         setFlag(ALU);
18511         setFlag(F64);
18512     } // Inst_VOP3__V_CMP_TRU_F64
18513
18514     Inst_VOP3__V_CMP_TRU_F64::~Inst_VOP3__V_CMP_TRU_F64()
18515     {
18516     } // ~Inst_VOP3__V_CMP_TRU_F64
18517
18518     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
18519     void
18520     Inst_VOP3__V_CMP_TRU_F64::execute(GPUDynInstPtr gpuDynInst)
18521     {
18522         Wavefront *wf = gpuDynInst->wavefront();
18523         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18524
18525         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18526             if (wf->execMask(lane)) {
18527                 sdst.setBit(lane, 1);
18528             }
18529         }
18530
18531         sdst.write();
18532     }
18533
18534     Inst_VOP3__V_CMPX_F_F64::Inst_VOP3__V_CMPX_F_F64(
18535           InFmt_VOP3 *iFmt)
18536         : Inst_VOP3(iFmt, "v_cmpx_f_f64", true)
18537     {
18538         setFlag(ALU);
18539         setFlag(F64);
18540     } // Inst_VOP3__V_CMPX_F_F64
18541
18542     Inst_VOP3__V_CMPX_F_F64::~Inst_VOP3__V_CMPX_F_F64()
18543     {
18544     } // ~Inst_VOP3__V_CMPX_F_F64
18545
18546     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
18547     void
18548     Inst_VOP3__V_CMPX_F_F64::execute(GPUDynInstPtr gpuDynInst)
18549     {
18550         Wavefront *wf = gpuDynInst->wavefront();
18551         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18552
18553         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18554             if (wf->execMask(lane)) {
18555                 sdst.setBit(lane, 0);
18556             }
18557         }
18558
18559         wf->execMask() = sdst.rawData();
18560         sdst.write();
18561     }
18562
18563     Inst_VOP3__V_CMPX_LT_F64::Inst_VOP3__V_CMPX_LT_F64(
18564           InFmt_VOP3 *iFmt)
18565         : Inst_VOP3(iFmt, "v_cmpx_lt_f64", true)
18566     {
18567         setFlag(ALU);
18568         setFlag(F64);
18569     } // Inst_VOP3__V_CMPX_LT_F64
18570
18571     Inst_VOP3__V_CMPX_LT_F64::~Inst_VOP3__V_CMPX_LT_F64()
18572     {
18573     } // ~Inst_VOP3__V_CMPX_LT_F64
18574
18575     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
18576     void
18577     Inst_VOP3__V_CMPX_LT_F64::execute(GPUDynInstPtr gpuDynInst)
18578     {
18579         Wavefront *wf = gpuDynInst->wavefront();
18580         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18581         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18582         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18583
18584         src0.readSrc();
18585         src1.readSrc();
18586
18587         if (instData.ABS & 0x1) {
18588             src0.absModifier();
18589         }
18590
18591         if (instData.ABS & 0x2) {
18592             src1.absModifier();
18593         }
18594
18595         if (extData.NEG & 0x1) {
18596             src0.negModifier();
18597         }
18598
18599         if (extData.NEG & 0x2) {
18600             src1.negModifier();
18601         }
18602
18603         /**
18604          * input modifiers are supported by FP operations only
18605          */
18606         assert(!(instData.ABS & 0x4));
18607         assert(!(extData.NEG & 0x4));
18608
18609         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18610             if (wf->execMask(lane)) {
18611                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
18612             }
18613         }
18614
18615         wf->execMask() = sdst.rawData();
18616         sdst.write();
18617     }
18618
18619     Inst_VOP3__V_CMPX_EQ_F64::Inst_VOP3__V_CMPX_EQ_F64(
18620           InFmt_VOP3 *iFmt)
18621         : Inst_VOP3(iFmt, "v_cmpx_eq_f64", true)
18622     {
18623         setFlag(ALU);
18624         setFlag(F64);
18625     } // Inst_VOP3__V_CMPX_EQ_F64
18626
18627     Inst_VOP3__V_CMPX_EQ_F64::~Inst_VOP3__V_CMPX_EQ_F64()
18628     {
18629     } // ~Inst_VOP3__V_CMPX_EQ_F64
18630
18631     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
18632     void
18633     Inst_VOP3__V_CMPX_EQ_F64::execute(GPUDynInstPtr gpuDynInst)
18634     {
18635         Wavefront *wf = gpuDynInst->wavefront();
18636         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18637         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18638         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18639
18640         src0.readSrc();
18641         src1.readSrc();
18642
18643         if (instData.ABS & 0x1) {
18644             src0.absModifier();
18645         }
18646
18647         if (instData.ABS & 0x2) {
18648             src1.absModifier();
18649         }
18650
18651         if (extData.NEG & 0x1) {
18652             src0.negModifier();
18653         }
18654
18655         if (extData.NEG & 0x2) {
18656             src1.negModifier();
18657         }
18658
18659         /**
18660          * input modifiers are supported by FP operations only
18661          */
18662         assert(!(instData.ABS & 0x4));
18663         assert(!(extData.NEG & 0x4));
18664
18665         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18666             if (wf->execMask(lane)) {
18667                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
18668             }
18669         }
18670
18671         wf->execMask() = sdst.rawData();
18672         sdst.write();
18673     }
18674
18675     Inst_VOP3__V_CMPX_LE_F64::Inst_VOP3__V_CMPX_LE_F64(
18676           InFmt_VOP3 *iFmt)
18677         : Inst_VOP3(iFmt, "v_cmpx_le_f64", true)
18678     {
18679         setFlag(ALU);
18680         setFlag(F64);
18681     } // Inst_VOP3__V_CMPX_LE_F64
18682
18683     Inst_VOP3__V_CMPX_LE_F64::~Inst_VOP3__V_CMPX_LE_F64()
18684     {
18685     } // ~Inst_VOP3__V_CMPX_LE_F64
18686
18687     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
18688     void
18689     Inst_VOP3__V_CMPX_LE_F64::execute(GPUDynInstPtr gpuDynInst)
18690     {
18691         Wavefront *wf = gpuDynInst->wavefront();
18692         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18693         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18694         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18695
18696         src0.readSrc();
18697         src1.readSrc();
18698
18699         if (instData.ABS & 0x1) {
18700             src0.absModifier();
18701         }
18702
18703         if (instData.ABS & 0x2) {
18704             src1.absModifier();
18705         }
18706
18707         if (extData.NEG & 0x1) {
18708             src0.negModifier();
18709         }
18710
18711         if (extData.NEG & 0x2) {
18712             src1.negModifier();
18713         }
18714
18715         /**
18716          * input modifiers are supported by FP operations only
18717          */
18718         assert(!(instData.ABS & 0x4));
18719         assert(!(extData.NEG & 0x4));
18720
18721         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18722             if (wf->execMask(lane)) {
18723                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
18724             }
18725         }
18726
18727         wf->execMask() = sdst.rawData();
18728         sdst.write();
18729     }
18730
18731     Inst_VOP3__V_CMPX_GT_F64::Inst_VOP3__V_CMPX_GT_F64(
18732           InFmt_VOP3 *iFmt)
18733         : Inst_VOP3(iFmt, "v_cmpx_gt_f64", true)
18734     {
18735         setFlag(ALU);
18736         setFlag(F64);
18737     } // Inst_VOP3__V_CMPX_GT_F64
18738
18739     Inst_VOP3__V_CMPX_GT_F64::~Inst_VOP3__V_CMPX_GT_F64()
18740     {
18741     } // ~Inst_VOP3__V_CMPX_GT_F64
18742
18743     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
18744     void
18745     Inst_VOP3__V_CMPX_GT_F64::execute(GPUDynInstPtr gpuDynInst)
18746     {
18747         Wavefront *wf = gpuDynInst->wavefront();
18748         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18749         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18750         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18751
18752         src0.readSrc();
18753         src1.readSrc();
18754
18755         if (instData.ABS & 0x1) {
18756             src0.absModifier();
18757         }
18758
18759         if (instData.ABS & 0x2) {
18760             src1.absModifier();
18761         }
18762
18763         if (extData.NEG & 0x1) {
18764             src0.negModifier();
18765         }
18766
18767         if (extData.NEG & 0x2) {
18768             src1.negModifier();
18769         }
18770
18771         /**
18772          * input modifiers are supported by FP operations only
18773          */
18774         assert(!(instData.ABS & 0x4));
18775         assert(!(extData.NEG & 0x4));
18776
18777         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18778             if (wf->execMask(lane)) {
18779                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
18780             }
18781         }
18782
18783         wf->execMask() = sdst.rawData();
18784         sdst.write();
18785     }
18786
18787     Inst_VOP3__V_CMPX_LG_F64::Inst_VOP3__V_CMPX_LG_F64(
18788           InFmt_VOP3 *iFmt)
18789         : Inst_VOP3(iFmt, "v_cmpx_lg_f64", true)
18790     {
18791         setFlag(ALU);
18792         setFlag(F64);
18793     } // Inst_VOP3__V_CMPX_LG_F64
18794
18795     Inst_VOP3__V_CMPX_LG_F64::~Inst_VOP3__V_CMPX_LG_F64()
18796     {
18797     } // ~Inst_VOP3__V_CMPX_LG_F64
18798
18799     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
18800     void
18801     Inst_VOP3__V_CMPX_LG_F64::execute(GPUDynInstPtr gpuDynInst)
18802     {
18803         Wavefront *wf = gpuDynInst->wavefront();
18804         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18805         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18806         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18807
18808         src0.readSrc();
18809         src1.readSrc();
18810
18811         if (instData.ABS & 0x1) {
18812             src0.absModifier();
18813         }
18814
18815         if (instData.ABS & 0x2) {
18816             src1.absModifier();
18817         }
18818
18819         if (extData.NEG & 0x1) {
18820             src0.negModifier();
18821         }
18822
18823         if (extData.NEG & 0x2) {
18824             src1.negModifier();
18825         }
18826
18827         /**
18828          * input modifiers are supported by FP operations only
18829          */
18830         assert(!(instData.ABS & 0x4));
18831         assert(!(extData.NEG & 0x4));
18832
18833         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18834             if (wf->execMask(lane)) {
18835                 sdst.setBit(lane, (src0[lane] < src1[lane]
18836                     || src0[lane] > src1[lane]) ? 1 : 0);
18837             }
18838         }
18839
18840         wf->execMask() = sdst.rawData();
18841         sdst.write();
18842     }
18843
18844     Inst_VOP3__V_CMPX_GE_F64::Inst_VOP3__V_CMPX_GE_F64(
18845           InFmt_VOP3 *iFmt)
18846         : Inst_VOP3(iFmt, "v_cmpx_ge_f64", true)
18847     {
18848         setFlag(ALU);
18849         setFlag(F64);
18850     } // Inst_VOP3__V_CMPX_GE_F64
18851
18852     Inst_VOP3__V_CMPX_GE_F64::~Inst_VOP3__V_CMPX_GE_F64()
18853     {
18854     } // ~Inst_VOP3__V_CMPX_GE_F64
18855
18856     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
18857     void
18858     Inst_VOP3__V_CMPX_GE_F64::execute(GPUDynInstPtr gpuDynInst)
18859     {
18860         Wavefront *wf = gpuDynInst->wavefront();
18861         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18862         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18863         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18864
18865         src0.readSrc();
18866         src1.readSrc();
18867
18868         if (instData.ABS & 0x1) {
18869             src0.absModifier();
18870         }
18871
18872         if (instData.ABS & 0x2) {
18873             src1.absModifier();
18874         }
18875
18876         if (extData.NEG & 0x1) {
18877             src0.negModifier();
18878         }
18879
18880         if (extData.NEG & 0x2) {
18881             src1.negModifier();
18882         }
18883
18884         /**
18885          * input modifiers are supported by FP operations only
18886          */
18887         assert(!(instData.ABS & 0x4));
18888         assert(!(extData.NEG & 0x4));
18889
18890         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18891             if (wf->execMask(lane)) {
18892                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
18893             }
18894         }
18895
18896         wf->execMask() = sdst.rawData();
18897         sdst.write();
18898     }
18899
18900     Inst_VOP3__V_CMPX_O_F64::Inst_VOP3__V_CMPX_O_F64(
18901           InFmt_VOP3 *iFmt)
18902         : Inst_VOP3(iFmt, "v_cmpx_o_f64", true)
18903     {
18904         setFlag(ALU);
18905         setFlag(F64);
18906     } // Inst_VOP3__V_CMPX_O_F64
18907
18908     Inst_VOP3__V_CMPX_O_F64::~Inst_VOP3__V_CMPX_O_F64()
18909     {
18910     } // ~Inst_VOP3__V_CMPX_O_F64
18911
18912     // EXEC,D.u64[threadID] = (!isNan(S0) && !isNan(S1)); D = VCC in VOPC
18913     // encoding.
18914     void
18915     Inst_VOP3__V_CMPX_O_F64::execute(GPUDynInstPtr gpuDynInst)
18916     {
18917         Wavefront *wf = gpuDynInst->wavefront();
18918         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18919         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18920         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18921
18922         src0.readSrc();
18923         src1.readSrc();
18924
18925         if (instData.ABS & 0x1) {
18926             src0.absModifier();
18927         }
18928
18929         if (instData.ABS & 0x2) {
18930             src1.absModifier();
18931         }
18932
18933         if (extData.NEG & 0x1) {
18934             src0.negModifier();
18935         }
18936
18937         if (extData.NEG & 0x2) {
18938             src1.negModifier();
18939         }
18940
18941         /**
18942          * input modifiers are supported by FP operations only
18943          */
18944         assert(!(instData.ABS & 0x4));
18945         assert(!(extData.NEG & 0x4));
18946
18947         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
18948             if (wf->execMask(lane)) {
18949                 sdst.setBit(lane, (!std::isnan(src0[lane])
18950                     && !std::isnan(src1[lane])) ? 1 : 0);
18951             }
18952         }
18953
18954         wf->execMask() = sdst.rawData();
18955         sdst.write();
18956     }
18957
18958     Inst_VOP3__V_CMPX_U_F64::Inst_VOP3__V_CMPX_U_F64(
18959           InFmt_VOP3 *iFmt)
18960         : Inst_VOP3(iFmt, "v_cmpx_u_f64", true)
18961     {
18962         setFlag(ALU);
18963         setFlag(F64);
18964     } // Inst_VOP3__V_CMPX_U_F64
18965
18966     Inst_VOP3__V_CMPX_U_F64::~Inst_VOP3__V_CMPX_U_F64()
18967     {
18968     } // ~Inst_VOP3__V_CMPX_U_F64
18969
18970     // EXEC,D.u64[threadID] = (isNan(S0)  ||  isNan(S1)); D = VCC in VOPC
18971     // encoding.
18972     void
18973     Inst_VOP3__V_CMPX_U_F64::execute(GPUDynInstPtr gpuDynInst)
18974     {
18975         Wavefront *wf = gpuDynInst->wavefront();
18976         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
18977         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
18978         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
18979
18980         src0.readSrc();
18981         src1.readSrc();
18982
18983         if (instData.ABS & 0x1) {
18984             src0.absModifier();
18985         }
18986
18987         if (instData.ABS & 0x2) {
18988             src1.absModifier();
18989         }
18990
18991         if (extData.NEG & 0x1) {
18992             src0.negModifier();
18993         }
18994
18995         if (extData.NEG & 0x2) {
18996             src1.negModifier();
18997         }
18998
18999         /**
19000          * input modifiers are supported by FP operations only
19001          */
19002         assert(!(instData.ABS & 0x4));
19003         assert(!(extData.NEG & 0x4));
19004
19005         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19006             if (wf->execMask(lane)) {
19007                 sdst.setBit(lane, (std::isnan(src0[lane])
19008                     || std::isnan(src1[lane])) ? 1 : 0);
19009             }
19010         }
19011
19012         wf->execMask() = sdst.rawData();
19013         sdst.write();
19014     }
19015
19016     Inst_VOP3__V_CMPX_NGE_F64::Inst_VOP3__V_CMPX_NGE_F64(
19017           InFmt_VOP3 *iFmt)
19018         : Inst_VOP3(iFmt, "v_cmpx_nge_f64", true)
19019     {
19020         setFlag(ALU);
19021         setFlag(F64);
19022     } // Inst_VOP3__V_CMPX_NGE_F64
19023
19024     Inst_VOP3__V_CMPX_NGE_F64::~Inst_VOP3__V_CMPX_NGE_F64()
19025     {
19026     } // ~Inst_VOP3__V_CMPX_NGE_F64
19027
19028     // EXEC,D.u64[threadID] = !(S0 >= S1); D = VCC in VOPC encoding.
19029     void
19030     Inst_VOP3__V_CMPX_NGE_F64::execute(GPUDynInstPtr gpuDynInst)
19031     {
19032         Wavefront *wf = gpuDynInst->wavefront();
19033         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
19034         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
19035         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19036
19037         src0.readSrc();
19038         src1.readSrc();
19039
19040         if (instData.ABS & 0x1) {
19041             src0.absModifier();
19042         }
19043
19044         if (instData.ABS & 0x2) {
19045             src1.absModifier();
19046         }
19047
19048         if (extData.NEG & 0x1) {
19049             src0.negModifier();
19050         }
19051
19052         if (extData.NEG & 0x2) {
19053             src1.negModifier();
19054         }
19055
19056         /**
19057          * input modifiers are supported by FP operations only
19058          */
19059         assert(!(instData.ABS & 0x4));
19060         assert(!(extData.NEG & 0x4));
19061
19062         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19063             if (wf->execMask(lane)) {
19064                 sdst.setBit(lane, !(src0[lane] >= src1[lane]) ? 1 : 0);
19065             }
19066         }
19067
19068         wf->execMask() = sdst.rawData();
19069         sdst.write();
19070     }
19071
19072     Inst_VOP3__V_CMPX_NLG_F64::Inst_VOP3__V_CMPX_NLG_F64(
19073           InFmt_VOP3 *iFmt)
19074         : Inst_VOP3(iFmt, "v_cmpx_nlg_f64", true)
19075     {
19076         setFlag(ALU);
19077         setFlag(F64);
19078     } // Inst_VOP3__V_CMPX_NLG_F64
19079
19080     Inst_VOP3__V_CMPX_NLG_F64::~Inst_VOP3__V_CMPX_NLG_F64()
19081     {
19082     } // ~Inst_VOP3__V_CMPX_NLG_F64
19083
19084     // EXEC,D.u64[threadID] = !(S0 <> S1); D = VCC in VOPC encoding.
19085     void
19086     Inst_VOP3__V_CMPX_NLG_F64::execute(GPUDynInstPtr gpuDynInst)
19087     {
19088         Wavefront *wf = gpuDynInst->wavefront();
19089         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
19090         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
19091         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19092
19093         src0.readSrc();
19094         src1.readSrc();
19095
19096         if (instData.ABS & 0x1) {
19097             src0.absModifier();
19098         }
19099
19100         if (instData.ABS & 0x2) {
19101             src1.absModifier();
19102         }
19103
19104         if (extData.NEG & 0x1) {
19105             src0.negModifier();
19106         }
19107
19108         if (extData.NEG & 0x2) {
19109             src1.negModifier();
19110         }
19111
19112         /**
19113          * input modifiers are supported by FP operations only
19114          */
19115         assert(!(instData.ABS & 0x4));
19116         assert(!(extData.NEG & 0x4));
19117
19118         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19119             if (wf->execMask(lane)) {
19120                 sdst.setBit(lane, !(src0[lane] < src1[lane]
19121                     || src0[lane] > src1[lane]) ? 1 : 0);
19122             }
19123         }
19124
19125         wf->execMask() = sdst.rawData();
19126         sdst.write();
19127     }
19128
19129     Inst_VOP3__V_CMPX_NGT_F64::Inst_VOP3__V_CMPX_NGT_F64(
19130           InFmt_VOP3 *iFmt)
19131         : Inst_VOP3(iFmt, "v_cmpx_ngt_f64", true)
19132     {
19133         setFlag(ALU);
19134         setFlag(F64);
19135     } // Inst_VOP3__V_CMPX_NGT_F64
19136
19137     Inst_VOP3__V_CMPX_NGT_F64::~Inst_VOP3__V_CMPX_NGT_F64()
19138     {
19139     } // ~Inst_VOP3__V_CMPX_NGT_F64
19140
19141     // EXEC,D.u64[threadID] = !(S0 > S1); D = VCC in VOPC encoding.
19142     void
19143     Inst_VOP3__V_CMPX_NGT_F64::execute(GPUDynInstPtr gpuDynInst)
19144     {
19145         Wavefront *wf = gpuDynInst->wavefront();
19146         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
19147         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
19148         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19149
19150         src0.readSrc();
19151         src1.readSrc();
19152
19153         if (instData.ABS & 0x1) {
19154             src0.absModifier();
19155         }
19156
19157         if (instData.ABS & 0x2) {
19158             src1.absModifier();
19159         }
19160
19161         if (extData.NEG & 0x1) {
19162             src0.negModifier();
19163         }
19164
19165         if (extData.NEG & 0x2) {
19166             src1.negModifier();
19167         }
19168
19169         /**
19170          * input modifiers are supported by FP operations only
19171          */
19172         assert(!(instData.ABS & 0x4));
19173         assert(!(extData.NEG & 0x4));
19174
19175         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19176             if (wf->execMask(lane)) {
19177                 sdst.setBit(lane, !(src0[lane] > src1[lane]) ? 1 : 0);
19178             }
19179         }
19180
19181         wf->execMask() = sdst.rawData();
19182         sdst.write();
19183     }
19184
19185     Inst_VOP3__V_CMPX_NLE_F64::Inst_VOP3__V_CMPX_NLE_F64(
19186           InFmt_VOP3 *iFmt)
19187         : Inst_VOP3(iFmt, "v_cmpx_nle_f64", true)
19188     {
19189         setFlag(ALU);
19190         setFlag(F64);
19191     } // Inst_VOP3__V_CMPX_NLE_F64
19192
19193     Inst_VOP3__V_CMPX_NLE_F64::~Inst_VOP3__V_CMPX_NLE_F64()
19194     {
19195     } // ~Inst_VOP3__V_CMPX_NLE_F64
19196
19197     // EXEC,D.u64[threadID] = !(S0 <= S1); D = VCC in VOPC encoding.
19198     void
19199     Inst_VOP3__V_CMPX_NLE_F64::execute(GPUDynInstPtr gpuDynInst)
19200     {
19201         Wavefront *wf = gpuDynInst->wavefront();
19202         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
19203         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
19204         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19205
19206         src0.readSrc();
19207         src1.readSrc();
19208
19209         if (instData.ABS & 0x1) {
19210             src0.absModifier();
19211         }
19212
19213         if (instData.ABS & 0x2) {
19214             src1.absModifier();
19215         }
19216
19217         if (extData.NEG & 0x1) {
19218             src0.negModifier();
19219         }
19220
19221         if (extData.NEG & 0x2) {
19222             src1.negModifier();
19223         }
19224
19225         /**
19226          * input modifiers are supported by FP operations only
19227          */
19228         assert(!(instData.ABS & 0x4));
19229         assert(!(extData.NEG & 0x4));
19230
19231         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19232             if (wf->execMask(lane)) {
19233                 sdst.setBit(lane, !(src0[lane] <= src1[lane]) ? 1 : 0);
19234             }
19235         }
19236
19237         wf->execMask() = sdst.rawData();
19238         sdst.write();
19239     }
19240
19241     Inst_VOP3__V_CMPX_NEQ_F64::Inst_VOP3__V_CMPX_NEQ_F64(
19242           InFmt_VOP3 *iFmt)
19243         : Inst_VOP3(iFmt, "v_cmpx_neq_f64", true)
19244     {
19245         setFlag(ALU);
19246         setFlag(F64);
19247     } // Inst_VOP3__V_CMPX_NEQ_F64
19248
19249     Inst_VOP3__V_CMPX_NEQ_F64::~Inst_VOP3__V_CMPX_NEQ_F64()
19250     {
19251     } // ~Inst_VOP3__V_CMPX_NEQ_F64
19252
19253     // EXEC,D.u64[threadID] = !(S0 == S1); D = VCC in VOPC encoding.
19254     void
19255     Inst_VOP3__V_CMPX_NEQ_F64::execute(GPUDynInstPtr gpuDynInst)
19256     {
19257         Wavefront *wf = gpuDynInst->wavefront();
19258         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
19259         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
19260         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19261
19262         src0.readSrc();
19263         src1.readSrc();
19264
19265         if (instData.ABS & 0x1) {
19266             src0.absModifier();
19267         }
19268
19269         if (instData.ABS & 0x2) {
19270             src1.absModifier();
19271         }
19272
19273         if (extData.NEG & 0x1) {
19274             src0.negModifier();
19275         }
19276
19277         if (extData.NEG & 0x2) {
19278             src1.negModifier();
19279         }
19280
19281         /**
19282          * input modifiers are supported by FP operations only
19283          */
19284         assert(!(instData.ABS & 0x4));
19285         assert(!(extData.NEG & 0x4));
19286
19287         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19288             if (wf->execMask(lane)) {
19289                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
19290             }
19291         }
19292
19293         wf->execMask() = sdst.rawData();
19294         sdst.write();
19295     }
19296
19297     Inst_VOP3__V_CMPX_NLT_F64::Inst_VOP3__V_CMPX_NLT_F64(
19298           InFmt_VOP3 *iFmt)
19299         : Inst_VOP3(iFmt, "v_cmpx_nlt_f64", true)
19300     {
19301         setFlag(ALU);
19302         setFlag(F64);
19303     } // Inst_VOP3__V_CMPX_NLT_F64
19304
19305     Inst_VOP3__V_CMPX_NLT_F64::~Inst_VOP3__V_CMPX_NLT_F64()
19306     {
19307     } // ~Inst_VOP3__V_CMPX_NLT_F64
19308
19309     // EXEC,D.u64[threadID] = !(S0 < S1); D = VCC in VOPC encoding.
19310     void
19311     Inst_VOP3__V_CMPX_NLT_F64::execute(GPUDynInstPtr gpuDynInst)
19312     {
19313         Wavefront *wf = gpuDynInst->wavefront();
19314         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
19315         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
19316         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19317
19318         src0.readSrc();
19319         src1.readSrc();
19320
19321         if (instData.ABS & 0x1) {
19322             src0.absModifier();
19323         }
19324
19325         if (instData.ABS & 0x2) {
19326             src1.absModifier();
19327         }
19328
19329         if (extData.NEG & 0x1) {
19330             src0.negModifier();
19331         }
19332
19333         if (extData.NEG & 0x2) {
19334             src1.negModifier();
19335         }
19336
19337         /**
19338          * input modifiers are supported by FP operations only
19339          */
19340         assert(!(instData.ABS & 0x4));
19341         assert(!(extData.NEG & 0x4));
19342
19343         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19344             if (wf->execMask(lane)) {
19345                 sdst.setBit(lane, !(src0[lane] < src1[lane]) ? 1 : 0);
19346             }
19347         }
19348
19349         wf->execMask() = sdst.rawData();
19350         sdst.write();
19351     }
19352
19353     Inst_VOP3__V_CMPX_TRU_F64::Inst_VOP3__V_CMPX_TRU_F64(
19354           InFmt_VOP3 *iFmt)
19355         : Inst_VOP3(iFmt, "v_cmpx_tru_f64", true)
19356     {
19357         setFlag(ALU);
19358         setFlag(F64);
19359     } // Inst_VOP3__V_CMPX_TRU_F64
19360
19361     Inst_VOP3__V_CMPX_TRU_F64::~Inst_VOP3__V_CMPX_TRU_F64()
19362     {
19363     } // ~Inst_VOP3__V_CMPX_TRU_F64
19364
19365     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
19366     void
19367     Inst_VOP3__V_CMPX_TRU_F64::execute(GPUDynInstPtr gpuDynInst)
19368     {
19369         Wavefront *wf = gpuDynInst->wavefront();
19370         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19371
19372         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19373             if (wf->execMask(lane)) {
19374                 sdst.setBit(lane, 1);
19375             }
19376         }
19377
19378         wf->execMask() = sdst.rawData();
19379         sdst.write();
19380     }
19381
19382     Inst_VOP3__V_CMP_F_I16::Inst_VOP3__V_CMP_F_I16(InFmt_VOP3 *iFmt)
19383         : Inst_VOP3(iFmt, "v_cmp_f_i16", true)
19384     {
19385         setFlag(ALU);
19386     } // Inst_VOP3__V_CMP_F_I16
19387
19388     Inst_VOP3__V_CMP_F_I16::~Inst_VOP3__V_CMP_F_I16()
19389     {
19390     } // ~Inst_VOP3__V_CMP_F_I16
19391
19392     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
19393     void
19394     Inst_VOP3__V_CMP_F_I16::execute(GPUDynInstPtr gpuDynInst)
19395     {
19396         Wavefront *wf = gpuDynInst->wavefront();
19397         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19398
19399         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19400             if (wf->execMask(lane)) {
19401                 sdst.setBit(lane, 0);
19402             }
19403         }
19404
19405         sdst.write();
19406     }
19407
19408     Inst_VOP3__V_CMP_LT_I16::Inst_VOP3__V_CMP_LT_I16(
19409           InFmt_VOP3 *iFmt)
19410         : Inst_VOP3(iFmt, "v_cmp_lt_i16", true)
19411     {
19412         setFlag(ALU);
19413     } // Inst_VOP3__V_CMP_LT_I16
19414
19415     Inst_VOP3__V_CMP_LT_I16::~Inst_VOP3__V_CMP_LT_I16()
19416     {
19417     } // ~Inst_VOP3__V_CMP_LT_I16
19418
19419     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
19420     void
19421     Inst_VOP3__V_CMP_LT_I16::execute(GPUDynInstPtr gpuDynInst)
19422     {
19423         Wavefront *wf = gpuDynInst->wavefront();
19424         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
19425         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
19426         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19427
19428         src0.readSrc();
19429         src1.readSrc();
19430
19431         /**
19432          * input modifiers are supported by FP operations only
19433          */
19434         assert(!(instData.ABS & 0x1));
19435         assert(!(instData.ABS & 0x2));
19436         assert(!(instData.ABS & 0x4));
19437         assert(!(extData.NEG & 0x1));
19438         assert(!(extData.NEG & 0x2));
19439         assert(!(extData.NEG & 0x4));
19440
19441         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19442             if (wf->execMask(lane)) {
19443                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
19444             }
19445         }
19446
19447         sdst.write();
19448     }
19449
19450     Inst_VOP3__V_CMP_EQ_I16::Inst_VOP3__V_CMP_EQ_I16(
19451           InFmt_VOP3 *iFmt)
19452         : Inst_VOP3(iFmt, "v_cmp_eq_i16", true)
19453     {
19454         setFlag(ALU);
19455     } // Inst_VOP3__V_CMP_EQ_I16
19456
19457     Inst_VOP3__V_CMP_EQ_I16::~Inst_VOP3__V_CMP_EQ_I16()
19458     {
19459     } // ~Inst_VOP3__V_CMP_EQ_I16
19460
19461     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
19462     void
19463     Inst_VOP3__V_CMP_EQ_I16::execute(GPUDynInstPtr gpuDynInst)
19464     {
19465         Wavefront *wf = gpuDynInst->wavefront();
19466         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
19467         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
19468         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19469
19470         src0.readSrc();
19471         src1.readSrc();
19472
19473         /**
19474          * input modifiers are supported by FP operations only
19475          */
19476         assert(!(instData.ABS & 0x1));
19477         assert(!(instData.ABS & 0x2));
19478         assert(!(instData.ABS & 0x4));
19479         assert(!(extData.NEG & 0x1));
19480         assert(!(extData.NEG & 0x2));
19481         assert(!(extData.NEG & 0x4));
19482
19483         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19484             if (wf->execMask(lane)) {
19485                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
19486             }
19487         }
19488
19489         sdst.write();
19490     }
19491
19492     Inst_VOP3__V_CMP_LE_I16::Inst_VOP3__V_CMP_LE_I16(
19493           InFmt_VOP3 *iFmt)
19494         : Inst_VOP3(iFmt, "v_cmp_le_i16", true)
19495     {
19496         setFlag(ALU);
19497     } // Inst_VOP3__V_CMP_LE_I16
19498
19499     Inst_VOP3__V_CMP_LE_I16::~Inst_VOP3__V_CMP_LE_I16()
19500     {
19501     } // ~Inst_VOP3__V_CMP_LE_I16
19502
19503     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
19504     void
19505     Inst_VOP3__V_CMP_LE_I16::execute(GPUDynInstPtr gpuDynInst)
19506     {
19507         Wavefront *wf = gpuDynInst->wavefront();
19508         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
19509         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
19510         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19511
19512         src0.readSrc();
19513         src1.readSrc();
19514
19515         /**
19516          * input modifiers are supported by FP operations only
19517          */
19518         assert(!(instData.ABS & 0x1));
19519         assert(!(instData.ABS & 0x2));
19520         assert(!(instData.ABS & 0x4));
19521         assert(!(extData.NEG & 0x1));
19522         assert(!(extData.NEG & 0x2));
19523         assert(!(extData.NEG & 0x4));
19524
19525         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19526             if (wf->execMask(lane)) {
19527                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
19528             }
19529         }
19530
19531         sdst.write();
19532     }
19533
19534     Inst_VOP3__V_CMP_GT_I16::Inst_VOP3__V_CMP_GT_I16(
19535           InFmt_VOP3 *iFmt)
19536         : Inst_VOP3(iFmt, "v_cmp_gt_i16", true)
19537     {
19538         setFlag(ALU);
19539     } // Inst_VOP3__V_CMP_GT_I16
19540
19541     Inst_VOP3__V_CMP_GT_I16::~Inst_VOP3__V_CMP_GT_I16()
19542     {
19543     } // ~Inst_VOP3__V_CMP_GT_I16
19544
19545     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
19546     void
19547     Inst_VOP3__V_CMP_GT_I16::execute(GPUDynInstPtr gpuDynInst)
19548     {
19549         Wavefront *wf = gpuDynInst->wavefront();
19550         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
19551         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
19552         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19553
19554         src0.readSrc();
19555         src1.readSrc();
19556
19557         /**
19558          * input modifiers are supported by FP operations only
19559          */
19560         assert(!(instData.ABS & 0x1));
19561         assert(!(instData.ABS & 0x2));
19562         assert(!(instData.ABS & 0x4));
19563         assert(!(extData.NEG & 0x1));
19564         assert(!(extData.NEG & 0x2));
19565         assert(!(extData.NEG & 0x4));
19566
19567         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19568             if (wf->execMask(lane)) {
19569                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
19570             }
19571         }
19572
19573         sdst.write();
19574     }
19575
19576     Inst_VOP3__V_CMP_NE_I16::Inst_VOP3__V_CMP_NE_I16(
19577           InFmt_VOP3 *iFmt)
19578         : Inst_VOP3(iFmt, "v_cmp_ne_i16", true)
19579     {
19580         setFlag(ALU);
19581     } // Inst_VOP3__V_CMP_NE_I16
19582
19583     Inst_VOP3__V_CMP_NE_I16::~Inst_VOP3__V_CMP_NE_I16()
19584     {
19585     } // ~Inst_VOP3__V_CMP_NE_I16
19586
19587     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
19588     void
19589     Inst_VOP3__V_CMP_NE_I16::execute(GPUDynInstPtr gpuDynInst)
19590     {
19591         Wavefront *wf = gpuDynInst->wavefront();
19592         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
19593         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
19594         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19595
19596         src0.readSrc();
19597         src1.readSrc();
19598
19599         /**
19600          * input modifiers are supported by FP operations only
19601          */
19602         assert(!(instData.ABS & 0x1));
19603         assert(!(instData.ABS & 0x2));
19604         assert(!(instData.ABS & 0x4));
19605         assert(!(extData.NEG & 0x1));
19606         assert(!(extData.NEG & 0x2));
19607         assert(!(extData.NEG & 0x4));
19608
19609         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19610             if (wf->execMask(lane)) {
19611                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
19612             }
19613         }
19614
19615         sdst.write();
19616     }
19617
19618     Inst_VOP3__V_CMP_GE_I16::Inst_VOP3__V_CMP_GE_I16(
19619           InFmt_VOP3 *iFmt)
19620         : Inst_VOP3(iFmt, "v_cmp_ge_i16", true)
19621     {
19622         setFlag(ALU);
19623     } // Inst_VOP3__V_CMP_GE_I16
19624
19625     Inst_VOP3__V_CMP_GE_I16::~Inst_VOP3__V_CMP_GE_I16()
19626     {
19627     } // ~Inst_VOP3__V_CMP_GE_I16
19628
19629     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
19630     void
19631     Inst_VOP3__V_CMP_GE_I16::execute(GPUDynInstPtr gpuDynInst)
19632     {
19633         Wavefront *wf = gpuDynInst->wavefront();
19634         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
19635         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
19636         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19637
19638         src0.readSrc();
19639         src1.readSrc();
19640
19641         /**
19642          * input modifiers are supported by FP operations only
19643          */
19644         assert(!(instData.ABS & 0x1));
19645         assert(!(instData.ABS & 0x2));
19646         assert(!(instData.ABS & 0x4));
19647         assert(!(extData.NEG & 0x1));
19648         assert(!(extData.NEG & 0x2));
19649         assert(!(extData.NEG & 0x4));
19650
19651         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19652             if (wf->execMask(lane)) {
19653                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
19654             }
19655         }
19656
19657         sdst.write();
19658     }
19659
19660     Inst_VOP3__V_CMP_T_I16::Inst_VOP3__V_CMP_T_I16(InFmt_VOP3 *iFmt)
19661         : Inst_VOP3(iFmt, "v_cmp_t_i16", true)
19662     {
19663         setFlag(ALU);
19664     } // Inst_VOP3__V_CMP_T_I16
19665
19666     Inst_VOP3__V_CMP_T_I16::~Inst_VOP3__V_CMP_T_I16()
19667     {
19668     } // ~Inst_VOP3__V_CMP_T_I16
19669
19670     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
19671     void
19672     Inst_VOP3__V_CMP_T_I16::execute(GPUDynInstPtr gpuDynInst)
19673     {
19674         Wavefront *wf = gpuDynInst->wavefront();
19675         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19676
19677         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19678             if (wf->execMask(lane)) {
19679                 sdst.setBit(lane, 1);
19680             }
19681         }
19682
19683         sdst.write();
19684     }
19685
19686     Inst_VOP3__V_CMP_F_U16::Inst_VOP3__V_CMP_F_U16(InFmt_VOP3 *iFmt)
19687         : Inst_VOP3(iFmt, "v_cmp_f_u16", true)
19688     {
19689         setFlag(ALU);
19690     } // Inst_VOP3__V_CMP_F_U16
19691
19692     Inst_VOP3__V_CMP_F_U16::~Inst_VOP3__V_CMP_F_U16()
19693     {
19694     } // ~Inst_VOP3__V_CMP_F_U16
19695
19696     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
19697     void
19698     Inst_VOP3__V_CMP_F_U16::execute(GPUDynInstPtr gpuDynInst)
19699     {
19700         Wavefront *wf = gpuDynInst->wavefront();
19701         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19702
19703         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19704             if (wf->execMask(lane)) {
19705                 sdst.setBit(lane, 0);
19706             }
19707         }
19708
19709         sdst.write();
19710     }
19711
19712     Inst_VOP3__V_CMP_LT_U16::Inst_VOP3__V_CMP_LT_U16(
19713           InFmt_VOP3 *iFmt)
19714         : Inst_VOP3(iFmt, "v_cmp_lt_u16", true)
19715     {
19716         setFlag(ALU);
19717     } // Inst_VOP3__V_CMP_LT_U16
19718
19719     Inst_VOP3__V_CMP_LT_U16::~Inst_VOP3__V_CMP_LT_U16()
19720     {
19721     } // ~Inst_VOP3__V_CMP_LT_U16
19722
19723     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
19724     void
19725     Inst_VOP3__V_CMP_LT_U16::execute(GPUDynInstPtr gpuDynInst)
19726     {
19727         Wavefront *wf = gpuDynInst->wavefront();
19728         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
19729         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
19730         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19731
19732         src0.readSrc();
19733         src1.readSrc();
19734
19735         /**
19736          * input modifiers are supported by FP operations only
19737          */
19738         assert(!(instData.ABS & 0x1));
19739         assert(!(instData.ABS & 0x2));
19740         assert(!(instData.ABS & 0x4));
19741         assert(!(extData.NEG & 0x1));
19742         assert(!(extData.NEG & 0x2));
19743         assert(!(extData.NEG & 0x4));
19744
19745         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19746             if (wf->execMask(lane)) {
19747                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
19748             }
19749         }
19750
19751         sdst.write();
19752     }
19753
19754     Inst_VOP3__V_CMP_EQ_U16::Inst_VOP3__V_CMP_EQ_U16(
19755           InFmt_VOP3 *iFmt)
19756         : Inst_VOP3(iFmt, "v_cmp_eq_u16", true)
19757     {
19758         setFlag(ALU);
19759     } // Inst_VOP3__V_CMP_EQ_U16
19760
19761     Inst_VOP3__V_CMP_EQ_U16::~Inst_VOP3__V_CMP_EQ_U16()
19762     {
19763     } // ~Inst_VOP3__V_CMP_EQ_U16
19764
19765     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
19766     void
19767     Inst_VOP3__V_CMP_EQ_U16::execute(GPUDynInstPtr gpuDynInst)
19768     {
19769         Wavefront *wf = gpuDynInst->wavefront();
19770         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
19771         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
19772         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19773
19774         src0.readSrc();
19775         src1.readSrc();
19776
19777         /**
19778          * input modifiers are supported by FP operations only
19779          */
19780         assert(!(instData.ABS & 0x1));
19781         assert(!(instData.ABS & 0x2));
19782         assert(!(instData.ABS & 0x4));
19783         assert(!(extData.NEG & 0x1));
19784         assert(!(extData.NEG & 0x2));
19785         assert(!(extData.NEG & 0x4));
19786
19787         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19788             if (wf->execMask(lane)) {
19789                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
19790             }
19791         }
19792
19793         sdst.write();
19794     }
19795
19796     Inst_VOP3__V_CMP_LE_U16::Inst_VOP3__V_CMP_LE_U16(
19797           InFmt_VOP3 *iFmt)
19798         : Inst_VOP3(iFmt, "v_cmp_le_u16", true)
19799     {
19800         setFlag(ALU);
19801     } // Inst_VOP3__V_CMP_LE_U16
19802
19803     Inst_VOP3__V_CMP_LE_U16::~Inst_VOP3__V_CMP_LE_U16()
19804     {
19805     } // ~Inst_VOP3__V_CMP_LE_U16
19806
19807     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
19808     void
19809     Inst_VOP3__V_CMP_LE_U16::execute(GPUDynInstPtr gpuDynInst)
19810     {
19811         Wavefront *wf = gpuDynInst->wavefront();
19812         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
19813         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
19814         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19815
19816         src0.readSrc();
19817         src1.readSrc();
19818
19819         /**
19820          * input modifiers are supported by FP operations only
19821          */
19822         assert(!(instData.ABS & 0x1));
19823         assert(!(instData.ABS & 0x2));
19824         assert(!(instData.ABS & 0x4));
19825         assert(!(extData.NEG & 0x1));
19826         assert(!(extData.NEG & 0x2));
19827         assert(!(extData.NEG & 0x4));
19828
19829         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19830             if (wf->execMask(lane)) {
19831                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
19832             }
19833         }
19834
19835         sdst.write();
19836     }
19837
19838     Inst_VOP3__V_CMP_GT_U16::Inst_VOP3__V_CMP_GT_U16(
19839           InFmt_VOP3 *iFmt)
19840         : Inst_VOP3(iFmt, "v_cmp_gt_u16", true)
19841     {
19842         setFlag(ALU);
19843     } // Inst_VOP3__V_CMP_GT_U16
19844
19845     Inst_VOP3__V_CMP_GT_U16::~Inst_VOP3__V_CMP_GT_U16()
19846     {
19847     } // ~Inst_VOP3__V_CMP_GT_U16
19848
19849     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
19850     void
19851     Inst_VOP3__V_CMP_GT_U16::execute(GPUDynInstPtr gpuDynInst)
19852     {
19853         Wavefront *wf = gpuDynInst->wavefront();
19854         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
19855         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
19856         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19857
19858         src0.readSrc();
19859         src1.readSrc();
19860
19861         /**
19862          * input modifiers are supported by FP operations only
19863          */
19864         assert(!(instData.ABS & 0x1));
19865         assert(!(instData.ABS & 0x2));
19866         assert(!(instData.ABS & 0x4));
19867         assert(!(extData.NEG & 0x1));
19868         assert(!(extData.NEG & 0x2));
19869         assert(!(extData.NEG & 0x4));
19870
19871         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19872             if (wf->execMask(lane)) {
19873                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
19874             }
19875         }
19876
19877         sdst.write();
19878     }
19879
19880     Inst_VOP3__V_CMP_NE_U16::Inst_VOP3__V_CMP_NE_U16(
19881           InFmt_VOP3 *iFmt)
19882         : Inst_VOP3(iFmt, "v_cmp_ne_u16", true)
19883     {
19884         setFlag(ALU);
19885     } // Inst_VOP3__V_CMP_NE_U16
19886
19887     Inst_VOP3__V_CMP_NE_U16::~Inst_VOP3__V_CMP_NE_U16()
19888     {
19889     } // ~Inst_VOP3__V_CMP_NE_U16
19890
19891     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
19892     void
19893     Inst_VOP3__V_CMP_NE_U16::execute(GPUDynInstPtr gpuDynInst)
19894     {
19895         Wavefront *wf = gpuDynInst->wavefront();
19896         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
19897         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
19898         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19899
19900         src0.readSrc();
19901         src1.readSrc();
19902
19903         /**
19904          * input modifiers are supported by FP operations only
19905          */
19906         assert(!(instData.ABS & 0x1));
19907         assert(!(instData.ABS & 0x2));
19908         assert(!(instData.ABS & 0x4));
19909         assert(!(extData.NEG & 0x1));
19910         assert(!(extData.NEG & 0x2));
19911         assert(!(extData.NEG & 0x4));
19912
19913         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19914             if (wf->execMask(lane)) {
19915                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
19916             }
19917         }
19918
19919         sdst.write();
19920     }
19921
19922     Inst_VOP3__V_CMP_GE_U16::Inst_VOP3__V_CMP_GE_U16(
19923           InFmt_VOP3 *iFmt)
19924         : Inst_VOP3(iFmt, "v_cmp_ge_u16", true)
19925     {
19926         setFlag(ALU);
19927     } // Inst_VOP3__V_CMP_GE_U16
19928
19929     Inst_VOP3__V_CMP_GE_U16::~Inst_VOP3__V_CMP_GE_U16()
19930     {
19931     } // ~Inst_VOP3__V_CMP_GE_U16
19932
19933     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
19934     void
19935     Inst_VOP3__V_CMP_GE_U16::execute(GPUDynInstPtr gpuDynInst)
19936     {
19937         Wavefront *wf = gpuDynInst->wavefront();
19938         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
19939         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
19940         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19941
19942         src0.readSrc();
19943         src1.readSrc();
19944
19945         /**
19946          * input modifiers are supported by FP operations only
19947          */
19948         assert(!(instData.ABS & 0x1));
19949         assert(!(instData.ABS & 0x2));
19950         assert(!(instData.ABS & 0x4));
19951         assert(!(extData.NEG & 0x1));
19952         assert(!(extData.NEG & 0x2));
19953         assert(!(extData.NEG & 0x4));
19954
19955         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19956             if (wf->execMask(lane)) {
19957                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
19958             }
19959         }
19960
19961         sdst.write();
19962     }
19963
19964     Inst_VOP3__V_CMP_T_U16::Inst_VOP3__V_CMP_T_U16(InFmt_VOP3 *iFmt)
19965         : Inst_VOP3(iFmt, "v_cmp_t_u16", true)
19966     {
19967         setFlag(ALU);
19968     } // Inst_VOP3__V_CMP_T_U16
19969
19970     Inst_VOP3__V_CMP_T_U16::~Inst_VOP3__V_CMP_T_U16()
19971     {
19972     } // ~Inst_VOP3__V_CMP_T_U16
19973
19974     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
19975     void
19976     Inst_VOP3__V_CMP_T_U16::execute(GPUDynInstPtr gpuDynInst)
19977     {
19978         Wavefront *wf = gpuDynInst->wavefront();
19979         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
19980
19981         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
19982             if (wf->execMask(lane)) {
19983                 sdst.setBit(lane, 1);
19984             }
19985         }
19986
19987         sdst.write();
19988     }
19989
19990     Inst_VOP3__V_CMPX_F_I16::Inst_VOP3__V_CMPX_F_I16(
19991           InFmt_VOP3 *iFmt)
19992         : Inst_VOP3(iFmt, "v_cmpx_f_i16", true)
19993     {
19994         setFlag(ALU);
19995     } // Inst_VOP3__V_CMPX_F_I16
19996
19997     Inst_VOP3__V_CMPX_F_I16::~Inst_VOP3__V_CMPX_F_I16()
19998     {
19999     } // ~Inst_VOP3__V_CMPX_F_I16
20000
20001     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
20002     void
20003     Inst_VOP3__V_CMPX_F_I16::execute(GPUDynInstPtr gpuDynInst)
20004     {
20005         Wavefront *wf = gpuDynInst->wavefront();
20006         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20007
20008         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20009             if (wf->execMask(lane)) {
20010                 sdst.setBit(lane, 0);
20011             }
20012         }
20013
20014         wf->execMask() = sdst.rawData();
20015         sdst.write();
20016     }
20017
20018     Inst_VOP3__V_CMPX_LT_I16::Inst_VOP3__V_CMPX_LT_I16(
20019           InFmt_VOP3 *iFmt)
20020         : Inst_VOP3(iFmt, "v_cmpx_lt_i16", true)
20021     {
20022         setFlag(ALU);
20023     } // Inst_VOP3__V_CMPX_LT_I16
20024
20025     Inst_VOP3__V_CMPX_LT_I16::~Inst_VOP3__V_CMPX_LT_I16()
20026     {
20027     } // ~Inst_VOP3__V_CMPX_LT_I16
20028
20029     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
20030     void
20031     Inst_VOP3__V_CMPX_LT_I16::execute(GPUDynInstPtr gpuDynInst)
20032     {
20033         Wavefront *wf = gpuDynInst->wavefront();
20034         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20035         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20036         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20037
20038         src0.readSrc();
20039         src1.readSrc();
20040
20041         /**
20042          * input modifiers are supported by FP operations only
20043          */
20044         assert(!(instData.ABS & 0x1));
20045         assert(!(instData.ABS & 0x2));
20046         assert(!(instData.ABS & 0x4));
20047         assert(!(extData.NEG & 0x1));
20048         assert(!(extData.NEG & 0x2));
20049         assert(!(extData.NEG & 0x4));
20050
20051         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20052             if (wf->execMask(lane)) {
20053                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
20054             }
20055         }
20056
20057         wf->execMask() = sdst.rawData();
20058         sdst.write();
20059     }
20060
20061     Inst_VOP3__V_CMPX_EQ_I16::Inst_VOP3__V_CMPX_EQ_I16(
20062           InFmt_VOP3 *iFmt)
20063         : Inst_VOP3(iFmt, "v_cmpx_eq_i16", true)
20064     {
20065         setFlag(ALU);
20066     } // Inst_VOP3__V_CMPX_EQ_I16
20067
20068     Inst_VOP3__V_CMPX_EQ_I16::~Inst_VOP3__V_CMPX_EQ_I16()
20069     {
20070     } // ~Inst_VOP3__V_CMPX_EQ_I16
20071
20072     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
20073     void
20074     Inst_VOP3__V_CMPX_EQ_I16::execute(GPUDynInstPtr gpuDynInst)
20075     {
20076         Wavefront *wf = gpuDynInst->wavefront();
20077         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20078         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20079         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20080
20081         src0.readSrc();
20082         src1.readSrc();
20083
20084         /**
20085          * input modifiers are supported by FP operations only
20086          */
20087         assert(!(instData.ABS & 0x1));
20088         assert(!(instData.ABS & 0x2));
20089         assert(!(instData.ABS & 0x4));
20090         assert(!(extData.NEG & 0x1));
20091         assert(!(extData.NEG & 0x2));
20092         assert(!(extData.NEG & 0x4));
20093
20094         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20095             if (wf->execMask(lane)) {
20096                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
20097             }
20098         }
20099
20100         wf->execMask() = sdst.rawData();
20101         sdst.write();
20102     }
20103
20104     Inst_VOP3__V_CMPX_LE_I16::Inst_VOP3__V_CMPX_LE_I16(
20105           InFmt_VOP3 *iFmt)
20106         : Inst_VOP3(iFmt, "v_cmpx_le_i16", true)
20107     {
20108         setFlag(ALU);
20109     } // Inst_VOP3__V_CMPX_LE_I16
20110
20111     Inst_VOP3__V_CMPX_LE_I16::~Inst_VOP3__V_CMPX_LE_I16()
20112     {
20113     } // ~Inst_VOP3__V_CMPX_LE_I16
20114
20115     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
20116     void
20117     Inst_VOP3__V_CMPX_LE_I16::execute(GPUDynInstPtr gpuDynInst)
20118     {
20119         Wavefront *wf = gpuDynInst->wavefront();
20120         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20121         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20122         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20123
20124         src0.readSrc();
20125         src1.readSrc();
20126
20127         /**
20128          * input modifiers are supported by FP operations only
20129          */
20130         assert(!(instData.ABS & 0x1));
20131         assert(!(instData.ABS & 0x2));
20132         assert(!(instData.ABS & 0x4));
20133         assert(!(extData.NEG & 0x1));
20134         assert(!(extData.NEG & 0x2));
20135         assert(!(extData.NEG & 0x4));
20136
20137         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20138             if (wf->execMask(lane)) {
20139                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
20140             }
20141         }
20142
20143         wf->execMask() = sdst.rawData();
20144         sdst.write();
20145     }
20146
20147     Inst_VOP3__V_CMPX_GT_I16::Inst_VOP3__V_CMPX_GT_I16(
20148           InFmt_VOP3 *iFmt)
20149         : Inst_VOP3(iFmt, "v_cmpx_gt_i16", true)
20150     {
20151         setFlag(ALU);
20152     } // Inst_VOP3__V_CMPX_GT_I16
20153
20154     Inst_VOP3__V_CMPX_GT_I16::~Inst_VOP3__V_CMPX_GT_I16()
20155     {
20156     } // ~Inst_VOP3__V_CMPX_GT_I16
20157
20158     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
20159     void
20160     Inst_VOP3__V_CMPX_GT_I16::execute(GPUDynInstPtr gpuDynInst)
20161     {
20162         Wavefront *wf = gpuDynInst->wavefront();
20163         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20164         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20165         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20166
20167         src0.readSrc();
20168         src1.readSrc();
20169
20170         /**
20171          * input modifiers are supported by FP operations only
20172          */
20173         assert(!(instData.ABS & 0x1));
20174         assert(!(instData.ABS & 0x2));
20175         assert(!(instData.ABS & 0x4));
20176         assert(!(extData.NEG & 0x1));
20177         assert(!(extData.NEG & 0x2));
20178         assert(!(extData.NEG & 0x4));
20179
20180         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20181             if (wf->execMask(lane)) {
20182                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
20183             }
20184         }
20185
20186         wf->execMask() = sdst.rawData();
20187         sdst.write();
20188     }
20189
20190     Inst_VOP3__V_CMPX_NE_I16::Inst_VOP3__V_CMPX_NE_I16(
20191           InFmt_VOP3 *iFmt)
20192         : Inst_VOP3(iFmt, "v_cmpx_ne_i16", true)
20193     {
20194         setFlag(ALU);
20195     } // Inst_VOP3__V_CMPX_NE_I16
20196
20197     Inst_VOP3__V_CMPX_NE_I16::~Inst_VOP3__V_CMPX_NE_I16()
20198     {
20199     } // ~Inst_VOP3__V_CMPX_NE_I16
20200
20201     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
20202     void
20203     Inst_VOP3__V_CMPX_NE_I16::execute(GPUDynInstPtr gpuDynInst)
20204     {
20205         Wavefront *wf = gpuDynInst->wavefront();
20206         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20207         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20208         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20209
20210         src0.readSrc();
20211         src1.readSrc();
20212
20213         /**
20214          * input modifiers are supported by FP operations only
20215          */
20216         assert(!(instData.ABS & 0x1));
20217         assert(!(instData.ABS & 0x2));
20218         assert(!(instData.ABS & 0x4));
20219         assert(!(extData.NEG & 0x1));
20220         assert(!(extData.NEG & 0x2));
20221         assert(!(extData.NEG & 0x4));
20222
20223         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20224             if (wf->execMask(lane)) {
20225                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
20226             }
20227         }
20228
20229         wf->execMask() = sdst.rawData();
20230         sdst.write();
20231     }
20232
20233     Inst_VOP3__V_CMPX_GE_I16::Inst_VOP3__V_CMPX_GE_I16(
20234           InFmt_VOP3 *iFmt)
20235         : Inst_VOP3(iFmt, "v_cmpx_ge_i16", true)
20236     {
20237         setFlag(ALU);
20238     } // Inst_VOP3__V_CMPX_GE_I16
20239
20240     Inst_VOP3__V_CMPX_GE_I16::~Inst_VOP3__V_CMPX_GE_I16()
20241     {
20242     } // ~Inst_VOP3__V_CMPX_GE_I16
20243
20244     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
20245     void
20246     Inst_VOP3__V_CMPX_GE_I16::execute(GPUDynInstPtr gpuDynInst)
20247     {
20248         Wavefront *wf = gpuDynInst->wavefront();
20249         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20250         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20251         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20252
20253         src0.readSrc();
20254         src1.readSrc();
20255
20256         /**
20257          * input modifiers are supported by FP operations only
20258          */
20259         assert(!(instData.ABS & 0x1));
20260         assert(!(instData.ABS & 0x2));
20261         assert(!(instData.ABS & 0x4));
20262         assert(!(extData.NEG & 0x1));
20263         assert(!(extData.NEG & 0x2));
20264         assert(!(extData.NEG & 0x4));
20265
20266         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20267             if (wf->execMask(lane)) {
20268                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
20269             }
20270         }
20271
20272         wf->execMask() = sdst.rawData();
20273         sdst.write();
20274     }
20275
20276     Inst_VOP3__V_CMPX_T_I16::Inst_VOP3__V_CMPX_T_I16(
20277           InFmt_VOP3 *iFmt)
20278         : Inst_VOP3(iFmt, "v_cmpx_t_i16", true)
20279     {
20280         setFlag(ALU);
20281     } // Inst_VOP3__V_CMPX_T_I16
20282
20283     Inst_VOP3__V_CMPX_T_I16::~Inst_VOP3__V_CMPX_T_I16()
20284     {
20285     } // ~Inst_VOP3__V_CMPX_T_I16
20286
20287     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
20288     void
20289     Inst_VOP3__V_CMPX_T_I16::execute(GPUDynInstPtr gpuDynInst)
20290     {
20291         Wavefront *wf = gpuDynInst->wavefront();
20292         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20293
20294         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20295             if (wf->execMask(lane)) {
20296                 sdst.setBit(lane, 1);
20297             }
20298         }
20299
20300         wf->execMask() = sdst.rawData();
20301         sdst.write();
20302     }
20303
20304     Inst_VOP3__V_CMPX_F_U16::Inst_VOP3__V_CMPX_F_U16(
20305           InFmt_VOP3 *iFmt)
20306         : Inst_VOP3(iFmt, "v_cmpx_f_u16", true)
20307     {
20308         setFlag(ALU);
20309     } // Inst_VOP3__V_CMPX_F_U16
20310
20311     Inst_VOP3__V_CMPX_F_U16::~Inst_VOP3__V_CMPX_F_U16()
20312     {
20313     } // ~Inst_VOP3__V_CMPX_F_U16
20314
20315     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
20316     void
20317     Inst_VOP3__V_CMPX_F_U16::execute(GPUDynInstPtr gpuDynInst)
20318     {
20319         Wavefront *wf = gpuDynInst->wavefront();
20320         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20321
20322         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20323             if (wf->execMask(lane)) {
20324                 sdst.setBit(lane, 0);
20325             }
20326         }
20327
20328         wf->execMask() = sdst.rawData();
20329         sdst.write();
20330     }
20331
20332     Inst_VOP3__V_CMPX_LT_U16::Inst_VOP3__V_CMPX_LT_U16(
20333           InFmt_VOP3 *iFmt)
20334         : Inst_VOP3(iFmt, "v_cmpx_lt_u16", true)
20335     {
20336         setFlag(ALU);
20337     } // Inst_VOP3__V_CMPX_LT_U16
20338
20339     Inst_VOP3__V_CMPX_LT_U16::~Inst_VOP3__V_CMPX_LT_U16()
20340     {
20341     } // ~Inst_VOP3__V_CMPX_LT_U16
20342
20343     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
20344     void
20345     Inst_VOP3__V_CMPX_LT_U16::execute(GPUDynInstPtr gpuDynInst)
20346     {
20347         Wavefront *wf = gpuDynInst->wavefront();
20348         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
20349         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
20350         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20351
20352         src0.readSrc();
20353         src1.readSrc();
20354
20355         /**
20356          * input modifiers are supported by FP operations only
20357          */
20358         assert(!(instData.ABS & 0x1));
20359         assert(!(instData.ABS & 0x2));
20360         assert(!(instData.ABS & 0x4));
20361         assert(!(extData.NEG & 0x1));
20362         assert(!(extData.NEG & 0x2));
20363         assert(!(extData.NEG & 0x4));
20364
20365         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20366             if (wf->execMask(lane)) {
20367                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
20368             }
20369         }
20370
20371         wf->execMask() = sdst.rawData();
20372         sdst.write();
20373     }
20374
20375     Inst_VOP3__V_CMPX_EQ_U16::Inst_VOP3__V_CMPX_EQ_U16(
20376           InFmt_VOP3 *iFmt)
20377         : Inst_VOP3(iFmt, "v_cmpx_eq_u16", true)
20378     {
20379         setFlag(ALU);
20380     } // Inst_VOP3__V_CMPX_EQ_U16
20381
20382     Inst_VOP3__V_CMPX_EQ_U16::~Inst_VOP3__V_CMPX_EQ_U16()
20383     {
20384     } // ~Inst_VOP3__V_CMPX_EQ_U16
20385
20386     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
20387     void
20388     Inst_VOP3__V_CMPX_EQ_U16::execute(GPUDynInstPtr gpuDynInst)
20389     {
20390         Wavefront *wf = gpuDynInst->wavefront();
20391         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20392         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20393         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20394
20395         src0.readSrc();
20396         src1.readSrc();
20397
20398         /**
20399          * input modifiers are supported by FP operations only
20400          */
20401         assert(!(instData.ABS & 0x1));
20402         assert(!(instData.ABS & 0x2));
20403         assert(!(instData.ABS & 0x4));
20404         assert(!(extData.NEG & 0x1));
20405         assert(!(extData.NEG & 0x2));
20406         assert(!(extData.NEG & 0x4));
20407
20408         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20409             if (wf->execMask(lane)) {
20410                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
20411             }
20412         }
20413
20414         wf->execMask() = sdst.rawData();
20415         sdst.write();
20416     }
20417
20418     Inst_VOP3__V_CMPX_LE_U16::Inst_VOP3__V_CMPX_LE_U16(
20419           InFmt_VOP3 *iFmt)
20420         : Inst_VOP3(iFmt, "v_cmpx_le_u16", true)
20421     {
20422         setFlag(ALU);
20423     } // Inst_VOP3__V_CMPX_LE_U16
20424
20425     Inst_VOP3__V_CMPX_LE_U16::~Inst_VOP3__V_CMPX_LE_U16()
20426     {
20427     } // ~Inst_VOP3__V_CMPX_LE_U16
20428
20429     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
20430     void
20431     Inst_VOP3__V_CMPX_LE_U16::execute(GPUDynInstPtr gpuDynInst)
20432     {
20433         Wavefront *wf = gpuDynInst->wavefront();
20434         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20435         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20436         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20437
20438         src0.readSrc();
20439         src1.readSrc();
20440
20441         /**
20442          * input modifiers are supported by FP operations only
20443          */
20444         assert(!(instData.ABS & 0x1));
20445         assert(!(instData.ABS & 0x2));
20446         assert(!(instData.ABS & 0x4));
20447         assert(!(extData.NEG & 0x1));
20448         assert(!(extData.NEG & 0x2));
20449         assert(!(extData.NEG & 0x4));
20450
20451         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20452             if (wf->execMask(lane)) {
20453                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
20454             }
20455         }
20456
20457         wf->execMask() = sdst.rawData();
20458         sdst.write();
20459     }
20460
20461     Inst_VOP3__V_CMPX_GT_U16::Inst_VOP3__V_CMPX_GT_U16(
20462           InFmt_VOP3 *iFmt)
20463         : Inst_VOP3(iFmt, "v_cmpx_gt_u16", true)
20464     {
20465         setFlag(ALU);
20466     } // Inst_VOP3__V_CMPX_GT_U16
20467
20468     Inst_VOP3__V_CMPX_GT_U16::~Inst_VOP3__V_CMPX_GT_U16()
20469     {
20470     } // ~Inst_VOP3__V_CMPX_GT_U16
20471
20472     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
20473     void
20474     Inst_VOP3__V_CMPX_GT_U16::execute(GPUDynInstPtr gpuDynInst)
20475     {
20476         Wavefront *wf = gpuDynInst->wavefront();
20477         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20478         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20479         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20480
20481         src0.readSrc();
20482         src1.readSrc();
20483
20484         /**
20485          * input modifiers are supported by FP operations only
20486          */
20487         assert(!(instData.ABS & 0x1));
20488         assert(!(instData.ABS & 0x2));
20489         assert(!(instData.ABS & 0x4));
20490         assert(!(extData.NEG & 0x1));
20491         assert(!(extData.NEG & 0x2));
20492         assert(!(extData.NEG & 0x4));
20493
20494         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20495             if (wf->execMask(lane)) {
20496                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
20497             }
20498         }
20499
20500         wf->execMask() = sdst.rawData();
20501         sdst.write();
20502     }
20503
20504     Inst_VOP3__V_CMPX_NE_U16::Inst_VOP3__V_CMPX_NE_U16(
20505           InFmt_VOP3 *iFmt)
20506         : Inst_VOP3(iFmt, "v_cmpx_ne_u16", true)
20507     {
20508         setFlag(ALU);
20509     } // Inst_VOP3__V_CMPX_NE_U16
20510
20511     Inst_VOP3__V_CMPX_NE_U16::~Inst_VOP3__V_CMPX_NE_U16()
20512     {
20513     } // ~Inst_VOP3__V_CMPX_NE_U16
20514
20515     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
20516     void
20517     Inst_VOP3__V_CMPX_NE_U16::execute(GPUDynInstPtr gpuDynInst)
20518     {
20519         Wavefront *wf = gpuDynInst->wavefront();
20520         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20521         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20522         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20523
20524         src0.readSrc();
20525         src1.readSrc();
20526
20527         /**
20528          * input modifiers are supported by FP operations only
20529          */
20530         assert(!(instData.ABS & 0x1));
20531         assert(!(instData.ABS & 0x2));
20532         assert(!(instData.ABS & 0x4));
20533         assert(!(extData.NEG & 0x1));
20534         assert(!(extData.NEG & 0x2));
20535         assert(!(extData.NEG & 0x4));
20536
20537         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20538             if (wf->execMask(lane)) {
20539                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
20540             }
20541         }
20542
20543         wf->execMask() = sdst.rawData();
20544         sdst.write();
20545     }
20546
20547     Inst_VOP3__V_CMPX_GE_U16::Inst_VOP3__V_CMPX_GE_U16(
20548           InFmt_VOP3 *iFmt)
20549         : Inst_VOP3(iFmt, "v_cmpx_ge_u16", true)
20550     {
20551         setFlag(ALU);
20552     } // Inst_VOP3__V_CMPX_GE_U16
20553
20554     Inst_VOP3__V_CMPX_GE_U16::~Inst_VOP3__V_CMPX_GE_U16()
20555     {
20556     } // ~Inst_VOP3__V_CMPX_GE_U16
20557
20558     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
20559     void
20560     Inst_VOP3__V_CMPX_GE_U16::execute(GPUDynInstPtr gpuDynInst)
20561     {
20562         Wavefront *wf = gpuDynInst->wavefront();
20563         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
20564         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
20565         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20566
20567         src0.readSrc();
20568         src1.readSrc();
20569
20570         /**
20571          * input modifiers are supported by FP operations only
20572          */
20573         assert(!(instData.ABS & 0x1));
20574         assert(!(instData.ABS & 0x2));
20575         assert(!(instData.ABS & 0x4));
20576         assert(!(extData.NEG & 0x1));
20577         assert(!(extData.NEG & 0x2));
20578         assert(!(extData.NEG & 0x4));
20579
20580         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20581             if (wf->execMask(lane)) {
20582                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
20583             }
20584         }
20585
20586         wf->execMask() = sdst.rawData();
20587         sdst.write();
20588     }
20589
20590     Inst_VOP3__V_CMPX_T_U16::Inst_VOP3__V_CMPX_T_U16(
20591           InFmt_VOP3 *iFmt)
20592         : Inst_VOP3(iFmt, "v_cmpx_t_u16", true)
20593     {
20594         setFlag(ALU);
20595     } // Inst_VOP3__V_CMPX_T_U16
20596
20597     Inst_VOP3__V_CMPX_T_U16::~Inst_VOP3__V_CMPX_T_U16()
20598     {
20599     } // ~Inst_VOP3__V_CMPX_T_U16
20600
20601     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
20602     void
20603     Inst_VOP3__V_CMPX_T_U16::execute(GPUDynInstPtr gpuDynInst)
20604     {
20605         Wavefront *wf = gpuDynInst->wavefront();
20606         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20607
20608         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20609             if (wf->execMask(lane)) {
20610                 sdst.setBit(lane, 1);
20611             }
20612         }
20613
20614         wf->execMask() = sdst.rawData();
20615         sdst.write();
20616     }
20617
20618     Inst_VOP3__V_CMP_F_I32::Inst_VOP3__V_CMP_F_I32(InFmt_VOP3 *iFmt)
20619         : Inst_VOP3(iFmt, "v_cmp_f_i32", true)
20620     {
20621         setFlag(ALU);
20622     } // Inst_VOP3__V_CMP_F_I32
20623
20624     Inst_VOP3__V_CMP_F_I32::~Inst_VOP3__V_CMP_F_I32()
20625     {
20626     } // ~Inst_VOP3__V_CMP_F_I32
20627
20628     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
20629     void
20630     Inst_VOP3__V_CMP_F_I32::execute(GPUDynInstPtr gpuDynInst)
20631     {
20632         Wavefront *wf = gpuDynInst->wavefront();
20633         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20634
20635         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20636             if (wf->execMask(lane)) {
20637                 sdst.setBit(lane, 0);
20638             }
20639         }
20640
20641         wf->execMask() = sdst.rawData();
20642         sdst.write();
20643     }
20644
20645     Inst_VOP3__V_CMP_LT_I32::Inst_VOP3__V_CMP_LT_I32(
20646           InFmt_VOP3 *iFmt)
20647         : Inst_VOP3(iFmt, "v_cmp_lt_i32", true)
20648     {
20649         setFlag(ALU);
20650     } // Inst_VOP3__V_CMP_LT_I32
20651
20652     Inst_VOP3__V_CMP_LT_I32::~Inst_VOP3__V_CMP_LT_I32()
20653     {
20654     } // ~Inst_VOP3__V_CMP_LT_I32
20655
20656     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
20657     void
20658     Inst_VOP3__V_CMP_LT_I32::execute(GPUDynInstPtr gpuDynInst)
20659     {
20660         Wavefront *wf = gpuDynInst->wavefront();
20661         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
20662         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
20663         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20664
20665         src0.readSrc();
20666         src1.readSrc();
20667
20668         /**
20669          * input modifiers are supported by FP operations only
20670          */
20671         assert(!(instData.ABS & 0x1));
20672         assert(!(instData.ABS & 0x2));
20673         assert(!(instData.ABS & 0x4));
20674         assert(!(extData.NEG & 0x1));
20675         assert(!(extData.NEG & 0x2));
20676         assert(!(extData.NEG & 0x4));
20677
20678         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20679             if (wf->execMask(lane)) {
20680                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
20681             }
20682         }
20683
20684         sdst.write();
20685     }
20686
20687     Inst_VOP3__V_CMP_EQ_I32::Inst_VOP3__V_CMP_EQ_I32(
20688           InFmt_VOP3 *iFmt)
20689         : Inst_VOP3(iFmt, "v_cmp_eq_i32", true)
20690     {
20691         setFlag(ALU);
20692     } // Inst_VOP3__V_CMP_EQ_I32
20693
20694     Inst_VOP3__V_CMP_EQ_I32::~Inst_VOP3__V_CMP_EQ_I32()
20695     {
20696     } // ~Inst_VOP3__V_CMP_EQ_I32
20697
20698     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
20699     void
20700     Inst_VOP3__V_CMP_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
20701     {
20702         Wavefront *wf = gpuDynInst->wavefront();
20703         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
20704         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
20705         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20706
20707         src0.readSrc();
20708         src1.readSrc();
20709
20710         /**
20711          * input modifiers are supported by FP operations only
20712          */
20713         assert(!(instData.ABS & 0x1));
20714         assert(!(instData.ABS & 0x2));
20715         assert(!(instData.ABS & 0x4));
20716         assert(!(extData.NEG & 0x1));
20717         assert(!(extData.NEG & 0x2));
20718         assert(!(extData.NEG & 0x4));
20719
20720         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20721             if (wf->execMask(lane)) {
20722                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
20723             }
20724         }
20725
20726         sdst.write();
20727     }
20728
20729     Inst_VOP3__V_CMP_LE_I32::Inst_VOP3__V_CMP_LE_I32(
20730           InFmt_VOP3 *iFmt)
20731         : Inst_VOP3(iFmt, "v_cmp_le_i32", true)
20732     {
20733         setFlag(ALU);
20734     } // Inst_VOP3__V_CMP_LE_I32
20735
20736     Inst_VOP3__V_CMP_LE_I32::~Inst_VOP3__V_CMP_LE_I32()
20737     {
20738     } // ~Inst_VOP3__V_CMP_LE_I32
20739
20740     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
20741     void
20742     Inst_VOP3__V_CMP_LE_I32::execute(GPUDynInstPtr gpuDynInst)
20743     {
20744         Wavefront *wf = gpuDynInst->wavefront();
20745         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
20746         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
20747         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20748
20749         src0.readSrc();
20750         src1.readSrc();
20751
20752         /**
20753          * input modifiers are supported by FP operations only
20754          */
20755         assert(!(instData.ABS & 0x1));
20756         assert(!(instData.ABS & 0x2));
20757         assert(!(instData.ABS & 0x4));
20758         assert(!(extData.NEG & 0x1));
20759         assert(!(extData.NEG & 0x2));
20760         assert(!(extData.NEG & 0x4));
20761
20762         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20763             if (wf->execMask(lane)) {
20764                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
20765             }
20766         }
20767
20768         sdst.write();
20769     }
20770
20771     Inst_VOP3__V_CMP_GT_I32::Inst_VOP3__V_CMP_GT_I32(
20772           InFmt_VOP3 *iFmt)
20773         : Inst_VOP3(iFmt, "v_cmp_gt_i32", true)
20774     {
20775         setFlag(ALU);
20776     } // Inst_VOP3__V_CMP_GT_I32
20777
20778     Inst_VOP3__V_CMP_GT_I32::~Inst_VOP3__V_CMP_GT_I32()
20779     {
20780     } // ~Inst_VOP3__V_CMP_GT_I32
20781
20782     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
20783     void
20784     Inst_VOP3__V_CMP_GT_I32::execute(GPUDynInstPtr gpuDynInst)
20785     {
20786         Wavefront *wf = gpuDynInst->wavefront();
20787         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
20788         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
20789         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20790
20791         src0.readSrc();
20792         src1.readSrc();
20793
20794         /**
20795          * input modifiers are supported by FP operations only
20796          */
20797         assert(!(instData.ABS & 0x1));
20798         assert(!(instData.ABS & 0x2));
20799         assert(!(instData.ABS & 0x4));
20800         assert(!(extData.NEG & 0x1));
20801         assert(!(extData.NEG & 0x2));
20802         assert(!(extData.NEG & 0x4));
20803
20804         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20805             if (wf->execMask(lane)) {
20806                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
20807             }
20808         }
20809
20810         sdst.write();
20811     }
20812
20813     Inst_VOP3__V_CMP_NE_I32::Inst_VOP3__V_CMP_NE_I32(
20814           InFmt_VOP3 *iFmt)
20815         : Inst_VOP3(iFmt, "v_cmp_ne_i32", true)
20816     {
20817         setFlag(ALU);
20818     } // Inst_VOP3__V_CMP_NE_I32
20819
20820     Inst_VOP3__V_CMP_NE_I32::~Inst_VOP3__V_CMP_NE_I32()
20821     {
20822     } // ~Inst_VOP3__V_CMP_NE_I32
20823
20824     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
20825     void
20826     Inst_VOP3__V_CMP_NE_I32::execute(GPUDynInstPtr gpuDynInst)
20827     {
20828         Wavefront *wf = gpuDynInst->wavefront();
20829         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
20830         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
20831         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20832
20833         src0.readSrc();
20834         src1.readSrc();
20835
20836         /**
20837          * input modifiers are supported by FP operations only
20838          */
20839         assert(!(instData.ABS & 0x1));
20840         assert(!(instData.ABS & 0x2));
20841         assert(!(instData.ABS & 0x4));
20842         assert(!(extData.NEG & 0x1));
20843         assert(!(extData.NEG & 0x2));
20844         assert(!(extData.NEG & 0x4));
20845
20846         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20847             if (wf->execMask(lane)) {
20848                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
20849             }
20850         }
20851
20852         sdst.write();
20853     }
20854
20855     Inst_VOP3__V_CMP_GE_I32::Inst_VOP3__V_CMP_GE_I32(
20856           InFmt_VOP3 *iFmt)
20857         : Inst_VOP3(iFmt, "v_cmp_ge_i32", true)
20858     {
20859         setFlag(ALU);
20860     } // Inst_VOP3__V_CMP_GE_I32
20861
20862     Inst_VOP3__V_CMP_GE_I32::~Inst_VOP3__V_CMP_GE_I32()
20863     {
20864     } // ~Inst_VOP3__V_CMP_GE_I32
20865
20866     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
20867     void
20868     Inst_VOP3__V_CMP_GE_I32::execute(GPUDynInstPtr gpuDynInst)
20869     {
20870         Wavefront *wf = gpuDynInst->wavefront();
20871         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
20872         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
20873         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20874
20875         src0.readSrc();
20876         src1.readSrc();
20877
20878         /**
20879          * input modifiers are supported by FP operations only
20880          */
20881         assert(!(instData.ABS & 0x1));
20882         assert(!(instData.ABS & 0x2));
20883         assert(!(instData.ABS & 0x4));
20884         assert(!(extData.NEG & 0x1));
20885         assert(!(extData.NEG & 0x2));
20886         assert(!(extData.NEG & 0x4));
20887
20888         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20889             if (wf->execMask(lane)) {
20890                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
20891             }
20892         }
20893
20894         sdst.write();
20895     }
20896
20897     Inst_VOP3__V_CMP_T_I32::Inst_VOP3__V_CMP_T_I32(InFmt_VOP3 *iFmt)
20898         : Inst_VOP3(iFmt, "v_cmp_t_i32", true)
20899     {
20900         setFlag(ALU);
20901     } // Inst_VOP3__V_CMP_T_I32
20902
20903     Inst_VOP3__V_CMP_T_I32::~Inst_VOP3__V_CMP_T_I32()
20904     {
20905     } // ~Inst_VOP3__V_CMP_T_I32
20906
20907     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
20908     void
20909     Inst_VOP3__V_CMP_T_I32::execute(GPUDynInstPtr gpuDynInst)
20910     {
20911         Wavefront *wf = gpuDynInst->wavefront();
20912         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20913
20914         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20915             if (wf->execMask(lane)) {
20916                 sdst.setBit(lane, 1);
20917             }
20918         }
20919
20920         sdst.write();
20921     }
20922
20923     Inst_VOP3__V_CMP_F_U32::Inst_VOP3__V_CMP_F_U32(InFmt_VOP3 *iFmt)
20924         : Inst_VOP3(iFmt, "v_cmp_f_u32", true)
20925     {
20926         setFlag(ALU);
20927     } // Inst_VOP3__V_CMP_F_U32
20928
20929     Inst_VOP3__V_CMP_F_U32::~Inst_VOP3__V_CMP_F_U32()
20930     {
20931     } // ~Inst_VOP3__V_CMP_F_U32
20932
20933     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
20934     void
20935     Inst_VOP3__V_CMP_F_U32::execute(GPUDynInstPtr gpuDynInst)
20936     {
20937         Wavefront *wf = gpuDynInst->wavefront();
20938         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20939
20940         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20941             if (wf->execMask(lane)) {
20942                 sdst.setBit(lane, 0);
20943             }
20944         }
20945
20946         sdst.write();
20947     }
20948
20949     Inst_VOP3__V_CMP_LT_U32::Inst_VOP3__V_CMP_LT_U32(
20950           InFmt_VOP3 *iFmt)
20951         : Inst_VOP3(iFmt, "v_cmp_lt_u32", true)
20952     {
20953         setFlag(ALU);
20954     } // Inst_VOP3__V_CMP_LT_U32
20955
20956     Inst_VOP3__V_CMP_LT_U32::~Inst_VOP3__V_CMP_LT_U32()
20957     {
20958     } // ~Inst_VOP3__V_CMP_LT_U32
20959
20960     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
20961     void
20962     Inst_VOP3__V_CMP_LT_U32::execute(GPUDynInstPtr gpuDynInst)
20963     {
20964         Wavefront *wf = gpuDynInst->wavefront();
20965         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
20966         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
20967         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
20968
20969         src0.readSrc();
20970         src1.readSrc();
20971
20972         /**
20973          * input modifiers are supported by FP operations only
20974          */
20975         assert(!(instData.ABS & 0x1));
20976         assert(!(instData.ABS & 0x2));
20977         assert(!(instData.ABS & 0x4));
20978         assert(!(extData.NEG & 0x1));
20979         assert(!(extData.NEG & 0x2));
20980         assert(!(extData.NEG & 0x4));
20981
20982         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
20983             if (wf->execMask(lane)) {
20984                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
20985             }
20986         }
20987
20988         sdst.write();
20989     }
20990
20991     Inst_VOP3__V_CMP_EQ_U32::Inst_VOP3__V_CMP_EQ_U32(
20992           InFmt_VOP3 *iFmt)
20993         : Inst_VOP3(iFmt, "v_cmp_eq_u32", true)
20994     {
20995         setFlag(ALU);
20996     } // Inst_VOP3__V_CMP_EQ_U32
20997
20998     Inst_VOP3__V_CMP_EQ_U32::~Inst_VOP3__V_CMP_EQ_U32()
20999     {
21000     } // ~Inst_VOP3__V_CMP_EQ_U32
21001
21002     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
21003     void
21004     Inst_VOP3__V_CMP_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
21005     {
21006         Wavefront *wf = gpuDynInst->wavefront();
21007         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21008         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21009         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21010
21011         src0.readSrc();
21012         src1.readSrc();
21013
21014         /**
21015          * input modifiers are supported by FP operations only
21016          */
21017         assert(!(instData.ABS & 0x1));
21018         assert(!(instData.ABS & 0x2));
21019         assert(!(instData.ABS & 0x4));
21020         assert(!(extData.NEG & 0x1));
21021         assert(!(extData.NEG & 0x2));
21022         assert(!(extData.NEG & 0x4));
21023
21024         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21025             if (wf->execMask(lane)) {
21026                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
21027             }
21028         }
21029
21030         sdst.write();
21031     }
21032
21033     Inst_VOP3__V_CMP_LE_U32::Inst_VOP3__V_CMP_LE_U32(
21034           InFmt_VOP3 *iFmt)
21035         : Inst_VOP3(iFmt, "v_cmp_le_u32", true)
21036     {
21037         setFlag(ALU);
21038     } // Inst_VOP3__V_CMP_LE_U32
21039
21040     Inst_VOP3__V_CMP_LE_U32::~Inst_VOP3__V_CMP_LE_U32()
21041     {
21042     } // ~Inst_VOP3__V_CMP_LE_U32
21043
21044     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
21045     void
21046     Inst_VOP3__V_CMP_LE_U32::execute(GPUDynInstPtr gpuDynInst)
21047     {
21048         Wavefront *wf = gpuDynInst->wavefront();
21049         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21050         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21051         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21052
21053         src0.readSrc();
21054         src1.readSrc();
21055
21056         /**
21057          * input modifiers are supported by FP operations only
21058          */
21059         assert(!(instData.ABS & 0x1));
21060         assert(!(instData.ABS & 0x2));
21061         assert(!(instData.ABS & 0x4));
21062         assert(!(extData.NEG & 0x1));
21063         assert(!(extData.NEG & 0x2));
21064         assert(!(extData.NEG & 0x4));
21065
21066         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21067             if (wf->execMask(lane)) {
21068                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
21069             }
21070         }
21071
21072         sdst.write();
21073     }
21074
21075     Inst_VOP3__V_CMP_GT_U32::Inst_VOP3__V_CMP_GT_U32(
21076           InFmt_VOP3 *iFmt)
21077         : Inst_VOP3(iFmt, "v_cmp_gt_u32", true)
21078     {
21079         setFlag(ALU);
21080     } // Inst_VOP3__V_CMP_GT_U32
21081
21082     Inst_VOP3__V_CMP_GT_U32::~Inst_VOP3__V_CMP_GT_U32()
21083     {
21084     } // ~Inst_VOP3__V_CMP_GT_U32
21085
21086     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
21087     void
21088     Inst_VOP3__V_CMP_GT_U32::execute(GPUDynInstPtr gpuDynInst)
21089     {
21090         Wavefront *wf = gpuDynInst->wavefront();
21091         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21092         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21093         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21094
21095         src0.readSrc();
21096         src1.readSrc();
21097
21098         /**
21099          * input modifiers are supported by FP operations only
21100          */
21101         assert(!(instData.ABS & 0x1));
21102         assert(!(instData.ABS & 0x2));
21103         assert(!(instData.ABS & 0x4));
21104         assert(!(extData.NEG & 0x1));
21105         assert(!(extData.NEG & 0x2));
21106         assert(!(extData.NEG & 0x4));
21107
21108         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21109             if (wf->execMask(lane)) {
21110                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
21111             }
21112         }
21113
21114         sdst.write();
21115     }
21116
21117     Inst_VOP3__V_CMP_NE_U32::Inst_VOP3__V_CMP_NE_U32(
21118           InFmt_VOP3 *iFmt)
21119         : Inst_VOP3(iFmt, "v_cmp_ne_u32", true)
21120     {
21121         setFlag(ALU);
21122     } // Inst_VOP3__V_CMP_NE_U32
21123
21124     Inst_VOP3__V_CMP_NE_U32::~Inst_VOP3__V_CMP_NE_U32()
21125     {
21126     } // ~Inst_VOP3__V_CMP_NE_U32
21127
21128     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
21129     void
21130     Inst_VOP3__V_CMP_NE_U32::execute(GPUDynInstPtr gpuDynInst)
21131     {
21132         Wavefront *wf = gpuDynInst->wavefront();
21133         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21134         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21135         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21136
21137         src0.readSrc();
21138         src1.readSrc();
21139
21140         /**
21141          * input modifiers are supported by FP operations only
21142          */
21143         assert(!(instData.ABS & 0x1));
21144         assert(!(instData.ABS & 0x2));
21145         assert(!(instData.ABS & 0x4));
21146         assert(!(extData.NEG & 0x1));
21147         assert(!(extData.NEG & 0x2));
21148         assert(!(extData.NEG & 0x4));
21149
21150         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21151             if (wf->execMask(lane)) {
21152                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
21153             }
21154         }
21155
21156         sdst.write();
21157     }
21158
21159     Inst_VOP3__V_CMP_GE_U32::Inst_VOP3__V_CMP_GE_U32(
21160           InFmt_VOP3 *iFmt)
21161         : Inst_VOP3(iFmt, "v_cmp_ge_u32", true)
21162     {
21163         setFlag(ALU);
21164     } // Inst_VOP3__V_CMP_GE_U32
21165
21166     Inst_VOP3__V_CMP_GE_U32::~Inst_VOP3__V_CMP_GE_U32()
21167     {
21168     } // ~Inst_VOP3__V_CMP_GE_U32
21169
21170     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
21171     void
21172     Inst_VOP3__V_CMP_GE_U32::execute(GPUDynInstPtr gpuDynInst)
21173     {
21174         Wavefront *wf = gpuDynInst->wavefront();
21175         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21176         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21177         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21178
21179         src0.readSrc();
21180         src1.readSrc();
21181
21182         /**
21183          * input modifiers are supported by FP operations only
21184          */
21185         assert(!(instData.ABS & 0x1));
21186         assert(!(instData.ABS & 0x2));
21187         assert(!(instData.ABS & 0x4));
21188         assert(!(extData.NEG & 0x1));
21189         assert(!(extData.NEG & 0x2));
21190         assert(!(extData.NEG & 0x4));
21191
21192         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21193             if (wf->execMask(lane)) {
21194                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
21195             }
21196         }
21197
21198         sdst.write();
21199     }
21200
21201     Inst_VOP3__V_CMP_T_U32::Inst_VOP3__V_CMP_T_U32(InFmt_VOP3 *iFmt)
21202         : Inst_VOP3(iFmt, "v_cmp_t_u32", true)
21203     {
21204         setFlag(ALU);
21205     } // Inst_VOP3__V_CMP_T_U32
21206
21207     Inst_VOP3__V_CMP_T_U32::~Inst_VOP3__V_CMP_T_U32()
21208     {
21209     } // ~Inst_VOP3__V_CMP_T_U32
21210
21211     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
21212     void
21213     Inst_VOP3__V_CMP_T_U32::execute(GPUDynInstPtr gpuDynInst)
21214     {
21215         Wavefront *wf = gpuDynInst->wavefront();
21216         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21217
21218         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21219             if (wf->execMask(lane)) {
21220                 sdst.setBit(lane, 1);
21221             }
21222         }
21223
21224         sdst.write();
21225     }
21226
21227     Inst_VOP3__V_CMPX_F_I32::Inst_VOP3__V_CMPX_F_I32(
21228           InFmt_VOP3 *iFmt)
21229         : Inst_VOP3(iFmt, "v_cmpx_f_i32", true)
21230     {
21231         setFlag(ALU);
21232     } // Inst_VOP3__V_CMPX_F_I32
21233
21234     Inst_VOP3__V_CMPX_F_I32::~Inst_VOP3__V_CMPX_F_I32()
21235     {
21236     } // ~Inst_VOP3__V_CMPX_F_I32
21237
21238     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
21239     void
21240     Inst_VOP3__V_CMPX_F_I32::execute(GPUDynInstPtr gpuDynInst)
21241     {
21242         Wavefront *wf = gpuDynInst->wavefront();
21243         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21244
21245         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21246             if (wf->execMask(lane)) {
21247                 sdst.setBit(lane, 0);
21248             }
21249         }
21250
21251         wf->execMask() = sdst.rawData();
21252         sdst.write();
21253     }
21254
21255     Inst_VOP3__V_CMPX_LT_I32::Inst_VOP3__V_CMPX_LT_I32(
21256           InFmt_VOP3 *iFmt)
21257         : Inst_VOP3(iFmt, "v_cmpx_lt_i32", true)
21258     {
21259         setFlag(ALU);
21260     } // Inst_VOP3__V_CMPX_LT_I32
21261
21262     Inst_VOP3__V_CMPX_LT_I32::~Inst_VOP3__V_CMPX_LT_I32()
21263     {
21264     } // ~Inst_VOP3__V_CMPX_LT_I32
21265
21266     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
21267     void
21268     Inst_VOP3__V_CMPX_LT_I32::execute(GPUDynInstPtr gpuDynInst)
21269     {
21270         Wavefront *wf = gpuDynInst->wavefront();
21271         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
21272         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
21273         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21274
21275         src0.readSrc();
21276         src1.readSrc();
21277
21278         /**
21279          * input modifiers are supported by FP operations only
21280          */
21281         assert(!(instData.ABS & 0x1));
21282         assert(!(instData.ABS & 0x2));
21283         assert(!(instData.ABS & 0x4));
21284         assert(!(extData.NEG & 0x1));
21285         assert(!(extData.NEG & 0x2));
21286         assert(!(extData.NEG & 0x4));
21287
21288         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21289             if (wf->execMask(lane)) {
21290                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
21291             }
21292         }
21293
21294         wf->execMask() = sdst.rawData();
21295         sdst.write();
21296     }
21297
21298     Inst_VOP3__V_CMPX_EQ_I32::Inst_VOP3__V_CMPX_EQ_I32(
21299           InFmt_VOP3 *iFmt)
21300         : Inst_VOP3(iFmt, "v_cmpx_eq_i32", true)
21301     {
21302         setFlag(ALU);
21303     } // Inst_VOP3__V_CMPX_EQ_I32
21304
21305     Inst_VOP3__V_CMPX_EQ_I32::~Inst_VOP3__V_CMPX_EQ_I32()
21306     {
21307     } // ~Inst_VOP3__V_CMPX_EQ_I32
21308
21309     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
21310     void
21311     Inst_VOP3__V_CMPX_EQ_I32::execute(GPUDynInstPtr gpuDynInst)
21312     {
21313         Wavefront *wf = gpuDynInst->wavefront();
21314         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
21315         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
21316         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21317
21318         src0.readSrc();
21319         src1.readSrc();
21320
21321         /**
21322          * input modifiers are supported by FP operations only
21323          */
21324         assert(!(instData.ABS & 0x1));
21325         assert(!(instData.ABS & 0x2));
21326         assert(!(instData.ABS & 0x4));
21327         assert(!(extData.NEG & 0x1));
21328         assert(!(extData.NEG & 0x2));
21329         assert(!(extData.NEG & 0x4));
21330
21331         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21332             if (wf->execMask(lane)) {
21333                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
21334             }
21335         }
21336
21337         wf->execMask() = sdst.rawData();
21338         sdst.write();
21339     }
21340
21341     Inst_VOP3__V_CMPX_LE_I32::Inst_VOP3__V_CMPX_LE_I32(
21342           InFmt_VOP3 *iFmt)
21343         : Inst_VOP3(iFmt, "v_cmpx_le_i32", true)
21344     {
21345         setFlag(ALU);
21346     } // Inst_VOP3__V_CMPX_LE_I32
21347
21348     Inst_VOP3__V_CMPX_LE_I32::~Inst_VOP3__V_CMPX_LE_I32()
21349     {
21350     } // ~Inst_VOP3__V_CMPX_LE_I32
21351
21352     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
21353     void
21354     Inst_VOP3__V_CMPX_LE_I32::execute(GPUDynInstPtr gpuDynInst)
21355     {
21356         Wavefront *wf = gpuDynInst->wavefront();
21357         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
21358         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
21359         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21360
21361         src0.readSrc();
21362         src1.readSrc();
21363
21364         /**
21365          * input modifiers are supported by FP operations only
21366          */
21367         assert(!(instData.ABS & 0x1));
21368         assert(!(instData.ABS & 0x2));
21369         assert(!(instData.ABS & 0x4));
21370         assert(!(extData.NEG & 0x1));
21371         assert(!(extData.NEG & 0x2));
21372         assert(!(extData.NEG & 0x4));
21373
21374         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21375             if (wf->execMask(lane)) {
21376                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
21377             }
21378         }
21379
21380         wf->execMask() = sdst.rawData();
21381         sdst.write();
21382     }
21383
21384     Inst_VOP3__V_CMPX_GT_I32::Inst_VOP3__V_CMPX_GT_I32(
21385           InFmt_VOP3 *iFmt)
21386         : Inst_VOP3(iFmt, "v_cmpx_gt_i32", true)
21387     {
21388         setFlag(ALU);
21389     } // Inst_VOP3__V_CMPX_GT_I32
21390
21391     Inst_VOP3__V_CMPX_GT_I32::~Inst_VOP3__V_CMPX_GT_I32()
21392     {
21393     } // ~Inst_VOP3__V_CMPX_GT_I32
21394
21395     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
21396     void
21397     Inst_VOP3__V_CMPX_GT_I32::execute(GPUDynInstPtr gpuDynInst)
21398     {
21399         Wavefront *wf = gpuDynInst->wavefront();
21400         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
21401         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
21402         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21403
21404         src0.readSrc();
21405         src1.readSrc();
21406
21407         /**
21408          * input modifiers are supported by FP operations only
21409          */
21410         assert(!(instData.ABS & 0x1));
21411         assert(!(instData.ABS & 0x2));
21412         assert(!(instData.ABS & 0x4));
21413         assert(!(extData.NEG & 0x1));
21414         assert(!(extData.NEG & 0x2));
21415         assert(!(extData.NEG & 0x4));
21416
21417         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21418             if (wf->execMask(lane)) {
21419                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
21420             }
21421         }
21422
21423         wf->execMask() = sdst.rawData();
21424         sdst.write();
21425     }
21426
21427     Inst_VOP3__V_CMPX_NE_I32::Inst_VOP3__V_CMPX_NE_I32(
21428           InFmt_VOP3 *iFmt)
21429         : Inst_VOP3(iFmt, "v_cmpx_ne_i32", true)
21430     {
21431         setFlag(ALU);
21432     } // Inst_VOP3__V_CMPX_NE_I32
21433
21434     Inst_VOP3__V_CMPX_NE_I32::~Inst_VOP3__V_CMPX_NE_I32()
21435     {
21436     } // ~Inst_VOP3__V_CMPX_NE_I32
21437
21438     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
21439     void
21440     Inst_VOP3__V_CMPX_NE_I32::execute(GPUDynInstPtr gpuDynInst)
21441     {
21442         Wavefront *wf = gpuDynInst->wavefront();
21443         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
21444         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
21445         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21446
21447         src0.readSrc();
21448         src1.readSrc();
21449
21450         /**
21451          * input modifiers are supported by FP operations only
21452          */
21453         assert(!(instData.ABS & 0x1));
21454         assert(!(instData.ABS & 0x2));
21455         assert(!(instData.ABS & 0x4));
21456         assert(!(extData.NEG & 0x1));
21457         assert(!(extData.NEG & 0x2));
21458         assert(!(extData.NEG & 0x4));
21459
21460         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21461             if (wf->execMask(lane)) {
21462                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
21463             }
21464         }
21465
21466         wf->execMask() = sdst.rawData();
21467         sdst.write();
21468     }
21469
21470     Inst_VOP3__V_CMPX_GE_I32::Inst_VOP3__V_CMPX_GE_I32(
21471           InFmt_VOP3 *iFmt)
21472         : Inst_VOP3(iFmt, "v_cmpx_ge_i32", true)
21473     {
21474         setFlag(ALU);
21475     } // Inst_VOP3__V_CMPX_GE_I32
21476
21477     Inst_VOP3__V_CMPX_GE_I32::~Inst_VOP3__V_CMPX_GE_I32()
21478     {
21479     } // ~Inst_VOP3__V_CMPX_GE_I32
21480
21481     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
21482     void
21483     Inst_VOP3__V_CMPX_GE_I32::execute(GPUDynInstPtr gpuDynInst)
21484     {
21485         Wavefront *wf = gpuDynInst->wavefront();
21486         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
21487         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
21488         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21489
21490         src0.readSrc();
21491         src1.readSrc();
21492
21493         /**
21494          * input modifiers are supported by FP operations only
21495          */
21496         assert(!(instData.ABS & 0x1));
21497         assert(!(instData.ABS & 0x2));
21498         assert(!(instData.ABS & 0x4));
21499         assert(!(extData.NEG & 0x1));
21500         assert(!(extData.NEG & 0x2));
21501         assert(!(extData.NEG & 0x4));
21502
21503         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21504             if (wf->execMask(lane)) {
21505                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
21506             }
21507         }
21508
21509         wf->execMask() = sdst.rawData();
21510         sdst.write();
21511     }
21512
21513     Inst_VOP3__V_CMPX_T_I32::Inst_VOP3__V_CMPX_T_I32(
21514           InFmt_VOP3 *iFmt)
21515         : Inst_VOP3(iFmt, "v_cmpx_t_i32", true)
21516     {
21517         setFlag(ALU);
21518     } // Inst_VOP3__V_CMPX_T_I32
21519
21520     Inst_VOP3__V_CMPX_T_I32::~Inst_VOP3__V_CMPX_T_I32()
21521     {
21522     } // ~Inst_VOP3__V_CMPX_T_I32
21523
21524     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
21525     void
21526     Inst_VOP3__V_CMPX_T_I32::execute(GPUDynInstPtr gpuDynInst)
21527     {
21528         Wavefront *wf = gpuDynInst->wavefront();
21529         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21530
21531         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21532             if (wf->execMask(lane)) {
21533                 sdst.setBit(lane, 1);
21534             }
21535         }
21536
21537         wf->execMask() = sdst.rawData();
21538         sdst.write();
21539     }
21540
21541     Inst_VOP3__V_CMPX_F_U32::Inst_VOP3__V_CMPX_F_U32(
21542           InFmt_VOP3 *iFmt)
21543         : Inst_VOP3(iFmt, "v_cmpx_f_u32", true)
21544     {
21545         setFlag(ALU);
21546     } // Inst_VOP3__V_CMPX_F_U32
21547
21548     Inst_VOP3__V_CMPX_F_U32::~Inst_VOP3__V_CMPX_F_U32()
21549     {
21550     } // ~Inst_VOP3__V_CMPX_F_U32
21551
21552     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
21553     void
21554     Inst_VOP3__V_CMPX_F_U32::execute(GPUDynInstPtr gpuDynInst)
21555     {
21556         Wavefront *wf = gpuDynInst->wavefront();
21557         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21558
21559         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21560             if (wf->execMask(lane)) {
21561                 sdst.setBit(lane, 0);
21562             }
21563         }
21564
21565         wf->execMask() = sdst.rawData();
21566         sdst.write();
21567     }
21568
21569     Inst_VOP3__V_CMPX_LT_U32::Inst_VOP3__V_CMPX_LT_U32(
21570           InFmt_VOP3 *iFmt)
21571         : Inst_VOP3(iFmt, "v_cmpx_lt_u32", true)
21572     {
21573         setFlag(ALU);
21574     } // Inst_VOP3__V_CMPX_LT_U32
21575
21576     Inst_VOP3__V_CMPX_LT_U32::~Inst_VOP3__V_CMPX_LT_U32()
21577     {
21578     } // ~Inst_VOP3__V_CMPX_LT_U32
21579
21580     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
21581     void
21582     Inst_VOP3__V_CMPX_LT_U32::execute(GPUDynInstPtr gpuDynInst)
21583     {
21584         Wavefront *wf = gpuDynInst->wavefront();
21585         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21586         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21587         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21588
21589         src0.readSrc();
21590         src1.readSrc();
21591
21592         /**
21593          * input modifiers are supported by FP operations only
21594          */
21595         assert(!(instData.ABS & 0x1));
21596         assert(!(instData.ABS & 0x2));
21597         assert(!(instData.ABS & 0x4));
21598         assert(!(extData.NEG & 0x1));
21599         assert(!(extData.NEG & 0x2));
21600         assert(!(extData.NEG & 0x4));
21601
21602         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21603             if (wf->execMask(lane)) {
21604                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
21605             }
21606         }
21607
21608         wf->execMask() = sdst.rawData();
21609         sdst.write();
21610     }
21611
21612     Inst_VOP3__V_CMPX_EQ_U32::Inst_VOP3__V_CMPX_EQ_U32(
21613           InFmt_VOP3 *iFmt)
21614         : Inst_VOP3(iFmt, "v_cmpx_eq_u32", true)
21615     {
21616         setFlag(ALU);
21617     } // Inst_VOP3__V_CMPX_EQ_U32
21618
21619     Inst_VOP3__V_CMPX_EQ_U32::~Inst_VOP3__V_CMPX_EQ_U32()
21620     {
21621     } // ~Inst_VOP3__V_CMPX_EQ_U32
21622
21623     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
21624     void
21625     Inst_VOP3__V_CMPX_EQ_U32::execute(GPUDynInstPtr gpuDynInst)
21626     {
21627         Wavefront *wf = gpuDynInst->wavefront();
21628         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21629         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21630         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21631
21632         src0.readSrc();
21633         src1.readSrc();
21634
21635         /**
21636          * input modifiers are supported by FP operations only
21637          */
21638         assert(!(instData.ABS & 0x1));
21639         assert(!(instData.ABS & 0x2));
21640         assert(!(instData.ABS & 0x4));
21641         assert(!(extData.NEG & 0x1));
21642         assert(!(extData.NEG & 0x2));
21643         assert(!(extData.NEG & 0x4));
21644
21645         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21646             if (wf->execMask(lane)) {
21647                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
21648             }
21649         }
21650
21651         wf->execMask() = sdst.rawData();
21652         sdst.write();
21653     }
21654
21655     Inst_VOP3__V_CMPX_LE_U32::Inst_VOP3__V_CMPX_LE_U32(
21656           InFmt_VOP3 *iFmt)
21657         : Inst_VOP3(iFmt, "v_cmpx_le_u32", true)
21658     {
21659         setFlag(ALU);
21660     } // Inst_VOP3__V_CMPX_LE_U32
21661
21662     Inst_VOP3__V_CMPX_LE_U32::~Inst_VOP3__V_CMPX_LE_U32()
21663     {
21664     } // ~Inst_VOP3__V_CMPX_LE_U32
21665
21666     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
21667     void
21668     Inst_VOP3__V_CMPX_LE_U32::execute(GPUDynInstPtr gpuDynInst)
21669     {
21670         Wavefront *wf = gpuDynInst->wavefront();
21671         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21672         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21673         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21674
21675         src0.readSrc();
21676         src1.readSrc();
21677
21678         /**
21679          * input modifiers are supported by FP operations only
21680          */
21681         assert(!(instData.ABS & 0x1));
21682         assert(!(instData.ABS & 0x2));
21683         assert(!(instData.ABS & 0x4));
21684         assert(!(extData.NEG & 0x1));
21685         assert(!(extData.NEG & 0x2));
21686         assert(!(extData.NEG & 0x4));
21687
21688         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21689             if (wf->execMask(lane)) {
21690                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
21691             }
21692         }
21693
21694         wf->execMask() = sdst.rawData();
21695         sdst.write();
21696     }
21697
21698     Inst_VOP3__V_CMPX_GT_U32::Inst_VOP3__V_CMPX_GT_U32(
21699           InFmt_VOP3 *iFmt)
21700         : Inst_VOP3(iFmt, "v_cmpx_gt_u32", true)
21701     {
21702         setFlag(ALU);
21703     } // Inst_VOP3__V_CMPX_GT_U32
21704
21705     Inst_VOP3__V_CMPX_GT_U32::~Inst_VOP3__V_CMPX_GT_U32()
21706     {
21707     } // ~Inst_VOP3__V_CMPX_GT_U32
21708
21709     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
21710     void
21711     Inst_VOP3__V_CMPX_GT_U32::execute(GPUDynInstPtr gpuDynInst)
21712     {
21713         Wavefront *wf = gpuDynInst->wavefront();
21714         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21715         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21716         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21717
21718         src0.readSrc();
21719         src1.readSrc();
21720
21721         /**
21722          * input modifiers are supported by FP operations only
21723          */
21724         assert(!(instData.ABS & 0x1));
21725         assert(!(instData.ABS & 0x2));
21726         assert(!(instData.ABS & 0x4));
21727         assert(!(extData.NEG & 0x1));
21728         assert(!(extData.NEG & 0x2));
21729         assert(!(extData.NEG & 0x4));
21730
21731         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21732             if (wf->execMask(lane)) {
21733                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
21734             }
21735         }
21736
21737         wf->execMask() = sdst.rawData();
21738         sdst.write();
21739     }
21740
21741     Inst_VOP3__V_CMPX_NE_U32::Inst_VOP3__V_CMPX_NE_U32(
21742           InFmt_VOP3 *iFmt)
21743         : Inst_VOP3(iFmt, "v_cmpx_ne_u32", true)
21744     {
21745         setFlag(ALU);
21746     } // Inst_VOP3__V_CMPX_NE_U32
21747
21748     Inst_VOP3__V_CMPX_NE_U32::~Inst_VOP3__V_CMPX_NE_U32()
21749     {
21750     } // ~Inst_VOP3__V_CMPX_NE_U32
21751
21752     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
21753     void
21754     Inst_VOP3__V_CMPX_NE_U32::execute(GPUDynInstPtr gpuDynInst)
21755     {
21756         Wavefront *wf = gpuDynInst->wavefront();
21757         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21758         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21759         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21760
21761         src0.readSrc();
21762         src1.readSrc();
21763
21764         /**
21765          * input modifiers are supported by FP operations only
21766          */
21767         assert(!(instData.ABS & 0x1));
21768         assert(!(instData.ABS & 0x2));
21769         assert(!(instData.ABS & 0x4));
21770         assert(!(extData.NEG & 0x1));
21771         assert(!(extData.NEG & 0x2));
21772         assert(!(extData.NEG & 0x4));
21773
21774         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21775             if (wf->execMask(lane)) {
21776                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
21777             }
21778         }
21779
21780         wf->execMask() = sdst.rawData();
21781         sdst.write();
21782     }
21783
21784     Inst_VOP3__V_CMPX_GE_U32::Inst_VOP3__V_CMPX_GE_U32(
21785           InFmt_VOP3 *iFmt)
21786         : Inst_VOP3(iFmt, "v_cmpx_ge_u32", true)
21787     {
21788         setFlag(ALU);
21789     } // Inst_VOP3__V_CMPX_GE_U32
21790
21791     Inst_VOP3__V_CMPX_GE_U32::~Inst_VOP3__V_CMPX_GE_U32()
21792     {
21793     } // ~Inst_VOP3__V_CMPX_GE_U32
21794
21795     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
21796     void
21797     Inst_VOP3__V_CMPX_GE_U32::execute(GPUDynInstPtr gpuDynInst)
21798     {
21799         Wavefront *wf = gpuDynInst->wavefront();
21800         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
21801         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
21802         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21803
21804         src0.readSrc();
21805         src1.readSrc();
21806
21807         /**
21808          * input modifiers are supported by FP operations only
21809          */
21810         assert(!(instData.ABS & 0x1));
21811         assert(!(instData.ABS & 0x2));
21812         assert(!(instData.ABS & 0x4));
21813         assert(!(extData.NEG & 0x1));
21814         assert(!(extData.NEG & 0x2));
21815         assert(!(extData.NEG & 0x4));
21816
21817         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21818             if (wf->execMask(lane)) {
21819                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
21820             }
21821         }
21822
21823         wf->execMask() = sdst.rawData();
21824         sdst.write();
21825     }
21826
21827     Inst_VOP3__V_CMPX_T_U32::Inst_VOP3__V_CMPX_T_U32(
21828           InFmt_VOP3 *iFmt)
21829         : Inst_VOP3(iFmt, "v_cmpx_t_u32", true)
21830     {
21831         setFlag(ALU);
21832     } // Inst_VOP3__V_CMPX_T_U32
21833
21834     Inst_VOP3__V_CMPX_T_U32::~Inst_VOP3__V_CMPX_T_U32()
21835     {
21836     } // ~Inst_VOP3__V_CMPX_T_U32
21837
21838     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
21839     void
21840     Inst_VOP3__V_CMPX_T_U32::execute(GPUDynInstPtr gpuDynInst)
21841     {
21842         Wavefront *wf = gpuDynInst->wavefront();
21843         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21844
21845         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21846             if (wf->execMask(lane)) {
21847                 sdst.setBit(lane, 1);
21848             }
21849         }
21850
21851         wf->execMask() = sdst.rawData();
21852         sdst.write();
21853     }
21854
21855     Inst_VOP3__V_CMP_F_I64::Inst_VOP3__V_CMP_F_I64(InFmt_VOP3 *iFmt)
21856         : Inst_VOP3(iFmt, "v_cmp_f_i64", true)
21857     {
21858         setFlag(ALU);
21859     } // Inst_VOP3__V_CMP_F_I64
21860
21861     Inst_VOP3__V_CMP_F_I64::~Inst_VOP3__V_CMP_F_I64()
21862     {
21863     } // ~Inst_VOP3__V_CMP_F_I64
21864
21865     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
21866     void
21867     Inst_VOP3__V_CMP_F_I64::execute(GPUDynInstPtr gpuDynInst)
21868     {
21869         Wavefront *wf = gpuDynInst->wavefront();
21870         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21871
21872         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21873             if (wf->execMask(lane)) {
21874                 sdst.setBit(lane, 0);
21875             }
21876         }
21877
21878         sdst.write();
21879     }
21880
21881     Inst_VOP3__V_CMP_LT_I64::Inst_VOP3__V_CMP_LT_I64(
21882           InFmt_VOP3 *iFmt)
21883         : Inst_VOP3(iFmt, "v_cmp_lt_i64", true)
21884     {
21885         setFlag(ALU);
21886     } // Inst_VOP3__V_CMP_LT_I64
21887
21888     Inst_VOP3__V_CMP_LT_I64::~Inst_VOP3__V_CMP_LT_I64()
21889     {
21890     } // ~Inst_VOP3__V_CMP_LT_I64
21891
21892     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
21893     void
21894     Inst_VOP3__V_CMP_LT_I64::execute(GPUDynInstPtr gpuDynInst)
21895     {
21896         Wavefront *wf = gpuDynInst->wavefront();
21897         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
21898         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
21899         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21900
21901         src0.readSrc();
21902         src1.readSrc();
21903
21904         /**
21905          * input modifiers are supported by FP operations only
21906          */
21907         assert(!(instData.ABS & 0x1));
21908         assert(!(instData.ABS & 0x2));
21909         assert(!(instData.ABS & 0x4));
21910         assert(!(extData.NEG & 0x1));
21911         assert(!(extData.NEG & 0x2));
21912         assert(!(extData.NEG & 0x4));
21913
21914         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21915             if (wf->execMask(lane)) {
21916                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
21917             }
21918         }
21919
21920         sdst.write();
21921     }
21922
21923     Inst_VOP3__V_CMP_EQ_I64::Inst_VOP3__V_CMP_EQ_I64(
21924           InFmt_VOP3 *iFmt)
21925         : Inst_VOP3(iFmt, "v_cmp_eq_i64", true)
21926     {
21927         setFlag(ALU);
21928     } // Inst_VOP3__V_CMP_EQ_I64
21929
21930     Inst_VOP3__V_CMP_EQ_I64::~Inst_VOP3__V_CMP_EQ_I64()
21931     {
21932     } // ~Inst_VOP3__V_CMP_EQ_I64
21933
21934     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
21935     void
21936     Inst_VOP3__V_CMP_EQ_I64::execute(GPUDynInstPtr gpuDynInst)
21937     {
21938         Wavefront *wf = gpuDynInst->wavefront();
21939         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
21940         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
21941         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21942
21943         src0.readSrc();
21944         src1.readSrc();
21945
21946         /**
21947          * input modifiers are supported by FP operations only
21948          */
21949         assert(!(instData.ABS & 0x1));
21950         assert(!(instData.ABS & 0x2));
21951         assert(!(instData.ABS & 0x4));
21952         assert(!(extData.NEG & 0x1));
21953         assert(!(extData.NEG & 0x2));
21954         assert(!(extData.NEG & 0x4));
21955
21956         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21957             if (wf->execMask(lane)) {
21958                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
21959             }
21960         }
21961
21962         sdst.write();
21963     }
21964
21965     Inst_VOP3__V_CMP_LE_I64::Inst_VOP3__V_CMP_LE_I64(
21966           InFmt_VOP3 *iFmt)
21967         : Inst_VOP3(iFmt, "v_cmp_le_i64", true)
21968     {
21969         setFlag(ALU);
21970     } // Inst_VOP3__V_CMP_LE_I64
21971
21972     Inst_VOP3__V_CMP_LE_I64::~Inst_VOP3__V_CMP_LE_I64()
21973     {
21974     } // ~Inst_VOP3__V_CMP_LE_I64
21975
21976     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
21977     void
21978     Inst_VOP3__V_CMP_LE_I64::execute(GPUDynInstPtr gpuDynInst)
21979     {
21980         Wavefront *wf = gpuDynInst->wavefront();
21981         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
21982         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
21983         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
21984
21985         src0.readSrc();
21986         src1.readSrc();
21987
21988         /**
21989          * input modifiers are supported by FP operations only
21990          */
21991         assert(!(instData.ABS & 0x1));
21992         assert(!(instData.ABS & 0x2));
21993         assert(!(instData.ABS & 0x4));
21994         assert(!(extData.NEG & 0x1));
21995         assert(!(extData.NEG & 0x2));
21996         assert(!(extData.NEG & 0x4));
21997
21998         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
21999             if (wf->execMask(lane)) {
22000                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
22001             }
22002         }
22003
22004         sdst.write();
22005     }
22006
22007     Inst_VOP3__V_CMP_GT_I64::Inst_VOP3__V_CMP_GT_I64(
22008           InFmt_VOP3 *iFmt)
22009         : Inst_VOP3(iFmt, "v_cmp_gt_i64", true)
22010     {
22011         setFlag(ALU);
22012     } // Inst_VOP3__V_CMP_GT_I64
22013
22014     Inst_VOP3__V_CMP_GT_I64::~Inst_VOP3__V_CMP_GT_I64()
22015     {
22016     } // ~Inst_VOP3__V_CMP_GT_I64
22017
22018     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
22019     void
22020     Inst_VOP3__V_CMP_GT_I64::execute(GPUDynInstPtr gpuDynInst)
22021     {
22022         Wavefront *wf = gpuDynInst->wavefront();
22023         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22024         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22025         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22026
22027         src0.readSrc();
22028         src1.readSrc();
22029
22030         /**
22031          * input modifiers are supported by FP operations only
22032          */
22033         assert(!(instData.ABS & 0x1));
22034         assert(!(instData.ABS & 0x2));
22035         assert(!(instData.ABS & 0x4));
22036         assert(!(extData.NEG & 0x1));
22037         assert(!(extData.NEG & 0x2));
22038         assert(!(extData.NEG & 0x4));
22039
22040         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22041             if (wf->execMask(lane)) {
22042                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
22043             }
22044         }
22045
22046         sdst.write();
22047     }
22048
22049     Inst_VOP3__V_CMP_NE_I64::Inst_VOP3__V_CMP_NE_I64(
22050           InFmt_VOP3 *iFmt)
22051         : Inst_VOP3(iFmt, "v_cmp_ne_i64", true)
22052     {
22053         setFlag(ALU);
22054     } // Inst_VOP3__V_CMP_NE_I64
22055
22056     Inst_VOP3__V_CMP_NE_I64::~Inst_VOP3__V_CMP_NE_I64()
22057     {
22058     } // ~Inst_VOP3__V_CMP_NE_I64
22059
22060     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
22061     void
22062     Inst_VOP3__V_CMP_NE_I64::execute(GPUDynInstPtr gpuDynInst)
22063     {
22064         Wavefront *wf = gpuDynInst->wavefront();
22065         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22066         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22067         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22068
22069         src0.readSrc();
22070         src1.readSrc();
22071
22072         /**
22073          * input modifiers are supported by FP operations only
22074          */
22075         assert(!(instData.ABS & 0x1));
22076         assert(!(instData.ABS & 0x2));
22077         assert(!(instData.ABS & 0x4));
22078         assert(!(extData.NEG & 0x1));
22079         assert(!(extData.NEG & 0x2));
22080         assert(!(extData.NEG & 0x4));
22081
22082         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22083             if (wf->execMask(lane)) {
22084                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
22085             }
22086         }
22087
22088         sdst.write();
22089     }
22090
22091     Inst_VOP3__V_CMP_GE_I64::Inst_VOP3__V_CMP_GE_I64(
22092           InFmt_VOP3 *iFmt)
22093         : Inst_VOP3(iFmt, "v_cmp_ge_i64", true)
22094     {
22095         setFlag(ALU);
22096     } // Inst_VOP3__V_CMP_GE_I64
22097
22098     Inst_VOP3__V_CMP_GE_I64::~Inst_VOP3__V_CMP_GE_I64()
22099     {
22100     } // ~Inst_VOP3__V_CMP_GE_I64
22101
22102     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
22103     void
22104     Inst_VOP3__V_CMP_GE_I64::execute(GPUDynInstPtr gpuDynInst)
22105     {
22106         Wavefront *wf = gpuDynInst->wavefront();
22107         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22108         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22109         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22110
22111         src0.readSrc();
22112         src1.readSrc();
22113
22114         /**
22115          * input modifiers are supported by FP operations only
22116          */
22117         assert(!(instData.ABS & 0x1));
22118         assert(!(instData.ABS & 0x2));
22119         assert(!(instData.ABS & 0x4));
22120         assert(!(extData.NEG & 0x1));
22121         assert(!(extData.NEG & 0x2));
22122         assert(!(extData.NEG & 0x4));
22123
22124         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22125             if (wf->execMask(lane)) {
22126                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
22127             }
22128         }
22129
22130         sdst.write();
22131     }
22132
22133     Inst_VOP3__V_CMP_T_I64::Inst_VOP3__V_CMP_T_I64(InFmt_VOP3 *iFmt)
22134         : Inst_VOP3(iFmt, "v_cmp_t_i64", true)
22135     {
22136         setFlag(ALU);
22137     } // Inst_VOP3__V_CMP_T_I64
22138
22139     Inst_VOP3__V_CMP_T_I64::~Inst_VOP3__V_CMP_T_I64()
22140     {
22141     } // ~Inst_VOP3__V_CMP_T_I64
22142
22143     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
22144     void
22145     Inst_VOP3__V_CMP_T_I64::execute(GPUDynInstPtr gpuDynInst)
22146     {
22147         Wavefront *wf = gpuDynInst->wavefront();
22148         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22149
22150         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22151             if (wf->execMask(lane)) {
22152                 sdst.setBit(lane, 1);
22153             }
22154         }
22155
22156         sdst.write();
22157     }
22158
22159     Inst_VOP3__V_CMP_F_U64::Inst_VOP3__V_CMP_F_U64(InFmt_VOP3 *iFmt)
22160         : Inst_VOP3(iFmt, "v_cmp_f_u64", true)
22161     {
22162         setFlag(ALU);
22163     } // Inst_VOP3__V_CMP_F_U64
22164
22165     Inst_VOP3__V_CMP_F_U64::~Inst_VOP3__V_CMP_F_U64()
22166     {
22167     } // ~Inst_VOP3__V_CMP_F_U64
22168
22169     // D.u64[threadID] = 0; D = VCC in VOPC encoding.
22170     void
22171     Inst_VOP3__V_CMP_F_U64::execute(GPUDynInstPtr gpuDynInst)
22172     {
22173         Wavefront *wf = gpuDynInst->wavefront();
22174         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22175
22176         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22177             if (wf->execMask(lane)) {
22178                 sdst.setBit(lane, 0);
22179             }
22180         }
22181
22182         sdst.write();
22183     }
22184
22185     Inst_VOP3__V_CMP_LT_U64::Inst_VOP3__V_CMP_LT_U64(
22186           InFmt_VOP3 *iFmt)
22187         : Inst_VOP3(iFmt, "v_cmp_lt_u64", true)
22188     {
22189         setFlag(ALU);
22190     } // Inst_VOP3__V_CMP_LT_U64
22191
22192     Inst_VOP3__V_CMP_LT_U64::~Inst_VOP3__V_CMP_LT_U64()
22193     {
22194     } // ~Inst_VOP3__V_CMP_LT_U64
22195
22196     // D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
22197     void
22198     Inst_VOP3__V_CMP_LT_U64::execute(GPUDynInstPtr gpuDynInst)
22199     {
22200         Wavefront *wf = gpuDynInst->wavefront();
22201         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22202         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22203         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22204
22205         src0.readSrc();
22206         src1.readSrc();
22207
22208         /**
22209          * input modifiers are supported by FP operations only
22210          */
22211         assert(!(instData.ABS & 0x1));
22212         assert(!(instData.ABS & 0x2));
22213         assert(!(instData.ABS & 0x4));
22214         assert(!(extData.NEG & 0x1));
22215         assert(!(extData.NEG & 0x2));
22216         assert(!(extData.NEG & 0x4));
22217
22218         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22219             if (wf->execMask(lane)) {
22220                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
22221             }
22222         }
22223
22224         sdst.write();
22225     }
22226
22227     Inst_VOP3__V_CMP_EQ_U64::Inst_VOP3__V_CMP_EQ_U64(
22228           InFmt_VOP3 *iFmt)
22229         : Inst_VOP3(iFmt, "v_cmp_eq_u64", true)
22230     {
22231         setFlag(ALU);
22232     } // Inst_VOP3__V_CMP_EQ_U64
22233
22234     Inst_VOP3__V_CMP_EQ_U64::~Inst_VOP3__V_CMP_EQ_U64()
22235     {
22236     } // ~Inst_VOP3__V_CMP_EQ_U64
22237
22238     // D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
22239     void
22240     Inst_VOP3__V_CMP_EQ_U64::execute(GPUDynInstPtr gpuDynInst)
22241     {
22242         Wavefront *wf = gpuDynInst->wavefront();
22243         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22244         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22245         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22246
22247         src0.readSrc();
22248         src1.readSrc();
22249
22250         /**
22251          * input modifiers are supported by FP operations only
22252          */
22253         assert(!(instData.ABS & 0x1));
22254         assert(!(instData.ABS & 0x2));
22255         assert(!(instData.ABS & 0x4));
22256         assert(!(extData.NEG & 0x1));
22257         assert(!(extData.NEG & 0x2));
22258         assert(!(extData.NEG & 0x4));
22259
22260         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22261             if (wf->execMask(lane)) {
22262                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
22263             }
22264         }
22265
22266         sdst.write();
22267     }
22268
22269     Inst_VOP3__V_CMP_LE_U64::Inst_VOP3__V_CMP_LE_U64(
22270           InFmt_VOP3 *iFmt)
22271         : Inst_VOP3(iFmt, "v_cmp_le_u64", true)
22272     {
22273         setFlag(ALU);
22274     } // Inst_VOP3__V_CMP_LE_U64
22275
22276     Inst_VOP3__V_CMP_LE_U64::~Inst_VOP3__V_CMP_LE_U64()
22277     {
22278     } // ~Inst_VOP3__V_CMP_LE_U64
22279
22280     // D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
22281     void
22282     Inst_VOP3__V_CMP_LE_U64::execute(GPUDynInstPtr gpuDynInst)
22283     {
22284         Wavefront *wf = gpuDynInst->wavefront();
22285         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22286         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22287         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22288
22289         src0.readSrc();
22290         src1.readSrc();
22291
22292         /**
22293          * input modifiers are supported by FP operations only
22294          */
22295         assert(!(instData.ABS & 0x1));
22296         assert(!(instData.ABS & 0x2));
22297         assert(!(instData.ABS & 0x4));
22298         assert(!(extData.NEG & 0x1));
22299         assert(!(extData.NEG & 0x2));
22300         assert(!(extData.NEG & 0x4));
22301
22302         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22303             if (wf->execMask(lane)) {
22304                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
22305             }
22306         }
22307
22308         sdst.write();
22309     }
22310
22311     Inst_VOP3__V_CMP_GT_U64::Inst_VOP3__V_CMP_GT_U64(
22312           InFmt_VOP3 *iFmt)
22313         : Inst_VOP3(iFmt, "v_cmp_gt_u64", true)
22314     {
22315         setFlag(ALU);
22316     } // Inst_VOP3__V_CMP_GT_U64
22317
22318     Inst_VOP3__V_CMP_GT_U64::~Inst_VOP3__V_CMP_GT_U64()
22319     {
22320     } // ~Inst_VOP3__V_CMP_GT_U64
22321
22322     // D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
22323     void
22324     Inst_VOP3__V_CMP_GT_U64::execute(GPUDynInstPtr gpuDynInst)
22325     {
22326         Wavefront *wf = gpuDynInst->wavefront();
22327         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22328         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22329         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22330
22331         src0.readSrc();
22332         src1.readSrc();
22333
22334         /**
22335          * input modifiers are supported by FP operations only
22336          */
22337         assert(!(instData.ABS & 0x1));
22338         assert(!(instData.ABS & 0x2));
22339         assert(!(instData.ABS & 0x4));
22340         assert(!(extData.NEG & 0x1));
22341         assert(!(extData.NEG & 0x2));
22342         assert(!(extData.NEG & 0x4));
22343
22344         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22345             if (wf->execMask(lane)) {
22346                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
22347             }
22348         }
22349
22350         sdst.write();
22351     }
22352
22353     Inst_VOP3__V_CMP_NE_U64::Inst_VOP3__V_CMP_NE_U64(
22354           InFmt_VOP3 *iFmt)
22355         : Inst_VOP3(iFmt, "v_cmp_ne_u64", true)
22356     {
22357         setFlag(ALU);
22358     } // Inst_VOP3__V_CMP_NE_U64
22359
22360     Inst_VOP3__V_CMP_NE_U64::~Inst_VOP3__V_CMP_NE_U64()
22361     {
22362     } // ~Inst_VOP3__V_CMP_NE_U64
22363
22364     // D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
22365     void
22366     Inst_VOP3__V_CMP_NE_U64::execute(GPUDynInstPtr gpuDynInst)
22367     {
22368         Wavefront *wf = gpuDynInst->wavefront();
22369         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22370         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22371         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22372
22373         src0.readSrc();
22374         src1.readSrc();
22375
22376         /**
22377          * input modifiers are supported by FP operations only
22378          */
22379         assert(!(instData.ABS & 0x1));
22380         assert(!(instData.ABS & 0x2));
22381         assert(!(instData.ABS & 0x4));
22382         assert(!(extData.NEG & 0x1));
22383         assert(!(extData.NEG & 0x2));
22384         assert(!(extData.NEG & 0x4));
22385
22386         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22387             if (wf->execMask(lane)) {
22388                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
22389             }
22390         }
22391
22392         sdst.write();
22393     }
22394
22395     Inst_VOP3__V_CMP_GE_U64::Inst_VOP3__V_CMP_GE_U64(
22396           InFmt_VOP3 *iFmt)
22397         : Inst_VOP3(iFmt, "v_cmp_ge_u64", true)
22398     {
22399         setFlag(ALU);
22400     } // Inst_VOP3__V_CMP_GE_U64
22401
22402     Inst_VOP3__V_CMP_GE_U64::~Inst_VOP3__V_CMP_GE_U64()
22403     {
22404     } // ~Inst_VOP3__V_CMP_GE_U64
22405
22406     // D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
22407     void
22408     Inst_VOP3__V_CMP_GE_U64::execute(GPUDynInstPtr gpuDynInst)
22409     {
22410         Wavefront *wf = gpuDynInst->wavefront();
22411         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22412         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22413         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22414
22415         src0.readSrc();
22416         src1.readSrc();
22417
22418         /**
22419          * input modifiers are supported by FP operations only
22420          */
22421         assert(!(instData.ABS & 0x1));
22422         assert(!(instData.ABS & 0x2));
22423         assert(!(instData.ABS & 0x4));
22424         assert(!(extData.NEG & 0x1));
22425         assert(!(extData.NEG & 0x2));
22426         assert(!(extData.NEG & 0x4));
22427
22428         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22429             if (wf->execMask(lane)) {
22430                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
22431             }
22432         }
22433
22434         sdst.write();
22435     }
22436
22437     Inst_VOP3__V_CMP_T_U64::Inst_VOP3__V_CMP_T_U64(InFmt_VOP3 *iFmt)
22438         : Inst_VOP3(iFmt, "v_cmp_t_u64", true)
22439     {
22440         setFlag(ALU);
22441     } // Inst_VOP3__V_CMP_T_U64
22442
22443     Inst_VOP3__V_CMP_T_U64::~Inst_VOP3__V_CMP_T_U64()
22444     {
22445     } // ~Inst_VOP3__V_CMP_T_U64
22446
22447     // D.u64[threadID] = 1; D = VCC in VOPC encoding.
22448     void
22449     Inst_VOP3__V_CMP_T_U64::execute(GPUDynInstPtr gpuDynInst)
22450     {
22451         Wavefront *wf = gpuDynInst->wavefront();
22452         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22453
22454         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22455             if (wf->execMask(lane)) {
22456                 sdst.setBit(lane, 1);
22457             }
22458         }
22459
22460         sdst.write();
22461     }
22462
22463     Inst_VOP3__V_CMPX_F_I64::Inst_VOP3__V_CMPX_F_I64(
22464           InFmt_VOP3 *iFmt)
22465         : Inst_VOP3(iFmt, "v_cmpx_f_i64", true)
22466     {
22467         setFlag(ALU);
22468     } // Inst_VOP3__V_CMPX_F_I64
22469
22470     Inst_VOP3__V_CMPX_F_I64::~Inst_VOP3__V_CMPX_F_I64()
22471     {
22472     } // ~Inst_VOP3__V_CMPX_F_I64
22473
22474     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
22475     void
22476     Inst_VOP3__V_CMPX_F_I64::execute(GPUDynInstPtr gpuDynInst)
22477     {
22478         Wavefront *wf = gpuDynInst->wavefront();
22479         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22480
22481         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22482             if (wf->execMask(lane)) {
22483                 sdst.setBit(lane, 0);
22484             }
22485         }
22486
22487         wf->execMask() = sdst.rawData();
22488         sdst.write();
22489     }
22490
22491     Inst_VOP3__V_CMPX_LT_I64::Inst_VOP3__V_CMPX_LT_I64(
22492           InFmt_VOP3 *iFmt)
22493         : Inst_VOP3(iFmt, "v_cmpx_lt_i64", true)
22494     {
22495         setFlag(ALU);
22496     } // Inst_VOP3__V_CMPX_LT_I64
22497
22498     Inst_VOP3__V_CMPX_LT_I64::~Inst_VOP3__V_CMPX_LT_I64()
22499     {
22500     } // ~Inst_VOP3__V_CMPX_LT_I64
22501
22502     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
22503     void
22504     Inst_VOP3__V_CMPX_LT_I64::execute(GPUDynInstPtr gpuDynInst)
22505     {
22506         Wavefront *wf = gpuDynInst->wavefront();
22507         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22508         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22509         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22510
22511         src0.readSrc();
22512         src1.readSrc();
22513
22514         /**
22515          * input modifiers are supported by FP operations only
22516          */
22517         assert(!(instData.ABS & 0x1));
22518         assert(!(instData.ABS & 0x2));
22519         assert(!(instData.ABS & 0x4));
22520         assert(!(extData.NEG & 0x1));
22521         assert(!(extData.NEG & 0x2));
22522         assert(!(extData.NEG & 0x4));
22523
22524         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22525             if (wf->execMask(lane)) {
22526                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
22527             }
22528         }
22529
22530         wf->execMask() = sdst.rawData();
22531         sdst.write();
22532     }
22533
22534     Inst_VOP3__V_CMPX_EQ_I64::Inst_VOP3__V_CMPX_EQ_I64(
22535           InFmt_VOP3 *iFmt)
22536         : Inst_VOP3(iFmt, "v_cmpx_eq_i64", true)
22537     {
22538         setFlag(ALU);
22539     } // Inst_VOP3__V_CMPX_EQ_I64
22540
22541     Inst_VOP3__V_CMPX_EQ_I64::~Inst_VOP3__V_CMPX_EQ_I64()
22542     {
22543     } // ~Inst_VOP3__V_CMPX_EQ_I64
22544
22545     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
22546     void
22547     Inst_VOP3__V_CMPX_EQ_I64::execute(GPUDynInstPtr gpuDynInst)
22548     {
22549         Wavefront *wf = gpuDynInst->wavefront();
22550         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22551         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22552         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22553
22554         src0.readSrc();
22555         src1.readSrc();
22556
22557         /**
22558          * input modifiers are supported by FP operations only
22559          */
22560         assert(!(instData.ABS & 0x1));
22561         assert(!(instData.ABS & 0x2));
22562         assert(!(instData.ABS & 0x4));
22563         assert(!(extData.NEG & 0x1));
22564         assert(!(extData.NEG & 0x2));
22565         assert(!(extData.NEG & 0x4));
22566
22567         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22568             if (wf->execMask(lane)) {
22569                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
22570             }
22571         }
22572
22573         wf->execMask() = sdst.rawData();
22574         sdst.write();
22575     }
22576
22577     Inst_VOP3__V_CMPX_LE_I64::Inst_VOP3__V_CMPX_LE_I64(
22578           InFmt_VOP3 *iFmt)
22579         : Inst_VOP3(iFmt, "v_cmpx_le_i64", true)
22580     {
22581         setFlag(ALU);
22582     } // Inst_VOP3__V_CMPX_LE_I64
22583
22584     Inst_VOP3__V_CMPX_LE_I64::~Inst_VOP3__V_CMPX_LE_I64()
22585     {
22586     } // ~Inst_VOP3__V_CMPX_LE_I64
22587
22588     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
22589     void
22590     Inst_VOP3__V_CMPX_LE_I64::execute(GPUDynInstPtr gpuDynInst)
22591     {
22592         Wavefront *wf = gpuDynInst->wavefront();
22593         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22594         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22595         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22596
22597         src0.readSrc();
22598         src1.readSrc();
22599
22600         /**
22601          * input modifiers are supported by FP operations only
22602          */
22603         assert(!(instData.ABS & 0x1));
22604         assert(!(instData.ABS & 0x2));
22605         assert(!(instData.ABS & 0x4));
22606         assert(!(extData.NEG & 0x1));
22607         assert(!(extData.NEG & 0x2));
22608         assert(!(extData.NEG & 0x4));
22609
22610         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22611             if (wf->execMask(lane)) {
22612                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
22613             }
22614         }
22615
22616         wf->execMask() = sdst.rawData();
22617         sdst.write();
22618     }
22619
22620     Inst_VOP3__V_CMPX_GT_I64::Inst_VOP3__V_CMPX_GT_I64(
22621           InFmt_VOP3 *iFmt)
22622         : Inst_VOP3(iFmt, "v_cmpx_gt_i64", true)
22623     {
22624         setFlag(ALU);
22625     } // Inst_VOP3__V_CMPX_GT_I64
22626
22627     Inst_VOP3__V_CMPX_GT_I64::~Inst_VOP3__V_CMPX_GT_I64()
22628     {
22629     } // ~Inst_VOP3__V_CMPX_GT_I64
22630
22631     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
22632     void
22633     Inst_VOP3__V_CMPX_GT_I64::execute(GPUDynInstPtr gpuDynInst)
22634     {
22635         Wavefront *wf = gpuDynInst->wavefront();
22636         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22637         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22638         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22639
22640         src0.readSrc();
22641         src1.readSrc();
22642
22643         /**
22644          * input modifiers are supported by FP operations only
22645          */
22646         assert(!(instData.ABS & 0x1));
22647         assert(!(instData.ABS & 0x2));
22648         assert(!(instData.ABS & 0x4));
22649         assert(!(extData.NEG & 0x1));
22650         assert(!(extData.NEG & 0x2));
22651         assert(!(extData.NEG & 0x4));
22652
22653         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22654             if (wf->execMask(lane)) {
22655                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
22656             }
22657         }
22658
22659         wf->execMask() = sdst.rawData();
22660         sdst.write();
22661     }
22662
22663     Inst_VOP3__V_CMPX_NE_I64::Inst_VOP3__V_CMPX_NE_I64(
22664           InFmt_VOP3 *iFmt)
22665         : Inst_VOP3(iFmt, "v_cmpx_ne_i64", true)
22666     {
22667         setFlag(ALU);
22668     } // Inst_VOP3__V_CMPX_NE_I64
22669
22670     Inst_VOP3__V_CMPX_NE_I64::~Inst_VOP3__V_CMPX_NE_I64()
22671     {
22672     } // ~Inst_VOP3__V_CMPX_NE_I64
22673
22674     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
22675     void
22676     Inst_VOP3__V_CMPX_NE_I64::execute(GPUDynInstPtr gpuDynInst)
22677     {
22678         Wavefront *wf = gpuDynInst->wavefront();
22679         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22680         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22681         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22682
22683         src0.readSrc();
22684         src1.readSrc();
22685
22686         /**
22687          * input modifiers are supported by FP operations only
22688          */
22689         assert(!(instData.ABS & 0x1));
22690         assert(!(instData.ABS & 0x2));
22691         assert(!(instData.ABS & 0x4));
22692         assert(!(extData.NEG & 0x1));
22693         assert(!(extData.NEG & 0x2));
22694         assert(!(extData.NEG & 0x4));
22695
22696         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22697             if (wf->execMask(lane)) {
22698                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
22699             }
22700         }
22701
22702         wf->execMask() = sdst.rawData();
22703         sdst.write();
22704     }
22705
22706     Inst_VOP3__V_CMPX_GE_I64::Inst_VOP3__V_CMPX_GE_I64(
22707           InFmt_VOP3 *iFmt)
22708         : Inst_VOP3(iFmt, "v_cmpx_ge_i64", true)
22709     {
22710         setFlag(ALU);
22711     } // Inst_VOP3__V_CMPX_GE_I64
22712
22713     Inst_VOP3__V_CMPX_GE_I64::~Inst_VOP3__V_CMPX_GE_I64()
22714     {
22715     } // ~Inst_VOP3__V_CMPX_GE_I64
22716
22717     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
22718     void
22719     Inst_VOP3__V_CMPX_GE_I64::execute(GPUDynInstPtr gpuDynInst)
22720     {
22721         Wavefront *wf = gpuDynInst->wavefront();
22722         ConstVecOperandI64 src0(gpuDynInst, extData.SRC0);
22723         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
22724         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22725
22726         src0.readSrc();
22727         src1.readSrc();
22728
22729         /**
22730          * input modifiers are supported by FP operations only
22731          */
22732         assert(!(instData.ABS & 0x1));
22733         assert(!(instData.ABS & 0x2));
22734         assert(!(instData.ABS & 0x4));
22735         assert(!(extData.NEG & 0x1));
22736         assert(!(extData.NEG & 0x2));
22737         assert(!(extData.NEG & 0x4));
22738
22739         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22740             if (wf->execMask(lane)) {
22741                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
22742             }
22743         }
22744
22745         wf->execMask() = sdst.rawData();
22746         sdst.write();
22747     }
22748
22749     Inst_VOP3__V_CMPX_T_I64::Inst_VOP3__V_CMPX_T_I64(
22750           InFmt_VOP3 *iFmt)
22751         : Inst_VOP3(iFmt, "v_cmpx_t_i64", true)
22752     {
22753         setFlag(ALU);
22754     } // Inst_VOP3__V_CMPX_T_I64
22755
22756     Inst_VOP3__V_CMPX_T_I64::~Inst_VOP3__V_CMPX_T_I64()
22757     {
22758     } // ~Inst_VOP3__V_CMPX_T_I64
22759
22760     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
22761     void
22762     Inst_VOP3__V_CMPX_T_I64::execute(GPUDynInstPtr gpuDynInst)
22763     {
22764         Wavefront *wf = gpuDynInst->wavefront();
22765         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22766
22767         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22768             if (wf->execMask(lane)) {
22769                 sdst.setBit(lane, 1);
22770             }
22771         }
22772
22773         wf->execMask() = sdst.rawData();
22774         sdst.write();
22775     }
22776
22777     Inst_VOP3__V_CMPX_F_U64::Inst_VOP3__V_CMPX_F_U64(
22778           InFmt_VOP3 *iFmt)
22779         : Inst_VOP3(iFmt, "v_cmpx_f_u64", true)
22780     {
22781         setFlag(ALU);
22782     } // Inst_VOP3__V_CMPX_F_U64
22783
22784     Inst_VOP3__V_CMPX_F_U64::~Inst_VOP3__V_CMPX_F_U64()
22785     {
22786     } // ~Inst_VOP3__V_CMPX_F_U64
22787
22788     // EXEC,D.u64[threadID] = 0; D = VCC in VOPC encoding.
22789     void
22790     Inst_VOP3__V_CMPX_F_U64::execute(GPUDynInstPtr gpuDynInst)
22791     {
22792         Wavefront *wf = gpuDynInst->wavefront();
22793         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22794
22795         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22796             if (wf->execMask(lane)) {
22797                 sdst.setBit(lane, 0);
22798             }
22799         }
22800
22801         wf->execMask() = sdst.rawData();
22802         sdst.write();
22803     }
22804
22805     Inst_VOP3__V_CMPX_LT_U64::Inst_VOP3__V_CMPX_LT_U64(
22806           InFmt_VOP3 *iFmt)
22807         : Inst_VOP3(iFmt, "v_cmpx_lt_u64", true)
22808     {
22809         setFlag(ALU);
22810     } // Inst_VOP3__V_CMPX_LT_U64
22811
22812     Inst_VOP3__V_CMPX_LT_U64::~Inst_VOP3__V_CMPX_LT_U64()
22813     {
22814     } // ~Inst_VOP3__V_CMPX_LT_U64
22815
22816     // EXEC,D.u64[threadID] = (S0 < S1); D = VCC in VOPC encoding.
22817     void
22818     Inst_VOP3__V_CMPX_LT_U64::execute(GPUDynInstPtr gpuDynInst)
22819     {
22820         Wavefront *wf = gpuDynInst->wavefront();
22821         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22822         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22823         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22824
22825         src0.readSrc();
22826         src1.readSrc();
22827
22828         /**
22829          * input modifiers are supported by FP operations only
22830          */
22831         assert(!(instData.ABS & 0x1));
22832         assert(!(instData.ABS & 0x2));
22833         assert(!(instData.ABS & 0x4));
22834         assert(!(extData.NEG & 0x1));
22835         assert(!(extData.NEG & 0x2));
22836         assert(!(extData.NEG & 0x4));
22837
22838         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22839             if (wf->execMask(lane)) {
22840                 sdst.setBit(lane, src0[lane] < src1[lane] ? 1 : 0);
22841             }
22842         }
22843
22844         wf->execMask() = sdst.rawData();
22845         sdst.write();
22846     }
22847
22848     Inst_VOP3__V_CMPX_EQ_U64::Inst_VOP3__V_CMPX_EQ_U64(
22849           InFmt_VOP3 *iFmt)
22850         : Inst_VOP3(iFmt, "v_cmpx_eq_u64", true)
22851     {
22852         setFlag(ALU);
22853     } // Inst_VOP3__V_CMPX_EQ_U64
22854
22855     Inst_VOP3__V_CMPX_EQ_U64::~Inst_VOP3__V_CMPX_EQ_U64()
22856     {
22857     } // ~Inst_VOP3__V_CMPX_EQ_U64
22858
22859     // EXEC,D.u64[threadID] = (S0 == S1); D = VCC in VOPC encoding.
22860     void
22861     Inst_VOP3__V_CMPX_EQ_U64::execute(GPUDynInstPtr gpuDynInst)
22862     {
22863         Wavefront *wf = gpuDynInst->wavefront();
22864         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22865         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22866         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22867
22868         src0.readSrc();
22869         src1.readSrc();
22870
22871         /**
22872          * input modifiers are supported by FP operations only
22873          */
22874         assert(!(instData.ABS & 0x1));
22875         assert(!(instData.ABS & 0x2));
22876         assert(!(instData.ABS & 0x4));
22877         assert(!(extData.NEG & 0x1));
22878         assert(!(extData.NEG & 0x2));
22879         assert(!(extData.NEG & 0x4));
22880
22881         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22882             if (wf->execMask(lane)) {
22883                 sdst.setBit(lane, src0[lane] == src1[lane] ? 1 : 0);
22884             }
22885         }
22886
22887         wf->execMask() = sdst.rawData();
22888         sdst.write();
22889     }
22890
22891     Inst_VOP3__V_CMPX_LE_U64::Inst_VOP3__V_CMPX_LE_U64(
22892           InFmt_VOP3 *iFmt)
22893         : Inst_VOP3(iFmt, "v_cmpx_le_u64", true)
22894     {
22895         setFlag(ALU);
22896     } // Inst_VOP3__V_CMPX_LE_U64
22897
22898     Inst_VOP3__V_CMPX_LE_U64::~Inst_VOP3__V_CMPX_LE_U64()
22899     {
22900     } // ~Inst_VOP3__V_CMPX_LE_U64
22901
22902     // EXEC,D.u64[threadID] = (S0 <= S1); D = VCC in VOPC encoding.
22903     void
22904     Inst_VOP3__V_CMPX_LE_U64::execute(GPUDynInstPtr gpuDynInst)
22905     {
22906         Wavefront *wf = gpuDynInst->wavefront();
22907         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22908         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22909         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22910
22911         src0.readSrc();
22912         src1.readSrc();
22913
22914         /**
22915          * input modifiers are supported by FP operations only
22916          */
22917         assert(!(instData.ABS & 0x1));
22918         assert(!(instData.ABS & 0x2));
22919         assert(!(instData.ABS & 0x4));
22920         assert(!(extData.NEG & 0x1));
22921         assert(!(extData.NEG & 0x2));
22922         assert(!(extData.NEG & 0x4));
22923
22924         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22925             if (wf->execMask(lane)) {
22926                 sdst.setBit(lane, src0[lane] <= src1[lane] ? 1 : 0);
22927             }
22928         }
22929
22930         wf->execMask() = sdst.rawData();
22931         sdst.write();
22932     }
22933
22934     Inst_VOP3__V_CMPX_GT_U64::Inst_VOP3__V_CMPX_GT_U64(
22935           InFmt_VOP3 *iFmt)
22936         : Inst_VOP3(iFmt, "v_cmpx_gt_u64", true)
22937     {
22938         setFlag(ALU);
22939     } // Inst_VOP3__V_CMPX_GT_U64
22940
22941     Inst_VOP3__V_CMPX_GT_U64::~Inst_VOP3__V_CMPX_GT_U64()
22942     {
22943     } // ~Inst_VOP3__V_CMPX_GT_U64
22944
22945     // EXEC,D.u64[threadID] = (S0 > S1); D = VCC in VOPC encoding.
22946     void
22947     Inst_VOP3__V_CMPX_GT_U64::execute(GPUDynInstPtr gpuDynInst)
22948     {
22949         Wavefront *wf = gpuDynInst->wavefront();
22950         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22951         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22952         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22953
22954         src0.readSrc();
22955         src1.readSrc();
22956
22957         /**
22958          * input modifiers are supported by FP operations only
22959          */
22960         assert(!(instData.ABS & 0x1));
22961         assert(!(instData.ABS & 0x2));
22962         assert(!(instData.ABS & 0x4));
22963         assert(!(extData.NEG & 0x1));
22964         assert(!(extData.NEG & 0x2));
22965         assert(!(extData.NEG & 0x4));
22966
22967         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
22968             if (wf->execMask(lane)) {
22969                 sdst.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
22970             }
22971         }
22972
22973         wf->execMask() = sdst.rawData();
22974         sdst.write();
22975     }
22976
22977     Inst_VOP3__V_CMPX_NE_U64::Inst_VOP3__V_CMPX_NE_U64(
22978           InFmt_VOP3 *iFmt)
22979         : Inst_VOP3(iFmt, "v_cmpx_ne_u64", true)
22980     {
22981         setFlag(ALU);
22982     } // Inst_VOP3__V_CMPX_NE_U64
22983
22984     Inst_VOP3__V_CMPX_NE_U64::~Inst_VOP3__V_CMPX_NE_U64()
22985     {
22986     } // ~Inst_VOP3__V_CMPX_NE_U64
22987
22988     // EXEC,D.u64[threadID] = (S0 <> S1); D = VCC in VOPC encoding.
22989     void
22990     Inst_VOP3__V_CMPX_NE_U64::execute(GPUDynInstPtr gpuDynInst)
22991     {
22992         Wavefront *wf = gpuDynInst->wavefront();
22993         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
22994         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
22995         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
22996
22997         src0.readSrc();
22998         src1.readSrc();
22999
23000         /**
23001          * input modifiers are supported by FP operations only
23002          */
23003         assert(!(instData.ABS & 0x1));
23004         assert(!(instData.ABS & 0x2));
23005         assert(!(instData.ABS & 0x4));
23006         assert(!(extData.NEG & 0x1));
23007         assert(!(extData.NEG & 0x2));
23008         assert(!(extData.NEG & 0x4));
23009
23010         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23011             if (wf->execMask(lane)) {
23012                 sdst.setBit(lane, src0[lane] != src1[lane] ? 1 : 0);
23013             }
23014         }
23015
23016         wf->execMask() = sdst.rawData();
23017         sdst.write();
23018     }
23019
23020     Inst_VOP3__V_CMPX_GE_U64::Inst_VOP3__V_CMPX_GE_U64(
23021           InFmt_VOP3 *iFmt)
23022         : Inst_VOP3(iFmt, "v_cmpx_ge_u64", true)
23023     {
23024         setFlag(ALU);
23025     } // Inst_VOP3__V_CMPX_GE_U64
23026
23027     Inst_VOP3__V_CMPX_GE_U64::~Inst_VOP3__V_CMPX_GE_U64()
23028     {
23029     } // ~Inst_VOP3__V_CMPX_GE_U64
23030
23031     // EXEC,D.u64[threadID] = (S0 >= S1); D = VCC in VOPC encoding.
23032     void
23033     Inst_VOP3__V_CMPX_GE_U64::execute(GPUDynInstPtr gpuDynInst)
23034     {
23035         Wavefront *wf = gpuDynInst->wavefront();
23036         ConstVecOperandU64 src0(gpuDynInst, extData.SRC0);
23037         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
23038         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
23039
23040         src0.readSrc();
23041         src1.readSrc();
23042
23043         /**
23044          * input modifiers are supported by FP operations only
23045          */
23046         assert(!(instData.ABS & 0x1));
23047         assert(!(instData.ABS & 0x2));
23048         assert(!(instData.ABS & 0x4));
23049         assert(!(extData.NEG & 0x1));
23050         assert(!(extData.NEG & 0x2));
23051         assert(!(extData.NEG & 0x4));
23052
23053         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23054             if (wf->execMask(lane)) {
23055                 sdst.setBit(lane, src0[lane] >= src1[lane] ? 1 : 0);
23056             }
23057         }
23058
23059         wf->execMask() = sdst.rawData();
23060         sdst.write();
23061     }
23062
23063     Inst_VOP3__V_CMPX_T_U64::Inst_VOP3__V_CMPX_T_U64(
23064           InFmt_VOP3 *iFmt)
23065         : Inst_VOP3(iFmt, "v_cmpx_t_u64", true)
23066     {
23067         setFlag(ALU);
23068     } // Inst_VOP3__V_CMPX_T_U64
23069
23070     Inst_VOP3__V_CMPX_T_U64::~Inst_VOP3__V_CMPX_T_U64()
23071     {
23072     } // ~Inst_VOP3__V_CMPX_T_U64
23073
23074     // EXEC,D.u64[threadID] = 1; D = VCC in VOPC encoding.
23075     void
23076     Inst_VOP3__V_CMPX_T_U64::execute(GPUDynInstPtr gpuDynInst)
23077     {
23078         Wavefront *wf = gpuDynInst->wavefront();
23079         ScalarOperandU64 sdst(gpuDynInst, instData.VDST);
23080
23081         /**
23082          * input modifiers are supported by FP operations only
23083          */
23084         assert(!(instData.ABS & 0x1));
23085         assert(!(instData.ABS & 0x2));
23086         assert(!(instData.ABS & 0x4));
23087         assert(!(extData.NEG & 0x1));
23088         assert(!(extData.NEG & 0x2));
23089         assert(!(extData.NEG & 0x4));
23090
23091         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23092             if (wf->execMask(lane)) {
23093                 sdst.setBit(lane, 1);
23094             }
23095         }
23096
23097         wf->execMask() = sdst.rawData();
23098         sdst.write();
23099     }
23100
23101     Inst_VOP3__V_CNDMASK_B32::Inst_VOP3__V_CNDMASK_B32(InFmt_VOP3 *iFmt)
23102         : Inst_VOP3(iFmt, "v_cndmask_b32", false)
23103     {
23104         setFlag(ALU);
23105         setFlag(ReadsVCC);
23106     } // Inst_VOP3__V_CNDMASK_B32
23107
23108     Inst_VOP3__V_CNDMASK_B32::~Inst_VOP3__V_CNDMASK_B32()
23109     {
23110     } // ~Inst_VOP3__V_CNDMASK_B32
23111
23112     // D.u = (VCC[i] ? S1.u : S0.u) (i = threadID in wave); VOP3: specify VCC
23113     // as a scalar GPR in S2.
23114     void
23115     Inst_VOP3__V_CNDMASK_B32::execute(GPUDynInstPtr gpuDynInst)
23116     {
23117         Wavefront *wf = gpuDynInst->wavefront();
23118         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
23119         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
23120         ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
23121         VecOperandU32 vdst(gpuDynInst, instData.VDST);
23122
23123         src0.readSrc();
23124         src1.readSrc();
23125         vcc.read();
23126
23127         /**
23128          * input modifiers are supported by FP operations only
23129          */
23130         assert(!(instData.ABS & 0x1));
23131         assert(!(instData.ABS & 0x2));
23132         assert(!(instData.ABS & 0x4));
23133         assert(!(extData.NEG & 0x1));
23134         assert(!(extData.NEG & 0x2));
23135         assert(!(extData.NEG & 0x4));
23136
23137         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23138             if (wf->execMask(lane)) {
23139                 vdst[lane] = bits(vcc.rawData(), lane)
23140                     ? src1[lane] : src0[lane];
23141             }
23142         }
23143
23144         vdst.write();
23145     }
23146
23147     Inst_VOP3__V_ADD_F32::Inst_VOP3__V_ADD_F32(InFmt_VOP3 *iFmt)
23148         : Inst_VOP3(iFmt, "v_add_f32", false)
23149     {
23150         setFlag(ALU);
23151         setFlag(F32);
23152     } // Inst_VOP3__V_ADD_F32
23153
23154     Inst_VOP3__V_ADD_F32::~Inst_VOP3__V_ADD_F32()
23155     {
23156     } // ~Inst_VOP3__V_ADD_F32
23157
23158     // D.f = S0.f + S1.f.
23159     void
23160     Inst_VOP3__V_ADD_F32::execute(GPUDynInstPtr gpuDynInst)
23161     {
23162         Wavefront *wf = gpuDynInst->wavefront();
23163         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
23164         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
23165         VecOperandF32 vdst(gpuDynInst, instData.VDST);
23166
23167         src0.readSrc();
23168         src1.readSrc();
23169
23170         if (instData.ABS & 0x1) {
23171             src0.absModifier();
23172         }
23173
23174         if (instData.ABS & 0x2) {
23175             src1.absModifier();
23176         }
23177
23178         if (extData.NEG & 0x1) {
23179             src0.negModifier();
23180         }
23181
23182         if (extData.NEG & 0x2) {
23183             src1.negModifier();
23184         }
23185
23186         /**
23187          * input modifiers are supported by FP operations only
23188          */
23189         assert(!(instData.ABS & 0x4));
23190         assert(!(extData.NEG & 0x4));
23191
23192         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23193             if (wf->execMask(lane)) {
23194                 vdst[lane] = src0[lane] + src1[lane];
23195             }
23196         }
23197
23198         vdst.write();
23199     }
23200
23201     Inst_VOP3__V_SUB_F32::Inst_VOP3__V_SUB_F32(InFmt_VOP3 *iFmt)
23202         : Inst_VOP3(iFmt, "v_sub_f32", false)
23203     {
23204         setFlag(ALU);
23205         setFlag(F32);
23206     } // Inst_VOP3__V_SUB_F32
23207
23208     Inst_VOP3__V_SUB_F32::~Inst_VOP3__V_SUB_F32()
23209     {
23210     } // ~Inst_VOP3__V_SUB_F32
23211
23212     // D.f = S0.f - S1.f.
23213     void
23214     Inst_VOP3__V_SUB_F32::execute(GPUDynInstPtr gpuDynInst)
23215     {
23216         Wavefront *wf = gpuDynInst->wavefront();
23217         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
23218         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
23219         VecOperandF32 vdst(gpuDynInst, instData.VDST);
23220
23221         src0.readSrc();
23222         src1.readSrc();
23223
23224         if (instData.ABS & 0x1) {
23225             src0.absModifier();
23226         }
23227
23228         if (instData.ABS & 0x2) {
23229             src1.absModifier();
23230         }
23231
23232         if (extData.NEG & 0x1) {
23233             src0.negModifier();
23234         }
23235
23236         if (extData.NEG & 0x2) {
23237             src1.negModifier();
23238         }
23239
23240         /**
23241          * input modifiers are supported by FP operations only
23242          */
23243         assert(!(instData.ABS & 0x4));
23244         assert(!(extData.NEG & 0x4));
23245
23246         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23247             if (wf->execMask(lane)) {
23248                 vdst[lane] = src0[lane] - src1[lane];
23249             }
23250         }
23251
23252         vdst.write();
23253     }
23254
23255     Inst_VOP3__V_SUBREV_F32::Inst_VOP3__V_SUBREV_F32(InFmt_VOP3 *iFmt)
23256         : Inst_VOP3(iFmt, "v_subrev_f32", false)
23257     {
23258         setFlag(ALU);
23259         setFlag(F32);
23260     } // Inst_VOP3__V_SUBREV_F32
23261
23262     Inst_VOP3__V_SUBREV_F32::~Inst_VOP3__V_SUBREV_F32()
23263     {
23264     } // ~Inst_VOP3__V_SUBREV_F32
23265
23266     // D.f = S1.f - S0.f.
23267     void
23268     Inst_VOP3__V_SUBREV_F32::execute(GPUDynInstPtr gpuDynInst)
23269     {
23270         Wavefront *wf = gpuDynInst->wavefront();
23271         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
23272         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
23273         VecOperandF32 vdst(gpuDynInst, instData.VDST);
23274
23275         src0.readSrc();
23276         src1.readSrc();
23277
23278         if (instData.ABS & 0x1) {
23279             src0.absModifier();
23280         }
23281
23282         if (instData.ABS & 0x2) {
23283             src1.absModifier();
23284         }
23285
23286         if (extData.NEG & 0x1) {
23287             src0.negModifier();
23288         }
23289
23290         if (extData.NEG & 0x2) {
23291             src1.negModifier();
23292         }
23293
23294         /**
23295          * input modifiers are supported by FP operations only
23296          */
23297         assert(!(instData.ABS & 0x4));
23298         assert(!(extData.NEG & 0x4));
23299
23300         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23301             if (wf->execMask(lane)) {
23302                 vdst[lane] = src1[lane] - src0[lane];
23303             }
23304         }
23305
23306         vdst.write();
23307     }
23308
23309     Inst_VOP3__V_MUL_LEGACY_F32::Inst_VOP3__V_MUL_LEGACY_F32(InFmt_VOP3 *iFmt)
23310         : Inst_VOP3(iFmt, "v_mul_legacy_f32", false)
23311     {
23312         setFlag(ALU);
23313         setFlag(F32);
23314     } // Inst_VOP3__V_MUL_LEGACY_F32
23315
23316     Inst_VOP3__V_MUL_LEGACY_F32::~Inst_VOP3__V_MUL_LEGACY_F32()
23317     {
23318     } // ~Inst_VOP3__V_MUL_LEGACY_F32
23319
23320     // D.f = S0.f * S1.f
23321     void
23322     Inst_VOP3__V_MUL_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
23323     {
23324         Wavefront *wf = gpuDynInst->wavefront();
23325         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
23326         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
23327         VecOperandF32 vdst(gpuDynInst, instData.VDST);
23328
23329         src0.readSrc();
23330         src1.readSrc();
23331
23332         if (instData.ABS & 0x1) {
23333             src0.absModifier();
23334         }
23335
23336         if (instData.ABS & 0x2) {
23337             src1.absModifier();
23338         }
23339
23340         if (extData.NEG & 0x1) {
23341             src0.negModifier();
23342         }
23343
23344         if (extData.NEG & 0x2) {
23345             src1.negModifier();
23346         }
23347
23348         /**
23349          * input modifiers are supported by FP operations only
23350          */
23351         assert(!(instData.ABS & 0x4));
23352         assert(!(extData.NEG & 0x4));
23353
23354         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23355             if (wf->execMask(lane)) {
23356                 if (std::isnan(src0[lane]) ||
23357                     std::isnan(src1[lane])) {
23358                     vdst[lane] = NAN;
23359                 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
23360                            std::fpclassify(src0[lane]) == FP_ZERO) &&
23361                            !std::signbit(src0[lane])) {
23362                     if (std::isinf(src1[lane])) {
23363                         vdst[lane] = NAN;
23364                     } else if (!std::signbit(src1[lane])) {
23365                         vdst[lane] = +0.0;
23366                     } else {
23367                         vdst[lane] = -0.0;
23368                     }
23369                 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
23370                            std::fpclassify(src0[lane]) == FP_ZERO) &&
23371                            std::signbit(src0[lane])) {
23372                     if (std::isinf(src1[lane])) {
23373                         vdst[lane] = NAN;
23374                     } else if (std::signbit(src1[lane])) {
23375                         vdst[lane] = +0.0;
23376                     } else {
23377                         vdst[lane] = -0.0;
23378                     }
23379                 } else if (std::isinf(src0[lane]) &&
23380                            !std::signbit(src0[lane])) {
23381                     if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
23382                         std::fpclassify(src1[lane]) == FP_ZERO) {
23383                         vdst[lane] = NAN;
23384                     } else if (!std::signbit(src1[lane])) {
23385                         vdst[lane] = +INFINITY;
23386                     } else {
23387                         vdst[lane] = -INFINITY;
23388                     }
23389                 } else if (std::isinf(src0[lane]) &&
23390                            std::signbit(src0[lane])) {
23391                     if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
23392                         std::fpclassify(src1[lane]) == FP_ZERO) {
23393                         vdst[lane] = NAN;
23394                     } else if (std::signbit(src1[lane])) {
23395                         vdst[lane] = +INFINITY;
23396                     } else {
23397                         vdst[lane] = -INFINITY;
23398                     }
23399                 } else {
23400                     vdst[lane] = src0[lane] * src1[lane];
23401                 }
23402             }
23403         }
23404
23405         vdst.write();
23406     }
23407
23408     Inst_VOP3__V_MUL_F32::Inst_VOP3__V_MUL_F32(InFmt_VOP3 *iFmt)
23409         : Inst_VOP3(iFmt, "v_mul_f32", false)
23410     {
23411         setFlag(ALU);
23412         setFlag(F32);
23413     } // Inst_VOP3__V_MUL_F32
23414
23415     Inst_VOP3__V_MUL_F32::~Inst_VOP3__V_MUL_F32()
23416     {
23417     } // ~Inst_VOP3__V_MUL_F32
23418
23419     // D.f = S0.f * S1.f.
23420     void
23421     Inst_VOP3__V_MUL_F32::execute(GPUDynInstPtr gpuDynInst)
23422     {
23423         Wavefront *wf = gpuDynInst->wavefront();
23424         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
23425         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
23426         VecOperandF32 vdst(gpuDynInst, instData.VDST);
23427
23428         src0.readSrc();
23429         src1.readSrc();
23430
23431         if (instData.ABS & 0x1) {
23432             src0.absModifier();
23433         }
23434
23435         if (instData.ABS & 0x2) {
23436             src1.absModifier();
23437         }
23438
23439         if (extData.NEG & 0x1) {
23440             src0.negModifier();
23441         }
23442
23443         if (extData.NEG & 0x2) {
23444             src1.negModifier();
23445         }
23446
23447         /**
23448          * input modifiers are supported by FP operations only
23449          */
23450         assert(!(instData.ABS & 0x4));
23451         assert(!(extData.NEG & 0x4));
23452
23453         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23454             if (wf->execMask(lane)) {
23455                 if (std::isnan(src0[lane]) ||
23456                     std::isnan(src1[lane])) {
23457                     vdst[lane] = NAN;
23458                 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
23459                            std::fpclassify(src0[lane]) == FP_ZERO) &&
23460                            !std::signbit(src0[lane])) {
23461                     if (std::isinf(src1[lane])) {
23462                         vdst[lane] = NAN;
23463                     } else if (!std::signbit(src1[lane])) {
23464                         vdst[lane] = +0.0;
23465                     } else {
23466                         vdst[lane] = -0.0;
23467                     }
23468                 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
23469                            std::fpclassify(src0[lane]) == FP_ZERO) &&
23470                            std::signbit(src0[lane])) {
23471                     if (std::isinf(src1[lane])) {
23472                         vdst[lane] = NAN;
23473                     } else if (std::signbit(src1[lane])) {
23474                         vdst[lane] = +0.0;
23475                     } else {
23476                         vdst[lane] = -0.0;
23477                     }
23478                 } else if (std::isinf(src0[lane]) &&
23479                            !std::signbit(src0[lane])) {
23480                     if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
23481                         std::fpclassify(src1[lane]) == FP_ZERO) {
23482                         vdst[lane] = NAN;
23483                     } else if (!std::signbit(src1[lane])) {
23484                         vdst[lane] = +INFINITY;
23485                     } else {
23486                         vdst[lane] = -INFINITY;
23487                     }
23488                 } else if (std::isinf(src0[lane]) &&
23489                            std::signbit(src0[lane])) {
23490                     if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
23491                         std::fpclassify(src1[lane]) == FP_ZERO) {
23492                         vdst[lane] = NAN;
23493                     } else if (std::signbit(src1[lane])) {
23494                         vdst[lane] = +INFINITY;
23495                     } else {
23496                         vdst[lane] = -INFINITY;
23497                     }
23498                 } else {
23499                     vdst[lane] = src0[lane] * src1[lane];
23500                 }
23501             }
23502         }
23503
23504         vdst.write();
23505     }
23506
23507     Inst_VOP3__V_MUL_I32_I24::Inst_VOP3__V_MUL_I32_I24(InFmt_VOP3 *iFmt)
23508         : Inst_VOP3(iFmt, "v_mul_i32_i24", false)
23509     {
23510         setFlag(ALU);
23511     } // Inst_VOP3__V_MUL_I32_I24
23512
23513     Inst_VOP3__V_MUL_I32_I24::~Inst_VOP3__V_MUL_I32_I24()
23514     {
23515     } // ~Inst_VOP3__V_MUL_I32_I24
23516
23517     // D.i = S0.i[23:0] * S1.i[23:0].
23518     void
23519     Inst_VOP3__V_MUL_I32_I24::execute(GPUDynInstPtr gpuDynInst)
23520     {
23521         Wavefront *wf = gpuDynInst->wavefront();
23522         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
23523         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
23524         VecOperandI32 vdst(gpuDynInst, instData.VDST);
23525
23526         src0.readSrc();
23527         src1.read();
23528
23529         /**
23530          * input modifiers are supported by FP operations only
23531          */
23532         assert(!(instData.ABS & 0x1));
23533         assert(!(instData.ABS & 0x2));
23534         assert(!(instData.ABS & 0x4));
23535         assert(!(extData.NEG & 0x1));
23536         assert(!(extData.NEG & 0x2));
23537         assert(!(extData.NEG & 0x4));
23538
23539         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23540             if (wf->execMask(lane)) {
23541                 vdst[lane] = sext<24>(bits(src0[lane], 23, 0))
23542                     * sext<24>(bits(src1[lane], 23, 0));
23543             }
23544         }
23545
23546         vdst.write();
23547     }
23548
23549     Inst_VOP3__V_MUL_HI_I32_I24::Inst_VOP3__V_MUL_HI_I32_I24(InFmt_VOP3 *iFmt)
23550         : Inst_VOP3(iFmt, "v_mul_hi_i32_i24", false)
23551     {
23552         setFlag(ALU);
23553     } // Inst_VOP3__V_MUL_HI_I32_I24
23554
23555     Inst_VOP3__V_MUL_HI_I32_I24::~Inst_VOP3__V_MUL_HI_I32_I24()
23556     {
23557     } // ~Inst_VOP3__V_MUL_HI_I32_I24
23558
23559     // D.i = (S0.i[23:0] * S1.i[23:0]) >> 32.
23560     void
23561     Inst_VOP3__V_MUL_HI_I32_I24::execute(GPUDynInstPtr gpuDynInst)
23562     {
23563         Wavefront *wf = gpuDynInst->wavefront();
23564         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
23565         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
23566         VecOperandI32 vdst(gpuDynInst, instData.VDST);
23567
23568         src0.readSrc();
23569         src1.readSrc();
23570
23571         /**
23572          * input modifiers are supported by FP operations only
23573          */
23574         assert(!(instData.ABS & 0x1));
23575         assert(!(instData.ABS & 0x2));
23576         assert(!(instData.ABS & 0x4));
23577         assert(!(extData.NEG & 0x1));
23578         assert(!(extData.NEG & 0x2));
23579         assert(!(extData.NEG & 0x4));
23580
23581         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23582             if (wf->execMask(lane)) {
23583                 VecElemI64 tmp_src0
23584                     = (VecElemI64)sext<24>(bits(src0[lane], 23, 0));
23585                 VecElemI64 tmp_src1
23586                     = (VecElemI64)sext<24>(bits(src1[lane], 23, 0));
23587
23588                 vdst[lane] = (VecElemI32)((tmp_src0 * tmp_src1) >> 32);
23589             }
23590         }
23591
23592         vdst.write();
23593     }
23594
23595     Inst_VOP3__V_MUL_U32_U24::Inst_VOP3__V_MUL_U32_U24(InFmt_VOP3 *iFmt)
23596         : Inst_VOP3(iFmt, "v_mul_u32_u24", false)
23597     {
23598         setFlag(ALU);
23599     } // Inst_VOP3__V_MUL_U32_U24
23600
23601     Inst_VOP3__V_MUL_U32_U24::~Inst_VOP3__V_MUL_U32_U24()
23602     {
23603     } // ~Inst_VOP3__V_MUL_U32_U24
23604
23605     // D.u = S0.u[23:0] * S1.u[23:0].
23606     void
23607     Inst_VOP3__V_MUL_U32_U24::execute(GPUDynInstPtr gpuDynInst)
23608     {
23609         Wavefront *wf = gpuDynInst->wavefront();
23610         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
23611         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
23612         VecOperandU32 vdst(gpuDynInst, instData.VDST);
23613
23614         src0.readSrc();
23615         src1.readSrc();
23616
23617         /**
23618          * input modifiers are supported by FP operations only
23619          */
23620         assert(!(instData.ABS & 0x1));
23621         assert(!(instData.ABS & 0x2));
23622         assert(!(instData.ABS & 0x4));
23623         assert(!(extData.NEG & 0x1));
23624         assert(!(extData.NEG & 0x2));
23625         assert(!(extData.NEG & 0x4));
23626
23627         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23628             if (wf->execMask(lane)) {
23629                 vdst[lane] = bits(src0[lane], 23, 0) * bits(src1[lane], 23, 0);
23630             }
23631         }
23632
23633         vdst.write();
23634     }
23635
23636     Inst_VOP3__V_MUL_HI_U32_U24::Inst_VOP3__V_MUL_HI_U32_U24(InFmt_VOP3 *iFmt)
23637         : Inst_VOP3(iFmt, "v_mul_hi_u32_u24", false)
23638     {
23639         setFlag(ALU);
23640     } // Inst_VOP3__V_MUL_HI_U32_U24
23641
23642     Inst_VOP3__V_MUL_HI_U32_U24::~Inst_VOP3__V_MUL_HI_U32_U24()
23643     {
23644     } // ~Inst_VOP3__V_MUL_HI_U32_U24
23645
23646     // D.i = (S0.u[23:0] * S1.u[23:0]) >> 32.
23647     void
23648     Inst_VOP3__V_MUL_HI_U32_U24::execute(GPUDynInstPtr gpuDynInst)
23649     {
23650         Wavefront *wf = gpuDynInst->wavefront();
23651         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
23652         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
23653         VecOperandU32 vdst(gpuDynInst, instData.VDST);
23654
23655         src0.readSrc();
23656         src1.readSrc();
23657
23658         /**
23659          * input modifiers are supported by FP operations only
23660          */
23661         assert(!(instData.ABS & 0x1));
23662         assert(!(instData.ABS & 0x2));
23663         assert(!(instData.ABS & 0x4));
23664         assert(!(extData.NEG & 0x1));
23665         assert(!(extData.NEG & 0x2));
23666         assert(!(extData.NEG & 0x4));
23667
23668         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23669             if (wf->execMask(lane)) {
23670                 VecElemU64 tmp_src0 = (VecElemU64)bits(src0[lane], 23, 0);
23671                 VecElemU64 tmp_src1 = (VecElemU64)bits(src1[lane], 23, 0);
23672                 vdst[lane] = (VecElemU32)((tmp_src0 * tmp_src1) >> 32);
23673             }
23674         }
23675
23676         vdst.write();
23677     }
23678
23679     Inst_VOP3__V_MIN_F32::Inst_VOP3__V_MIN_F32(InFmt_VOP3 *iFmt)
23680         : Inst_VOP3(iFmt, "v_min_f32", false)
23681     {
23682         setFlag(ALU);
23683         setFlag(F32);
23684     } // Inst_VOP3__V_MIN_F32
23685
23686     Inst_VOP3__V_MIN_F32::~Inst_VOP3__V_MIN_F32()
23687     {
23688     } // ~Inst_VOP3__V_MIN_F32
23689
23690     // D.f = (S0.f < S1.f ? S0.f : S1.f).
23691     void
23692     Inst_VOP3__V_MIN_F32::execute(GPUDynInstPtr gpuDynInst)
23693     {
23694         Wavefront *wf = gpuDynInst->wavefront();
23695         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
23696         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
23697         VecOperandF32 vdst(gpuDynInst, instData.VDST);
23698
23699         src0.readSrc();
23700         src1.readSrc();
23701
23702         if (instData.ABS & 0x1) {
23703             src0.absModifier();
23704         }
23705
23706         if (instData.ABS & 0x2) {
23707             src1.absModifier();
23708         }
23709
23710         if (extData.NEG & 0x1) {
23711             src0.negModifier();
23712         }
23713
23714         if (extData.NEG & 0x2) {
23715             src1.negModifier();
23716         }
23717
23718         /**
23719          * input modifiers are supported by FP operations only
23720          */
23721         assert(!(instData.ABS & 0x4));
23722         assert(!(extData.NEG & 0x4));
23723
23724         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23725             if (wf->execMask(lane)) {
23726                 vdst[lane] = std::fmin(src0[lane], src1[lane]);
23727             }
23728         }
23729
23730         vdst.write();
23731     }
23732
23733     Inst_VOP3__V_MAX_F32::Inst_VOP3__V_MAX_F32(InFmt_VOP3 *iFmt)
23734         : Inst_VOP3(iFmt, "v_max_f32", false)
23735     {
23736         setFlag(ALU);
23737         setFlag(F32);
23738     } // Inst_VOP3__V_MAX_F32
23739
23740     Inst_VOP3__V_MAX_F32::~Inst_VOP3__V_MAX_F32()
23741     {
23742     } // ~Inst_VOP3__V_MAX_F32
23743
23744     // D.f = (S0.f >= S1.f ? S0.f : S1.f).
23745     void
23746     Inst_VOP3__V_MAX_F32::execute(GPUDynInstPtr gpuDynInst)
23747     {
23748         Wavefront *wf = gpuDynInst->wavefront();
23749         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
23750         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
23751         VecOperandF32 vdst(gpuDynInst, instData.VDST);
23752
23753         src0.readSrc();
23754         src1.readSrc();
23755
23756         if (instData.ABS & 0x1) {
23757             src0.absModifier();
23758         }
23759
23760         if (instData.ABS & 0x2) {
23761             src1.absModifier();
23762         }
23763
23764         if (extData.NEG & 0x1) {
23765             src0.negModifier();
23766         }
23767
23768         if (extData.NEG & 0x2) {
23769             src1.negModifier();
23770         }
23771
23772         /**
23773          * input modifiers are supported by FP operations only
23774          */
23775         assert(!(instData.ABS & 0x4));
23776         assert(!(extData.NEG & 0x4));
23777
23778         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23779             if (wf->execMask(lane)) {
23780                 vdst[lane] = std::fmax(src0[lane], src1[lane]);
23781             }
23782         }
23783
23784         vdst.write();
23785     }
23786
23787     Inst_VOP3__V_MIN_I32::Inst_VOP3__V_MIN_I32(InFmt_VOP3 *iFmt)
23788         : Inst_VOP3(iFmt, "v_min_i32", false)
23789     {
23790         setFlag(ALU);
23791     } // Inst_VOP3__V_MIN_I32
23792
23793     Inst_VOP3__V_MIN_I32::~Inst_VOP3__V_MIN_I32()
23794     {
23795     } // ~Inst_VOP3__V_MIN_I32
23796
23797     // D.i = min(S0.i, S1.i).
23798     void
23799     Inst_VOP3__V_MIN_I32::execute(GPUDynInstPtr gpuDynInst)
23800     {
23801         Wavefront *wf = gpuDynInst->wavefront();
23802         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
23803         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
23804         VecOperandI32 vdst(gpuDynInst, instData.VDST);
23805
23806         src0.readSrc();
23807         src1.readSrc();
23808
23809         /**
23810          * input modifiers are supported by FP operations only
23811          */
23812         assert(!(instData.ABS & 0x1));
23813         assert(!(instData.ABS & 0x2));
23814         assert(!(instData.ABS & 0x4));
23815         assert(!(extData.NEG & 0x1));
23816         assert(!(extData.NEG & 0x2));
23817         assert(!(extData.NEG & 0x4));
23818
23819         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23820             if (wf->execMask(lane)) {
23821                 vdst[lane] = std::min(src0[lane], src1[lane]);
23822             }
23823         }
23824
23825         vdst.write();
23826     }
23827
23828     Inst_VOP3__V_MAX_I32::Inst_VOP3__V_MAX_I32(InFmt_VOP3 *iFmt)
23829         : Inst_VOP3(iFmt, "v_max_i32", false)
23830     {
23831         setFlag(ALU);
23832     } // Inst_VOP3__V_MAX_I32
23833
23834     Inst_VOP3__V_MAX_I32::~Inst_VOP3__V_MAX_I32()
23835     {
23836     } // ~Inst_VOP3__V_MAX_I32
23837
23838     // D.i = max(S0.i, S1.i).
23839     void
23840     Inst_VOP3__V_MAX_I32::execute(GPUDynInstPtr gpuDynInst)
23841     {
23842         Wavefront *wf = gpuDynInst->wavefront();
23843         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
23844         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
23845         VecOperandI32 vdst(gpuDynInst, instData.VDST);
23846
23847         src0.readSrc();
23848         src1.readSrc();
23849
23850         /**
23851          * input modifiers are supported by FP operations only
23852          */
23853         assert(!(instData.ABS & 0x1));
23854         assert(!(instData.ABS & 0x2));
23855         assert(!(instData.ABS & 0x4));
23856         assert(!(extData.NEG & 0x1));
23857         assert(!(extData.NEG & 0x2));
23858         assert(!(extData.NEG & 0x4));
23859
23860         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23861             if (wf->execMask(lane)) {
23862                 vdst[lane] = std::max(src0[lane], src1[lane]);
23863             }
23864         }
23865
23866         vdst.write();
23867     }
23868
23869     Inst_VOP3__V_MIN_U32::Inst_VOP3__V_MIN_U32(InFmt_VOP3 *iFmt)
23870         : Inst_VOP3(iFmt, "v_min_u32", false)
23871     {
23872         setFlag(ALU);
23873     } // Inst_VOP3__V_MIN_U32
23874
23875     Inst_VOP3__V_MIN_U32::~Inst_VOP3__V_MIN_U32()
23876     {
23877     } // ~Inst_VOP3__V_MIN_U32
23878
23879     // D.u = min(S0.u, S1.u).
23880     void
23881     Inst_VOP3__V_MIN_U32::execute(GPUDynInstPtr gpuDynInst)
23882     {
23883         Wavefront *wf = gpuDynInst->wavefront();
23884         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
23885         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
23886         VecOperandU32 vdst(gpuDynInst, instData.VDST);
23887
23888         src0.readSrc();
23889         src1.readSrc();
23890
23891         /**
23892          * input modifiers are supported by FP operations only
23893          */
23894         assert(!(instData.ABS & 0x1));
23895         assert(!(instData.ABS & 0x2));
23896         assert(!(instData.ABS & 0x4));
23897         assert(!(extData.NEG & 0x1));
23898         assert(!(extData.NEG & 0x2));
23899         assert(!(extData.NEG & 0x4));
23900
23901         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23902             if (wf->execMask(lane)) {
23903                 vdst[lane] = std::min(src0[lane], src1[lane]);
23904             }
23905         }
23906
23907         vdst.write();
23908     }
23909
23910     Inst_VOP3__V_MAX_U32::Inst_VOP3__V_MAX_U32(InFmt_VOP3 *iFmt)
23911         : Inst_VOP3(iFmt, "v_max_u32", false)
23912     {
23913         setFlag(ALU);
23914     } // Inst_VOP3__V_MAX_U32
23915
23916     Inst_VOP3__V_MAX_U32::~Inst_VOP3__V_MAX_U32()
23917     {
23918     } // ~Inst_VOP3__V_MAX_U32
23919
23920     // D.u = max(S0.u, S1.u).
23921     void
23922     Inst_VOP3__V_MAX_U32::execute(GPUDynInstPtr gpuDynInst)
23923     {
23924         Wavefront *wf = gpuDynInst->wavefront();
23925         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
23926         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
23927         VecOperandU32 vdst(gpuDynInst, instData.VDST);
23928
23929         src0.readSrc();
23930         src1.readSrc();
23931
23932         /**
23933          * input modifiers are supported by FP operations only
23934          */
23935         assert(!(instData.ABS & 0x1));
23936         assert(!(instData.ABS & 0x2));
23937         assert(!(instData.ABS & 0x4));
23938         assert(!(extData.NEG & 0x1));
23939         assert(!(extData.NEG & 0x2));
23940         assert(!(extData.NEG & 0x4));
23941
23942         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23943             if (wf->execMask(lane)) {
23944                 vdst[lane] = std::max(src0[lane], src1[lane]);
23945             }
23946         }
23947
23948         vdst.write();
23949     }
23950
23951     Inst_VOP3__V_LSHRREV_B32::Inst_VOP3__V_LSHRREV_B32(InFmt_VOP3 *iFmt)
23952         : Inst_VOP3(iFmt, "v_lshrrev_b32", false)
23953     {
23954         setFlag(ALU);
23955     } // Inst_VOP3__V_LSHRREV_B32
23956
23957     Inst_VOP3__V_LSHRREV_B32::~Inst_VOP3__V_LSHRREV_B32()
23958     {
23959     } // ~Inst_VOP3__V_LSHRREV_B32
23960
23961     // D.u = S1.u >> S0.u[4:0].
23962     // The vacated bits are set to zero.
23963     void
23964     Inst_VOP3__V_LSHRREV_B32::execute(GPUDynInstPtr gpuDynInst)
23965     {
23966         Wavefront *wf = gpuDynInst->wavefront();
23967         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
23968         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
23969         VecOperandU32 vdst(gpuDynInst, instData.VDST);
23970
23971         src0.readSrc();
23972         src1.readSrc();
23973
23974         /**
23975          * input modifiers are supported by FP operations only
23976          */
23977         assert(!(instData.ABS & 0x1));
23978         assert(!(instData.ABS & 0x2));
23979         assert(!(instData.ABS & 0x4));
23980         assert(!(extData.NEG & 0x1));
23981         assert(!(extData.NEG & 0x2));
23982         assert(!(extData.NEG & 0x4));
23983
23984         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
23985             if (wf->execMask(lane)) {
23986                 vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
23987             }
23988         }
23989
23990         vdst.write();
23991     }
23992
23993     Inst_VOP3__V_ASHRREV_I32::Inst_VOP3__V_ASHRREV_I32(InFmt_VOP3 *iFmt)
23994         : Inst_VOP3(iFmt, "v_ashrrev_i32", false)
23995     {
23996         setFlag(ALU);
23997     } // Inst_VOP3__V_ASHRREV_I32
23998
23999     Inst_VOP3__V_ASHRREV_I32::~Inst_VOP3__V_ASHRREV_I32()
24000     {
24001     } // ~Inst_VOP3__V_ASHRREV_I32
24002
24003     // D.i = signext(S1.i) >> S0.i[4:0].
24004     // The vacated bits are set to the sign bit of the input value.
24005     void
24006     Inst_VOP3__V_ASHRREV_I32::execute(GPUDynInstPtr gpuDynInst)
24007     {
24008         Wavefront *wf = gpuDynInst->wavefront();
24009         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24010         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
24011         VecOperandI32 vdst(gpuDynInst, instData.VDST);
24012
24013         src0.readSrc();
24014         src1.readSrc();
24015
24016         /**
24017          * input modifiers are supported by FP operations only
24018          */
24019         assert(!(instData.ABS & 0x1));
24020         assert(!(instData.ABS & 0x2));
24021         assert(!(instData.ABS & 0x4));
24022         assert(!(extData.NEG & 0x1));
24023         assert(!(extData.NEG & 0x2));
24024         assert(!(extData.NEG & 0x4));
24025
24026         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24027             if (wf->execMask(lane)) {
24028                 vdst[lane] = src1[lane] >> bits(src0[lane], 4, 0);
24029             }
24030         }
24031
24032         vdst.write();
24033     }
24034
24035     Inst_VOP3__V_LSHLREV_B32::Inst_VOP3__V_LSHLREV_B32(InFmt_VOP3 *iFmt)
24036         : Inst_VOP3(iFmt, "v_lshlrev_b32", false)
24037     {
24038         setFlag(ALU);
24039     } // Inst_VOP3__V_LSHLREV_B32
24040
24041     Inst_VOP3__V_LSHLREV_B32::~Inst_VOP3__V_LSHLREV_B32()
24042     {
24043     } // ~Inst_VOP3__V_LSHLREV_B32
24044
24045     // D.u = S1.u << S0.u[4:0].
24046     void
24047     Inst_VOP3__V_LSHLREV_B32::execute(GPUDynInstPtr gpuDynInst)
24048     {
24049         Wavefront *wf = gpuDynInst->wavefront();
24050         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24051         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24052         VecOperandU32 vdst(gpuDynInst, instData.VDST);
24053
24054         src0.readSrc();
24055         src1.readSrc();
24056
24057         /**
24058          * input modifiers are supported by FP operations only
24059          */
24060         assert(!(instData.ABS & 0x1));
24061         assert(!(instData.ABS & 0x2));
24062         assert(!(instData.ABS & 0x4));
24063         assert(!(extData.NEG & 0x1));
24064         assert(!(extData.NEG & 0x2));
24065         assert(!(extData.NEG & 0x4));
24066
24067         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24068             if (wf->execMask(lane)) {
24069                 vdst[lane] = src1[lane] << bits(src0[lane], 4, 0);
24070             }
24071         }
24072
24073         vdst.write();
24074     }
24075
24076     Inst_VOP3__V_AND_B32::Inst_VOP3__V_AND_B32(InFmt_VOP3 *iFmt)
24077         : Inst_VOP3(iFmt, "v_and_b32", false)
24078     {
24079         setFlag(ALU);
24080     } // Inst_VOP3__V_AND_B32
24081
24082     Inst_VOP3__V_AND_B32::~Inst_VOP3__V_AND_B32()
24083     {
24084     } // ~Inst_VOP3__V_AND_B32
24085
24086     // D.u = S0.u & S1.u.
24087     // Input and output modifiers not supported.
24088     void
24089     Inst_VOP3__V_AND_B32::execute(GPUDynInstPtr gpuDynInst)
24090     {
24091         Wavefront *wf = gpuDynInst->wavefront();
24092         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24093         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24094         VecOperandU32 vdst(gpuDynInst, instData.VDST);
24095
24096         src0.readSrc();
24097         src1.readSrc();
24098
24099         /**
24100          * input modifiers are supported by FP operations only
24101          */
24102         assert(!(instData.ABS & 0x1));
24103         assert(!(instData.ABS & 0x2));
24104         assert(!(instData.ABS & 0x4));
24105         assert(!(extData.NEG & 0x1));
24106         assert(!(extData.NEG & 0x2));
24107         assert(!(extData.NEG & 0x4));
24108
24109         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24110             if (wf->execMask(lane)) {
24111                 vdst[lane] = src0[lane] & src1[lane];
24112             }
24113         }
24114
24115         vdst.write();
24116     }
24117
24118     Inst_VOP3__V_OR_B32::Inst_VOP3__V_OR_B32(InFmt_VOP3 *iFmt)
24119         : Inst_VOP3(iFmt, "v_or_b32", false)
24120     {
24121         setFlag(ALU);
24122     } // Inst_VOP3__V_OR_B32
24123
24124     Inst_VOP3__V_OR_B32::~Inst_VOP3__V_OR_B32()
24125     {
24126     } // ~Inst_VOP3__V_OR_B32
24127
24128     // D.u = S0.u | S1.u.
24129     // Input and output modifiers not supported.
24130     void
24131     Inst_VOP3__V_OR_B32::execute(GPUDynInstPtr gpuDynInst)
24132     {
24133         Wavefront *wf = gpuDynInst->wavefront();
24134         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24135         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24136         VecOperandU32 vdst(gpuDynInst, instData.VDST);
24137
24138         src0.readSrc();
24139         src1.readSrc();
24140
24141         /**
24142          * input modifiers are supported by FP operations only
24143          */
24144         assert(!(instData.ABS & 0x1));
24145         assert(!(instData.ABS & 0x2));
24146         assert(!(instData.ABS & 0x4));
24147         assert(!(extData.NEG & 0x1));
24148         assert(!(extData.NEG & 0x2));
24149         assert(!(extData.NEG & 0x4));
24150
24151         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24152             if (wf->execMask(lane)) {
24153                 vdst[lane] = src0[lane] | src1[lane];
24154             }
24155         }
24156
24157         vdst.write();
24158     }
24159
24160     Inst_VOP3__V_XOR_B32::Inst_VOP3__V_XOR_B32(InFmt_VOP3 *iFmt)
24161         : Inst_VOP3(iFmt, "v_xor_b32", false)
24162     {
24163         setFlag(ALU);
24164     } // Inst_VOP3__V_XOR_B32
24165
24166     Inst_VOP3__V_XOR_B32::~Inst_VOP3__V_XOR_B32()
24167     {
24168     } // ~Inst_VOP3__V_XOR_B32
24169
24170     // D.u = S0.u ^ S1.u.
24171     // Input and output modifiers not supported.
24172     void
24173     Inst_VOP3__V_XOR_B32::execute(GPUDynInstPtr gpuDynInst)
24174     {
24175         Wavefront *wf = gpuDynInst->wavefront();
24176         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24177         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24178         VecOperandU32 vdst(gpuDynInst, instData.VDST);
24179
24180         src0.readSrc();
24181         src1.readSrc();
24182
24183         /**
24184          * input modifiers are supported by FP operations only
24185          */
24186         assert(!(instData.ABS & 0x1));
24187         assert(!(instData.ABS & 0x2));
24188         assert(!(instData.ABS & 0x4));
24189         assert(!(extData.NEG & 0x1));
24190         assert(!(extData.NEG & 0x2));
24191         assert(!(extData.NEG & 0x4));
24192
24193         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24194             if (wf->execMask(lane)) {
24195                 vdst[lane] = src0[lane] ^ src1[lane];
24196             }
24197         }
24198
24199         vdst.write();
24200     }
24201
24202     Inst_VOP3__V_MAC_F32::Inst_VOP3__V_MAC_F32(InFmt_VOP3 *iFmt)
24203         : Inst_VOP3(iFmt, "v_mac_f32", false)
24204     {
24205         setFlag(ALU);
24206         setFlag(F32);
24207         setFlag(MAC);
24208     } // Inst_VOP3__V_MAC_F32
24209
24210     Inst_VOP3__V_MAC_F32::~Inst_VOP3__V_MAC_F32()
24211     {
24212     } // ~Inst_VOP3__V_MAC_F32
24213
24214     // D.f = S0.f * S1.f + D.f.
24215     void
24216     Inst_VOP3__V_MAC_F32::execute(GPUDynInstPtr gpuDynInst)
24217     {
24218         Wavefront *wf = gpuDynInst->wavefront();
24219         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
24220         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
24221         VecOperandF32 vdst(gpuDynInst, instData.VDST);
24222
24223         src0.readSrc();
24224         src1.readSrc();
24225         vdst.read();
24226
24227         if (instData.ABS & 0x1) {
24228             src0.absModifier();
24229         }
24230
24231         if (instData.ABS & 0x2) {
24232             src1.absModifier();
24233         }
24234
24235         if (extData.NEG & 0x1) {
24236             src0.negModifier();
24237         }
24238
24239         if (extData.NEG & 0x2) {
24240             src1.negModifier();
24241         }
24242
24243         /**
24244          * input modifiers are supported by FP operations only
24245          */
24246         assert(!(instData.ABS & 0x4));
24247         assert(!(extData.NEG & 0x4));
24248
24249         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24250             if (wf->execMask(lane)) {
24251                 vdst[lane] = std::fma(src0[lane], src1[lane], vdst[lane]);
24252             }
24253         }
24254
24255         vdst.write();
24256     }
24257
24258     Inst_VOP3__V_ADD_U32::Inst_VOP3__V_ADD_U32(InFmt_VOP3_SDST_ENC *iFmt)
24259         : Inst_VOP3_SDST_ENC(iFmt, "v_add_u32")
24260     {
24261         setFlag(ALU);
24262         setFlag(WritesVCC);
24263     } // Inst_VOP3__V_ADD_U32
24264
24265     Inst_VOP3__V_ADD_U32::~Inst_VOP3__V_ADD_U32()
24266     {
24267     } // ~Inst_VOP3__V_ADD_U32
24268
24269     // D.u = S0.u + S1.u;
24270     // VCC[threadId] = (S0.u + S1.u >= 0x800000000ULL ? 1 : 0) is an UNSIGNED
24271     // overflow or carry-out.
24272     // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
24273     void
24274     Inst_VOP3__V_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
24275     {
24276         Wavefront *wf = gpuDynInst->wavefront();
24277         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24278         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24279         VecOperandU32 vdst(gpuDynInst, instData.VDST);
24280         ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
24281
24282         src0.readSrc();
24283         src1.readSrc();
24284
24285         /**
24286          * input modifiers are supported by FP operations only
24287          */
24288         assert(!(extData.NEG & 0x1));
24289         assert(!(extData.NEG & 0x2));
24290         assert(!(extData.NEG & 0x4));
24291
24292         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24293             if (wf->execMask(lane)) {
24294                 vdst[lane] = src0[lane] + src1[lane];
24295                 vcc.setBit(lane, ((VecElemU64)src0[lane]
24296                     + (VecElemU64)src1[lane]) >= 0x100000000ULL ? 1 : 0);
24297             }
24298         }
24299
24300         vdst.write();
24301         vcc.write();
24302     }
24303
24304     Inst_VOP3__V_SUB_U32::Inst_VOP3__V_SUB_U32(InFmt_VOP3_SDST_ENC *iFmt)
24305         : Inst_VOP3_SDST_ENC(iFmt, "v_sub_u32")
24306     {
24307         setFlag(ALU);
24308         setFlag(WritesVCC);
24309     } // Inst_VOP3__V_SUB_U32
24310
24311     Inst_VOP3__V_SUB_U32::~Inst_VOP3__V_SUB_U32()
24312     {
24313     } // ~Inst_VOP3__V_SUB_U32
24314
24315     // D.u = S0.u - S1.u;
24316     // VCC[threadId] = (S1.u > S0.u ? 1 : 0) is an UNSIGNED overflow or
24317     // carry-out.
24318     // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
24319     void
24320     Inst_VOP3__V_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
24321     {
24322         Wavefront *wf = gpuDynInst->wavefront();
24323         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24324         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24325         VecOperandU32 vdst(gpuDynInst, instData.VDST);
24326         ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
24327
24328         src0.readSrc();
24329         src1.readSrc();
24330
24331         /**
24332          * input modifiers are supported by FP operations only
24333          */
24334         assert(!(extData.NEG & 0x1));
24335         assert(!(extData.NEG & 0x2));
24336         assert(!(extData.NEG & 0x4));
24337
24338         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24339             if (wf->execMask(lane)) {
24340                 vdst[lane] = src0[lane] - src1[lane];
24341                 vcc.setBit(lane, src1[lane] > src0[lane] ? 1 : 0);
24342             }
24343         }
24344
24345         vdst.write();
24346         vcc.write();
24347     }
24348
24349     Inst_VOP3__V_SUBREV_U32::Inst_VOP3__V_SUBREV_U32(
24350           InFmt_VOP3_SDST_ENC *iFmt)
24351         : Inst_VOP3_SDST_ENC(iFmt, "v_subrev_u32")
24352     {
24353         setFlag(ALU);
24354         setFlag(WritesVCC);
24355     } // Inst_VOP3__V_SUBREV_U32
24356
24357     Inst_VOP3__V_SUBREV_U32::~Inst_VOP3__V_SUBREV_U32()
24358     {
24359     } // ~Inst_VOP3__V_SUBREV_U32
24360
24361     // D.u = S1.u - S0.u;
24362     // VCC[threadId] = (S0.u > S1.u ? 1 : 0) is an UNSIGNED overflow or
24363     // carry-out.
24364     // In VOP3 the VCC destination may be an arbitrary SGPR-pair.
24365     void
24366     Inst_VOP3__V_SUBREV_U32::execute(GPUDynInstPtr gpuDynInst)
24367     {
24368         Wavefront *wf = gpuDynInst->wavefront();
24369         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24370         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24371         VecOperandU32 vdst(gpuDynInst, instData.VDST);
24372         ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
24373
24374         src0.readSrc();
24375         src1.readSrc();
24376
24377         /**
24378          * input modifiers are supported by FP operations only
24379          */
24380         assert(!(extData.NEG & 0x1));
24381         assert(!(extData.NEG & 0x2));
24382         assert(!(extData.NEG & 0x4));
24383
24384         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24385             if (wf->execMask(lane)) {
24386                 vdst[lane] = src1[lane] - src0[lane];
24387                 vcc.setBit(lane, src0[lane] > src1[lane] ? 1 : 0);
24388             }
24389         }
24390
24391         vdst.write();
24392         vcc.write();
24393     }
24394
24395     Inst_VOP3__V_ADDC_U32::Inst_VOP3__V_ADDC_U32(InFmt_VOP3_SDST_ENC *iFmt)
24396         : Inst_VOP3_SDST_ENC(iFmt, "v_addc_u32")
24397     {
24398         setFlag(ALU);
24399         setFlag(WritesVCC);
24400         setFlag(ReadsVCC);
24401     } // Inst_VOP3__V_ADDC_U32
24402
24403     Inst_VOP3__V_ADDC_U32::~Inst_VOP3__V_ADDC_U32()
24404     {
24405     } // ~Inst_VOP3__V_ADDC_U32
24406
24407     // D.u = S0.u + S1.u + VCC[threadId];
24408     // VCC[threadId] = (S0.u + S1.u + VCC[threadId] >= 0x100000000ULL ? 1 : 0)
24409     // is an UNSIGNED overflow.
24410     // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
24411     // source comes from the SGPR-pair at S2.u.
24412     void
24413     Inst_VOP3__V_ADDC_U32::execute(GPUDynInstPtr gpuDynInst)
24414     {
24415         Wavefront *wf = gpuDynInst->wavefront();
24416         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24417         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24418         ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
24419         VecOperandU32 vdst(gpuDynInst, instData.VDST);
24420         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
24421
24422         src0.readSrc();
24423         src1.readSrc();
24424         vcc.read();
24425
24426         /**
24427          * input modifiers are supported by FP operations only
24428          */
24429         assert(!(extData.NEG & 0x1));
24430         assert(!(extData.NEG & 0x2));
24431         assert(!(extData.NEG & 0x4));
24432
24433         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24434             if (wf->execMask(lane)) {
24435                 vdst[lane] = src0[lane] + src1[lane]
24436                     + bits(vcc.rawData(), lane);
24437                 sdst.setBit(lane, ((VecElemU64)src0[lane]
24438                     + (VecElemU64)src1[lane]
24439                         + (VecElemU64)bits(vcc.rawData(), lane))
24440                             >= 0x100000000 ? 1 : 0);
24441             }
24442         }
24443
24444         vdst.write();
24445         sdst.write();
24446     }
24447
24448     Inst_VOP3__V_SUBB_U32::Inst_VOP3__V_SUBB_U32(InFmt_VOP3_SDST_ENC *iFmt)
24449         : Inst_VOP3_SDST_ENC(iFmt, "v_subb_u32")
24450     {
24451         setFlag(ALU);
24452         setFlag(WritesVCC);
24453         setFlag(ReadsVCC);
24454     } // Inst_VOP3__V_SUBB_U32
24455
24456     Inst_VOP3__V_SUBB_U32::~Inst_VOP3__V_SUBB_U32()
24457     {
24458     } // ~Inst_VOP3__V_SUBB_U32
24459
24460     // D.u = S0.u - S1.u - VCC[threadId];
24461     // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
24462     // overflow.
24463     // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
24464     // source comes from the SGPR-pair at S2.u.
24465     void
24466     Inst_VOP3__V_SUBB_U32::execute(GPUDynInstPtr gpuDynInst)
24467     {
24468         Wavefront *wf = gpuDynInst->wavefront();
24469         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24470         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24471         ConstScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
24472         ScalarOperandU64 sdst(gpuDynInst, instData.SDST);
24473         VecOperandU32 vdst(gpuDynInst, instData.VDST);
24474
24475         src0.readSrc();
24476         src1.readSrc();
24477         vcc.read();
24478
24479         /**
24480          * input modifiers are supported by FP operations only
24481          */
24482         assert(!(extData.NEG & 0x1));
24483         assert(!(extData.NEG & 0x2));
24484         assert(!(extData.NEG & 0x4));
24485
24486         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24487             if (wf->execMask(lane)) {
24488                 vdst[lane] = src0[lane] - src1[lane]
24489                     - bits(vcc.rawData(), lane);
24490                 sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane))
24491                     > src0[lane] ? 1 : 0);
24492             }
24493         }
24494
24495         vdst.write();
24496         sdst.write();
24497     }
24498
24499     Inst_VOP3__V_SUBBREV_U32::Inst_VOP3__V_SUBBREV_U32(
24500           InFmt_VOP3_SDST_ENC *iFmt)
24501         : Inst_VOP3_SDST_ENC(iFmt, "v_subbrev_u32")
24502     {
24503         setFlag(ALU);
24504         setFlag(WritesVCC);
24505         setFlag(ReadsVCC);
24506     } // Inst_VOP3__V_SUBBREV_U32
24507
24508     Inst_VOP3__V_SUBBREV_U32::~Inst_VOP3__V_SUBBREV_U32()
24509     {
24510     } // ~Inst_VOP3__V_SUBBREV_U32
24511
24512     // D.u = S1.u - S0.u - VCC[threadId];
24513     // VCC[threadId] = (S1.u + VCC[threadId] > S0.u ? 1 : 0) is an UNSIGNED
24514     // overflow.
24515     // In VOP3 the VCC destination may be an arbitrary SGPR-pair, and the VCC
24516     // source comes from the SGPR-pair at S2.u.
24517     void
24518     Inst_VOP3__V_SUBBREV_U32::execute(GPUDynInstPtr gpuDynInst)
24519     {
24520         Wavefront *wf = gpuDynInst->wavefront();
24521         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
24522         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
24523         ConstScalarOperandU64 sdst(gpuDynInst, instData.SDST);
24524         ScalarOperandU64 vcc(gpuDynInst, extData.SRC2);
24525         VecOperandU32 vdst(gpuDynInst, instData.VDST);
24526
24527         src0.readSrc();
24528         src1.readSrc();
24529         vcc.read();
24530
24531         /**
24532          * input modifiers are supported by FP operations only
24533          */
24534         assert(!(extData.NEG & 0x1));
24535         assert(!(extData.NEG & 0x2));
24536         assert(!(extData.NEG & 0x4));
24537
24538         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24539             if (wf->execMask(lane)) {
24540                 vdst[lane] = src1[lane] - src0[lane]
24541                     - bits(vcc.rawData(), lane);
24542                 sdst.setBit(lane, (src1[lane] + bits(vcc.rawData(), lane))
24543                     > src0[lane] ? 1 : 0);
24544             }
24545         }
24546
24547         vdst.write();
24548         sdst.write();
24549     }
24550
24551     Inst_VOP3__V_ADD_F16::Inst_VOP3__V_ADD_F16(InFmt_VOP3 *iFmt)
24552         : Inst_VOP3(iFmt, "v_add_f16", false)
24553     {
24554         setFlag(ALU);
24555         setFlag(F16);
24556     } // Inst_VOP3__V_ADD_F16
24557
24558     Inst_VOP3__V_ADD_F16::~Inst_VOP3__V_ADD_F16()
24559     {
24560     } // ~Inst_VOP3__V_ADD_F16
24561
24562     // D.f16 = S0.f16 + S1.f16.
24563     void
24564     Inst_VOP3__V_ADD_F16::execute(GPUDynInstPtr gpuDynInst)
24565     {
24566         panicUnimplemented();
24567     }
24568
24569     Inst_VOP3__V_SUB_F16::Inst_VOP3__V_SUB_F16(InFmt_VOP3 *iFmt)
24570         : Inst_VOP3(iFmt, "v_sub_f16", false)
24571     {
24572         setFlag(ALU);
24573         setFlag(F16);
24574     } // Inst_VOP3__V_SUB_F16
24575
24576     Inst_VOP3__V_SUB_F16::~Inst_VOP3__V_SUB_F16()
24577     {
24578     } // ~Inst_VOP3__V_SUB_F16
24579
24580     // D.f16 = S0.f16 - S1.f16.
24581     void
24582     Inst_VOP3__V_SUB_F16::execute(GPUDynInstPtr gpuDynInst)
24583     {
24584         panicUnimplemented();
24585     }
24586
24587     Inst_VOP3__V_SUBREV_F16::Inst_VOP3__V_SUBREV_F16(InFmt_VOP3 *iFmt)
24588         : Inst_VOP3(iFmt, "v_subrev_f16", false)
24589     {
24590         setFlag(ALU);
24591         setFlag(F16);
24592     } // Inst_VOP3__V_SUBREV_F16
24593
24594     Inst_VOP3__V_SUBREV_F16::~Inst_VOP3__V_SUBREV_F16()
24595     {
24596     } // ~Inst_VOP3__V_SUBREV_F16
24597
24598     // D.f16 = S1.f16 - S0.f16.
24599     void
24600     Inst_VOP3__V_SUBREV_F16::execute(GPUDynInstPtr gpuDynInst)
24601     {
24602         panicUnimplemented();
24603     }
24604
24605     Inst_VOP3__V_MUL_F16::Inst_VOP3__V_MUL_F16(InFmt_VOP3 *iFmt)
24606         : Inst_VOP3(iFmt, "v_mul_f16", false)
24607     {
24608         setFlag(ALU);
24609         setFlag(F16);
24610     } // Inst_VOP3__V_MUL_F16
24611
24612     Inst_VOP3__V_MUL_F16::~Inst_VOP3__V_MUL_F16()
24613     {
24614     } // ~Inst_VOP3__V_MUL_F16
24615
24616     // D.f16 = S0.f16 * S1.f16.
24617     void
24618     Inst_VOP3__V_MUL_F16::execute(GPUDynInstPtr gpuDynInst)
24619     {
24620         panicUnimplemented();
24621     }
24622
24623     Inst_VOP3__V_MAC_F16::Inst_VOP3__V_MAC_F16(InFmt_VOP3 *iFmt)
24624         : Inst_VOP3(iFmt, "v_mac_f16", false)
24625     {
24626         setFlag(ALU);
24627         setFlag(F16);
24628         setFlag(MAC);
24629     } // Inst_VOP3__V_MAC_F16
24630
24631     Inst_VOP3__V_MAC_F16::~Inst_VOP3__V_MAC_F16()
24632     {
24633     } // ~Inst_VOP3__V_MAC_F16
24634
24635     // D.f16 = S0.f16 * S1.f16 + D.f16.
24636     void
24637     Inst_VOP3__V_MAC_F16::execute(GPUDynInstPtr gpuDynInst)
24638     {
24639         panicUnimplemented();
24640     }
24641
24642     Inst_VOP3__V_ADD_U16::Inst_VOP3__V_ADD_U16(InFmt_VOP3 *iFmt)
24643         : Inst_VOP3(iFmt, "v_add_u16", false)
24644     {
24645         setFlag(ALU);
24646     } // Inst_VOP3__V_ADD_U16
24647
24648     Inst_VOP3__V_ADD_U16::~Inst_VOP3__V_ADD_U16()
24649     {
24650     } // ~Inst_VOP3__V_ADD_U16
24651
24652     // D.u16 = S0.u16 + S1.u16.
24653     void
24654     Inst_VOP3__V_ADD_U16::execute(GPUDynInstPtr gpuDynInst)
24655     {
24656         Wavefront *wf = gpuDynInst->wavefront();
24657         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24658         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
24659         VecOperandU16 vdst(gpuDynInst, instData.VDST);
24660
24661         src0.readSrc();
24662         src1.readSrc();
24663
24664         /**
24665          * input modifiers are supported by FP operations only
24666          */
24667         assert(!(instData.ABS & 0x1));
24668         assert(!(instData.ABS & 0x2));
24669         assert(!(instData.ABS & 0x4));
24670         assert(!(extData.NEG & 0x1));
24671         assert(!(extData.NEG & 0x2));
24672         assert(!(extData.NEG & 0x4));
24673
24674         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24675             if (wf->execMask(lane)) {
24676                 vdst[lane] = src0[lane] + src1[lane];
24677             }
24678         }
24679
24680         vdst.write();
24681     }
24682
24683     Inst_VOP3__V_SUB_U16::Inst_VOP3__V_SUB_U16(InFmt_VOP3 *iFmt)
24684         : Inst_VOP3(iFmt, "v_sub_u16", false)
24685     {
24686         setFlag(ALU);
24687     } // Inst_VOP3__V_SUB_U16
24688
24689     Inst_VOP3__V_SUB_U16::~Inst_VOP3__V_SUB_U16()
24690     {
24691     } // ~Inst_VOP3__V_SUB_U16
24692
24693     // D.u16 = S0.u16 - S1.u16.
24694     void
24695     Inst_VOP3__V_SUB_U16::execute(GPUDynInstPtr gpuDynInst)
24696     {
24697         Wavefront *wf = gpuDynInst->wavefront();
24698         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24699         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
24700         VecOperandU16 vdst(gpuDynInst, instData.VDST);
24701
24702         src0.readSrc();
24703         src1.readSrc();
24704
24705         /**
24706          * input modifiers are supported by FP operations only
24707          */
24708         assert(!(instData.ABS & 0x1));
24709         assert(!(instData.ABS & 0x2));
24710         assert(!(instData.ABS & 0x4));
24711         assert(!(extData.NEG & 0x1));
24712         assert(!(extData.NEG & 0x2));
24713         assert(!(extData.NEG & 0x4));
24714
24715         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24716             if (wf->execMask(lane)) {
24717                 vdst[lane] = src0[lane] - src1[lane];
24718             }
24719         }
24720
24721         vdst.write();
24722     }
24723
24724     Inst_VOP3__V_SUBREV_U16::Inst_VOP3__V_SUBREV_U16(InFmt_VOP3 *iFmt)
24725         : Inst_VOP3(iFmt, "v_subrev_u16", false)
24726     {
24727         setFlag(ALU);
24728     } // Inst_VOP3__V_SUBREV_U16
24729
24730     Inst_VOP3__V_SUBREV_U16::~Inst_VOP3__V_SUBREV_U16()
24731     {
24732     } // ~Inst_VOP3__V_SUBREV_U16
24733
24734     // D.u16 = S1.u16 - S0.u16.
24735     void
24736     Inst_VOP3__V_SUBREV_U16::execute(GPUDynInstPtr gpuDynInst)
24737     {
24738         Wavefront *wf = gpuDynInst->wavefront();
24739         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24740         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
24741         VecOperandU16 vdst(gpuDynInst, instData.VDST);
24742
24743         src0.readSrc();
24744         src1.readSrc();
24745
24746         /**
24747          * input modifiers are supported by FP operations only
24748          */
24749         assert(!(instData.ABS & 0x1));
24750         assert(!(instData.ABS & 0x2));
24751         assert(!(instData.ABS & 0x4));
24752         assert(!(extData.NEG & 0x1));
24753         assert(!(extData.NEG & 0x2));
24754         assert(!(extData.NEG & 0x4));
24755
24756         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24757             if (wf->execMask(lane)) {
24758                 vdst[lane] = src1[lane] - src0[lane];
24759             }
24760         }
24761
24762         vdst.write();
24763     }
24764
24765     Inst_VOP3__V_MUL_LO_U16::Inst_VOP3__V_MUL_LO_U16(InFmt_VOP3 *iFmt)
24766         : Inst_VOP3(iFmt, "v_mul_lo_u16", false)
24767     {
24768         setFlag(ALU);
24769     } // Inst_VOP3__V_MUL_LO_U16
24770
24771     Inst_VOP3__V_MUL_LO_U16::~Inst_VOP3__V_MUL_LO_U16()
24772     {
24773     } // ~Inst_VOP3__V_MUL_LO_U16
24774
24775     // D.u16 = S0.u16 * S1.u16.
24776     void
24777     Inst_VOP3__V_MUL_LO_U16::execute(GPUDynInstPtr gpuDynInst)
24778     {
24779         Wavefront *wf = gpuDynInst->wavefront();
24780         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24781         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
24782         VecOperandU16 vdst(gpuDynInst, instData.VDST);
24783
24784         src0.readSrc();
24785         src1.readSrc();
24786
24787         /**
24788          * input modifiers are supported by FP operations only
24789          */
24790         assert(!(instData.ABS & 0x1));
24791         assert(!(instData.ABS & 0x2));
24792         assert(!(instData.ABS & 0x4));
24793         assert(!(extData.NEG & 0x1));
24794         assert(!(extData.NEG & 0x2));
24795         assert(!(extData.NEG & 0x4));
24796
24797         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24798             if (wf->execMask(lane)) {
24799                 vdst[lane] = src0[lane] * src1[lane];
24800             }
24801         }
24802
24803         vdst.write();
24804     }
24805
24806     Inst_VOP3__V_LSHLREV_B16::Inst_VOP3__V_LSHLREV_B16(InFmt_VOP3 *iFmt)
24807         : Inst_VOP3(iFmt, "v_lshlrev_b16", false)
24808     {
24809         setFlag(ALU);
24810     } // Inst_VOP3__V_LSHLREV_B16
24811
24812     Inst_VOP3__V_LSHLREV_B16::~Inst_VOP3__V_LSHLREV_B16()
24813     {
24814     } // ~Inst_VOP3__V_LSHLREV_B16
24815
24816     // D.u[15:0] = S1.u[15:0] << S0.u[3:0].
24817     void
24818     Inst_VOP3__V_LSHLREV_B16::execute(GPUDynInstPtr gpuDynInst)
24819     {
24820         Wavefront *wf = gpuDynInst->wavefront();
24821         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
24822         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24823         VecOperandU16 vdst(gpuDynInst, instData.VDST);
24824
24825         src0.readSrc();
24826         src1.readSrc();
24827
24828         /**
24829          * input modifiers are supported by FP operations only
24830          */
24831         assert(!(instData.ABS & 0x1));
24832         assert(!(instData.ABS & 0x2));
24833         assert(!(instData.ABS & 0x4));
24834         assert(!(extData.NEG & 0x1));
24835         assert(!(extData.NEG & 0x2));
24836         assert(!(extData.NEG & 0x4));
24837
24838         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24839             if (wf->execMask(lane)) {
24840                 vdst[lane] = src1[lane] << bits(src0[lane], 3, 0);
24841             }
24842         }
24843
24844         vdst.write();
24845     }
24846
24847     Inst_VOP3__V_LSHRREV_B16::Inst_VOP3__V_LSHRREV_B16(InFmt_VOP3 *iFmt)
24848         : Inst_VOP3(iFmt, "v_lshrrev_b16", false)
24849     {
24850         setFlag(ALU);
24851     } // Inst_VOP3__V_LSHRREV_B16
24852
24853     Inst_VOP3__V_LSHRREV_B16::~Inst_VOP3__V_LSHRREV_B16()
24854     {
24855     } // ~Inst_VOP3__V_LSHRREV_B16
24856
24857     // D.u[15:0] = S1.u[15:0] >> S0.u[3:0].
24858     // The vacated bits are set to zero.
24859     void
24860     Inst_VOP3__V_LSHRREV_B16::execute(GPUDynInstPtr gpuDynInst)
24861     {
24862         Wavefront *wf = gpuDynInst->wavefront();
24863         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
24864         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24865         VecOperandU16 vdst(gpuDynInst, instData.VDST);
24866
24867         src0.readSrc();
24868         src1.readSrc();
24869
24870         if (instData.ABS & 0x1) {
24871             src0.absModifier();
24872         }
24873
24874         if (instData.ABS & 0x2) {
24875             src1.absModifier();
24876         }
24877
24878         if (extData.NEG & 0x1) {
24879             src0.negModifier();
24880         }
24881
24882         if (extData.NEG & 0x2) {
24883             src1.negModifier();
24884         }
24885
24886         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24887             if (wf->execMask(lane)) {
24888                 vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0);
24889             }
24890         }
24891
24892         vdst.write();
24893     }
24894
24895     Inst_VOP3__V_ASHRREV_I16::Inst_VOP3__V_ASHRREV_I16(InFmt_VOP3 *iFmt)
24896         : Inst_VOP3(iFmt, "v_ashrrev_i16", false)
24897     {
24898         setFlag(ALU);
24899     } // Inst_VOP3__V_ASHRREV_I16
24900
24901     Inst_VOP3__V_ASHRREV_I16::~Inst_VOP3__V_ASHRREV_I16()
24902     {
24903     } // ~Inst_VOP3__V_ASHRREV_I16
24904
24905     // D.i[15:0] = signext(S1.i[15:0]) >> S0.i[3:0].
24906     // The vacated bits are set to the sign bit of the input value.
24907     void
24908     Inst_VOP3__V_ASHRREV_I16::execute(GPUDynInstPtr gpuDynInst)
24909     {
24910         Wavefront *wf = gpuDynInst->wavefront();
24911         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24912         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
24913         VecOperandI16 vdst(gpuDynInst, instData.VDST);
24914
24915         src0.readSrc();
24916         src1.readSrc();
24917
24918         /**
24919          * input modifiers are supported by FP operations only
24920          */
24921         assert(!(instData.ABS & 0x1));
24922         assert(!(instData.ABS & 0x2));
24923         assert(!(instData.ABS & 0x4));
24924         assert(!(extData.NEG & 0x1));
24925         assert(!(extData.NEG & 0x2));
24926         assert(!(extData.NEG & 0x4));
24927
24928         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
24929             if (wf->execMask(lane)) {
24930                 vdst[lane] = src1[lane] >> bits(src0[lane], 3, 0);
24931             }
24932         }
24933
24934         vdst.write();
24935     }
24936
24937     Inst_VOP3__V_MAX_F16::Inst_VOP3__V_MAX_F16(InFmt_VOP3 *iFmt)
24938         : Inst_VOP3(iFmt, "v_max_f16", false)
24939     {
24940         setFlag(ALU);
24941         setFlag(F16);
24942     } // Inst_VOP3__V_MAX_F16
24943
24944     Inst_VOP3__V_MAX_F16::~Inst_VOP3__V_MAX_F16()
24945     {
24946     } // ~Inst_VOP3__V_MAX_F16
24947
24948     // D.f16 = max(S0.f16, S1.f16).
24949     void
24950     Inst_VOP3__V_MAX_F16::execute(GPUDynInstPtr gpuDynInst)
24951     {
24952         panicUnimplemented();
24953     }
24954
24955     Inst_VOP3__V_MIN_F16::Inst_VOP3__V_MIN_F16(InFmt_VOP3 *iFmt)
24956         : Inst_VOP3(iFmt, "v_min_f16", false)
24957     {
24958         setFlag(ALU);
24959         setFlag(F16);
24960     } // Inst_VOP3__V_MIN_F16
24961
24962     Inst_VOP3__V_MIN_F16::~Inst_VOP3__V_MIN_F16()
24963     {
24964     } // ~Inst_VOP3__V_MIN_F16
24965
24966     // D.f16 = min(S0.f16, S1.f16).
24967     void
24968     Inst_VOP3__V_MIN_F16::execute(GPUDynInstPtr gpuDynInst)
24969     {
24970         panicUnimplemented();
24971     }
24972
24973     Inst_VOP3__V_MAX_U16::Inst_VOP3__V_MAX_U16(InFmt_VOP3 *iFmt)
24974         : Inst_VOP3(iFmt, "v_max_u16", false)
24975     {
24976         setFlag(ALU);
24977     } // Inst_VOP3__V_MAX_U16
24978
24979     Inst_VOP3__V_MAX_U16::~Inst_VOP3__V_MAX_U16()
24980     {
24981     } // ~Inst_VOP3__V_MAX_U16
24982
24983     // D.u[15:0] = max(S0.u[15:0], S1.u[15:0]).
24984     void
24985     Inst_VOP3__V_MAX_U16::execute(GPUDynInstPtr gpuDynInst)
24986     {
24987         Wavefront *wf = gpuDynInst->wavefront();
24988         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
24989         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
24990         VecOperandU16 vdst(gpuDynInst, instData.VDST);
24991
24992         src0.readSrc();
24993         src1.readSrc();
24994
24995         if (instData.ABS & 0x1) {
24996             src0.absModifier();
24997         }
24998
24999         if (instData.ABS & 0x2) {
25000             src1.absModifier();
25001         }
25002
25003         if (extData.NEG & 0x1) {
25004             src0.negModifier();
25005         }
25006
25007         if (extData.NEG & 0x2) {
25008             src1.negModifier();
25009         }
25010
25011         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25012             if (wf->execMask(lane)) {
25013                 vdst[lane] = std::max(src0[lane], src1[lane]);
25014             }
25015         }
25016
25017         vdst.write();
25018     }
25019
25020     Inst_VOP3__V_MAX_I16::Inst_VOP3__V_MAX_I16(InFmt_VOP3 *iFmt)
25021         : Inst_VOP3(iFmt, "v_max_i16", false)
25022     {
25023         setFlag(ALU);
25024     } // Inst_VOP3__V_MAX_I16
25025
25026     Inst_VOP3__V_MAX_I16::~Inst_VOP3__V_MAX_I16()
25027     {
25028     } // ~Inst_VOP3__V_MAX_I16
25029
25030     // D.i[15:0] = max(S0.i[15:0], S1.i[15:0]).
25031     void
25032     Inst_VOP3__V_MAX_I16::execute(GPUDynInstPtr gpuDynInst)
25033     {
25034         Wavefront *wf = gpuDynInst->wavefront();
25035         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
25036         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
25037         VecOperandI16 vdst(gpuDynInst, instData.VDST);
25038
25039         src0.readSrc();
25040         src1.readSrc();
25041
25042         if (instData.ABS & 0x1) {
25043             src0.absModifier();
25044         }
25045
25046         if (instData.ABS & 0x2) {
25047             src1.absModifier();
25048         }
25049
25050         if (extData.NEG & 0x1) {
25051             src0.negModifier();
25052         }
25053
25054         if (extData.NEG & 0x2) {
25055             src1.negModifier();
25056         }
25057
25058         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25059             if (wf->execMask(lane)) {
25060                 vdst[lane] = std::max(src0[lane], src1[lane]);
25061             }
25062         }
25063
25064         vdst.write();
25065     }
25066
25067     Inst_VOP3__V_MIN_U16::Inst_VOP3__V_MIN_U16(InFmt_VOP3 *iFmt)
25068         : Inst_VOP3(iFmt, "v_min_u16", false)
25069     {
25070         setFlag(ALU);
25071     } // Inst_VOP3__V_MIN_U16
25072
25073     Inst_VOP3__V_MIN_U16::~Inst_VOP3__V_MIN_U16()
25074     {
25075     } // ~Inst_VOP3__V_MIN_U16
25076
25077     // D.u[15:0] = min(S0.u[15:0], S1.u[15:0]).
25078     void
25079     Inst_VOP3__V_MIN_U16::execute(GPUDynInstPtr gpuDynInst)
25080     {
25081         Wavefront *wf = gpuDynInst->wavefront();
25082         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
25083         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
25084         VecOperandU16 vdst(gpuDynInst, instData.VDST);
25085
25086         src0.readSrc();
25087         src1.readSrc();
25088
25089         if (instData.ABS & 0x1) {
25090             src0.absModifier();
25091         }
25092
25093         if (instData.ABS & 0x2) {
25094             src1.absModifier();
25095         }
25096
25097         if (extData.NEG & 0x1) {
25098             src0.negModifier();
25099         }
25100
25101         if (extData.NEG & 0x2) {
25102             src1.negModifier();
25103         }
25104
25105         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25106             if (wf->execMask(lane)) {
25107                 vdst[lane] = std::min(src0[lane], src1[lane]);
25108             }
25109         }
25110
25111         vdst.write();
25112     }
25113
25114     Inst_VOP3__V_MIN_I16::Inst_VOP3__V_MIN_I16(InFmt_VOP3 *iFmt)
25115         : Inst_VOP3(iFmt, "v_min_i16", false)
25116     {
25117         setFlag(ALU);
25118     } // Inst_VOP3__V_MIN_I16
25119
25120     Inst_VOP3__V_MIN_I16::~Inst_VOP3__V_MIN_I16()
25121     {
25122     } // ~Inst_VOP3__V_MIN_I16
25123
25124     // D.i[15:0] = min(S0.i[15:0], S1.i[15:0]).
25125     void
25126     Inst_VOP3__V_MIN_I16::execute(GPUDynInstPtr gpuDynInst)
25127     {
25128         Wavefront *wf = gpuDynInst->wavefront();
25129         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
25130         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
25131         VecOperandI16 vdst(gpuDynInst, instData.VDST);
25132
25133         src0.readSrc();
25134         src1.readSrc();
25135
25136         if (instData.ABS & 0x1) {
25137             src0.absModifier();
25138         }
25139
25140         if (instData.ABS & 0x2) {
25141             src1.absModifier();
25142         }
25143
25144         if (extData.NEG & 0x1) {
25145             src0.negModifier();
25146         }
25147
25148         if (extData.NEG & 0x2) {
25149             src1.negModifier();
25150         }
25151
25152         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25153             if (wf->execMask(lane)) {
25154                 vdst[lane] = std::min(src0[lane], src1[lane]);
25155             }
25156         }
25157
25158         vdst.write();
25159     }
25160
25161     Inst_VOP3__V_LDEXP_F16::Inst_VOP3__V_LDEXP_F16(InFmt_VOP3 *iFmt)
25162         : Inst_VOP3(iFmt, "v_ldexp_f16", false)
25163     {
25164         setFlag(ALU);
25165         setFlag(F16);
25166     } // Inst_VOP3__V_LDEXP_F16
25167
25168     Inst_VOP3__V_LDEXP_F16::~Inst_VOP3__V_LDEXP_F16()
25169     {
25170     } // ~Inst_VOP3__V_LDEXP_F16
25171
25172     // D.f16 = S0.f16 * (2 ** S1.i16).
25173     void
25174     Inst_VOP3__V_LDEXP_F16::execute(GPUDynInstPtr gpuDynInst)
25175     {
25176         panicUnimplemented();
25177     }
25178
25179     Inst_VOP3__V_NOP::Inst_VOP3__V_NOP(InFmt_VOP3 *iFmt)
25180         : Inst_VOP3(iFmt, "v_nop", false)
25181     {
25182         setFlag(Nop);
25183         setFlag(ALU);
25184     } // Inst_VOP3__V_NOP
25185
25186     Inst_VOP3__V_NOP::~Inst_VOP3__V_NOP()
25187     {
25188     } // ~Inst_VOP3__V_NOP
25189
25190     // Do nothing.
25191     void
25192     Inst_VOP3__V_NOP::execute(GPUDynInstPtr gpuDynInst)
25193     {
25194     }
25195
25196     Inst_VOP3__V_MOV_B32::Inst_VOP3__V_MOV_B32(InFmt_VOP3 *iFmt)
25197         : Inst_VOP3(iFmt, "v_mov_b32", false)
25198     {
25199         setFlag(ALU);
25200     } // Inst_VOP3__V_MOV_B32
25201
25202     Inst_VOP3__V_MOV_B32::~Inst_VOP3__V_MOV_B32()
25203     {
25204     } // ~Inst_VOP3__V_MOV_B32
25205
25206     // D.u = S0.u.
25207     // Input and output modifiers not supported; this is an untyped operation.
25208     void
25209     Inst_VOP3__V_MOV_B32::execute(GPUDynInstPtr gpuDynInst)
25210     {
25211         Wavefront *wf = gpuDynInst->wavefront();
25212         ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
25213         VecOperandU32 vdst(gpuDynInst, instData.VDST);
25214
25215         src.readSrc();
25216
25217         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25218             if (wf->execMask(lane)) {
25219                 vdst[lane] = src[lane];
25220             }
25221         }
25222
25223         vdst.write();
25224     }
25225
25226     Inst_VOP3__V_CVT_I32_F64::Inst_VOP3__V_CVT_I32_F64(InFmt_VOP3 *iFmt)
25227         : Inst_VOP3(iFmt, "v_cvt_i32_f64", false)
25228     {
25229         setFlag(ALU);
25230         setFlag(F64);
25231     } // Inst_VOP3__V_CVT_I32_F64
25232
25233     Inst_VOP3__V_CVT_I32_F64::~Inst_VOP3__V_CVT_I32_F64()
25234     {
25235     } // ~Inst_VOP3__V_CVT_I32_F64
25236
25237     // D.i = (int)S0.d.
25238     // Out-of-range floating point values (including infinity) saturate. NaN
25239     // is converted to 0.
25240     void
25241     Inst_VOP3__V_CVT_I32_F64::execute(GPUDynInstPtr gpuDynInst)
25242     {
25243         Wavefront *wf = gpuDynInst->wavefront();
25244         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
25245         VecOperandI32 vdst(gpuDynInst, instData.VDST);
25246
25247         src.readSrc();
25248
25249         if (instData.ABS & 0x1) {
25250             src.absModifier();
25251         }
25252
25253         if (extData.NEG & 0x1) {
25254             src.negModifier();
25255         }
25256
25257         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25258             if (wf->execMask(lane)) {
25259                 int exp;
25260                 std::frexp(src[lane],&exp);
25261                 if (std::isnan(src[lane])) {
25262                     vdst[lane] = 0;
25263                 } else if (std::isinf(src[lane]) || exp > 30) {
25264                     if (std::signbit(src[lane])) {
25265                         vdst[lane] = INT_MIN;
25266                     } else {
25267                         vdst[lane] = INT_MAX;
25268                     }
25269                 } else {
25270                     vdst[lane] = (VecElemI32)src[lane];
25271                 }
25272             }
25273         }
25274
25275         vdst.write();
25276     }
25277
25278     Inst_VOP3__V_CVT_F64_I32::Inst_VOP3__V_CVT_F64_I32(InFmt_VOP3 *iFmt)
25279         : Inst_VOP3(iFmt, "v_cvt_f64_i32", false)
25280     {
25281         setFlag(ALU);
25282         setFlag(F64);
25283     } // Inst_VOP3__V_CVT_F64_I32
25284
25285     Inst_VOP3__V_CVT_F64_I32::~Inst_VOP3__V_CVT_F64_I32()
25286     {
25287     } // ~Inst_VOP3__V_CVT_F64_I32
25288
25289     // D.d = (double)S0.i.
25290     void
25291     Inst_VOP3__V_CVT_F64_I32::execute(GPUDynInstPtr gpuDynInst)
25292     {
25293         Wavefront *wf = gpuDynInst->wavefront();
25294         ConstVecOperandI32 src(gpuDynInst, extData.SRC0);
25295         VecOperandF64 vdst(gpuDynInst, instData.VDST);
25296
25297         src.readSrc();
25298
25299         if (instData.ABS & 0x1) {
25300             src.absModifier();
25301         }
25302
25303         if (extData.NEG & 0x1) {
25304             src.negModifier();
25305         }
25306
25307         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25308             if (wf->execMask(lane)) {
25309                 vdst[lane] = (VecElemF64)src[lane];
25310             }
25311         }
25312
25313         vdst.write();
25314     }
25315
25316     Inst_VOP3__V_CVT_F32_I32::Inst_VOP3__V_CVT_F32_I32(InFmt_VOP3 *iFmt)
25317         : Inst_VOP3(iFmt, "v_cvt_f32_i32", false)
25318     {
25319         setFlag(ALU);
25320         setFlag(F32);
25321     } // Inst_VOP3__V_CVT_F32_I32
25322
25323     Inst_VOP3__V_CVT_F32_I32::~Inst_VOP3__V_CVT_F32_I32()
25324     {
25325     } // ~Inst_VOP3__V_CVT_F32_I32
25326
25327     // D.f = (float)S0.i.
25328     void
25329     Inst_VOP3__V_CVT_F32_I32::execute(GPUDynInstPtr gpuDynInst)
25330     {
25331         Wavefront *wf = gpuDynInst->wavefront();
25332         VecOperandI32 src(gpuDynInst, extData.SRC0);
25333         VecOperandF32 vdst(gpuDynInst, instData.VDST);
25334
25335         src.readSrc();
25336
25337         /**
25338          * input modifiers are supported by FP operations only
25339          */
25340         assert(!(instData.ABS & 0x1));
25341         assert(!(instData.ABS & 0x2));
25342         assert(!(instData.ABS & 0x4));
25343         assert(!(extData.NEG & 0x1));
25344         assert(!(extData.NEG & 0x2));
25345         assert(!(extData.NEG & 0x4));
25346
25347         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25348             if (wf->execMask(lane)) {
25349                 vdst[lane] = (VecElemF32)src[lane];
25350             }
25351         }
25352
25353         vdst.write();
25354     }
25355
25356     Inst_VOP3__V_CVT_F32_U32::Inst_VOP3__V_CVT_F32_U32(InFmt_VOP3 *iFmt)
25357         : Inst_VOP3(iFmt, "v_cvt_f32_u32", false)
25358     {
25359         setFlag(ALU);
25360         setFlag(F32);
25361     } // Inst_VOP3__V_CVT_F32_U32
25362
25363     Inst_VOP3__V_CVT_F32_U32::~Inst_VOP3__V_CVT_F32_U32()
25364     {
25365     } // ~Inst_VOP3__V_CVT_F32_U32
25366
25367     // D.f = (float)S0.u.
25368     void
25369     Inst_VOP3__V_CVT_F32_U32::execute(GPUDynInstPtr gpuDynInst)
25370     {
25371         Wavefront *wf = gpuDynInst->wavefront();
25372         ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
25373         VecOperandF32 vdst(gpuDynInst, instData.VDST);
25374
25375         src.readSrc();
25376
25377         if (instData.ABS & 0x1) {
25378             src.absModifier();
25379         }
25380
25381         if (extData.NEG & 0x1) {
25382             src.negModifier();
25383         }
25384
25385         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25386             if (wf->execMask(lane)) {
25387                 vdst[lane] = (VecElemF32)src[lane];
25388             }
25389         }
25390
25391         vdst.write();
25392     }
25393
25394     Inst_VOP3__V_CVT_U32_F32::Inst_VOP3__V_CVT_U32_F32(InFmt_VOP3 *iFmt)
25395         : Inst_VOP3(iFmt, "v_cvt_u32_f32", false)
25396     {
25397         setFlag(ALU);
25398         setFlag(F32);
25399     } // Inst_VOP3__V_CVT_U32_F32
25400
25401     Inst_VOP3__V_CVT_U32_F32::~Inst_VOP3__V_CVT_U32_F32()
25402     {
25403     } // ~Inst_VOP3__V_CVT_U32_F32
25404
25405     // D.u = (unsigned)S0.f.
25406     // Out-of-range floating point values (including infinity) saturate. NaN
25407     // is converted to 0.
25408     void
25409     Inst_VOP3__V_CVT_U32_F32::execute(GPUDynInstPtr gpuDynInst)
25410     {
25411         Wavefront *wf = gpuDynInst->wavefront();
25412         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
25413         VecOperandU32 vdst(gpuDynInst, instData.VDST);
25414
25415         src.readSrc();
25416
25417         if (instData.ABS & 0x1) {
25418             src.absModifier();
25419         }
25420
25421         if (extData.NEG & 0x1) {
25422             src.negModifier();
25423         }
25424
25425         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25426             if (wf->execMask(lane)) {
25427                 int exp;
25428                 std::frexp(src[lane],&exp);
25429                 if (std::isnan(src[lane])) {
25430                     vdst[lane] = 0;
25431                 } else if (std::isinf(src[lane])) {
25432                     if (std::signbit(src[lane])) {
25433                         vdst[lane] = 0;
25434                     } else {
25435                         vdst[lane] = UINT_MAX;
25436                     }
25437                 } else if (exp > 31) {
25438                     vdst[lane] = UINT_MAX;
25439                 } else {
25440                     vdst[lane] = (VecElemU32)src[lane];
25441                 }
25442             }
25443         }
25444
25445         vdst.write();
25446     }
25447
25448     Inst_VOP3__V_CVT_I32_F32::Inst_VOP3__V_CVT_I32_F32(InFmt_VOP3 *iFmt)
25449         : Inst_VOP3(iFmt, "v_cvt_i32_f32", false)
25450     {
25451         setFlag(ALU);
25452         setFlag(F32);
25453     } // Inst_VOP3__V_CVT_I32_F32
25454
25455     Inst_VOP3__V_CVT_I32_F32::~Inst_VOP3__V_CVT_I32_F32()
25456     {
25457     } // ~Inst_VOP3__V_CVT_I32_F32
25458
25459     // D.i = (int)S0.f.
25460     // Out-of-range floating point values (including infinity) saturate. NaN
25461     // is converted to 0.
25462     void
25463     Inst_VOP3__V_CVT_I32_F32::execute(GPUDynInstPtr gpuDynInst)
25464     {
25465         Wavefront *wf = gpuDynInst->wavefront();
25466         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
25467         VecOperandI32 vdst(gpuDynInst, instData.VDST);
25468
25469         src.readSrc();
25470
25471         if (instData.ABS & 0x1) {
25472             src.absModifier();
25473         }
25474
25475         if (extData.NEG & 0x1) {
25476             src.negModifier();
25477         }
25478
25479         /**
25480          * input modifiers are supported by FP operations only
25481          */
25482         assert(!(instData.ABS & 0x2));
25483         assert(!(instData.ABS & 0x4));
25484         assert(!(extData.NEG & 0x2));
25485         assert(!(extData.NEG & 0x4));
25486
25487         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25488             if (wf->execMask(lane)) {
25489                 int exp;
25490                 std::frexp(src[lane],&exp);
25491                 if (std::isnan(src[lane])) {
25492                     vdst[lane] = 0;
25493                 } else if (std::isinf(src[lane]) || exp > 30) {
25494                     if (std::signbit(src[lane])) {
25495                         vdst[lane] = INT_MIN;
25496                     } else {
25497                         vdst[lane] = INT_MAX;
25498                     }
25499                 } else {
25500                     vdst[lane] = (VecElemI32)src[lane];
25501                 }
25502             }
25503         }
25504
25505         vdst.write();
25506     }
25507
25508     Inst_VOP3__V_MOV_FED_B32::Inst_VOP3__V_MOV_FED_B32(InFmt_VOP3 *iFmt)
25509         : Inst_VOP3(iFmt, "v_mov_fed_b32", false)
25510     {
25511         setFlag(ALU);
25512     } // Inst_VOP3__V_MOV_FED_B32
25513
25514     Inst_VOP3__V_MOV_FED_B32::~Inst_VOP3__V_MOV_FED_B32()
25515     {
25516     } // ~Inst_VOP3__V_MOV_FED_B32
25517
25518     // D.u = S0.u;
25519     // Input and output modifiers not supported; this is an untyped operation.
25520     void
25521     Inst_VOP3__V_MOV_FED_B32::execute(GPUDynInstPtr gpuDynInst)
25522     {
25523         panicUnimplemented();
25524     }
25525
25526     Inst_VOP3__V_CVT_F16_F32::Inst_VOP3__V_CVT_F16_F32(InFmt_VOP3 *iFmt)
25527         : Inst_VOP3(iFmt, "v_cvt_f16_f32", false)
25528     {
25529         setFlag(ALU);
25530         setFlag(F32);
25531     } // Inst_VOP3__V_CVT_F16_F32
25532
25533     Inst_VOP3__V_CVT_F16_F32::~Inst_VOP3__V_CVT_F16_F32()
25534     {
25535     } // ~Inst_VOP3__V_CVT_F16_F32
25536
25537     // D.f16 = flt32_to_flt16(S0.f).
25538     void
25539     Inst_VOP3__V_CVT_F16_F32::execute(GPUDynInstPtr gpuDynInst)
25540     {
25541         panicUnimplemented();
25542     }
25543
25544     Inst_VOP3__V_CVT_F32_F16::Inst_VOP3__V_CVT_F32_F16(InFmt_VOP3 *iFmt)
25545         : Inst_VOP3(iFmt, "v_cvt_f32_f16", false)
25546     {
25547         setFlag(ALU);
25548         setFlag(F32);
25549     } // Inst_VOP3__V_CVT_F32_F16
25550
25551     Inst_VOP3__V_CVT_F32_F16::~Inst_VOP3__V_CVT_F32_F16()
25552     {
25553     } // ~Inst_VOP3__V_CVT_F32_F16
25554
25555     // D.f = flt16_to_flt32(S0.f16).
25556     void
25557     Inst_VOP3__V_CVT_F32_F16::execute(GPUDynInstPtr gpuDynInst)
25558     {
25559         panicUnimplemented();
25560     }
25561
25562     Inst_VOP3__V_CVT_RPI_I32_F32::Inst_VOP3__V_CVT_RPI_I32_F32(
25563           InFmt_VOP3 *iFmt)
25564         : Inst_VOP3(iFmt, "v_cvt_rpi_i32_f32", false)
25565     {
25566         setFlag(ALU);
25567         setFlag(F32);
25568     } // Inst_VOP3__V_CVT_RPI_I32_F32
25569
25570     Inst_VOP3__V_CVT_RPI_I32_F32::~Inst_VOP3__V_CVT_RPI_I32_F32()
25571     {
25572     } // ~Inst_VOP3__V_CVT_RPI_I32_F32
25573
25574     // D.i = (int)floor(S0.f + 0.5).
25575     void
25576     Inst_VOP3__V_CVT_RPI_I32_F32::execute(GPUDynInstPtr gpuDynInst)
25577     {
25578         Wavefront *wf = gpuDynInst->wavefront();
25579         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
25580         VecOperandI32 vdst(gpuDynInst, instData.VDST);
25581
25582         src.readSrc();
25583
25584         if (instData.ABS & 0x1) {
25585             src.absModifier();
25586         }
25587
25588         if (extData.NEG & 0x1) {
25589             src.negModifier();
25590         }
25591
25592         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25593             if (wf->execMask(lane)) {
25594                 vdst[lane] = (VecElemI32)std::floor(src[lane] + 0.5);
25595             }
25596         }
25597
25598         vdst.write();
25599     }
25600
25601     Inst_VOP3__V_CVT_FLR_I32_F32::Inst_VOP3__V_CVT_FLR_I32_F32(
25602           InFmt_VOP3 *iFmt)
25603         : Inst_VOP3(iFmt, "v_cvt_flr_i32_f32", false)
25604     {
25605         setFlag(ALU);
25606         setFlag(F32);
25607     } // Inst_VOP3__V_CVT_FLR_I32_F32
25608
25609     Inst_VOP3__V_CVT_FLR_I32_F32::~Inst_VOP3__V_CVT_FLR_I32_F32()
25610     {
25611     } // ~Inst_VOP3__V_CVT_FLR_I32_F32
25612
25613     // D.i = (int)floor(S0.f).
25614     void
25615     Inst_VOP3__V_CVT_FLR_I32_F32::execute(GPUDynInstPtr gpuDynInst)
25616     {
25617         Wavefront *wf = gpuDynInst->wavefront();
25618         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
25619         VecOperandI32 vdst(gpuDynInst, instData.VDST);
25620
25621         src.readSrc();
25622
25623         if (instData.ABS & 0x1) {
25624             src.absModifier();
25625         }
25626
25627         if (extData.NEG & 0x1) {
25628             src.negModifier();
25629         }
25630
25631         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25632             if (wf->execMask(lane)) {
25633                 vdst[lane] = (VecElemI32)std::floor(src[lane]);
25634             }
25635         }
25636
25637         vdst.write();
25638     }
25639
25640     Inst_VOP3__V_CVT_OFF_F32_I4::Inst_VOP3__V_CVT_OFF_F32_I4(InFmt_VOP3 *iFmt)
25641         : Inst_VOP3(iFmt, "v_cvt_off_f32_i4", false)
25642     {
25643         setFlag(ALU);
25644         setFlag(F32);
25645     } // Inst_VOP3__V_CVT_OFF_F32_I4
25646
25647     Inst_VOP3__V_CVT_OFF_F32_I4::~Inst_VOP3__V_CVT_OFF_F32_I4()
25648     {
25649     } // ~Inst_VOP3__V_CVT_OFF_F32_I4
25650
25651     // 4-bit signed int to 32-bit float.
25652     void
25653     Inst_VOP3__V_CVT_OFF_F32_I4::execute(GPUDynInstPtr gpuDynInst)
25654     {
25655         panicUnimplemented();
25656     }
25657
25658     Inst_VOP3__V_CVT_F32_F64::Inst_VOP3__V_CVT_F32_F64(InFmt_VOP3 *iFmt)
25659         : Inst_VOP3(iFmt, "v_cvt_f32_f64", false)
25660     {
25661         setFlag(ALU);
25662         setFlag(F64);
25663     } // Inst_VOP3__V_CVT_F32_F64
25664
25665     Inst_VOP3__V_CVT_F32_F64::~Inst_VOP3__V_CVT_F32_F64()
25666     {
25667     } // ~Inst_VOP3__V_CVT_F32_F64
25668
25669     // D.f = (float)S0.d.
25670     void
25671     Inst_VOP3__V_CVT_F32_F64::execute(GPUDynInstPtr gpuDynInst)
25672     {
25673         Wavefront *wf = gpuDynInst->wavefront();
25674         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
25675         VecOperandF32 vdst(gpuDynInst, instData.VDST);
25676
25677         src.readSrc();
25678
25679         if (instData.ABS & 0x1) {
25680             src.absModifier();
25681         }
25682
25683         if (extData.NEG & 0x1) {
25684             src.negModifier();
25685         }
25686
25687         /**
25688          * input modifiers are supported by FP operations only
25689          */
25690         assert(!(instData.ABS & 0x2));
25691         assert(!(instData.ABS & 0x4));
25692         assert(!(extData.NEG & 0x2));
25693         assert(!(extData.NEG & 0x4));
25694
25695         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25696             if (wf->execMask(lane)) {
25697                 vdst[lane] = (VecElemF32)src[lane];
25698             }
25699         }
25700
25701         vdst.write();
25702     }
25703
25704     Inst_VOP3__V_CVT_F64_F32::Inst_VOP3__V_CVT_F64_F32(InFmt_VOP3 *iFmt)
25705         : Inst_VOP3(iFmt, "v_cvt_f64_f32", false)
25706     {
25707         setFlag(ALU);
25708         setFlag(F64);
25709     } // Inst_VOP3__V_CVT_F64_F32
25710
25711     Inst_VOP3__V_CVT_F64_F32::~Inst_VOP3__V_CVT_F64_F32()
25712     {
25713     } // ~Inst_VOP3__V_CVT_F64_F32
25714
25715     // D.d = (double)S0.f.
25716     void
25717     Inst_VOP3__V_CVT_F64_F32::execute(GPUDynInstPtr gpuDynInst)
25718     {
25719         Wavefront *wf = gpuDynInst->wavefront();
25720         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
25721         VecOperandF64 vdst(gpuDynInst, instData.VDST);
25722
25723         src.readSrc();
25724
25725         if (instData.ABS & 0x1) {
25726             src.absModifier();
25727         }
25728
25729         if (extData.NEG & 0x1) {
25730             src.negModifier();
25731         }
25732
25733         /**
25734          * input modifiers are supported by FP operations only
25735          */
25736         assert(!(instData.ABS & 0x2));
25737         assert(!(instData.ABS & 0x4));
25738         assert(!(extData.NEG & 0x2));
25739         assert(!(extData.NEG & 0x4));
25740
25741         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25742             if (wf->execMask(lane)) {
25743                 vdst[lane] = (VecElemF64)src[lane];
25744             }
25745         }
25746
25747         vdst.write();
25748     }
25749
25750     Inst_VOP3__V_CVT_F32_UBYTE0::Inst_VOP3__V_CVT_F32_UBYTE0(InFmt_VOP3 *iFmt)
25751         : Inst_VOP3(iFmt, "v_cvt_f32_ubyte0", false)
25752     {
25753         setFlag(ALU);
25754         setFlag(F32);
25755     } // Inst_VOP3__V_CVT_F32_UBYTE0
25756
25757     Inst_VOP3__V_CVT_F32_UBYTE0::~Inst_VOP3__V_CVT_F32_UBYTE0()
25758     {
25759     } // ~Inst_VOP3__V_CVT_F32_UBYTE0
25760
25761     // D.f = (float)(S0.u[7:0]).
25762     void
25763     Inst_VOP3__V_CVT_F32_UBYTE0::execute(GPUDynInstPtr gpuDynInst)
25764     {
25765         Wavefront *wf = gpuDynInst->wavefront();
25766         ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
25767         VecOperandF32 vdst(gpuDynInst, instData.VDST);
25768
25769         src.readSrc();
25770
25771         if (instData.ABS & 0x1) {
25772             src.absModifier();
25773         }
25774
25775         if (extData.NEG & 0x1) {
25776             src.negModifier();
25777         }
25778
25779         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25780             if (wf->execMask(lane)) {
25781                 vdst[lane] = (VecElemF32)bits(src[lane], 7, 0);
25782             }
25783         }
25784
25785         vdst.write();
25786     }
25787
25788     Inst_VOP3__V_CVT_F32_UBYTE1::Inst_VOP3__V_CVT_F32_UBYTE1(InFmt_VOP3 *iFmt)
25789         : Inst_VOP3(iFmt, "v_cvt_f32_ubyte1", false)
25790     {
25791         setFlag(ALU);
25792         setFlag(F32);
25793     } // Inst_VOP3__V_CVT_F32_UBYTE1
25794
25795     Inst_VOP3__V_CVT_F32_UBYTE1::~Inst_VOP3__V_CVT_F32_UBYTE1()
25796     {
25797     } // ~Inst_VOP3__V_CVT_F32_UBYTE1
25798
25799     // D.f = (float)(S0.u[15:8]).
25800     void
25801     Inst_VOP3__V_CVT_F32_UBYTE1::execute(GPUDynInstPtr gpuDynInst)
25802     {
25803         Wavefront *wf = gpuDynInst->wavefront();
25804         ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
25805         VecOperandF32 vdst(gpuDynInst, instData.VDST);
25806
25807         src.readSrc();
25808
25809         if (instData.ABS & 0x1) {
25810             src.absModifier();
25811         }
25812
25813         if (extData.NEG & 0x1) {
25814             src.negModifier();
25815         }
25816
25817         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25818             if (wf->execMask(lane)) {
25819                 vdst[lane] = (VecElemF32)bits(src[lane], 15, 8);
25820             }
25821         }
25822
25823         vdst.write();
25824     }
25825
25826     Inst_VOP3__V_CVT_F32_UBYTE2::Inst_VOP3__V_CVT_F32_UBYTE2(InFmt_VOP3 *iFmt)
25827         : Inst_VOP3(iFmt, "v_cvt_f32_ubyte2", false)
25828     {
25829         setFlag(ALU);
25830         setFlag(F32);
25831     } // Inst_VOP3__V_CVT_F32_UBYTE2
25832
25833     Inst_VOP3__V_CVT_F32_UBYTE2::~Inst_VOP3__V_CVT_F32_UBYTE2()
25834     {
25835     } // ~Inst_VOP3__V_CVT_F32_UBYTE2
25836
25837     // D.f = (float)(S0.u[23:16]).
25838     void
25839     Inst_VOP3__V_CVT_F32_UBYTE2::execute(GPUDynInstPtr gpuDynInst)
25840     {
25841         Wavefront *wf = gpuDynInst->wavefront();
25842         ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
25843         VecOperandF32 vdst(gpuDynInst, instData.VDST);
25844
25845         src.readSrc();
25846
25847         if (instData.ABS & 0x1) {
25848             src.absModifier();
25849         }
25850
25851         if (extData.NEG & 0x1) {
25852             src.negModifier();
25853         }
25854
25855         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25856             if (wf->execMask(lane)) {
25857                 vdst[lane] = (VecElemF32)bits(src[lane], 23, 16);
25858             }
25859         }
25860
25861         vdst.write();
25862     }
25863
25864     Inst_VOP3__V_CVT_F32_UBYTE3::Inst_VOP3__V_CVT_F32_UBYTE3(InFmt_VOP3 *iFmt)
25865         : Inst_VOP3(iFmt, "v_cvt_f32_ubyte3", false)
25866     {
25867         setFlag(ALU);
25868         setFlag(F32);
25869     } // Inst_VOP3__V_CVT_F32_UBYTE3
25870
25871     Inst_VOP3__V_CVT_F32_UBYTE3::~Inst_VOP3__V_CVT_F32_UBYTE3()
25872     {
25873     } // ~Inst_VOP3__V_CVT_F32_UBYTE3
25874
25875     // D.f = (float)(S0.u[31:24]).
25876     void
25877     Inst_VOP3__V_CVT_F32_UBYTE3::execute(GPUDynInstPtr gpuDynInst)
25878     {
25879         Wavefront *wf = gpuDynInst->wavefront();
25880         ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
25881         VecOperandF32 vdst(gpuDynInst, instData.VDST);
25882
25883         src.readSrc();
25884
25885         if (instData.ABS & 0x1) {
25886             src.absModifier();
25887         }
25888
25889         if (extData.NEG & 0x1) {
25890             src.negModifier();
25891         }
25892
25893         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25894             if (wf->execMask(lane)) {
25895                 vdst[lane] = (VecElemF32)bits(src[lane], 31, 24);
25896             }
25897         }
25898
25899         vdst.write();
25900     }
25901
25902     Inst_VOP3__V_CVT_U32_F64::Inst_VOP3__V_CVT_U32_F64(InFmt_VOP3 *iFmt)
25903         : Inst_VOP3(iFmt, "v_cvt_u32_f64", false)
25904     {
25905         setFlag(ALU);
25906         setFlag(F64);
25907     } // Inst_VOP3__V_CVT_U32_F64
25908
25909     Inst_VOP3__V_CVT_U32_F64::~Inst_VOP3__V_CVT_U32_F64()
25910     {
25911     } // ~Inst_VOP3__V_CVT_U32_F64
25912
25913     // D.u = (unsigned)S0.d.
25914     // Out-of-range floating point values (including infinity) saturate. NaN
25915     // is converted to 0.
25916     void
25917     Inst_VOP3__V_CVT_U32_F64::execute(GPUDynInstPtr gpuDynInst)
25918     {
25919         Wavefront *wf = gpuDynInst->wavefront();
25920         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
25921         VecOperandU32 vdst(gpuDynInst, instData.VDST);
25922
25923         src.readSrc();
25924
25925         if (instData.ABS & 0x1) {
25926             src.absModifier();
25927         }
25928
25929         if (extData.NEG & 0x1) {
25930             src.negModifier();
25931         }
25932
25933         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25934             if (wf->execMask(lane)) {
25935                 int exp;
25936                 std::frexp(src[lane],&exp);
25937                 if (std::isnan(src[lane])) {
25938                     vdst[lane] = 0;
25939                 } else if (std::isinf(src[lane])) {
25940                     if (std::signbit(src[lane])) {
25941                         vdst[lane] = 0;
25942                     } else {
25943                         vdst[lane] = UINT_MAX;
25944                     }
25945                 } else if (exp > 31) {
25946                     vdst[lane] = UINT_MAX;
25947                 } else {
25948                     vdst[lane] = (VecElemU32)src[lane];
25949                 }
25950             }
25951         }
25952
25953         vdst.write();
25954     }
25955
25956     Inst_VOP3__V_CVT_F64_U32::Inst_VOP3__V_CVT_F64_U32(InFmt_VOP3 *iFmt)
25957         : Inst_VOP3(iFmt, "v_cvt_f64_u32", false)
25958     {
25959         setFlag(ALU);
25960         setFlag(F64);
25961     } // Inst_VOP3__V_CVT_F64_U32
25962
25963     Inst_VOP3__V_CVT_F64_U32::~Inst_VOP3__V_CVT_F64_U32()
25964     {
25965     } // ~Inst_VOP3__V_CVT_F64_U32
25966
25967     // D.d = (double)S0.u.
25968     void
25969     Inst_VOP3__V_CVT_F64_U32::execute(GPUDynInstPtr gpuDynInst)
25970     {
25971         Wavefront *wf = gpuDynInst->wavefront();
25972         ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
25973         VecOperandF64 vdst(gpuDynInst, instData.VDST);
25974
25975         src.readSrc();
25976
25977         if (instData.ABS & 0x1) {
25978             src.absModifier();
25979         }
25980
25981         if (extData.NEG & 0x1) {
25982             src.negModifier();
25983         }
25984
25985         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
25986             if (wf->execMask(lane)) {
25987                 vdst[lane] = (VecElemF64)src[lane];
25988             }
25989         }
25990
25991         vdst.write();
25992     }
25993
25994     Inst_VOP3__V_TRUNC_F64::Inst_VOP3__V_TRUNC_F64(InFmt_VOP3 *iFmt)
25995         : Inst_VOP3(iFmt, "v_trunc_f64", false)
25996     {
25997         setFlag(ALU);
25998         setFlag(F64);
25999     } // Inst_VOP3__V_TRUNC_F64
26000
26001     Inst_VOP3__V_TRUNC_F64::~Inst_VOP3__V_TRUNC_F64()
26002     {
26003     } // ~Inst_VOP3__V_TRUNC_F64
26004
26005     // D.d = trunc(S0.d), return integer part of S0.d.
26006     void
26007     Inst_VOP3__V_TRUNC_F64::execute(GPUDynInstPtr gpuDynInst)
26008     {
26009         Wavefront *wf = gpuDynInst->wavefront();
26010         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26011         VecOperandF64 vdst(gpuDynInst, instData.VDST);
26012
26013         src.readSrc();
26014
26015         if (instData.ABS & 0x1) {
26016             src.absModifier();
26017         }
26018
26019         if (extData.NEG & 0x1) {
26020             src.negModifier();
26021         }
26022
26023         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26024             if (wf->execMask(lane)) {
26025                 vdst[lane] = std::trunc(src[lane]);
26026             }
26027         }
26028
26029         vdst.write();
26030     }
26031
26032     Inst_VOP3__V_CEIL_F64::Inst_VOP3__V_CEIL_F64(InFmt_VOP3 *iFmt)
26033         : Inst_VOP3(iFmt, "v_ceil_f64", false)
26034     {
26035         setFlag(ALU);
26036         setFlag(F64);
26037     } // Inst_VOP3__V_CEIL_F64
26038
26039     Inst_VOP3__V_CEIL_F64::~Inst_VOP3__V_CEIL_F64()
26040     {
26041     } // ~Inst_VOP3__V_CEIL_F64
26042
26043     // D.d = ceil(S0.d);
26044     void
26045     Inst_VOP3__V_CEIL_F64::execute(GPUDynInstPtr gpuDynInst)
26046     {
26047         Wavefront *wf = gpuDynInst->wavefront();
26048         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26049         VecOperandF64 vdst(gpuDynInst, instData.VDST);
26050
26051         src.readSrc();
26052
26053         if (instData.ABS & 0x1) {
26054             src.absModifier();
26055         }
26056
26057         if (extData.NEG & 0x1) {
26058             src.negModifier();
26059         }
26060
26061         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26062             if (wf->execMask(lane)) {
26063                 vdst[lane] = std::ceil(src[lane]);
26064             }
26065         }
26066
26067         vdst.write();
26068     }
26069
26070     Inst_VOP3__V_RNDNE_F64::Inst_VOP3__V_RNDNE_F64(InFmt_VOP3 *iFmt)
26071         : Inst_VOP3(iFmt, "v_rndne_f64", false)
26072     {
26073         setFlag(ALU);
26074         setFlag(F64);
26075     } // Inst_VOP3__V_RNDNE_F64
26076
26077     Inst_VOP3__V_RNDNE_F64::~Inst_VOP3__V_RNDNE_F64()
26078     {
26079     } // ~Inst_VOP3__V_RNDNE_F64
26080
26081     // D.d = round_nearest_even(S0.d).
26082     void
26083     Inst_VOP3__V_RNDNE_F64::execute(GPUDynInstPtr gpuDynInst)
26084     {
26085         Wavefront *wf = gpuDynInst->wavefront();
26086         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26087         VecOperandF64 vdst(gpuDynInst, instData.VDST);
26088
26089         src.readSrc();
26090
26091         if (instData.ABS & 0x1) {
26092             src.absModifier();
26093         }
26094
26095         if (extData.NEG & 0x1) {
26096             src.negModifier();
26097         }
26098
26099         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26100             if (wf->execMask(lane)) {
26101                 vdst[lane] = roundNearestEven(src[lane]);
26102             }
26103         }
26104
26105         vdst.write();
26106     }
26107
26108     Inst_VOP3__V_FLOOR_F64::Inst_VOP3__V_FLOOR_F64(InFmt_VOP3 *iFmt)
26109         : Inst_VOP3(iFmt, "v_floor_f64", false)
26110     {
26111         setFlag(ALU);
26112         setFlag(F64);
26113     } // Inst_VOP3__V_FLOOR_F64
26114
26115     Inst_VOP3__V_FLOOR_F64::~Inst_VOP3__V_FLOOR_F64()
26116     {
26117     } // ~Inst_VOP3__V_FLOOR_F64
26118
26119     // D.d = floor(S0.d);
26120     void
26121     Inst_VOP3__V_FLOOR_F64::execute(GPUDynInstPtr gpuDynInst)
26122     {
26123         Wavefront *wf = gpuDynInst->wavefront();
26124         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26125         VecOperandF64 vdst(gpuDynInst, instData.VDST);
26126
26127         src.readSrc();
26128
26129         if (instData.ABS & 0x1) {
26130             src.absModifier();
26131         }
26132
26133         if (extData.NEG & 0x1) {
26134             src.negModifier();
26135         }
26136
26137         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26138             if (wf->execMask(lane)) {
26139                 vdst[lane] = std::floor(src[lane]);
26140             }
26141         }
26142
26143         vdst.write();
26144     }
26145
26146     Inst_VOP3__V_FRACT_F32::Inst_VOP3__V_FRACT_F32(InFmt_VOP3 *iFmt)
26147         : Inst_VOP3(iFmt, "v_fract_f32", false)
26148     {
26149         setFlag(ALU);
26150         setFlag(F32);
26151     } // Inst_VOP3__V_FRACT_F32
26152
26153     Inst_VOP3__V_FRACT_F32::~Inst_VOP3__V_FRACT_F32()
26154     {
26155     } // ~Inst_VOP3__V_FRACT_F32
26156
26157     // D.f = modf(S0.f).
26158     void
26159     Inst_VOP3__V_FRACT_F32::execute(GPUDynInstPtr gpuDynInst)
26160     {
26161         Wavefront *wf = gpuDynInst->wavefront();
26162         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26163         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26164
26165         src.readSrc();
26166
26167         if (instData.ABS & 0x1) {
26168             src.absModifier();
26169         }
26170
26171         if (extData.NEG & 0x1) {
26172             src.negModifier();
26173         }
26174
26175         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26176             if (wf->execMask(lane)) {
26177                 VecElemF32 int_part(0.0);
26178                 vdst[lane] = std::modf(src[lane], &int_part);
26179             }
26180         }
26181
26182         vdst.write();
26183     }
26184
26185     Inst_VOP3__V_TRUNC_F32::Inst_VOP3__V_TRUNC_F32(InFmt_VOP3 *iFmt)
26186         : Inst_VOP3(iFmt, "v_trunc_f32", false)
26187     {
26188         setFlag(ALU);
26189         setFlag(F32);
26190     } // Inst_VOP3__V_TRUNC_F32
26191
26192     Inst_VOP3__V_TRUNC_F32::~Inst_VOP3__V_TRUNC_F32()
26193     {
26194     } // ~Inst_VOP3__V_TRUNC_F32
26195
26196     // D.f = trunc(S0.f), return integer part of S0.f.
26197     void
26198     Inst_VOP3__V_TRUNC_F32::execute(GPUDynInstPtr gpuDynInst)
26199     {
26200         Wavefront *wf = gpuDynInst->wavefront();
26201         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26202         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26203
26204         src.readSrc();
26205
26206         if (instData.ABS & 0x1) {
26207             src.absModifier();
26208         }
26209
26210         if (extData.NEG & 0x1) {
26211             src.negModifier();
26212         }
26213
26214         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26215             if (wf->execMask(lane)) {
26216                 vdst[lane] = std::trunc(src[lane]);
26217             }
26218         }
26219
26220         vdst.write();
26221     }
26222
26223     Inst_VOP3__V_CEIL_F32::Inst_VOP3__V_CEIL_F32(InFmt_VOP3 *iFmt)
26224         : Inst_VOP3(iFmt, "v_ceil_f32", false)
26225     {
26226         setFlag(ALU);
26227         setFlag(F32);
26228     } // Inst_VOP3__V_CEIL_F32
26229
26230     Inst_VOP3__V_CEIL_F32::~Inst_VOP3__V_CEIL_F32()
26231     {
26232     } // ~Inst_VOP3__V_CEIL_F32
26233
26234     // D.f = ceil(S0.f);
26235     void
26236     Inst_VOP3__V_CEIL_F32::execute(GPUDynInstPtr gpuDynInst)
26237     {
26238         Wavefront *wf = gpuDynInst->wavefront();
26239         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26240         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26241
26242         src.readSrc();
26243
26244         if (instData.ABS & 0x1) {
26245             src.absModifier();
26246         }
26247
26248         if (extData.NEG & 0x1) {
26249             src.negModifier();
26250         }
26251
26252         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26253             if (wf->execMask(lane)) {
26254                 vdst[lane] = std::ceil(src[lane]);
26255             }
26256         }
26257
26258         vdst.write();
26259     }
26260
26261     Inst_VOP3__V_RNDNE_F32::Inst_VOP3__V_RNDNE_F32(InFmt_VOP3 *iFmt)
26262         : Inst_VOP3(iFmt, "v_rndne_f32", false)
26263     {
26264         setFlag(ALU);
26265         setFlag(F32);
26266     } // Inst_VOP3__V_RNDNE_F32
26267
26268     Inst_VOP3__V_RNDNE_F32::~Inst_VOP3__V_RNDNE_F32()
26269     {
26270     } // ~Inst_VOP3__V_RNDNE_F32
26271
26272     // D.f = round_nearest_even(S0.f).
26273     void
26274     Inst_VOP3__V_RNDNE_F32::execute(GPUDynInstPtr gpuDynInst)
26275     {
26276         Wavefront *wf = gpuDynInst->wavefront();
26277         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26278         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26279
26280         src.readSrc();
26281
26282         if (instData.ABS & 0x1) {
26283             src.absModifier();
26284         }
26285
26286         if (extData.NEG & 0x1) {
26287             src.negModifier();
26288         }
26289
26290         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26291             if (wf->execMask(lane)) {
26292                 vdst[lane] = roundNearestEven(src[lane]);
26293             }
26294         }
26295
26296         vdst.write();
26297     }
26298
26299     Inst_VOP3__V_FLOOR_F32::Inst_VOP3__V_FLOOR_F32(InFmt_VOP3 *iFmt)
26300         : Inst_VOP3(iFmt, "v_floor_f32", false)
26301     {
26302         setFlag(ALU);
26303         setFlag(F32);
26304     } // Inst_VOP3__V_FLOOR_F32
26305
26306     Inst_VOP3__V_FLOOR_F32::~Inst_VOP3__V_FLOOR_F32()
26307     {
26308     } // ~Inst_VOP3__V_FLOOR_F32
26309
26310     // D.f = floor(S0.f);
26311     void
26312     Inst_VOP3__V_FLOOR_F32::execute(GPUDynInstPtr gpuDynInst)
26313     {
26314         Wavefront *wf = gpuDynInst->wavefront();
26315         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26316         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26317
26318         src.readSrc();
26319
26320         if (instData.ABS & 0x1) {
26321             src.absModifier();
26322         }
26323
26324         if (extData.NEG & 0x1) {
26325             src.negModifier();
26326         }
26327
26328         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26329             if (wf->execMask(lane)) {
26330                 vdst[lane] = std::floor(src[lane]);
26331             }
26332         }
26333
26334         vdst.write();
26335     }
26336
26337     Inst_VOP3__V_EXP_F32::Inst_VOP3__V_EXP_F32(InFmt_VOP3 *iFmt)
26338         : Inst_VOP3(iFmt, "v_exp_f32", false)
26339     {
26340         setFlag(ALU);
26341         setFlag(F32);
26342     } // Inst_VOP3__V_EXP_F32
26343
26344     Inst_VOP3__V_EXP_F32::~Inst_VOP3__V_EXP_F32()
26345     {
26346     } // ~Inst_VOP3__V_EXP_F32
26347
26348     // D.f = pow(2.0, S0.f).
26349     void
26350     Inst_VOP3__V_EXP_F32::execute(GPUDynInstPtr gpuDynInst)
26351     {
26352         Wavefront *wf = gpuDynInst->wavefront();
26353         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26354         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26355
26356         src.readSrc();
26357
26358         if (instData.ABS & 0x1) {
26359             src.absModifier();
26360         }
26361
26362         if (extData.NEG & 0x1) {
26363             src.negModifier();
26364         }
26365
26366         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26367             if (wf->execMask(lane)) {
26368                 vdst[lane] = std::pow(2.0, src[lane]);
26369             }
26370         }
26371
26372         vdst.write();
26373     }
26374
26375     Inst_VOP3__V_LOG_F32::Inst_VOP3__V_LOG_F32(InFmt_VOP3 *iFmt)
26376         : Inst_VOP3(iFmt, "v_log_f32", false)
26377     {
26378         setFlag(ALU);
26379         setFlag(F32);
26380     } // Inst_VOP3__V_LOG_F32
26381
26382     Inst_VOP3__V_LOG_F32::~Inst_VOP3__V_LOG_F32()
26383     {
26384     } // ~Inst_VOP3__V_LOG_F32
26385
26386     // D.f = log2(S0.f).
26387     void
26388     Inst_VOP3__V_LOG_F32::execute(GPUDynInstPtr gpuDynInst)
26389     {
26390         Wavefront *wf = gpuDynInst->wavefront();
26391         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26392         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26393
26394         src.readSrc();
26395
26396         if (instData.ABS & 0x1) {
26397             src.absModifier();
26398         }
26399
26400         if (extData.NEG & 0x1) {
26401             src.negModifier();
26402         }
26403
26404         /**
26405          * input modifiers are supported by FP operations only
26406          */
26407         assert(!(instData.ABS & 0x2));
26408         assert(!(instData.ABS & 0x4));
26409         assert(!(extData.NEG & 0x2));
26410         assert(!(extData.NEG & 0x4));
26411
26412         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26413             if (wf->execMask(lane)) {
26414                 vdst[lane] = std::log2(src[lane]);
26415             }
26416         }
26417
26418         vdst.write();
26419     }
26420
26421     Inst_VOP3__V_RCP_F32::Inst_VOP3__V_RCP_F32(InFmt_VOP3 *iFmt)
26422         : Inst_VOP3(iFmt, "v_rcp_f32", false)
26423     {
26424         setFlag(ALU);
26425         setFlag(F32);
26426     } // Inst_VOP3__V_RCP_F32
26427
26428     Inst_VOP3__V_RCP_F32::~Inst_VOP3__V_RCP_F32()
26429     {
26430     } // ~Inst_VOP3__V_RCP_F32
26431
26432     // D.f = 1.0 / S0.f.
26433     void
26434     Inst_VOP3__V_RCP_F32::execute(GPUDynInstPtr gpuDynInst)
26435     {
26436         Wavefront *wf = gpuDynInst->wavefront();
26437         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26438         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26439
26440         src.readSrc();
26441
26442         if (instData.ABS & 0x1) {
26443             src.absModifier();
26444         }
26445
26446         if (extData.NEG & 0x1) {
26447             src.negModifier();
26448         }
26449
26450         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26451             if (wf->execMask(lane)) {
26452                 vdst[lane] = 1.0 / src[lane];
26453             }
26454         }
26455
26456         vdst.write();
26457     }
26458
26459     Inst_VOP3__V_RCP_IFLAG_F32::Inst_VOP3__V_RCP_IFLAG_F32(InFmt_VOP3 *iFmt)
26460         : Inst_VOP3(iFmt, "v_rcp_iflag_f32", false)
26461     {
26462         setFlag(ALU);
26463         setFlag(F32);
26464     } // Inst_VOP3__V_RCP_IFLAG_F32
26465
26466     Inst_VOP3__V_RCP_IFLAG_F32::~Inst_VOP3__V_RCP_IFLAG_F32()
26467     {
26468     } // ~Inst_VOP3__V_RCP_IFLAG_F32
26469
26470     // D.f = 1.0 / S0.f.
26471     void
26472     Inst_VOP3__V_RCP_IFLAG_F32::execute(GPUDynInstPtr gpuDynInst)
26473     {
26474         Wavefront *wf = gpuDynInst->wavefront();
26475         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26476         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26477
26478         src.readSrc();
26479
26480         if (instData.ABS & 0x1) {
26481             src.absModifier();
26482         }
26483
26484         if (extData.NEG & 0x1) {
26485             src.negModifier();
26486         }
26487
26488         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26489             if (wf->execMask(lane)) {
26490                 vdst[lane] = 1.0 / src[lane];
26491             }
26492         }
26493
26494         vdst.write();
26495     }
26496
26497     Inst_VOP3__V_RSQ_F32::Inst_VOP3__V_RSQ_F32(InFmt_VOP3 *iFmt)
26498         : Inst_VOP3(iFmt, "v_rsq_f32", false)
26499     {
26500         setFlag(ALU);
26501         setFlag(F32);
26502     } // Inst_VOP3__V_RSQ_F32
26503
26504     Inst_VOP3__V_RSQ_F32::~Inst_VOP3__V_RSQ_F32()
26505     {
26506     } // ~Inst_VOP3__V_RSQ_F32
26507
26508     // D.f = 1.0 / sqrt(S0.f).
26509     void
26510     Inst_VOP3__V_RSQ_F32::execute(GPUDynInstPtr gpuDynInst)
26511     {
26512         Wavefront *wf = gpuDynInst->wavefront();
26513         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26514         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26515
26516         src.readSrc();
26517
26518         if (instData.ABS & 0x1) {
26519             src.absModifier();
26520         }
26521
26522         if (extData.NEG & 0x1) {
26523             src.negModifier();
26524         }
26525
26526         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26527             if (wf->execMask(lane)) {
26528                 vdst[lane] = 1.0 / std::sqrt(src[lane]);
26529             }
26530         }
26531
26532         vdst.write();
26533     }
26534
26535     Inst_VOP3__V_RCP_F64::Inst_VOP3__V_RCP_F64(InFmt_VOP3 *iFmt)
26536         : Inst_VOP3(iFmt, "v_rcp_f64", false)
26537     {
26538         setFlag(ALU);
26539         setFlag(F64);
26540     } // Inst_VOP3__V_RCP_F64
26541
26542     Inst_VOP3__V_RCP_F64::~Inst_VOP3__V_RCP_F64()
26543     {
26544     } // ~Inst_VOP3__V_RCP_F64
26545
26546     // D.d = 1.0 / S0.d.
26547     void
26548     Inst_VOP3__V_RCP_F64::execute(GPUDynInstPtr gpuDynInst)
26549     {
26550         Wavefront *wf = gpuDynInst->wavefront();
26551         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26552         VecOperandF64 vdst(gpuDynInst, instData.VDST);
26553
26554         src.readSrc();
26555
26556         if (instData.ABS & 0x1) {
26557             src.absModifier();
26558         }
26559
26560         if (extData.NEG & 0x1) {
26561             src.negModifier();
26562         }
26563
26564         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26565             if (wf->execMask(lane)) {
26566                 if (std::fpclassify(src[lane]) == FP_ZERO) {
26567                     vdst[lane] = +INFINITY;
26568                 } else if (std::isnan(src[lane])) {
26569                     vdst[lane] = NAN;
26570                 } else if (std::isinf(src[lane])) {
26571                     if (std::signbit(src[lane])) {
26572                         vdst[lane] = -0.0;
26573                     } else {
26574                         vdst[lane] = 0.0;
26575                     }
26576                 } else {
26577                     vdst[lane] = 1.0 / src[lane];
26578                 }
26579             }
26580         }
26581
26582         vdst.write();
26583     }
26584
26585     Inst_VOP3__V_RSQ_F64::Inst_VOP3__V_RSQ_F64(InFmt_VOP3 *iFmt)
26586         : Inst_VOP3(iFmt, "v_rsq_f64", false)
26587     {
26588         setFlag(ALU);
26589         setFlag(F64);
26590     } // Inst_VOP3__V_RSQ_F64
26591
26592     Inst_VOP3__V_RSQ_F64::~Inst_VOP3__V_RSQ_F64()
26593     {
26594     } // ~Inst_VOP3__V_RSQ_F64
26595
26596     // D.d = 1.0 / sqrt(S0.d).
26597     void
26598     Inst_VOP3__V_RSQ_F64::execute(GPUDynInstPtr gpuDynInst)
26599     {
26600         Wavefront *wf = gpuDynInst->wavefront();
26601         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26602         VecOperandF64 vdst(gpuDynInst, instData.VDST);
26603
26604         src.readSrc();
26605
26606         if (instData.ABS & 0x1) {
26607             src.absModifier();
26608         }
26609
26610         if (extData.NEG & 0x1) {
26611             src.negModifier();
26612         }
26613
26614         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26615             if (wf->execMask(lane)) {
26616                 if (std::fpclassify(src[lane]) == FP_ZERO) {
26617                     vdst[lane] = +INFINITY;
26618                 } else if (std::isnan(src[lane])) {
26619                     vdst[lane] = NAN;
26620                 } else if (std::isinf(src[lane]) && !std::signbit(src[lane])) {
26621                     vdst[lane] = 0.0;
26622                 } else if (std::signbit(src[lane])) {
26623                     vdst[lane] = NAN;
26624                 } else {
26625                     vdst[lane] = 1.0 / std::sqrt(src[lane]);
26626                 }
26627             }
26628         }
26629
26630         vdst.write();
26631     }
26632
26633     Inst_VOP3__V_SQRT_F32::Inst_VOP3__V_SQRT_F32(InFmt_VOP3 *iFmt)
26634         : Inst_VOP3(iFmt, "v_sqrt_f32", false)
26635     {
26636         setFlag(ALU);
26637         setFlag(F32);
26638     } // Inst_VOP3__V_SQRT_F32
26639
26640     Inst_VOP3__V_SQRT_F32::~Inst_VOP3__V_SQRT_F32()
26641     {
26642     } // ~Inst_VOP3__V_SQRT_F32
26643
26644     // D.f = sqrt(S0.f).
26645     void
26646     Inst_VOP3__V_SQRT_F32::execute(GPUDynInstPtr gpuDynInst)
26647     {
26648         Wavefront *wf = gpuDynInst->wavefront();
26649         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26650         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26651
26652         src.readSrc();
26653
26654         if (instData.ABS & 0x1) {
26655             src.absModifier();
26656         }
26657
26658         if (extData.NEG & 0x1) {
26659             src.negModifier();
26660         }
26661
26662         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26663             if (wf->execMask(lane)) {
26664                 vdst[lane] = std::sqrt(src[lane]);
26665             }
26666         }
26667
26668         vdst.write();
26669     }
26670
26671     Inst_VOP3__V_SQRT_F64::Inst_VOP3__V_SQRT_F64(InFmt_VOP3 *iFmt)
26672         : Inst_VOP3(iFmt, "v_sqrt_f64", false)
26673     {
26674         setFlag(ALU);
26675         setFlag(F64);
26676     } // Inst_VOP3__V_SQRT_F64
26677
26678     Inst_VOP3__V_SQRT_F64::~Inst_VOP3__V_SQRT_F64()
26679     {
26680     } // ~Inst_VOP3__V_SQRT_F64
26681
26682     // D.d = sqrt(S0.d).
26683     void
26684     Inst_VOP3__V_SQRT_F64::execute(GPUDynInstPtr gpuDynInst)
26685     {
26686         Wavefront *wf = gpuDynInst->wavefront();
26687         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26688         VecOperandF64 vdst(gpuDynInst, instData.VDST);
26689
26690         src.readSrc();
26691
26692         if (instData.ABS & 0x1) {
26693             src.absModifier();
26694         }
26695
26696         if (extData.NEG & 0x1) {
26697             src.negModifier();
26698         }
26699
26700         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26701             if (wf->execMask(lane)) {
26702                 vdst[lane] = std::sqrt(src[lane]);
26703             }
26704         }
26705
26706         vdst.write();
26707     }
26708
26709     Inst_VOP3__V_SIN_F32::Inst_VOP3__V_SIN_F32(InFmt_VOP3 *iFmt)
26710         : Inst_VOP3(iFmt, "v_sin_f32", false)
26711     {
26712         setFlag(ALU);
26713         setFlag(F32);
26714     } // Inst_VOP3__V_SIN_F32
26715
26716     Inst_VOP3__V_SIN_F32::~Inst_VOP3__V_SIN_F32()
26717     {
26718     } // ~Inst_VOP3__V_SIN_F32
26719
26720     // D.f = sin(S0.f * 2 * PI).
26721     void
26722     Inst_VOP3__V_SIN_F32::execute(GPUDynInstPtr gpuDynInst)
26723     {
26724         Wavefront *wf = gpuDynInst->wavefront();
26725         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26726         ConstScalarOperandF32 pi(gpuDynInst, REG_PI);
26727         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26728
26729         src.readSrc();
26730         pi.read();
26731
26732         if (instData.ABS & 0x1) {
26733             src.absModifier();
26734         }
26735
26736         if (extData.NEG & 0x1) {
26737             src.negModifier();
26738         }
26739
26740         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26741             if (wf->execMask(lane)) {
26742                 vdst[lane] = std::sin(src[lane] * 2 * pi.rawData());
26743             }
26744         }
26745
26746         vdst.write();
26747     }
26748
26749     Inst_VOP3__V_COS_F32::Inst_VOP3__V_COS_F32(InFmt_VOP3 *iFmt)
26750         : Inst_VOP3(iFmt, "v_cos_f32", false)
26751     {
26752         setFlag(ALU);
26753         setFlag(F32);
26754     } // Inst_VOP3__V_COS_F32
26755
26756     Inst_VOP3__V_COS_F32::~Inst_VOP3__V_COS_F32()
26757     {
26758     } // ~Inst_VOP3__V_COS_F32
26759
26760     // D.f = cos(S0.f * 2 * PI).
26761     void
26762     Inst_VOP3__V_COS_F32::execute(GPUDynInstPtr gpuDynInst)
26763     {
26764         Wavefront *wf = gpuDynInst->wavefront();
26765         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
26766         ConstScalarOperandF32 pi(gpuDynInst, REG_PI);
26767         VecOperandF32 vdst(gpuDynInst, instData.VDST);
26768
26769         src.readSrc();
26770         pi.read();
26771
26772         if (instData.ABS & 0x1) {
26773             src.absModifier();
26774         }
26775
26776         if (extData.NEG & 0x1) {
26777             src.negModifier();
26778         }
26779
26780         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26781             if (wf->execMask(lane)) {
26782                 vdst[lane] = std::cos(src[lane] * 2 * pi.rawData());
26783             }
26784         }
26785
26786         vdst.write();
26787     }
26788
26789     Inst_VOP3__V_NOT_B32::Inst_VOP3__V_NOT_B32(InFmt_VOP3 *iFmt)
26790         : Inst_VOP3(iFmt, "v_not_b32", false)
26791     {
26792         setFlag(ALU);
26793     } // Inst_VOP3__V_NOT_B32
26794
26795     Inst_VOP3__V_NOT_B32::~Inst_VOP3__V_NOT_B32()
26796     {
26797     } // ~Inst_VOP3__V_NOT_B32
26798
26799     // D.u = ~S0.u.
26800     // Input and output modifiers not supported.
26801     void
26802     Inst_VOP3__V_NOT_B32::execute(GPUDynInstPtr gpuDynInst)
26803     {
26804         Wavefront *wf = gpuDynInst->wavefront();
26805         ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
26806         VecOperandU32 vdst(gpuDynInst, instData.VDST);
26807
26808         src.readSrc();
26809
26810         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26811             if (wf->execMask(lane)) {
26812                 vdst[lane] = ~src[lane];
26813             }
26814         }
26815
26816         vdst.write();
26817     }
26818
26819     Inst_VOP3__V_BFREV_B32::Inst_VOP3__V_BFREV_B32(InFmt_VOP3 *iFmt)
26820         : Inst_VOP3(iFmt, "v_bfrev_b32", false)
26821     {
26822         setFlag(ALU);
26823     } // Inst_VOP3__V_BFREV_B32
26824
26825     Inst_VOP3__V_BFREV_B32::~Inst_VOP3__V_BFREV_B32()
26826     {
26827     } // ~Inst_VOP3__V_BFREV_B32
26828
26829     // D.u[31:0] = S0.u[0:31], bitfield reverse.
26830     // Input and output modifiers not supported.
26831     void
26832     Inst_VOP3__V_BFREV_B32::execute(GPUDynInstPtr gpuDynInst)
26833     {
26834         Wavefront *wf = gpuDynInst->wavefront();
26835         ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
26836         VecOperandU32 vdst(gpuDynInst, instData.VDST);
26837
26838         src.readSrc();
26839
26840         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26841             if (wf->execMask(lane)) {
26842                 vdst[lane] = reverseBits(src[lane]);
26843             }
26844         }
26845
26846         vdst.write();
26847     }
26848
26849     Inst_VOP3__V_FFBH_U32::Inst_VOP3__V_FFBH_U32(InFmt_VOP3 *iFmt)
26850         : Inst_VOP3(iFmt, "v_ffbh_u32", false)
26851     {
26852         setFlag(ALU);
26853     } // Inst_VOP3__V_FFBH_U32
26854
26855     Inst_VOP3__V_FFBH_U32::~Inst_VOP3__V_FFBH_U32()
26856     {
26857     } // ~Inst_VOP3__V_FFBH_U32
26858
26859     // D.u = position of first 1 in S0.u from MSB;
26860     // D.u = 0xffffffff if S0.u == 0.
26861     void
26862     Inst_VOP3__V_FFBH_U32::execute(GPUDynInstPtr gpuDynInst)
26863     {
26864         Wavefront *wf = gpuDynInst->wavefront();
26865         ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
26866         VecOperandU32 vdst(gpuDynInst, instData.VDST);
26867
26868         src.readSrc();
26869
26870         if (instData.ABS & 0x1) {
26871             src.absModifier();
26872         }
26873
26874         if (extData.NEG & 0x1) {
26875             src.negModifier();
26876         }
26877
26878         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26879             if (wf->execMask(lane)) {
26880                 vdst[lane] = findFirstOneMsb(src[lane]);
26881             }
26882         }
26883
26884         vdst.write();
26885     }
26886
26887     Inst_VOP3__V_FFBL_B32::Inst_VOP3__V_FFBL_B32(InFmt_VOP3 *iFmt)
26888         : Inst_VOP3(iFmt, "v_ffbl_b32", false)
26889     {
26890         setFlag(ALU);
26891     } // Inst_VOP3__V_FFBL_B32
26892
26893     Inst_VOP3__V_FFBL_B32::~Inst_VOP3__V_FFBL_B32()
26894     {
26895     } // ~Inst_VOP3__V_FFBL_B32
26896
26897     // D.u = position of first 1 in S0.u from LSB;
26898     // D.u = 0xffffffff if S0.u == 0.
26899     void
26900     Inst_VOP3__V_FFBL_B32::execute(GPUDynInstPtr gpuDynInst)
26901     {
26902         Wavefront *wf = gpuDynInst->wavefront();
26903         ConstVecOperandU32 src(gpuDynInst, extData.SRC0);
26904         VecOperandU32 vdst(gpuDynInst, instData.VDST);
26905
26906         src.readSrc();
26907
26908         if (instData.ABS & 0x1) {
26909             src.absModifier();
26910         }
26911
26912         if (extData.NEG & 0x1) {
26913             src.negModifier();
26914         }
26915
26916         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26917             if (wf->execMask(lane)) {
26918                 vdst[lane] = findFirstOne(src[lane]);
26919             }
26920         }
26921
26922         vdst.write();
26923     }
26924
26925     Inst_VOP3__V_FFBH_I32::Inst_VOP3__V_FFBH_I32(InFmt_VOP3 *iFmt)
26926         : Inst_VOP3(iFmt, "v_ffbh_i32", false)
26927     {
26928         setFlag(ALU);
26929     } // Inst_VOP3__V_FFBH_I32
26930
26931     Inst_VOP3__V_FFBH_I32::~Inst_VOP3__V_FFBH_I32()
26932     {
26933     } // ~Inst_VOP3__V_FFBH_I32
26934
26935     // D.u = position of first bit different from sign bit in S0.i from MSB;
26936     // D.u = 0xffffffff if S0.i == 0 or S0.i == 0xffffffff.
26937     void
26938     Inst_VOP3__V_FFBH_I32::execute(GPUDynInstPtr gpuDynInst)
26939     {
26940         Wavefront *wf = gpuDynInst->wavefront();
26941         ConstVecOperandI32 src(gpuDynInst, extData.SRC0);
26942         VecOperandU32 vdst(gpuDynInst, instData.VDST);
26943
26944         src.readSrc();
26945
26946         if (instData.ABS & 0x1) {
26947             src.absModifier();
26948         }
26949
26950         if (extData.NEG & 0x1) {
26951             src.negModifier();
26952         }
26953
26954         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26955             if (wf->execMask(lane)) {
26956                 vdst[lane] = firstOppositeSignBit(src[lane]);
26957             }
26958         }
26959
26960         vdst.write();
26961     }
26962
26963     Inst_VOP3__V_FREXP_EXP_I32_F64::Inst_VOP3__V_FREXP_EXP_I32_F64(
26964           InFmt_VOP3 *iFmt)
26965         : Inst_VOP3(iFmt, "v_frexp_exp_i32_f64", false)
26966     {
26967         setFlag(ALU);
26968         setFlag(F64);
26969     } // Inst_VOP3__V_FREXP_EXP_I32_F64
26970
26971     Inst_VOP3__V_FREXP_EXP_I32_F64::~Inst_VOP3__V_FREXP_EXP_I32_F64()
26972     {
26973     } // ~Inst_VOP3__V_FREXP_EXP_I32_F64
26974
26975     // See V_FREXP_EXP_I32_F32.
26976     void
26977     Inst_VOP3__V_FREXP_EXP_I32_F64::execute(GPUDynInstPtr gpuDynInst)
26978     {
26979         Wavefront *wf = gpuDynInst->wavefront();
26980         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
26981         VecOperandI32 vdst(gpuDynInst, instData.VDST);
26982
26983         src.readSrc();
26984
26985         if (instData.ABS & 0x1) {
26986             src.absModifier();
26987         }
26988
26989         if (extData.NEG & 0x1) {
26990             src.negModifier();
26991         }
26992
26993         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
26994             if (wf->execMask(lane)) {
26995                 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
26996                     vdst[lane] = 0;
26997                 } else {
26998                     VecElemI32 exp(0);
26999                     std::frexp(src[lane], &exp);
27000                     vdst[lane] = exp;
27001                 }
27002             }
27003         }
27004
27005         vdst.write();
27006     }
27007
27008     Inst_VOP3__V_FREXP_MANT_F64::Inst_VOP3__V_FREXP_MANT_F64(InFmt_VOP3 *iFmt)
27009         : Inst_VOP3(iFmt, "v_frexp_mant_f64", false)
27010     {
27011         setFlag(ALU);
27012         setFlag(F64);
27013     } // Inst_VOP3__V_FREXP_MANT_F64
27014
27015     Inst_VOP3__V_FREXP_MANT_F64::~Inst_VOP3__V_FREXP_MANT_F64()
27016     {
27017     } // ~Inst_VOP3__V_FREXP_MANT_F64
27018
27019     void
27020     Inst_VOP3__V_FREXP_MANT_F64::execute(GPUDynInstPtr gpuDynInst)
27021     {
27022         Wavefront *wf = gpuDynInst->wavefront();
27023         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
27024         VecOperandF64 vdst(gpuDynInst, instData.VDST);
27025
27026         src.readSrc();
27027
27028         if (instData.ABS & 0x1) {
27029             src.absModifier();
27030         }
27031
27032         if (extData.NEG & 0x1) {
27033             src.negModifier();
27034         }
27035
27036         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27037             if (wf->execMask(lane)) {
27038                 VecElemI32 exp(0);
27039                 vdst[lane] = std::frexp(src[lane], &exp);
27040             }
27041         }
27042
27043         vdst.write();
27044     }
27045
27046     Inst_VOP3__V_FRACT_F64::Inst_VOP3__V_FRACT_F64(InFmt_VOP3 *iFmt)
27047         : Inst_VOP3(iFmt, "v_fract_f64", false)
27048     {
27049         setFlag(ALU);
27050         setFlag(F64);
27051     } // Inst_VOP3__V_FRACT_F64
27052
27053     Inst_VOP3__V_FRACT_F64::~Inst_VOP3__V_FRACT_F64()
27054     {
27055     } // ~Inst_VOP3__V_FRACT_F64
27056
27057     void
27058     Inst_VOP3__V_FRACT_F64::execute(GPUDynInstPtr gpuDynInst)
27059     {
27060         Wavefront *wf = gpuDynInst->wavefront();
27061         ConstVecOperandF64 src(gpuDynInst, extData.SRC0);
27062         VecOperandF64 vdst(gpuDynInst, instData.VDST);
27063
27064         src.readSrc();
27065
27066         if (instData.ABS & 0x1) {
27067             src.absModifier();
27068         }
27069
27070         if (extData.NEG & 0x1) {
27071             src.negModifier();
27072         }
27073
27074         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27075             if (wf->execMask(lane)) {
27076                 VecElemF32 int_part(0.0);
27077                 vdst[lane] = std::modf(src[lane], &int_part);
27078             }
27079         }
27080
27081         vdst.write();
27082     }
27083
27084     Inst_VOP3__V_FREXP_EXP_I32_F32::Inst_VOP3__V_FREXP_EXP_I32_F32(
27085           InFmt_VOP3 *iFmt)
27086         : Inst_VOP3(iFmt, "v_frexp_exp_i32_f32", false)
27087     {
27088         setFlag(ALU);
27089         setFlag(F32);
27090     } // Inst_VOP3__V_FREXP_EXP_I32_F32
27091
27092     Inst_VOP3__V_FREXP_EXP_I32_F32::~Inst_VOP3__V_FREXP_EXP_I32_F32()
27093     {
27094     } // ~Inst_VOP3__V_FREXP_EXP_I32_F32
27095
27096     // frexp(S0.f, Exponenti(S0.f))
27097     // if (S0.f == INF || S0.f == NAN) then D.i = 0;
27098     // else D.i = Exponent(S0.f)
27099     void
27100     Inst_VOP3__V_FREXP_EXP_I32_F32::execute(GPUDynInstPtr gpuDynInst)
27101     {
27102         Wavefront *wf = gpuDynInst->wavefront();
27103         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
27104         VecOperandI32 vdst(gpuDynInst, instData.VDST);
27105
27106         src.readSrc();
27107
27108         if (instData.ABS & 0x1) {
27109             src.absModifier();
27110         }
27111
27112         if (extData.NEG & 0x1) {
27113             src.negModifier();
27114         }
27115
27116         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27117             if (wf->execMask(lane)) {
27118                 if (std::isinf(src[lane])|| std::isnan(src[lane])) {
27119                     vdst[lane] = 0;
27120                 } else {
27121                     VecElemI32 exp(0);
27122                     std::frexp(src[lane], &exp);
27123                     vdst[lane] = exp;
27124                 }
27125             }
27126         }
27127
27128         vdst.write();
27129     }
27130
27131     Inst_VOP3__V_FREXP_MANT_F32::Inst_VOP3__V_FREXP_MANT_F32(InFmt_VOP3 *iFmt)
27132         : Inst_VOP3(iFmt, "v_frexp_mant_f32", false)
27133     {
27134         setFlag(ALU);
27135         setFlag(F32);
27136     } // Inst_VOP3__V_FREXP_MANT_F32
27137
27138     Inst_VOP3__V_FREXP_MANT_F32::~Inst_VOP3__V_FREXP_MANT_F32()
27139     {
27140     } // ~Inst_VOP3__V_FREXP_MANT_F32
27141
27142     // if (S0.f == INF || S0.f == NAN) then D.f = S0.f;
27143     // else D.f = Mantissa(S0.f).
27144     void
27145     Inst_VOP3__V_FREXP_MANT_F32::execute(GPUDynInstPtr gpuDynInst)
27146     {
27147         Wavefront *wf = gpuDynInst->wavefront();
27148         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
27149         VecOperandF32 vdst(gpuDynInst, instData.VDST);
27150
27151         src.readSrc();
27152
27153         if (instData.ABS & 0x1) {
27154             src.absModifier();
27155         }
27156
27157         if (extData.NEG & 0x1) {
27158             src.negModifier();
27159         }
27160
27161         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27162             if (wf->execMask(lane)) {
27163                 if (std::isinf(src[lane]) || std::isnan(src[lane])) {
27164                     vdst[lane] = src[lane];
27165                 } else {
27166                     VecElemI32 exp(0);
27167                     vdst[lane] = std::frexp(src[lane], &exp);
27168                 }
27169             }
27170         }
27171
27172         vdst.write();
27173     }
27174
27175     Inst_VOP3__V_CLREXCP::Inst_VOP3__V_CLREXCP(InFmt_VOP3 *iFmt)
27176         : Inst_VOP3(iFmt, "v_clrexcp", false)
27177     {
27178     } // Inst_VOP3__V_CLREXCP
27179
27180     Inst_VOP3__V_CLREXCP::~Inst_VOP3__V_CLREXCP()
27181     {
27182     } // ~Inst_VOP3__V_CLREXCP
27183
27184     void
27185     Inst_VOP3__V_CLREXCP::execute(GPUDynInstPtr gpuDynInst)
27186     {
27187         panicUnimplemented();
27188     }
27189
27190     Inst_VOP3__V_CVT_F16_U16::Inst_VOP3__V_CVT_F16_U16(InFmt_VOP3 *iFmt)
27191         : Inst_VOP3(iFmt, "v_cvt_f16_u16", false)
27192     {
27193         setFlag(ALU);
27194         setFlag(F16);
27195     } // Inst_VOP3__V_CVT_F16_U16
27196
27197     Inst_VOP3__V_CVT_F16_U16::~Inst_VOP3__V_CVT_F16_U16()
27198     {
27199     } // ~Inst_VOP3__V_CVT_F16_U16
27200
27201     // D.f16 = uint16_to_flt16(S.u16).
27202     void
27203     Inst_VOP3__V_CVT_F16_U16::execute(GPUDynInstPtr gpuDynInst)
27204     {
27205         panicUnimplemented();
27206     }
27207
27208     Inst_VOP3__V_CVT_F16_I16::Inst_VOP3__V_CVT_F16_I16(InFmt_VOP3 *iFmt)
27209         : Inst_VOP3(iFmt, "v_cvt_f16_i16", false)
27210     {
27211         setFlag(ALU);
27212         setFlag(F16);
27213     } // Inst_VOP3__V_CVT_F16_I16
27214
27215     Inst_VOP3__V_CVT_F16_I16::~Inst_VOP3__V_CVT_F16_I16()
27216     {
27217     } // ~Inst_VOP3__V_CVT_F16_I16
27218
27219     // D.f16 = int16_to_flt16(S.i16).
27220     void
27221     Inst_VOP3__V_CVT_F16_I16::execute(GPUDynInstPtr gpuDynInst)
27222     {
27223         panicUnimplemented();
27224     }
27225
27226     Inst_VOP3__V_CVT_U16_F16::Inst_VOP3__V_CVT_U16_F16(InFmt_VOP3 *iFmt)
27227         : Inst_VOP3(iFmt, "v_cvt_u16_f16", false)
27228     {
27229         setFlag(ALU);
27230         setFlag(F16);
27231     } // Inst_VOP3__V_CVT_U16_F16
27232
27233     Inst_VOP3__V_CVT_U16_F16::~Inst_VOP3__V_CVT_U16_F16()
27234     {
27235     } // ~Inst_VOP3__V_CVT_U16_F16
27236
27237     // D.u16 = flt16_to_uint16(S.f16).
27238     void
27239     Inst_VOP3__V_CVT_U16_F16::execute(GPUDynInstPtr gpuDynInst)
27240     {
27241         panicUnimplemented();
27242     }
27243
27244     Inst_VOP3__V_CVT_I16_F16::Inst_VOP3__V_CVT_I16_F16(InFmt_VOP3 *iFmt)
27245         : Inst_VOP3(iFmt, "v_cvt_i16_f16", false)
27246     {
27247         setFlag(ALU);
27248         setFlag(F16);
27249     } // Inst_VOP3__V_CVT_I16_F16
27250
27251     Inst_VOP3__V_CVT_I16_F16::~Inst_VOP3__V_CVT_I16_F16()
27252     {
27253     } // ~Inst_VOP3__V_CVT_I16_F16
27254
27255     // D.i16 = flt16_to_int16(S.f16).
27256     void
27257     Inst_VOP3__V_CVT_I16_F16::execute(GPUDynInstPtr gpuDynInst)
27258     {
27259         panicUnimplemented();
27260     }
27261
27262     Inst_VOP3__V_RCP_F16::Inst_VOP3__V_RCP_F16(InFmt_VOP3 *iFmt)
27263         : Inst_VOP3(iFmt, "v_rcp_f16", false)
27264     {
27265         setFlag(ALU);
27266         setFlag(F16);
27267     } // Inst_VOP3__V_RCP_F16
27268
27269     Inst_VOP3__V_RCP_F16::~Inst_VOP3__V_RCP_F16()
27270     {
27271     } // ~Inst_VOP3__V_RCP_F16
27272
27273     // if (S0.f16 == 1.0f)
27274     //     D.f16 = 1.0f;
27275     // else
27276     //     D.f16 = 1 / S0.f16.
27277     void
27278     Inst_VOP3__V_RCP_F16::execute(GPUDynInstPtr gpuDynInst)
27279     {
27280         panicUnimplemented();
27281     }
27282
27283     Inst_VOP3__V_SQRT_F16::Inst_VOP3__V_SQRT_F16(InFmt_VOP3 *iFmt)
27284         : Inst_VOP3(iFmt, "v_sqrt_f16", false)
27285     {
27286         setFlag(ALU);
27287         setFlag(F16);
27288     } // Inst_VOP3__V_SQRT_F16
27289
27290     Inst_VOP3__V_SQRT_F16::~Inst_VOP3__V_SQRT_F16()
27291     {
27292     } // ~Inst_VOP3__V_SQRT_F16
27293
27294     // if (S0.f16 == 1.0f)
27295     //     D.f16 = 1.0f;
27296     // else
27297     //     D.f16 = sqrt(S0.f16).
27298     void
27299     Inst_VOP3__V_SQRT_F16::execute(GPUDynInstPtr gpuDynInst)
27300     {
27301         panicUnimplemented();
27302     }
27303
27304     Inst_VOP3__V_RSQ_F16::Inst_VOP3__V_RSQ_F16(InFmt_VOP3 *iFmt)
27305         : Inst_VOP3(iFmt, "v_rsq_f16", false)
27306     {
27307         setFlag(ALU);
27308         setFlag(F16);
27309     } // Inst_VOP3__V_RSQ_F16
27310
27311     Inst_VOP3__V_RSQ_F16::~Inst_VOP3__V_RSQ_F16()
27312     {
27313     } // ~Inst_VOP3__V_RSQ_F16
27314
27315     // if (S0.f16 == 1.0f)
27316     //     D.f16 = 1.0f;
27317     // else
27318     //     D.f16 = 1 / sqrt(S0.f16).
27319     void
27320     Inst_VOP3__V_RSQ_F16::execute(GPUDynInstPtr gpuDynInst)
27321     {
27322         panicUnimplemented();
27323     }
27324
27325     Inst_VOP3__V_LOG_F16::Inst_VOP3__V_LOG_F16(InFmt_VOP3 *iFmt)
27326         : Inst_VOP3(iFmt, "v_log_f16", false)
27327     {
27328         setFlag(ALU);
27329         setFlag(F16);
27330     } // Inst_VOP3__V_LOG_F16
27331
27332     Inst_VOP3__V_LOG_F16::~Inst_VOP3__V_LOG_F16()
27333     {
27334     } // ~Inst_VOP3__V_LOG_F16
27335
27336     // if (S0.f16 == 1.0f)
27337     //     D.f16 = 0.0f;
27338     // else
27339     //     D.f16 = log2(S0.f16).
27340     void
27341     Inst_VOP3__V_LOG_F16::execute(GPUDynInstPtr gpuDynInst)
27342     {
27343         panicUnimplemented();
27344     }
27345
27346     Inst_VOP3__V_EXP_F16::Inst_VOP3__V_EXP_F16(InFmt_VOP3 *iFmt)
27347         : Inst_VOP3(iFmt, "v_exp_f16", false)
27348     {
27349         setFlag(ALU);
27350         setFlag(F16);
27351     } // Inst_VOP3__V_EXP_F16
27352
27353     Inst_VOP3__V_EXP_F16::~Inst_VOP3__V_EXP_F16()
27354     {
27355     } // ~Inst_VOP3__V_EXP_F16
27356
27357     // if (S0.f16 == 0.0f)
27358     //     D.f16 = 1.0f;
27359     // else
27360     //     D.f16 = pow(2.0, S0.f16).
27361     void
27362     Inst_VOP3__V_EXP_F16::execute(GPUDynInstPtr gpuDynInst)
27363     {
27364         panicUnimplemented();
27365     }
27366
27367     Inst_VOP3__V_FREXP_MANT_F16::Inst_VOP3__V_FREXP_MANT_F16(InFmt_VOP3 *iFmt)
27368         : Inst_VOP3(iFmt, "v_frexp_mant_f16", false)
27369     {
27370         setFlag(ALU);
27371         setFlag(F16);
27372     } // Inst_VOP3__V_FREXP_MANT_F16
27373
27374     Inst_VOP3__V_FREXP_MANT_F16::~Inst_VOP3__V_FREXP_MANT_F16()
27375     {
27376     } // ~Inst_VOP3__V_FREXP_MANT_F16
27377
27378     // if (S0.f16 == +-INF || S0.f16 == NAN)
27379     //     D.f16 = S0.f16;
27380     // else
27381     //     D.f16 = mantissa(S0.f16).
27382     void
27383     Inst_VOP3__V_FREXP_MANT_F16::execute(GPUDynInstPtr gpuDynInst)
27384     {
27385         panicUnimplemented();
27386     }
27387
27388     Inst_VOP3__V_FREXP_EXP_I16_F16::Inst_VOP3__V_FREXP_EXP_I16_F16(
27389           InFmt_VOP3 *iFmt)
27390         : Inst_VOP3(iFmt, "v_frexp_exp_i16_f16", false)
27391     {
27392         setFlag(ALU);
27393         setFlag(F16);
27394     } // Inst_VOP3__V_FREXP_EXP_I16_F16
27395
27396     Inst_VOP3__V_FREXP_EXP_I16_F16::~Inst_VOP3__V_FREXP_EXP_I16_F16()
27397     {
27398     } // ~Inst_VOP3__V_FREXP_EXP_I16_F16
27399
27400     void
27401     Inst_VOP3__V_FREXP_EXP_I16_F16::execute(GPUDynInstPtr gpuDynInst)
27402     {
27403         panicUnimplemented();
27404     }
27405
27406     Inst_VOP3__V_FLOOR_F16::Inst_VOP3__V_FLOOR_F16(InFmt_VOP3 *iFmt)
27407         : Inst_VOP3(iFmt, "v_floor_f16", false)
27408     {
27409         setFlag(ALU);
27410         setFlag(F16);
27411     } // Inst_VOP3__V_FLOOR_F16
27412
27413     Inst_VOP3__V_FLOOR_F16::~Inst_VOP3__V_FLOOR_F16()
27414     {
27415     } // ~Inst_VOP3__V_FLOOR_F16
27416
27417     // D.f16 = floor(S0.f16);
27418     void
27419     Inst_VOP3__V_FLOOR_F16::execute(GPUDynInstPtr gpuDynInst)
27420     {
27421         panicUnimplemented();
27422     }
27423
27424     Inst_VOP3__V_CEIL_F16::Inst_VOP3__V_CEIL_F16(InFmt_VOP3 *iFmt)
27425         : Inst_VOP3(iFmt, "v_ceil_f16", false)
27426     {
27427         setFlag(ALU);
27428         setFlag(F16);
27429     } // Inst_VOP3__V_CEIL_F16
27430
27431     Inst_VOP3__V_CEIL_F16::~Inst_VOP3__V_CEIL_F16()
27432     {
27433     } // ~Inst_VOP3__V_CEIL_F16
27434
27435     // D.f16 = ceil(S0.f16);
27436     void
27437     Inst_VOP3__V_CEIL_F16::execute(GPUDynInstPtr gpuDynInst)
27438     {
27439         panicUnimplemented();
27440     }
27441
27442     Inst_VOP3__V_TRUNC_F16::Inst_VOP3__V_TRUNC_F16(InFmt_VOP3 *iFmt)
27443         : Inst_VOP3(iFmt, "v_trunc_f16", false)
27444     {
27445         setFlag(ALU);
27446         setFlag(F16);
27447     } // Inst_VOP3__V_TRUNC_F16
27448
27449     Inst_VOP3__V_TRUNC_F16::~Inst_VOP3__V_TRUNC_F16()
27450     {
27451     } // ~Inst_VOP3__V_TRUNC_F16
27452
27453     // D.f16 = trunc(S0.f16).
27454     void
27455     Inst_VOP3__V_TRUNC_F16::execute(GPUDynInstPtr gpuDynInst)
27456     {
27457         panicUnimplemented();
27458     }
27459
27460     Inst_VOP3__V_RNDNE_F16::Inst_VOP3__V_RNDNE_F16(InFmt_VOP3 *iFmt)
27461         : Inst_VOP3(iFmt, "v_rndne_f16", false)
27462     {
27463         setFlag(ALU);
27464         setFlag(F16);
27465     } // Inst_VOP3__V_RNDNE_F16
27466
27467     Inst_VOP3__V_RNDNE_F16::~Inst_VOP3__V_RNDNE_F16()
27468     {
27469     } // ~Inst_VOP3__V_RNDNE_F16
27470
27471     // D.f16 = roundNearestEven(S0.f16);
27472     void
27473     Inst_VOP3__V_RNDNE_F16::execute(GPUDynInstPtr gpuDynInst)
27474     {
27475         panicUnimplemented();
27476     }
27477
27478     Inst_VOP3__V_FRACT_F16::Inst_VOP3__V_FRACT_F16(InFmt_VOP3 *iFmt)
27479         : Inst_VOP3(iFmt, "v_fract_f16", false)
27480     {
27481         setFlag(ALU);
27482         setFlag(F16);
27483     } // Inst_VOP3__V_FRACT_F16
27484
27485     Inst_VOP3__V_FRACT_F16::~Inst_VOP3__V_FRACT_F16()
27486     {
27487     } // ~Inst_VOP3__V_FRACT_F16
27488
27489     // D.f16 = S0.f16 + -floor(S0.f16).
27490     void
27491     Inst_VOP3__V_FRACT_F16::execute(GPUDynInstPtr gpuDynInst)
27492     {
27493         panicUnimplemented();
27494     }
27495
27496     Inst_VOP3__V_SIN_F16::Inst_VOP3__V_SIN_F16(InFmt_VOP3 *iFmt)
27497         : Inst_VOP3(iFmt, "v_sin_f16", false)
27498     {
27499         setFlag(ALU);
27500         setFlag(F16);
27501     } // Inst_VOP3__V_SIN_F16
27502
27503     Inst_VOP3__V_SIN_F16::~Inst_VOP3__V_SIN_F16()
27504     {
27505     } // ~Inst_VOP3__V_SIN_F16
27506
27507     // D.f16 = sin(S0.f16 * 2 * PI).
27508     void
27509     Inst_VOP3__V_SIN_F16::execute(GPUDynInstPtr gpuDynInst)
27510     {
27511         panicUnimplemented();
27512     }
27513
27514     Inst_VOP3__V_COS_F16::Inst_VOP3__V_COS_F16(InFmt_VOP3 *iFmt)
27515         : Inst_VOP3(iFmt, "v_cos_f16", false)
27516     {
27517         setFlag(ALU);
27518         setFlag(F16);
27519     } // Inst_VOP3__V_COS_F16
27520
27521     Inst_VOP3__V_COS_F16::~Inst_VOP3__V_COS_F16()
27522     {
27523     } // ~Inst_VOP3__V_COS_F16
27524
27525     // D.f16 = cos(S0.f16 * 2 * PI).
27526     void
27527     Inst_VOP3__V_COS_F16::execute(GPUDynInstPtr gpuDynInst)
27528     {
27529         panicUnimplemented();
27530     }
27531
27532     Inst_VOP3__V_EXP_LEGACY_F32::Inst_VOP3__V_EXP_LEGACY_F32(InFmt_VOP3 *iFmt)
27533         : Inst_VOP3(iFmt, "v_exp_legacy_f32", false)
27534     {
27535         setFlag(ALU);
27536         setFlag(F32);
27537     } // Inst_VOP3__V_EXP_LEGACY_F32
27538
27539     Inst_VOP3__V_EXP_LEGACY_F32::~Inst_VOP3__V_EXP_LEGACY_F32()
27540     {
27541     } // ~Inst_VOP3__V_EXP_LEGACY_F32
27542
27543     // D.f = pow(2.0, S0.f)
27544     void
27545     Inst_VOP3__V_EXP_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
27546     {
27547         Wavefront *wf = gpuDynInst->wavefront();
27548         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
27549         VecOperandF32 vdst(gpuDynInst, instData.VDST);
27550
27551         src.readSrc();
27552
27553         if (instData.ABS & 0x1) {
27554             src.absModifier();
27555         }
27556
27557         if (extData.NEG & 0x1) {
27558             src.negModifier();
27559         }
27560
27561         /**
27562          * input modifiers are supported by FP operations only
27563          */
27564         assert(!(instData.ABS & 0x2));
27565         assert(!(instData.ABS & 0x4));
27566         assert(!(extData.NEG & 0x2));
27567         assert(!(extData.NEG & 0x4));
27568
27569         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27570             if (wf->execMask(lane)) {
27571                 vdst[lane] = std::pow(2.0, src[lane]);
27572             }
27573         }
27574
27575         vdst.write();
27576     }
27577
27578     Inst_VOP3__V_LOG_LEGACY_F32::Inst_VOP3__V_LOG_LEGACY_F32(InFmt_VOP3 *iFmt)
27579         : Inst_VOP3(iFmt, "v_log_legacy_f32", false)
27580     {
27581         setFlag(ALU);
27582         setFlag(F32);
27583     } // Inst_VOP3__V_LOG_LEGACY_F32
27584
27585     Inst_VOP3__V_LOG_LEGACY_F32::~Inst_VOP3__V_LOG_LEGACY_F32()
27586     {
27587     } // ~Inst_VOP3__V_LOG_LEGACY_F32
27588
27589     // D.f = log2(S0.f).
27590     void
27591     Inst_VOP3__V_LOG_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
27592     {
27593         Wavefront *wf = gpuDynInst->wavefront();
27594         ConstVecOperandF32 src(gpuDynInst, extData.SRC0);
27595         VecOperandF32 vdst(gpuDynInst, instData.VDST);
27596
27597         src.readSrc();
27598
27599         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27600             if (wf->execMask(lane)) {
27601                 vdst[lane] = std::log2(src[lane]);
27602             }
27603         }
27604
27605         vdst.write();
27606     }
27607
27608     Inst_VOP3__V_MAD_LEGACY_F32::Inst_VOP3__V_MAD_LEGACY_F32(InFmt_VOP3 *iFmt)
27609         : Inst_VOP3(iFmt, "v_mad_legacy_f32", false)
27610     {
27611         setFlag(ALU);
27612         setFlag(F32);
27613         setFlag(MAD);
27614     } // Inst_VOP3__V_MAD_LEGACY_F32
27615
27616     Inst_VOP3__V_MAD_LEGACY_F32::~Inst_VOP3__V_MAD_LEGACY_F32()
27617     {
27618     } // ~Inst_VOP3__V_MAD_LEGACY_F32
27619
27620     // D.f = S0.f * S1.f + S2.f
27621     void
27622     Inst_VOP3__V_MAD_LEGACY_F32::execute(GPUDynInstPtr gpuDynInst)
27623     {
27624         Wavefront *wf = gpuDynInst->wavefront();
27625         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
27626         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
27627         ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
27628         VecOperandF32 vdst(gpuDynInst, instData.VDST);
27629
27630         src0.readSrc();
27631         src1.readSrc();
27632         src2.readSrc();
27633
27634         if (instData.ABS & 0x1) {
27635             src0.absModifier();
27636         }
27637
27638         if (instData.ABS & 0x2) {
27639             src1.absModifier();
27640         }
27641
27642         if (instData.ABS & 0x4) {
27643             src2.absModifier();
27644         }
27645
27646         if (extData.NEG & 0x1) {
27647             src0.negModifier();
27648         }
27649
27650         if (extData.NEG & 0x2) {
27651             src1.negModifier();
27652         }
27653
27654         if (extData.NEG & 0x4) {
27655             src2.negModifier();
27656         }
27657
27658         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27659             if (wf->execMask(lane)) {
27660                 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
27661             }
27662         }
27663
27664         vdst.write();
27665     }
27666
27667     Inst_VOP3__V_MAD_F32::Inst_VOP3__V_MAD_F32(InFmt_VOP3 *iFmt)
27668         : Inst_VOP3(iFmt, "v_mad_f32", false)
27669     {
27670         setFlag(ALU);
27671         setFlag(F32);
27672         setFlag(MAD);
27673     } // Inst_VOP3__V_MAD_F32
27674
27675     Inst_VOP3__V_MAD_F32::~Inst_VOP3__V_MAD_F32()
27676     {
27677     } // ~Inst_VOP3__V_MAD_F32
27678
27679     // D.f = S0.f * S1.f + S2.f.
27680     void
27681     Inst_VOP3__V_MAD_F32::execute(GPUDynInstPtr gpuDynInst)
27682     {
27683         Wavefront *wf = gpuDynInst->wavefront();
27684         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
27685         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
27686         ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
27687         VecOperandF32 vdst(gpuDynInst, instData.VDST);
27688
27689         src0.readSrc();
27690         src1.readSrc();
27691         src2.readSrc();
27692
27693         if (instData.ABS & 0x1) {
27694             src0.absModifier();
27695         }
27696
27697         if (instData.ABS & 0x2) {
27698             src1.absModifier();
27699         }
27700
27701         if (instData.ABS & 0x4) {
27702             src2.absModifier();
27703         }
27704
27705         if (extData.NEG & 0x1) {
27706             src0.negModifier();
27707         }
27708
27709         if (extData.NEG & 0x2) {
27710             src1.negModifier();
27711         }
27712
27713         if (extData.NEG & 0x4) {
27714             src2.negModifier();
27715         }
27716
27717         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27718             if (wf->execMask(lane)) {
27719                 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
27720             }
27721         }
27722
27723         vdst.write();
27724     }
27725
27726     Inst_VOP3__V_MAD_I32_I24::Inst_VOP3__V_MAD_I32_I24(InFmt_VOP3 *iFmt)
27727         : Inst_VOP3(iFmt, "v_mad_i32_i24", false)
27728     {
27729         setFlag(ALU);
27730         setFlag(MAD);
27731     } // Inst_VOP3__V_MAD_I32_I24
27732
27733     Inst_VOP3__V_MAD_I32_I24::~Inst_VOP3__V_MAD_I32_I24()
27734     {
27735     } // ~Inst_VOP3__V_MAD_I32_I24
27736
27737     // D.i = S0.i[23:0] * S1.i[23:0] + S2.i.
27738     void
27739     Inst_VOP3__V_MAD_I32_I24::execute(GPUDynInstPtr gpuDynInst)
27740     {
27741         Wavefront *wf = gpuDynInst->wavefront();
27742         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
27743         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
27744         ConstVecOperandI32 src2(gpuDynInst, extData.SRC2);
27745         VecOperandI32 vdst(gpuDynInst, instData.VDST);
27746
27747         src0.readSrc();
27748         src1.readSrc();
27749         src2.readSrc();
27750
27751         /**
27752          * input modifiers are supported by FP operations only
27753          */
27754         assert(!(instData.ABS & 0x1));
27755         assert(!(instData.ABS & 0x2));
27756         assert(!(instData.ABS & 0x4));
27757         assert(!(extData.NEG & 0x1));
27758         assert(!(extData.NEG & 0x2));
27759         assert(!(extData.NEG & 0x4));
27760
27761         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27762             if (wf->execMask(lane)) {
27763                 vdst[lane] = sext<24>(bits(src0[lane], 23, 0))
27764                     * sext<24>(bits(src1[lane], 23, 0)) + src2[lane];
27765             }
27766         }
27767
27768         vdst.write();
27769     }
27770
27771     Inst_VOP3__V_MAD_U32_U24::Inst_VOP3__V_MAD_U32_U24(InFmt_VOP3 *iFmt)
27772         : Inst_VOP3(iFmt, "v_mad_u32_u24", false)
27773     {
27774         setFlag(ALU);
27775         setFlag(MAD);
27776     } // Inst_VOP3__V_MAD_U32_U24
27777
27778     Inst_VOP3__V_MAD_U32_U24::~Inst_VOP3__V_MAD_U32_U24()
27779     {
27780     } // ~Inst_VOP3__V_MAD_U32_U24
27781
27782     // D.u = S0.u[23:0] * S1.u[23:0] + S2.u.
27783     void
27784     Inst_VOP3__V_MAD_U32_U24::execute(GPUDynInstPtr gpuDynInst)
27785     {
27786         Wavefront *wf = gpuDynInst->wavefront();
27787         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
27788         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
27789         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
27790         VecOperandU32 vdst(gpuDynInst, instData.VDST);
27791
27792         src0.readSrc();
27793         src1.readSrc();
27794         src2.readSrc();
27795
27796         /**
27797          * input modifiers are supported by FP operations only
27798          */
27799         assert(!(instData.ABS & 0x1));
27800         assert(!(instData.ABS & 0x2));
27801         assert(!(instData.ABS & 0x4));
27802         assert(!(extData.NEG & 0x1));
27803         assert(!(extData.NEG & 0x2));
27804         assert(!(extData.NEG & 0x4));
27805
27806         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27807             if (wf->execMask(lane)) {
27808                 vdst[lane] = bits(src0[lane], 23, 0) * bits(src1[lane], 23, 0)
27809                     + src2[lane];
27810             }
27811         }
27812
27813         vdst.write();
27814     }
27815
27816     Inst_VOP3__V_CUBEID_F32::Inst_VOP3__V_CUBEID_F32(InFmt_VOP3 *iFmt)
27817         : Inst_VOP3(iFmt, "v_cubeid_f32", false)
27818     {
27819         setFlag(ALU);
27820         setFlag(F32);
27821     } // Inst_VOP3__V_CUBEID_F32
27822
27823     Inst_VOP3__V_CUBEID_F32::~Inst_VOP3__V_CUBEID_F32()
27824     {
27825     } // ~Inst_VOP3__V_CUBEID_F32
27826
27827     void
27828     Inst_VOP3__V_CUBEID_F32::execute(GPUDynInstPtr gpuDynInst)
27829     {
27830         panicUnimplemented();
27831     }
27832
27833     Inst_VOP3__V_CUBESC_F32::Inst_VOP3__V_CUBESC_F32(InFmt_VOP3 *iFmt)
27834         : Inst_VOP3(iFmt, "v_cubesc_f32", false)
27835     {
27836         setFlag(ALU);
27837         setFlag(F32);
27838     } // Inst_VOP3__V_CUBESC_F32
27839
27840     Inst_VOP3__V_CUBESC_F32::~Inst_VOP3__V_CUBESC_F32()
27841     {
27842     } // ~Inst_VOP3__V_CUBESC_F32
27843
27844     void
27845     Inst_VOP3__V_CUBESC_F32::execute(GPUDynInstPtr gpuDynInst)
27846     {
27847         panicUnimplemented();
27848     }
27849
27850     Inst_VOP3__V_CUBETC_F32::Inst_VOP3__V_CUBETC_F32(InFmt_VOP3 *iFmt)
27851         : Inst_VOP3(iFmt, "v_cubetc_f32", false)
27852     {
27853         setFlag(ALU);
27854         setFlag(F32);
27855     } // Inst_VOP3__V_CUBETC_F32
27856
27857     Inst_VOP3__V_CUBETC_F32::~Inst_VOP3__V_CUBETC_F32()
27858     {
27859     } // ~Inst_VOP3__V_CUBETC_F32
27860
27861     void
27862     Inst_VOP3__V_CUBETC_F32::execute(GPUDynInstPtr gpuDynInst)
27863     {
27864         panicUnimplemented();
27865     }
27866
27867     Inst_VOP3__V_CUBEMA_F32::Inst_VOP3__V_CUBEMA_F32(InFmt_VOP3 *iFmt)
27868         : Inst_VOP3(iFmt, "v_cubema_f32", false)
27869     {
27870         setFlag(ALU);
27871         setFlag(F32);
27872     } // Inst_VOP3__V_CUBEMA_F32
27873
27874     Inst_VOP3__V_CUBEMA_F32::~Inst_VOP3__V_CUBEMA_F32()
27875     {
27876     } // ~Inst_VOP3__V_CUBEMA_F32
27877
27878     void
27879     Inst_VOP3__V_CUBEMA_F32::execute(GPUDynInstPtr gpuDynInst)
27880     {
27881         panicUnimplemented();
27882     }
27883
27884     Inst_VOP3__V_BFE_U32::Inst_VOP3__V_BFE_U32(InFmt_VOP3 *iFmt)
27885         : Inst_VOP3(iFmt, "v_bfe_u32", false)
27886     {
27887         setFlag(ALU);
27888     } // Inst_VOP3__V_BFE_U32
27889
27890     Inst_VOP3__V_BFE_U32::~Inst_VOP3__V_BFE_U32()
27891     {
27892     } // ~Inst_VOP3__V_BFE_U32
27893
27894     // D.u = (S0.u >> S1.u[4:0]) & ((1 << S2.u[4:0]) - 1).
27895     // Bitfield extract with S0 = data, S1 = field_offset, S2 = field_width.
27896     void
27897     Inst_VOP3__V_BFE_U32::execute(GPUDynInstPtr gpuDynInst)
27898     {
27899         Wavefront *wf = gpuDynInst->wavefront();
27900         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
27901         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
27902         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
27903         VecOperandU32 vdst(gpuDynInst, instData.VDST);
27904
27905         src0.readSrc();
27906         src1.readSrc();
27907         src2.readSrc();
27908
27909         /**
27910          * input modifiers are supported by FP operations only
27911          */
27912         assert(!(instData.ABS & 0x1));
27913         assert(!(instData.ABS & 0x2));
27914         assert(!(instData.ABS & 0x4));
27915         assert(!(extData.NEG & 0x1));
27916         assert(!(extData.NEG & 0x2));
27917         assert(!(extData.NEG & 0x4));
27918
27919         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27920             if (wf->execMask(lane)) {
27921                 vdst[lane] = (src0[lane] >> bits(src1[lane], 4, 0))
27922                     & ((1 << bits(src2[lane], 4, 0)) - 1);
27923             }
27924         }
27925
27926         vdst.write();
27927     }
27928
27929     Inst_VOP3__V_BFE_I32::Inst_VOP3__V_BFE_I32(InFmt_VOP3 *iFmt)
27930         : Inst_VOP3(iFmt, "v_bfe_i32", false)
27931     {
27932         setFlag(ALU);
27933     } // Inst_VOP3__V_BFE_I32
27934
27935     Inst_VOP3__V_BFE_I32::~Inst_VOP3__V_BFE_I32()
27936     {
27937     } // ~Inst_VOP3__V_BFE_I32
27938
27939     // D.i = (S0.i >> S1.u[4:0]) & ((1 << S2.u[4:0]) - 1).
27940     // Bitfield extract with S0 = data, S1 = field_offset, S2 = field_width.
27941     void
27942     Inst_VOP3__V_BFE_I32::execute(GPUDynInstPtr gpuDynInst)
27943     {
27944         Wavefront *wf = gpuDynInst->wavefront();
27945         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
27946         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
27947         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
27948         VecOperandI32 vdst(gpuDynInst, instData.VDST);
27949
27950         src0.readSrc();
27951         src1.readSrc();
27952         src2.readSrc();
27953
27954         /**
27955          * input modifiers are supported by FP operations only
27956          */
27957         assert(!(instData.ABS & 0x1));
27958         assert(!(instData.ABS & 0x2));
27959         assert(!(instData.ABS & 0x4));
27960         assert(!(extData.NEG & 0x1));
27961         assert(!(extData.NEG & 0x2));
27962         assert(!(extData.NEG & 0x4));
27963
27964         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
27965             if (wf->execMask(lane)) {
27966                 vdst[lane] = (src0[lane] >> bits(src1[lane], 4, 0))
27967                     & ((1 << bits(src2[lane], 4, 0)) - 1);
27968             }
27969         }
27970
27971         vdst.write();
27972     }
27973
27974     Inst_VOP3__V_BFI_B32::Inst_VOP3__V_BFI_B32(InFmt_VOP3 *iFmt)
27975         : Inst_VOP3(iFmt, "v_bfi_b32", false)
27976     {
27977         setFlag(ALU);
27978     } // Inst_VOP3__V_BFI_B32
27979
27980     Inst_VOP3__V_BFI_B32::~Inst_VOP3__V_BFI_B32()
27981     {
27982     } // ~Inst_VOP3__V_BFI_B32
27983
27984     // D.u = (S0.u & S1.u) | (~S0.u & S2.u); bitfield insert.
27985     void
27986     Inst_VOP3__V_BFI_B32::execute(GPUDynInstPtr gpuDynInst)
27987     {
27988         Wavefront *wf = gpuDynInst->wavefront();
27989         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
27990         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
27991         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
27992         VecOperandU32 vdst(gpuDynInst, instData.VDST);
27993
27994         src0.readSrc();
27995         src1.readSrc();
27996         src2.readSrc();
27997
27998         /**
27999          * input modifiers are supported by FP operations only
28000          */
28001         assert(!(instData.ABS & 0x1));
28002         assert(!(instData.ABS & 0x2));
28003         assert(!(instData.ABS & 0x4));
28004         assert(!(extData.NEG & 0x1));
28005         assert(!(extData.NEG & 0x2));
28006         assert(!(extData.NEG & 0x4));
28007
28008         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28009             if (wf->execMask(lane)) {
28010                 vdst[lane] = (src0[lane] & src1[lane]) | (~src0[lane]
28011                     & src2[lane]);
28012             }
28013         }
28014
28015         vdst.write();
28016     }
28017
28018     Inst_VOP3__V_FMA_F32::Inst_VOP3__V_FMA_F32(InFmt_VOP3 *iFmt)
28019         : Inst_VOP3(iFmt, "v_fma_f32", false)
28020     {
28021         setFlag(ALU);
28022         setFlag(F32);
28023         setFlag(FMA);
28024     } // Inst_VOP3__V_FMA_F32
28025
28026     Inst_VOP3__V_FMA_F32::~Inst_VOP3__V_FMA_F32()
28027     {
28028     } // ~Inst_VOP3__V_FMA_F32
28029
28030     // D.f = S0.f * S1.f + S2.f.
28031     void
28032     Inst_VOP3__V_FMA_F32::execute(GPUDynInstPtr gpuDynInst)
28033     {
28034         Wavefront *wf = gpuDynInst->wavefront();
28035         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
28036         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
28037         ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
28038         VecOperandF32 vdst(gpuDynInst, instData.VDST);
28039
28040         src0.readSrc();
28041         src1.readSrc();
28042         src2.readSrc();
28043
28044         if (instData.ABS & 0x1) {
28045             src0.absModifier();
28046         }
28047
28048         if (instData.ABS & 0x2) {
28049             src1.absModifier();
28050         }
28051
28052         if (instData.ABS & 0x4) {
28053             src2.absModifier();
28054         }
28055
28056         if (extData.NEG & 0x1) {
28057             src0.negModifier();
28058         }
28059
28060         if (extData.NEG & 0x2) {
28061             src1.negModifier();
28062         }
28063
28064         if (extData.NEG & 0x4) {
28065             src2.negModifier();
28066         }
28067
28068         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28069             if (wf->execMask(lane)) {
28070                 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
28071             }
28072         }
28073
28074         vdst.write();
28075     }
28076
28077     Inst_VOP3__V_FMA_F64::Inst_VOP3__V_FMA_F64(InFmt_VOP3 *iFmt)
28078         : Inst_VOP3(iFmt, "v_fma_f64", false)
28079     {
28080         setFlag(ALU);
28081         setFlag(F64);
28082         setFlag(FMA);
28083     } // Inst_VOP3__V_FMA_F64
28084
28085     Inst_VOP3__V_FMA_F64::~Inst_VOP3__V_FMA_F64()
28086     {
28087     } // ~Inst_VOP3__V_FMA_F64
28088
28089     // D.d = S0.d * S1.d + S2.d.
28090     void
28091     Inst_VOP3__V_FMA_F64::execute(GPUDynInstPtr gpuDynInst)
28092     {
28093         Wavefront *wf = gpuDynInst->wavefront();
28094         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
28095         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
28096         ConstVecOperandF64 src2(gpuDynInst, extData.SRC2);
28097         VecOperandF64 vdst(gpuDynInst, instData.VDST);
28098
28099         src0.readSrc();
28100         src1.readSrc();
28101         src2.readSrc();
28102
28103         if (instData.ABS & 0x1) {
28104             src0.absModifier();
28105         }
28106
28107         if (instData.ABS & 0x2) {
28108             src1.absModifier();
28109         }
28110
28111         if (instData.ABS & 0x4) {
28112             src2.absModifier();
28113         }
28114
28115         if (extData.NEG & 0x1) {
28116             src0.negModifier();
28117         }
28118
28119         if (extData.NEG & 0x2) {
28120             src1.negModifier();
28121         }
28122
28123         if (extData.NEG & 0x4) {
28124             src2.negModifier();
28125         }
28126
28127         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28128             if (wf->execMask(lane)) {
28129                 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
28130             }
28131         }
28132
28133         vdst.write();
28134     }
28135
28136     Inst_VOP3__V_LERP_U8::Inst_VOP3__V_LERP_U8(InFmt_VOP3 *iFmt)
28137         : Inst_VOP3(iFmt, "v_lerp_u8", false)
28138     {
28139         setFlag(ALU);
28140     } // Inst_VOP3__V_LERP_U8
28141
28142     Inst_VOP3__V_LERP_U8::~Inst_VOP3__V_LERP_U8()
28143     {
28144     } // ~Inst_VOP3__V_LERP_U8
28145
28146     // D.u = ((S0.u[31:24] + S1.u[31:24] + S2.u[24]) >> 1) << 24
28147     // D.u += ((S0.u[23:16] + S1.u[23:16] + S2.u[16]) >> 1) << 16;
28148     // D.u += ((S0.u[15:8] + S1.u[15:8] + S2.u[8]) >> 1) << 8;
28149     // D.u += ((S0.u[7:0] + S1.u[7:0] + S2.u[0]) >> 1).
28150     void
28151     Inst_VOP3__V_LERP_U8::execute(GPUDynInstPtr gpuDynInst)
28152     {
28153         Wavefront *wf = gpuDynInst->wavefront();
28154         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
28155         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28156         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28157         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28158
28159         src0.readSrc();
28160         src1.readSrc();
28161         src2.readSrc();
28162
28163         /**
28164          * input modifiers are supported by FP operations only
28165          */
28166         assert(!(instData.ABS & 0x1));
28167         assert(!(instData.ABS & 0x2));
28168         assert(!(instData.ABS & 0x4));
28169         assert(!(extData.NEG & 0x1));
28170         assert(!(extData.NEG & 0x2));
28171         assert(!(extData.NEG & 0x4));
28172
28173         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28174             if (wf->execMask(lane)) {
28175                 vdst[lane] = ((bits(src0[lane], 31, 24)
28176                     + bits(src1[lane], 31, 24) + bits(src2[lane], 24)) >> 1)
28177                         << 24;
28178                 vdst[lane] += ((bits(src0[lane], 23, 16)
28179                     + bits(src1[lane], 23, 16) + bits(src2[lane], 16)) >> 1)
28180                         << 16;
28181                 vdst[lane] += ((bits(src0[lane], 15, 8)
28182                     + bits(src1[lane], 15, 8) + bits(src2[lane], 8)) >> 1)
28183                         << 8;
28184                 vdst[lane] += ((bits(src0[lane], 7, 0) + bits(src1[lane], 7, 0)
28185                     + bits(src2[lane], 0)) >> 1);
28186             }
28187         }
28188
28189         vdst.write();
28190     }
28191
28192     Inst_VOP3__V_ALIGNBIT_B32::Inst_VOP3__V_ALIGNBIT_B32(InFmt_VOP3 *iFmt)
28193         : Inst_VOP3(iFmt, "v_alignbit_b32", false)
28194     {
28195         setFlag(ALU);
28196     } // Inst_VOP3__V_ALIGNBIT_B32
28197
28198     Inst_VOP3__V_ALIGNBIT_B32::~Inst_VOP3__V_ALIGNBIT_B32()
28199     {
28200     } // ~Inst_VOP3__V_ALIGNBIT_B32
28201
28202     // D.u = ({S0, S1} >> S2.u[4:0]) & 0xffffffff.
28203     void
28204     Inst_VOP3__V_ALIGNBIT_B32::execute(GPUDynInstPtr gpuDynInst)
28205     {
28206         Wavefront *wf = gpuDynInst->wavefront();
28207         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
28208         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28209         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28210         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28211
28212         src0.readSrc();
28213         src1.readSrc();
28214         src2.readSrc();
28215
28216         /**
28217          * input modifiers are supported by FP operations only
28218          */
28219         assert(!(instData.ABS & 0x1));
28220         assert(!(instData.ABS & 0x2));
28221         assert(!(instData.ABS & 0x4));
28222         assert(!(extData.NEG & 0x1));
28223         assert(!(extData.NEG & 0x2));
28224         assert(!(extData.NEG & 0x4));
28225
28226         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28227             if (wf->execMask(lane)) {
28228                 VecElemU64 src_0_1 = (((VecElemU64)src0[lane] << 32)
28229                     | (VecElemU64)src1[lane]);
28230                 vdst[lane] = (VecElemU32)((src_0_1
28231                     >> (VecElemU64)bits(src2[lane], 4, 0)) & 0xffffffff);
28232             }
28233         }
28234
28235         vdst.write();
28236     }
28237
28238     Inst_VOP3__V_ALIGNBYTE_B32::Inst_VOP3__V_ALIGNBYTE_B32(InFmt_VOP3 *iFmt)
28239         : Inst_VOP3(iFmt, "v_alignbyte_b32", false)
28240     {
28241         setFlag(ALU);
28242     } // Inst_VOP3__V_ALIGNBYTE_B32
28243
28244     Inst_VOP3__V_ALIGNBYTE_B32::~Inst_VOP3__V_ALIGNBYTE_B32()
28245     {
28246     } // ~Inst_VOP3__V_ALIGNBYTE_B32
28247
28248     // D.u = ({S0, S1} >> (8 * S2.u[4:0])) & 0xffffffff.
28249     void
28250     Inst_VOP3__V_ALIGNBYTE_B32::execute(GPUDynInstPtr gpuDynInst)
28251     {
28252         Wavefront *wf = gpuDynInst->wavefront();
28253         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
28254         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28255         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28256         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28257
28258         src0.readSrc();
28259         src1.readSrc();
28260         src2.readSrc();
28261
28262         /**
28263          * input modifiers are supported by FP operations only
28264          */
28265         assert(!(instData.ABS & 0x1));
28266         assert(!(instData.ABS & 0x2));
28267         assert(!(instData.ABS & 0x4));
28268         assert(!(extData.NEG & 0x1));
28269         assert(!(extData.NEG & 0x2));
28270         assert(!(extData.NEG & 0x4));
28271
28272         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28273             if (wf->execMask(lane)) {
28274                 VecElemU64 src_0_1 = (((VecElemU64)src0[lane] << 32)
28275                     | (VecElemU64)src1[lane]);
28276                 vdst[lane] = (VecElemU32)((src_0_1
28277                     >> (8ULL * (VecElemU64)bits(src2[lane], 4, 0)))
28278                         & 0xffffffff);
28279             }
28280         }
28281
28282         vdst.write();
28283     }
28284
28285     Inst_VOP3__V_MIN3_F32::Inst_VOP3__V_MIN3_F32(InFmt_VOP3 *iFmt)
28286         : Inst_VOP3(iFmt, "v_min3_f32", false)
28287     {
28288         setFlag(ALU);
28289         setFlag(F32);
28290     } // Inst_VOP3__V_MIN3_F32
28291
28292     Inst_VOP3__V_MIN3_F32::~Inst_VOP3__V_MIN3_F32()
28293     {
28294     } // ~Inst_VOP3__V_MIN3_F32
28295
28296     // D.f = min(S0.f, S1.f, S2.f).
28297     void
28298     Inst_VOP3__V_MIN3_F32::execute(GPUDynInstPtr gpuDynInst)
28299     {
28300         Wavefront *wf = gpuDynInst->wavefront();
28301         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
28302         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
28303         ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
28304         VecOperandF32 vdst(gpuDynInst, instData.VDST);
28305
28306         src0.readSrc();
28307         src1.readSrc();
28308         src2.readSrc();
28309
28310         if (instData.ABS & 0x1) {
28311             src0.absModifier();
28312         }
28313
28314         if (instData.ABS & 0x2) {
28315             src1.absModifier();
28316         }
28317
28318         if (instData.ABS & 0x4) {
28319             src2.absModifier();
28320         }
28321
28322         if (extData.NEG & 0x1) {
28323             src0.negModifier();
28324         }
28325
28326         if (extData.NEG & 0x2) {
28327             src1.negModifier();
28328         }
28329
28330         if (extData.NEG & 0x4) {
28331             src2.negModifier();
28332         }
28333
28334         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28335             if (wf->execMask(lane)) {
28336                 VecElemF32 min_0_1 = std::fmin(src0[lane], src1[lane]);
28337                 vdst[lane] = std::fmin(min_0_1, src2[lane]);
28338             }
28339         }
28340
28341         vdst.write();
28342     }
28343
28344     Inst_VOP3__V_MIN3_I32::Inst_VOP3__V_MIN3_I32(InFmt_VOP3 *iFmt)
28345         : Inst_VOP3(iFmt, "v_min3_i32", false)
28346     {
28347         setFlag(ALU);
28348     } // Inst_VOP3__V_MIN3_I32
28349
28350     Inst_VOP3__V_MIN3_I32::~Inst_VOP3__V_MIN3_I32()
28351     {
28352     } // ~Inst_VOP3__V_MIN3_I32
28353
28354     // D.i = min(S0.i, S1.i, S2.i).
28355     void
28356     Inst_VOP3__V_MIN3_I32::execute(GPUDynInstPtr gpuDynInst)
28357     {
28358         Wavefront *wf = gpuDynInst->wavefront();
28359         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
28360         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
28361         ConstVecOperandI32 src2(gpuDynInst, extData.SRC2);
28362         VecOperandI32 vdst(gpuDynInst, instData.VDST);
28363
28364         src0.readSrc();
28365         src1.readSrc();
28366         src2.readSrc();
28367
28368         /**
28369          * input modifiers are supported by FP operations only
28370          */
28371         assert(!(instData.ABS & 0x1));
28372         assert(!(instData.ABS & 0x2));
28373         assert(!(instData.ABS & 0x4));
28374         assert(!(extData.NEG & 0x1));
28375         assert(!(extData.NEG & 0x2));
28376         assert(!(extData.NEG & 0x4));
28377
28378         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28379             if (wf->execMask(lane)) {
28380                 VecElemI32 min_0_1 = std::min(src0[lane], src1[lane]);
28381                 vdst[lane] = std::min(min_0_1, src2[lane]);
28382             }
28383         }
28384
28385         vdst.write();
28386     }
28387
28388     Inst_VOP3__V_MIN3_U32::Inst_VOP3__V_MIN3_U32(InFmt_VOP3 *iFmt)
28389         : Inst_VOP3(iFmt, "v_min3_u32", false)
28390     {
28391         setFlag(ALU);
28392     } // Inst_VOP3__V_MIN3_U32
28393
28394     Inst_VOP3__V_MIN3_U32::~Inst_VOP3__V_MIN3_U32()
28395     {
28396     } // ~Inst_VOP3__V_MIN3_U32
28397
28398     // D.u = min(S0.u, S1.u, S2.u).
28399     void
28400     Inst_VOP3__V_MIN3_U32::execute(GPUDynInstPtr gpuDynInst)
28401     {
28402         Wavefront *wf = gpuDynInst->wavefront();
28403         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
28404         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28405         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28406         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28407
28408         src0.readSrc();
28409         src1.readSrc();
28410         src2.readSrc();
28411
28412         /**
28413          * input modifiers are supported by FP operations only
28414          */
28415         assert(!(instData.ABS & 0x1));
28416         assert(!(instData.ABS & 0x2));
28417         assert(!(instData.ABS & 0x4));
28418         assert(!(extData.NEG & 0x1));
28419         assert(!(extData.NEG & 0x2));
28420         assert(!(extData.NEG & 0x4));
28421
28422         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28423             if (wf->execMask(lane)) {
28424                 VecElemU32 min_0_1 = std::min(src0[lane], src1[lane]);
28425                 vdst[lane] = std::min(min_0_1, src2[lane]);
28426             }
28427         }
28428
28429         vdst.write();
28430     }
28431
28432     Inst_VOP3__V_MAX3_F32::Inst_VOP3__V_MAX3_F32(InFmt_VOP3 *iFmt)
28433         : Inst_VOP3(iFmt, "v_max3_f32", false)
28434     {
28435         setFlag(ALU);
28436         setFlag(F32);
28437     } // Inst_VOP3__V_MAX3_F32
28438
28439     Inst_VOP3__V_MAX3_F32::~Inst_VOP3__V_MAX3_F32()
28440     {
28441     } // ~Inst_VOP3__V_MAX3_F32
28442
28443     // D.f = max(S0.f, S1.f, S2.f).
28444     void
28445     Inst_VOP3__V_MAX3_F32::execute(GPUDynInstPtr gpuDynInst)
28446     {
28447         Wavefront *wf = gpuDynInst->wavefront();
28448         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
28449         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
28450         ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
28451         VecOperandF32 vdst(gpuDynInst, instData.VDST);
28452
28453         src0.readSrc();
28454         src1.readSrc();
28455         src2.readSrc();
28456
28457         if (instData.ABS & 0x1) {
28458             src0.absModifier();
28459         }
28460
28461         if (instData.ABS & 0x2) {
28462             src1.absModifier();
28463         }
28464
28465         if (instData.ABS & 0x4) {
28466             src2.absModifier();
28467         }
28468
28469         if (extData.NEG & 0x1) {
28470             src0.negModifier();
28471         }
28472
28473         if (extData.NEG & 0x2) {
28474             src1.negModifier();
28475         }
28476
28477         if (extData.NEG & 0x4) {
28478             src2.negModifier();
28479         }
28480
28481         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28482             if (wf->execMask(lane)) {
28483                 VecElemF32 max_0_1 = std::fmax(src0[lane], src1[lane]);
28484                 vdst[lane] = std::fmax(max_0_1, src2[lane]);
28485             }
28486         }
28487
28488         vdst.write();
28489     }
28490
28491     Inst_VOP3__V_MAX3_I32::Inst_VOP3__V_MAX3_I32(InFmt_VOP3 *iFmt)
28492         : Inst_VOP3(iFmt, "v_max3_i32", false)
28493     {
28494         setFlag(ALU);
28495     } // Inst_VOP3__V_MAX3_I32
28496
28497     Inst_VOP3__V_MAX3_I32::~Inst_VOP3__V_MAX3_I32()
28498     {
28499     } // ~Inst_VOP3__V_MAX3_I32
28500
28501     // D.i = max(S0.i, S1.i, S2.i).
28502     void
28503     Inst_VOP3__V_MAX3_I32::execute(GPUDynInstPtr gpuDynInst)
28504     {
28505         Wavefront *wf = gpuDynInst->wavefront();
28506         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
28507         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
28508         ConstVecOperandI32 src2(gpuDynInst, extData.SRC2);
28509         VecOperandI32 vdst(gpuDynInst, instData.VDST);
28510
28511         src0.readSrc();
28512         src1.readSrc();
28513         src2.readSrc();
28514
28515         /**
28516          * input modifiers are supported by FP operations only
28517          */
28518         assert(!(instData.ABS & 0x1));
28519         assert(!(instData.ABS & 0x2));
28520         assert(!(instData.ABS & 0x4));
28521         assert(!(extData.NEG & 0x1));
28522         assert(!(extData.NEG & 0x2));
28523         assert(!(extData.NEG & 0x4));
28524
28525         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28526             if (wf->execMask(lane)) {
28527                 VecElemI32 max_0_1 = std::max(src0[lane], src1[lane]);
28528                 vdst[lane] = std::max(max_0_1, src2[lane]);
28529             }
28530         }
28531
28532         vdst.write();
28533     }
28534
28535     Inst_VOP3__V_MAX3_U32::Inst_VOP3__V_MAX3_U32(InFmt_VOP3 *iFmt)
28536         : Inst_VOP3(iFmt, "v_max3_u32", false)
28537     {
28538         setFlag(ALU);
28539     } // Inst_VOP3__V_MAX3_U32
28540
28541     Inst_VOP3__V_MAX3_U32::~Inst_VOP3__V_MAX3_U32()
28542     {
28543     } // ~Inst_VOP3__V_MAX3_U32
28544
28545     // D.u = max(S0.u, S1.u, S2.u).
28546     void
28547     Inst_VOP3__V_MAX3_U32::execute(GPUDynInstPtr gpuDynInst)
28548     {
28549         Wavefront *wf = gpuDynInst->wavefront();
28550         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
28551         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28552         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28553         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28554
28555         src0.readSrc();
28556         src1.readSrc();
28557         src2.readSrc();
28558
28559         /**
28560          * input modifiers are supported by FP operations only
28561          */
28562         assert(!(instData.ABS & 0x1));
28563         assert(!(instData.ABS & 0x2));
28564         assert(!(instData.ABS & 0x4));
28565         assert(!(extData.NEG & 0x1));
28566         assert(!(extData.NEG & 0x2));
28567         assert(!(extData.NEG & 0x4));
28568
28569         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28570             if (wf->execMask(lane)) {
28571                 VecElemU32 max_0_1 = std::max(src0[lane], src1[lane]);
28572                 vdst[lane] = std::max(max_0_1, src2[lane]);
28573             }
28574         }
28575
28576         vdst.write();
28577     }
28578
28579     Inst_VOP3__V_MED3_F32::Inst_VOP3__V_MED3_F32(InFmt_VOP3 *iFmt)
28580         : Inst_VOP3(iFmt, "v_med3_f32", false)
28581     {
28582         setFlag(ALU);
28583         setFlag(F32);
28584     } // Inst_VOP3__V_MED3_F32
28585
28586     Inst_VOP3__V_MED3_F32::~Inst_VOP3__V_MED3_F32()
28587     {
28588     } // ~Inst_VOP3__V_MED3_F32
28589
28590     // D.f = median(S0.f, S1.f, S2.f).
28591     void
28592     Inst_VOP3__V_MED3_F32::execute(GPUDynInstPtr gpuDynInst)
28593     {
28594         Wavefront *wf = gpuDynInst->wavefront();
28595         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
28596         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
28597         ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
28598         VecOperandF32 vdst(gpuDynInst, instData.VDST);
28599
28600         src0.readSrc();
28601         src1.readSrc();
28602         src2.readSrc();
28603
28604         if (instData.ABS & 0x1) {
28605             src0.absModifier();
28606         }
28607
28608         if (instData.ABS & 0x2) {
28609             src1.absModifier();
28610         }
28611
28612         if (instData.ABS & 0x4) {
28613             src2.absModifier();
28614         }
28615
28616         if (extData.NEG & 0x1) {
28617             src0.negModifier();
28618         }
28619
28620         if (extData.NEG & 0x2) {
28621             src1.negModifier();
28622         }
28623
28624         if (extData.NEG & 0x4) {
28625             src2.negModifier();
28626         }
28627
28628         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28629             if (wf->execMask(lane)) {
28630                 vdst[lane] = median(src0[lane], src1[lane], src2[lane]);
28631             }
28632         }
28633
28634         vdst.write();
28635     }
28636
28637     Inst_VOP3__V_MED3_I32::Inst_VOP3__V_MED3_I32(InFmt_VOP3 *iFmt)
28638         : Inst_VOP3(iFmt, "v_med3_i32", false)
28639     {
28640         setFlag(ALU);
28641     } // Inst_VOP3__V_MED3_I32
28642
28643     Inst_VOP3__V_MED3_I32::~Inst_VOP3__V_MED3_I32()
28644     {
28645     } // ~Inst_VOP3__V_MED3_I32
28646
28647     // D.i = median(S0.i, S1.i, S2.i).
28648     void
28649     Inst_VOP3__V_MED3_I32::execute(GPUDynInstPtr gpuDynInst)
28650     {
28651         Wavefront *wf = gpuDynInst->wavefront();
28652         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
28653         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
28654         ConstVecOperandI32 src2(gpuDynInst, extData.SRC2);
28655         VecOperandI32 vdst(gpuDynInst, instData.VDST);
28656
28657         src0.readSrc();
28658         src1.readSrc();
28659         src2.readSrc();
28660
28661         /**
28662          * input modifiers are supported by FP operations only
28663          */
28664         assert(!(instData.ABS & 0x1));
28665         assert(!(instData.ABS & 0x2));
28666         assert(!(instData.ABS & 0x4));
28667         assert(!(extData.NEG & 0x1));
28668         assert(!(extData.NEG & 0x2));
28669         assert(!(extData.NEG & 0x4));
28670
28671         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28672             if (wf->execMask(lane)) {
28673                 vdst[lane] = median(src0[lane], src1[lane], src2[lane]);
28674             }
28675         }
28676
28677         vdst.write();
28678     }
28679
28680     Inst_VOP3__V_MED3_U32::Inst_VOP3__V_MED3_U32(InFmt_VOP3 *iFmt)
28681         : Inst_VOP3(iFmt, "v_med3_u32", false)
28682     {
28683         setFlag(ALU);
28684     } // Inst_VOP3__V_MED3_U32
28685
28686     Inst_VOP3__V_MED3_U32::~Inst_VOP3__V_MED3_U32()
28687     {
28688     } // ~Inst_VOP3__V_MED3_U32
28689
28690     // D.u = median(S0.u, S1.u, S2.u).
28691     void
28692     Inst_VOP3__V_MED3_U32::execute(GPUDynInstPtr gpuDynInst)
28693     {
28694         Wavefront *wf = gpuDynInst->wavefront();
28695         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
28696         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28697         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28698         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28699
28700         src0.readSrc();
28701         src1.readSrc();
28702         src2.readSrc();
28703
28704         /**
28705          * input modifiers are supported by FP operations only
28706          */
28707         assert(!(instData.ABS & 0x1));
28708         assert(!(instData.ABS & 0x2));
28709         assert(!(instData.ABS & 0x4));
28710         assert(!(extData.NEG & 0x1));
28711         assert(!(extData.NEG & 0x2));
28712         assert(!(extData.NEG & 0x4));
28713
28714         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28715             if (wf->execMask(lane)) {
28716                 vdst[lane] = median(src0[lane], src1[lane], src2[lane]);
28717             }
28718         }
28719
28720         vdst.write();
28721     }
28722
28723     Inst_VOP3__V_SAD_U8::Inst_VOP3__V_SAD_U8(InFmt_VOP3 *iFmt)
28724         : Inst_VOP3(iFmt, "v_sad_u8", false)
28725     {
28726         setFlag(ALU);
28727     } // Inst_VOP3__V_SAD_U8
28728
28729     Inst_VOP3__V_SAD_U8::~Inst_VOP3__V_SAD_U8()
28730     {
28731     } // ~Inst_VOP3__V_SAD_U8
28732
28733     // D.u = abs(S0.i[31:24] - S1.i[31:24]) + abs(S0.i[23:16] - S1.i[23:16]) +
28734     // abs(S0.i[15:8] - S1.i[15:8]) + abs(S0.i[7:0] - S1.i[7:0]) + S2.u.
28735     // Sum of absolute differences with accumulation, overflow into upper bits
28736     // is allowed.
28737     void
28738     Inst_VOP3__V_SAD_U8::execute(GPUDynInstPtr gpuDynInst)
28739     {
28740         Wavefront *wf = gpuDynInst->wavefront();
28741         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
28742         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
28743         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28744         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28745
28746         src0.readSrc();
28747         src1.readSrc();
28748         src2.readSrc();
28749
28750         /**
28751          * input modifiers are supported by FP operations only
28752          */
28753         assert(!(instData.ABS & 0x1));
28754         assert(!(instData.ABS & 0x2));
28755         assert(!(instData.ABS & 0x4));
28756         assert(!(extData.NEG & 0x1));
28757         assert(!(extData.NEG & 0x2));
28758         assert(!(extData.NEG & 0x4));
28759
28760         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28761             if (wf->execMask(lane)) {
28762                 vdst[lane] = std::abs(bits(src0[lane], 31, 24)
28763                     - bits(src1[lane], 31, 24))
28764                     + std::abs(bits(src0[lane], 23, 16)
28765                     - bits(src1[lane], 23, 16))
28766                     + std::abs(bits(src0[lane], 15, 8)
28767                     - bits(src1[lane], 15, 8))
28768                     + std::abs(bits(src0[lane], 7, 0)
28769                     - bits(src1[lane], 7, 0)) + src2[lane];
28770             }
28771         }
28772
28773         vdst.write();
28774     }
28775
28776     Inst_VOP3__V_SAD_HI_U8::Inst_VOP3__V_SAD_HI_U8(InFmt_VOP3 *iFmt)
28777         : Inst_VOP3(iFmt, "v_sad_hi_u8", false)
28778     {
28779         setFlag(ALU);
28780     } // Inst_VOP3__V_SAD_HI_U8
28781
28782     Inst_VOP3__V_SAD_HI_U8::~Inst_VOP3__V_SAD_HI_U8()
28783     {
28784     } // ~Inst_VOP3__V_SAD_HI_U8
28785
28786     // D.u = (SAD_U8(S0, S1, 0) << 16) + S2.u.
28787     // Sum of absolute differences with accumulation, overflow is lost.
28788     void
28789     Inst_VOP3__V_SAD_HI_U8::execute(GPUDynInstPtr gpuDynInst)
28790     {
28791         Wavefront *wf = gpuDynInst->wavefront();
28792         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
28793         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28794         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28795         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28796
28797         src0.readSrc();
28798         src1.readSrc();
28799         src2.readSrc();
28800
28801         /**
28802          * input modifiers are supported by FP operations only
28803          */
28804         assert(!(instData.ABS & 0x1));
28805         assert(!(instData.ABS & 0x2));
28806         assert(!(instData.ABS & 0x4));
28807         assert(!(extData.NEG & 0x1));
28808         assert(!(extData.NEG & 0x2));
28809         assert(!(extData.NEG & 0x4));
28810
28811         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28812             if (wf->execMask(lane)) {
28813                 vdst[lane] = (((bits(src0[lane], 31, 24)
28814                     - bits(src1[lane], 31, 24)) + (bits(src0[lane], 23, 16)
28815                     - bits(src1[lane], 23, 16)) + (bits(src0[lane], 15, 8)
28816                     - bits(src1[lane], 15, 8)) + (bits(src0[lane], 7, 0)
28817                     - bits(src1[lane], 7, 0))) << 16) + src2[lane];
28818             }
28819         }
28820
28821         vdst.write();
28822     }
28823
28824     Inst_VOP3__V_SAD_U16::Inst_VOP3__V_SAD_U16(InFmt_VOP3 *iFmt)
28825         : Inst_VOP3(iFmt, "v_sad_u16", false)
28826     {
28827         setFlag(ALU);
28828     } // Inst_VOP3__V_SAD_U16
28829
28830     Inst_VOP3__V_SAD_U16::~Inst_VOP3__V_SAD_U16()
28831     {
28832     } // ~Inst_VOP3__V_SAD_U16
28833
28834     // D.u = abs(S0.i[31:16] - S1.i[31:16]) + abs(S0.i[15:0] - S1.i[15:0])
28835     // + S2.u.
28836     // Word SAD with accumulation.
28837     void
28838     Inst_VOP3__V_SAD_U16::execute(GPUDynInstPtr gpuDynInst)
28839     {
28840         Wavefront *wf = gpuDynInst->wavefront();
28841         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
28842         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
28843         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28844         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28845
28846         src0.readSrc();
28847         src1.readSrc();
28848         src2.readSrc();
28849
28850         /**
28851          * input modifiers are supported by FP operations only
28852          */
28853         assert(!(instData.ABS & 0x1));
28854         assert(!(instData.ABS & 0x2));
28855         assert(!(instData.ABS & 0x4));
28856         assert(!(extData.NEG & 0x1));
28857         assert(!(extData.NEG & 0x2));
28858         assert(!(extData.NEG & 0x4));
28859
28860         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28861             if (wf->execMask(lane)) {
28862                 vdst[lane] = std::abs(bits(src0[lane], 31, 16)
28863                     - bits(src1[lane], 31, 16))
28864                     + std::abs(bits(src0[lane], 15, 0)
28865                     - bits(src1[lane], 15, 0)) + src2[lane];
28866             }
28867         }
28868
28869         vdst.write();
28870     }
28871
28872     Inst_VOP3__V_SAD_U32::Inst_VOP3__V_SAD_U32(InFmt_VOP3 *iFmt)
28873         : Inst_VOP3(iFmt, "v_sad_u32", false)
28874     {
28875         setFlag(ALU);
28876     } // Inst_VOP3__V_SAD_U32
28877
28878     Inst_VOP3__V_SAD_U32::~Inst_VOP3__V_SAD_U32()
28879     {
28880     } // ~Inst_VOP3__V_SAD_U32
28881
28882     // D.u = abs(S0.i - S1.i) + S2.u.
28883     // Dword SAD with accumulation.
28884     void
28885     Inst_VOP3__V_SAD_U32::execute(GPUDynInstPtr gpuDynInst)
28886     {
28887         Wavefront *wf = gpuDynInst->wavefront();
28888         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
28889         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
28890         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28891         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28892
28893         src0.readSrc();
28894         src1.readSrc();
28895         src2.readSrc();
28896
28897         /**
28898          * input modifiers are supported by FP operations only
28899          */
28900         assert(!(instData.ABS & 0x1));
28901         assert(!(instData.ABS & 0x2));
28902         assert(!(instData.ABS & 0x4));
28903         assert(!(extData.NEG & 0x1));
28904         assert(!(extData.NEG & 0x2));
28905         assert(!(extData.NEG & 0x4));
28906
28907         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28908             if (wf->execMask(lane)) {
28909                 vdst[lane] = std::abs(src0[lane] - src1[lane]) + src2[lane];
28910             }
28911         }
28912
28913         vdst.write();
28914     }
28915
28916     Inst_VOP3__V_CVT_PK_U8_F32::Inst_VOP3__V_CVT_PK_U8_F32(InFmt_VOP3 *iFmt)
28917         : Inst_VOP3(iFmt, "v_cvt_pk_u8_f32", false)
28918     {
28919         setFlag(ALU);
28920         setFlag(F32);
28921     } // Inst_VOP3__V_CVT_PK_U8_F32
28922
28923     Inst_VOP3__V_CVT_PK_U8_F32::~Inst_VOP3__V_CVT_PK_U8_F32()
28924     {
28925     } // ~Inst_VOP3__V_CVT_PK_U8_F32
28926
28927     // D.u = ((flt32_to_uint8(S0.f) & 0xff) << (8 * S1.u[1:0]))
28928     // | (S2.u & ~(0xff << (8 * S1.u[1:0]))).
28929     // Convert floating point value S0 to 8-bit unsigned integer and pack the
28930     // result into byte S1 of dword S2.
28931     void
28932     Inst_VOP3__V_CVT_PK_U8_F32::execute(GPUDynInstPtr gpuDynInst)
28933     {
28934         Wavefront *wf = gpuDynInst->wavefront();
28935         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
28936         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
28937         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
28938         VecOperandU32 vdst(gpuDynInst, instData.VDST);
28939
28940         src0.readSrc();
28941         src1.readSrc();
28942         src2.readSrc();
28943
28944         if (instData.ABS & 0x1) {
28945             src0.absModifier();
28946         }
28947
28948
28949         if (extData.NEG & 0x1) {
28950             src0.negModifier();
28951         }
28952
28953         /**
28954          * input modifiers are supported by FP operations only
28955          */
28956         assert(!(instData.ABS & 0x2));
28957         assert(!(instData.ABS & 0x4));
28958         assert(!(extData.NEG & 0x2));
28959         assert(!(extData.NEG & 0x4));
28960
28961         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
28962             if (wf->execMask(lane)) {
28963                 vdst[lane] = (((VecElemU8)src0[lane] & 0xff)
28964                     << (8 * bits(src1[lane], 1, 0)))
28965                     | (src2[lane] & ~(0xff << (8 * bits(src1[lane], 1, 0))));
28966             }
28967         }
28968
28969         vdst.write();
28970     }
28971
28972     Inst_VOP3__V_DIV_FIXUP_F32::Inst_VOP3__V_DIV_FIXUP_F32(InFmt_VOP3 *iFmt)
28973         : Inst_VOP3(iFmt, "v_div_fixup_f32", false)
28974     {
28975         setFlag(ALU);
28976         setFlag(F32);
28977     } // Inst_VOP3__V_DIV_FIXUP_F32
28978
28979     Inst_VOP3__V_DIV_FIXUP_F32::~Inst_VOP3__V_DIV_FIXUP_F32()
28980     {
28981     } // ~Inst_VOP3__V_DIV_FIXUP_F32
28982
28983     // D.f = Divide fixup and flags -- s0.f = Quotient, s1.f = Denominator,
28984     // s2.f = Numerator.
28985     void
28986     Inst_VOP3__V_DIV_FIXUP_F32::execute(GPUDynInstPtr gpuDynInst)
28987     {
28988         Wavefront *wf = gpuDynInst->wavefront();
28989         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
28990         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
28991         ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
28992         VecOperandF32 vdst(gpuDynInst, instData.VDST);
28993
28994         src0.readSrc();
28995         src1.readSrc();
28996         src2.readSrc();
28997
28998         if (instData.ABS & 0x1) {
28999             src0.absModifier();
29000         }
29001
29002         if (instData.ABS & 0x2) {
29003             src1.absModifier();
29004         }
29005
29006         if (instData.ABS & 0x4) {
29007             src2.absModifier();
29008         }
29009
29010         if (extData.NEG & 0x1) {
29011             src0.negModifier();
29012         }
29013
29014         if (extData.NEG & 0x2) {
29015             src1.negModifier();
29016         }
29017
29018         if (extData.NEG & 0x4) {
29019             src2.negModifier();
29020         }
29021
29022         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29023             if (wf->execMask(lane)) {
29024                 if (std::fpclassify(src1[lane]) == FP_ZERO) {
29025                     if (std::signbit(src1[lane])) {
29026                         vdst[lane] = -INFINITY;
29027                     } else {
29028                         vdst[lane] = +INFINITY;
29029                     }
29030                 } else if (std::isnan(src2[lane]) || std::isnan(src1[lane])) {
29031                     vdst[lane] = NAN;
29032                 } else if (std::isinf(src1[lane])) {
29033                     if (std::signbit(src1[lane])) {
29034                         vdst[lane] = -INFINITY;
29035                     } else {
29036                         vdst[lane] = +INFINITY;
29037                     }
29038                 } else {
29039                     vdst[lane] = src2[lane] / src1[lane];
29040                 }
29041             }
29042         }
29043
29044         vdst.write();
29045     } // execute
29046     // --- Inst_VOP3__V_DIV_FIXUP_F64 class methods ---
29047
29048     Inst_VOP3__V_DIV_FIXUP_F64::Inst_VOP3__V_DIV_FIXUP_F64(InFmt_VOP3 *iFmt)
29049         : Inst_VOP3(iFmt, "v_div_fixup_f64", false)
29050     {
29051         setFlag(ALU);
29052         setFlag(F64);
29053     } // Inst_VOP3__V_DIV_FIXUP_F64
29054
29055     Inst_VOP3__V_DIV_FIXUP_F64::~Inst_VOP3__V_DIV_FIXUP_F64()
29056     {
29057     } // ~Inst_VOP3__V_DIV_FIXUP_F64
29058
29059     // D.d = Divide fixup and flags -- s0.d = Quotient, s1.d = Denominator,
29060     // s2.d = Numerator.
29061     void
29062     Inst_VOP3__V_DIV_FIXUP_F64::execute(GPUDynInstPtr gpuDynInst)
29063     {
29064         Wavefront *wf = gpuDynInst->wavefront();
29065         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
29066         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
29067         ConstVecOperandF64 src2(gpuDynInst, extData.SRC2);
29068         VecOperandF64 vdst(gpuDynInst, instData.VDST);
29069
29070         src0.readSrc();
29071         src1.readSrc();
29072         src2.readSrc();
29073
29074         if (instData.ABS & 0x1) {
29075             src0.absModifier();
29076         }
29077
29078         if (instData.ABS & 0x2) {
29079             src1.absModifier();
29080         }
29081
29082         if (instData.ABS & 0x4) {
29083             src2.absModifier();
29084         }
29085
29086         if (extData.NEG & 0x1) {
29087             src0.negModifier();
29088         }
29089
29090         if (extData.NEG & 0x2) {
29091             src1.negModifier();
29092         }
29093
29094         if (extData.NEG & 0x4) {
29095             src2.negModifier();
29096         }
29097
29098         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29099             if (wf->execMask(lane)) {
29100                 int sign_out = std::signbit(src1[lane])
29101                               ^ std::signbit(src2[lane]);
29102                 int exp1(0);
29103                 int exp2(0);
29104                 std::frexp(src1[lane], &exp1);
29105                 std::frexp(src2[lane], &exp2);
29106
29107                 if (std::isnan(src1[lane]) || std::isnan(src2[lane])) {
29108                     vdst[lane] = std::numeric_limits<VecElemF64>::quiet_NaN();
29109                 } else if (std::fpclassify(src1[lane]) == FP_ZERO
29110                            && std::fpclassify(src2[lane]) == FP_ZERO) {
29111                     vdst[lane]
29112                         = std::numeric_limits<VecElemF64>::signaling_NaN();
29113                 } else if (std::isinf(src1[lane]) && std::isinf(src2[lane])) {
29114                     vdst[lane]
29115                         = std::numeric_limits<VecElemF64>::signaling_NaN();
29116                 } else if (std::fpclassify(src1[lane]) == FP_ZERO
29117                            || std::isinf(src2[lane])) {
29118                     vdst[lane] = sign_out ? -INFINITY : +INFINITY;
29119                 } else if (std::isinf(src1[lane])
29120                            || std::fpclassify(src2[lane]) == FP_ZERO) {
29121                     vdst[lane] = sign_out ? -0.0 : +0.0;
29122                 } else if (exp2 - exp1 < -1075) {
29123                     vdst[lane] = src0[lane];
29124                 } else if (exp1 == 2047) {
29125                     vdst[lane] = src0[lane];
29126                 } else {
29127                     vdst[lane] = sign_out ? -std::fabs(src0[lane])
29128                         : std::fabs(src0[lane]);
29129                 }
29130             }
29131         }
29132
29133         vdst.write();
29134     }
29135
29136     Inst_VOP3__V_DIV_SCALE_F32::Inst_VOP3__V_DIV_SCALE_F32(
29137           InFmt_VOP3_SDST_ENC *iFmt)
29138         : Inst_VOP3_SDST_ENC(iFmt, "v_div_scale_f32")
29139     {
29140         setFlag(ALU);
29141         setFlag(WritesVCC);
29142         setFlag(F32);
29143     } // Inst_VOP3__V_DIV_SCALE_F32
29144
29145     Inst_VOP3__V_DIV_SCALE_F32::~Inst_VOP3__V_DIV_SCALE_F32()
29146     {
29147     } // ~Inst_VOP3__V_DIV_SCALE_F32
29148
29149     // {vcc,D.f} = Divide preop and flags -- s0.f = Quotient, s1.f =
29150     // Denominator, s2.f = Numerator -- s0 must equal s1 or s2. Given a
29151     // numerator and denominator, this opcode will appropriately scale inputs
29152     // for division to avoid subnormal terms during Newton-Raphson correction
29153     // algorithm. This opcode producses a VCC flag for post-scale of quotient.
29154     void
29155     Inst_VOP3__V_DIV_SCALE_F32::execute(GPUDynInstPtr gpuDynInst)
29156     {
29157         Wavefront *wf = gpuDynInst->wavefront();
29158         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
29159         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
29160         ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
29161         ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
29162         VecOperandF32 vdst(gpuDynInst, instData.VDST);
29163
29164         src0.readSrc();
29165         src1.readSrc();
29166         src2.readSrc();
29167
29168         if (extData.NEG & 0x1) {
29169             src0.negModifier();
29170         }
29171
29172         if (extData.NEG & 0x2) {
29173             src1.negModifier();
29174         }
29175
29176         if (extData.NEG & 0x4) {
29177             src2.negModifier();
29178         }
29179
29180         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29181             if (wf->execMask(lane)) {
29182                 vdst[lane] = src0[lane];
29183                 vcc.setBit(lane, 0);
29184             }
29185         }
29186
29187         vcc.write();
29188         vdst.write();
29189     } // execute
29190     // --- Inst_VOP3__V_DIV_SCALE_F64 class methods ---
29191
29192     Inst_VOP3__V_DIV_SCALE_F64::Inst_VOP3__V_DIV_SCALE_F64(
29193           InFmt_VOP3_SDST_ENC *iFmt)
29194         : Inst_VOP3_SDST_ENC(iFmt, "v_div_scale_f64")
29195     {
29196         setFlag(ALU);
29197         setFlag(WritesVCC);
29198         setFlag(F64);
29199     } // Inst_VOP3__V_DIV_SCALE_F64
29200
29201     Inst_VOP3__V_DIV_SCALE_F64::~Inst_VOP3__V_DIV_SCALE_F64()
29202     {
29203     } // ~Inst_VOP3__V_DIV_SCALE_F64
29204
29205     // {vcc,D.d} = Divide preop and flags -- s0.d = Quotient, s1.d =
29206     // Denominator, s2.d = Numerator -- s0 must equal s1 or s2. Given a
29207     // numerator and denominator, this opcode will appropriately scale inputs
29208     // for division to avoid subnormal terms during Newton-Raphson correction
29209     // algorithm. This opcode producses a VCC flag for post-scale of quotient.
29210     void
29211     Inst_VOP3__V_DIV_SCALE_F64::execute(GPUDynInstPtr gpuDynInst)
29212     {
29213         Wavefront *wf = gpuDynInst->wavefront();
29214         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
29215         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
29216         ConstVecOperandF64 src2(gpuDynInst, extData.SRC2);
29217         ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
29218         VecOperandF64 vdst(gpuDynInst, instData.VDST);
29219
29220         src0.readSrc();
29221         src1.readSrc();
29222         src2.readSrc();
29223
29224         if (extData.NEG & 0x1) {
29225             src0.negModifier();
29226         }
29227
29228         if (extData.NEG & 0x2) {
29229             src1.negModifier();
29230         }
29231
29232         if (extData.NEG & 0x4) {
29233             src2.negModifier();
29234         }
29235
29236         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29237             if (wf->execMask(lane)) {
29238                 int exp1(0);
29239                 int exp2(0);
29240                 std::frexp(src1[lane], &exp1);
29241                 std::frexp(src2[lane], &exp2);
29242                 vcc.setBit(lane, 0);
29243
29244                 if (std::fpclassify(src1[lane]) == FP_ZERO
29245                     || std::fpclassify(src2[lane]) == FP_ZERO) {
29246                     vdst[lane] = NAN;
29247                 } else if (exp2 - exp1 >= 768) {
29248                     vcc.setBit(lane, 1);
29249                     if (src0[lane] == src1[lane]) {
29250                         vdst[lane] = std::ldexp(src0[lane], 128);
29251                     }
29252                 } else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL) {
29253                     vdst[lane] = std::ldexp(src0[lane], 128);
29254                 } else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL
29255                            && std::fpclassify(src2[lane] / src1[lane])
29256                            == FP_SUBNORMAL) {
29257                     vcc.setBit(lane, 1);
29258                     if (src0[lane] == src1[lane]) {
29259                         vdst[lane] = std::ldexp(src0[lane], 128);
29260                     }
29261                 } else if (std::fpclassify(1.0 / src1[lane]) == FP_SUBNORMAL) {
29262                     vdst[lane] = std::ldexp(src0[lane], -128);
29263                 } else if (std::fpclassify(src2[lane] / src1[lane])
29264                            == FP_SUBNORMAL) {
29265                     vcc.setBit(lane, 1);
29266                     if (src0[lane] == src2[lane]) {
29267                         vdst[lane] = std::ldexp(src0[lane], 128);
29268                     }
29269                 } else if (exp2 <= 53) {
29270                     vdst[lane] = std::ldexp(src0[lane], 128);
29271                 }
29272             }
29273         }
29274
29275         vcc.write();
29276         vdst.write();
29277     }
29278
29279     Inst_VOP3__V_DIV_FMAS_F32::Inst_VOP3__V_DIV_FMAS_F32(InFmt_VOP3 *iFmt)
29280         : Inst_VOP3(iFmt, "v_div_fmas_f32", false)
29281     {
29282         setFlag(ALU);
29283         setFlag(ReadsVCC);
29284         setFlag(F32);
29285         setFlag(FMA);
29286     } // Inst_VOP3__V_DIV_FMAS_F32
29287
29288     Inst_VOP3__V_DIV_FMAS_F32::~Inst_VOP3__V_DIV_FMAS_F32()
29289     {
29290     } // ~Inst_VOP3__V_DIV_FMAS_F32
29291
29292     // D.f = Special case divide FMA with scale and flags(s0.f = Quotient,
29293     // s1.f = Denominator, s2.f = Numerator)
29294     void
29295     Inst_VOP3__V_DIV_FMAS_F32::execute(GPUDynInstPtr gpuDynInst)
29296     {
29297         Wavefront *wf = gpuDynInst->wavefront();
29298         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
29299         ConstVecOperandF32 src1(gpuDynInst, extData.SRC1);
29300         ConstVecOperandF32 src2(gpuDynInst, extData.SRC2);
29301         VecOperandF64 vdst(gpuDynInst, instData.VDST);
29302
29303         src0.readSrc();
29304         src1.readSrc();
29305         src2.readSrc();
29306
29307         if (instData.ABS & 0x1) {
29308             src0.absModifier();
29309         }
29310
29311         if (instData.ABS & 0x2) {
29312             src1.absModifier();
29313         }
29314
29315         if (instData.ABS & 0x4) {
29316             src2.absModifier();
29317         }
29318
29319         if (extData.NEG & 0x1) {
29320             src0.negModifier();
29321         }
29322
29323         if (extData.NEG & 0x2) {
29324             src1.negModifier();
29325         }
29326
29327         if (extData.NEG & 0x4) {
29328             src2.negModifier();
29329         }
29330
29331         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29332             if (wf->execMask(lane)) {
29333                 vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
29334             }
29335         }
29336
29337         //vdst.write();
29338     } // execute
29339     // --- Inst_VOP3__V_DIV_FMAS_F64 class methods ---
29340
29341     Inst_VOP3__V_DIV_FMAS_F64::Inst_VOP3__V_DIV_FMAS_F64(InFmt_VOP3 *iFmt)
29342         : Inst_VOP3(iFmt, "v_div_fmas_f64", false)
29343     {
29344         setFlag(ALU);
29345         setFlag(ReadsVCC);
29346         setFlag(F64);
29347         setFlag(FMA);
29348     } // Inst_VOP3__V_DIV_FMAS_F64
29349
29350     Inst_VOP3__V_DIV_FMAS_F64::~Inst_VOP3__V_DIV_FMAS_F64()
29351     {
29352     } // ~Inst_VOP3__V_DIV_FMAS_F64
29353
29354     // D.d = Special case divide FMA with scale and flags(s0.d = Quotient,
29355     // s1.d = Denominator, s2.d = Numerator)
29356     void
29357     Inst_VOP3__V_DIV_FMAS_F64::execute(GPUDynInstPtr gpuDynInst)
29358     {
29359         Wavefront *wf = gpuDynInst->wavefront();
29360         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
29361         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
29362         ConstVecOperandF64 src2(gpuDynInst, extData.SRC2);
29363         VecOperandF64 vdst(gpuDynInst, instData.VDST);
29364         ConstScalarOperandU64 vcc(gpuDynInst, REG_VCC_LO);
29365
29366         src0.readSrc();
29367         src1.readSrc();
29368         src2.readSrc();
29369         vcc.read();
29370
29371         if (instData.ABS & 0x1) {
29372             src0.absModifier();
29373         }
29374
29375         if (instData.ABS & 0x2) {
29376             src1.absModifier();
29377         }
29378
29379         if (instData.ABS & 0x4) {
29380             src2.absModifier();
29381         }
29382
29383         if (extData.NEG & 0x1) {
29384             src0.negModifier();
29385         }
29386
29387         if (extData.NEG & 0x2) {
29388             src1.negModifier();
29389         }
29390
29391         if (extData.NEG & 0x4) {
29392             src2.negModifier();
29393         }
29394
29395         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29396             if (wf->execMask(lane)) {
29397                 if (bits(vcc.rawData(), lane)) {
29398                     vdst[lane] = std::pow(2, 64)
29399                         * std::fma(src0[lane], src1[lane], src2[lane]);
29400                 } else {
29401                     vdst[lane] = std::fma(src0[lane], src1[lane], src2[lane]);
29402                 }
29403             }
29404         }
29405
29406         vdst.write();
29407     }
29408
29409     Inst_VOP3__V_MSAD_U8::Inst_VOP3__V_MSAD_U8(InFmt_VOP3 *iFmt)
29410         : Inst_VOP3(iFmt, "v_msad_u8", false)
29411     {
29412         setFlag(ALU);
29413     } // Inst_VOP3__V_MSAD_U8
29414
29415     Inst_VOP3__V_MSAD_U8::~Inst_VOP3__V_MSAD_U8()
29416     {
29417     } // ~Inst_VOP3__V_MSAD_U8
29418
29419     // D.u = Masked Byte SAD with accum_lo(S0.u, S1.u, S2.u).
29420     void
29421     Inst_VOP3__V_MSAD_U8::execute(GPUDynInstPtr gpuDynInst)
29422     {
29423         panicUnimplemented();
29424     }
29425
29426     Inst_VOP3__V_QSAD_PK_U16_U8::Inst_VOP3__V_QSAD_PK_U16_U8(InFmt_VOP3 *iFmt)
29427         : Inst_VOP3(iFmt, "v_qsad_pk_u16_u8", false)
29428     {
29429         setFlag(ALU);
29430     } // Inst_VOP3__V_QSAD_PK_U16_U8
29431
29432     Inst_VOP3__V_QSAD_PK_U16_U8::~Inst_VOP3__V_QSAD_PK_U16_U8()
29433     {
29434     } // ~Inst_VOP3__V_QSAD_PK_U16_U8
29435
29436     // D.u = Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0],
29437     // S1.u[31:0], S2.u[63:0])
29438     void
29439     Inst_VOP3__V_QSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst)
29440     {
29441         panicUnimplemented();
29442     }
29443
29444     Inst_VOP3__V_MQSAD_PK_U16_U8::Inst_VOP3__V_MQSAD_PK_U16_U8(
29445           InFmt_VOP3 *iFmt)
29446         : Inst_VOP3(iFmt, "v_mqsad_pk_u16_u8", false)
29447     {
29448         setFlag(ALU);
29449     } // Inst_VOP3__V_MQSAD_PK_U16_U8
29450
29451     Inst_VOP3__V_MQSAD_PK_U16_U8::~Inst_VOP3__V_MQSAD_PK_U16_U8()
29452     {
29453     } // ~Inst_VOP3__V_MQSAD_PK_U16_U8
29454
29455     // D.u = Masked Quad-Byte SAD with 16-bit packed accum_lo/hi(S0.u[63:0],
29456     // S1.u[31:0], S2.u[63:0])
29457     void
29458     Inst_VOP3__V_MQSAD_PK_U16_U8::execute(GPUDynInstPtr gpuDynInst)
29459     {
29460         panicUnimplemented();
29461     }
29462
29463     Inst_VOP3__V_MQSAD_U32_U8::Inst_VOP3__V_MQSAD_U32_U8(InFmt_VOP3 *iFmt)
29464         : Inst_VOP3(iFmt, "v_mqsad_u32_u8", false)
29465     {
29466         setFlag(ALU);
29467     } // Inst_VOP3__V_MQSAD_U32_U8
29468
29469     Inst_VOP3__V_MQSAD_U32_U8::~Inst_VOP3__V_MQSAD_U32_U8()
29470     {
29471     } // ~Inst_VOP3__V_MQSAD_U32_U8
29472
29473     // D.u128 = Masked Quad-Byte SAD with 32-bit accum_lo/hi(S0.u[63:0],
29474     // S1.u[31:0], S2.u[127:0])
29475     void
29476     Inst_VOP3__V_MQSAD_U32_U8::execute(GPUDynInstPtr gpuDynInst)
29477     {
29478         panicUnimplemented();
29479     }
29480
29481     Inst_VOP3__V_MAD_U64_U32::Inst_VOP3__V_MAD_U64_U32(
29482           InFmt_VOP3_SDST_ENC *iFmt)
29483         : Inst_VOP3_SDST_ENC(iFmt, "v_mad_u64_u32")
29484     {
29485         setFlag(ALU);
29486         setFlag(WritesVCC);
29487         setFlag(MAD);
29488     } // Inst_VOP3__V_MAD_U64_U32
29489
29490     Inst_VOP3__V_MAD_U64_U32::~Inst_VOP3__V_MAD_U64_U32()
29491     {
29492     } // ~Inst_VOP3__V_MAD_U64_U32
29493
29494     // {vcc_out, D.u64} = S0.u32 * S1.u32 + S2.u64.
29495     void
29496     Inst_VOP3__V_MAD_U64_U32::execute(GPUDynInstPtr gpuDynInst)
29497     {
29498         Wavefront *wf = gpuDynInst->wavefront();
29499         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
29500         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
29501         ConstVecOperandU64 src2(gpuDynInst, extData.SRC2);
29502         ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
29503         VecOperandU64 vdst(gpuDynInst, instData.VDST);
29504
29505         src0.readSrc();
29506         src1.readSrc();
29507         src2.readSrc();
29508         vdst.read();
29509
29510         /**
29511          * input modifiers are supported by FP operations only
29512          */
29513         assert(!(extData.NEG & 0x1));
29514         assert(!(extData.NEG & 0x2));
29515         assert(!(extData.NEG & 0x4));
29516
29517         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29518             if (wf->execMask(lane)) {
29519                 vcc.setBit(lane, muladd(vdst[lane], src0[lane], src1[lane],
29520                     src2[lane]));
29521             }
29522         }
29523
29524         vcc.write();
29525         vdst.write();
29526     }
29527
29528     Inst_VOP3__V_MAD_I64_I32::Inst_VOP3__V_MAD_I64_I32(
29529           InFmt_VOP3_SDST_ENC *iFmt)
29530         : Inst_VOP3_SDST_ENC(iFmt, "v_mad_i64_i32")
29531     {
29532         setFlag(ALU);
29533         setFlag(WritesVCC);
29534         setFlag(MAD);
29535     } // Inst_VOP3__V_MAD_I64_I32
29536
29537     Inst_VOP3__V_MAD_I64_I32::~Inst_VOP3__V_MAD_I64_I32()
29538     {
29539     } // ~Inst_VOP3__V_MAD_I64_I32
29540
29541     // {vcc_out,D.i64} = S0.i32 * S1.i32 + S2.i64.
29542     void
29543     Inst_VOP3__V_MAD_I64_I32::execute(GPUDynInstPtr gpuDynInst)
29544     {
29545         Wavefront *wf = gpuDynInst->wavefront();
29546         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
29547         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
29548         ConstVecOperandI64 src2(gpuDynInst, extData.SRC2);
29549         ScalarOperandU64 vcc(gpuDynInst, instData.SDST);
29550         VecOperandI64 vdst(gpuDynInst, instData.VDST);
29551
29552         src0.readSrc();
29553         src1.readSrc();
29554         src2.readSrc();
29555
29556         /**
29557          * input modifiers are supported by FP operations only
29558          */
29559         assert(!(extData.NEG & 0x1));
29560         assert(!(extData.NEG & 0x2));
29561         assert(!(extData.NEG & 0x4));
29562
29563         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29564             if (wf->execMask(lane)) {
29565                 vcc.setBit(lane, muladd(vdst[lane], src0[lane], src1[lane],
29566                     src2[lane]));
29567             }
29568         }
29569
29570         vcc.write();
29571         vdst.write();
29572     }
29573
29574     Inst_VOP3__V_MAD_F16::Inst_VOP3__V_MAD_F16(InFmt_VOP3 *iFmt)
29575         : Inst_VOP3(iFmt, "v_mad_f16", false)
29576     {
29577         setFlag(ALU);
29578         setFlag(F16);
29579         setFlag(MAD);
29580     } // Inst_VOP3__V_MAD_F16
29581
29582     Inst_VOP3__V_MAD_F16::~Inst_VOP3__V_MAD_F16()
29583     {
29584     } // ~Inst_VOP3__V_MAD_F16
29585
29586     // D.f16 = S0.f16 * S1.f16 + S2.f16.
29587     // Supports round mode, exception flags, saturation.
29588     void
29589     Inst_VOP3__V_MAD_F16::execute(GPUDynInstPtr gpuDynInst)
29590     {
29591         panicUnimplemented();
29592     }
29593
29594     Inst_VOP3__V_MAD_U16::Inst_VOP3__V_MAD_U16(InFmt_VOP3 *iFmt)
29595         : Inst_VOP3(iFmt, "v_mad_u16", false)
29596     {
29597         setFlag(ALU);
29598         setFlag(MAD);
29599     } // Inst_VOP3__V_MAD_U16
29600
29601     Inst_VOP3__V_MAD_U16::~Inst_VOP3__V_MAD_U16()
29602     {
29603     } // ~Inst_VOP3__V_MAD_U16
29604
29605     // D.u16 = S0.u16 * S1.u16 + S2.u16.
29606     // Supports saturation (unsigned 16-bit integer domain).
29607     void
29608     Inst_VOP3__V_MAD_U16::execute(GPUDynInstPtr gpuDynInst)
29609     {
29610         Wavefront *wf = gpuDynInst->wavefront();
29611         ConstVecOperandU16 src0(gpuDynInst, extData.SRC0);
29612         ConstVecOperandU16 src1(gpuDynInst, extData.SRC1);
29613         ConstVecOperandU16 src2(gpuDynInst, extData.SRC2);
29614         VecOperandU16 vdst(gpuDynInst, instData.VDST);
29615
29616         src0.readSrc();
29617         src1.readSrc();
29618         src2.readSrc();
29619
29620         /**
29621          * input modifiers are supported by FP operations only
29622          */
29623         assert(!(instData.ABS & 0x1));
29624         assert(!(instData.ABS & 0x2));
29625         assert(!(instData.ABS & 0x4));
29626         assert(!(extData.NEG & 0x1));
29627         assert(!(extData.NEG & 0x2));
29628         assert(!(extData.NEG & 0x4));
29629
29630         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29631             if (wf->execMask(lane)) {
29632                 vdst[lane] = src0[lane] * src1[lane] + src2[lane];
29633             }
29634         }
29635
29636         vdst.write();
29637     }
29638
29639     Inst_VOP3__V_MAD_I16::Inst_VOP3__V_MAD_I16(InFmt_VOP3 *iFmt)
29640         : Inst_VOP3(iFmt, "v_mad_i16", false)
29641     {
29642         setFlag(ALU);
29643         setFlag(MAD);
29644     } // Inst_VOP3__V_MAD_I16
29645
29646     Inst_VOP3__V_MAD_I16::~Inst_VOP3__V_MAD_I16()
29647     {
29648     } // ~Inst_VOP3__V_MAD_I16
29649
29650     // D.i16 = S0.i16 * S1.i16 + S2.i16.
29651     // Supports saturation (signed 16-bit integer domain).
29652     void
29653     Inst_VOP3__V_MAD_I16::execute(GPUDynInstPtr gpuDynInst)
29654     {
29655         Wavefront *wf = gpuDynInst->wavefront();
29656         ConstVecOperandI16 src0(gpuDynInst, extData.SRC0);
29657         ConstVecOperandI16 src1(gpuDynInst, extData.SRC1);
29658         ConstVecOperandI16 src2(gpuDynInst, extData.SRC2);
29659         VecOperandI16 vdst(gpuDynInst, instData.VDST);
29660
29661         src0.readSrc();
29662         src1.readSrc();
29663         src2.readSrc();
29664
29665         /**
29666          * input modifiers are supported by FP operations only
29667          */
29668         assert(!(instData.ABS & 0x1));
29669         assert(!(instData.ABS & 0x2));
29670         assert(!(instData.ABS & 0x4));
29671         assert(!(extData.NEG & 0x1));
29672         assert(!(extData.NEG & 0x2));
29673         assert(!(extData.NEG & 0x4));
29674
29675         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29676             if (wf->execMask(lane)) {
29677                 vdst[lane] = src0[lane] * src1[lane] + src2[lane];
29678             }
29679         }
29680
29681         vdst.write();
29682     }
29683
29684     Inst_VOP3__V_PERM_B32::Inst_VOP3__V_PERM_B32(InFmt_VOP3 *iFmt)
29685         : Inst_VOP3(iFmt, "v_perm_b32", false)
29686     {
29687         setFlag(ALU);
29688     } // Inst_VOP3__V_PERM_B32
29689
29690     Inst_VOP3__V_PERM_B32::~Inst_VOP3__V_PERM_B32()
29691     {
29692     } // ~Inst_VOP3__V_PERM_B32
29693
29694     // D.u[31:24] = permute({S0.u, S1.u}, S2.u[31:24]);
29695     // D.u[23:16] = permute({S0.u, S1.u}, S2.u[23:16]);
29696     // D.u[15:8] = permute({S0.u, S1.u}, S2.u[15:8]);
29697     // D.u[7:0] = permute({S0.u, S1.u}, S2.u[7:0]);
29698     // byte permute(byte in[8], byte sel) {
29699     //     if(sel>=13) then return 0xff;
29700     //     elsif(sel==12) then return 0x00;
29701     //     elsif(sel==11) then return in[7][7] * 0xff;
29702     //     elsif(sel==10) then return in[5][7] * 0xff;
29703     //     elsif(sel==9) then return in[3][7] * 0xff;
29704     //     elsif(sel==8) then return in[1][7] * 0xff;
29705     //     else return in[sel];
29706     //     }
29707     void
29708     Inst_VOP3__V_PERM_B32::execute(GPUDynInstPtr gpuDynInst)
29709     {
29710         Wavefront *wf = gpuDynInst->wavefront();
29711         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
29712         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
29713         ConstVecOperandU32 src2(gpuDynInst, extData.SRC2);
29714         VecOperandU32 vdst(gpuDynInst, instData.VDST);
29715
29716         src0.readSrc();
29717         src1.readSrc();
29718         src2.readSrc();
29719
29720         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29721             if (wf->execMask(lane)) {
29722                 VecElemU64 selector = (VecElemU64)src0[lane];
29723                 selector = (selector << 32) | (VecElemU64)src1[lane];
29724                 vdst[lane] = 0;
29725
29726                 DPRINTF(GCN3, "Executing v_perm_b32 src_0 0x%08x, src_1 "
29727                         "0x%08x, src_2 0x%08x, vdst 0x%08x\n", src0[lane],
29728                         src1[lane], src2[lane], vdst[lane]);
29729                 DPRINTF(GCN3, "Selector: 0x%08x \n", selector);
29730
29731                 for (int i = 0; i < 4 ; ++i) {
29732                     VecElemU32 permuted_val = permute(selector, 0xFF
29733                         & ((VecElemU32)src2[lane] >> (8 * i)));
29734                     vdst[lane] |= (permuted_val << i);
29735                 }
29736
29737                 DPRINTF(GCN3, "v_perm result: 0x%08x\n", vdst[lane]);
29738             }
29739         }
29740
29741         vdst.write();
29742     }
29743
29744     Inst_VOP3__V_FMA_F16::Inst_VOP3__V_FMA_F16(InFmt_VOP3 *iFmt)
29745         : Inst_VOP3(iFmt, "v_fma_f16", false)
29746     {
29747         setFlag(ALU);
29748         setFlag(F16);
29749         setFlag(FMA);
29750     } // Inst_VOP3__V_FMA_F16
29751
29752     Inst_VOP3__V_FMA_F16::~Inst_VOP3__V_FMA_F16()
29753     {
29754     } // ~Inst_VOP3__V_FMA_F16
29755
29756     // D.f16 = S0.f16 * S1.f16 + S2.f16.
29757     // Fused half precision multiply add.
29758     void
29759     Inst_VOP3__V_FMA_F16::execute(GPUDynInstPtr gpuDynInst)
29760     {
29761         panicUnimplemented();
29762     }
29763
29764     Inst_VOP3__V_DIV_FIXUP_F16::Inst_VOP3__V_DIV_FIXUP_F16(InFmt_VOP3 *iFmt)
29765         : Inst_VOP3(iFmt, "v_div_fixup_f16", false)
29766     {
29767         setFlag(ALU);
29768         setFlag(F16);
29769     } // Inst_VOP3__V_DIV_FIXUP_F16
29770
29771     Inst_VOP3__V_DIV_FIXUP_F16::~Inst_VOP3__V_DIV_FIXUP_F16()
29772     {
29773     } // ~Inst_VOP3__V_DIV_FIXUP_F16
29774
29775     // sign_out =  sign(S1.f16)^sign(S2.f16);
29776     // if (S2.f16 == NAN)
29777     //     D.f16 = Quiet(S2.f16);
29778     // else if (S1.f16 == NAN)
29779     //     D.f16 = Quiet(S1.f16);
29780     // else if (S1.f16 == S2.f16 == 0)
29781     //     # 0/0
29782     //     D.f16 = pele_nan(0xfe00);
29783     // else if (abs(S1.f16) == abs(S2.f16) == +-INF)
29784     //     # inf/inf
29785     //     D.f16 = pele_nan(0xfe00);
29786     // else if (S1.f16 ==0 || abs(S2.f16) == +-INF)
29787     //     # x/0, or inf/y
29788     //     D.f16 = sign_out ? -INF : INF;
29789     // else if (abs(S1.f16) == +-INF || S2.f16 == 0)
29790     //     # x/inf, 0/y
29791     //     D.f16 = sign_out ? -0 : 0;
29792     // else if ((exp(S2.f16) - exp(S1.f16)) < -150)
29793     //     D.f16 = sign_out ? -underflow : underflow;
29794     // else if (exp(S1.f16) == 255)
29795     //     D.f16 = sign_out ? -overflow : overflow;
29796     // else
29797     //     D.f16 = sign_out ? -abs(S0.f16) : abs(S0.f16).
29798     // Half precision division fixup.
29799     // S0 = Quotient, S1 = Denominator, S3 = Numerator.
29800     // Given a numerator, denominator, and quotient from a divide, this opcode
29801     // will detect and apply special case numerics, touching up the quotient if
29802     // necessary. This opcode also generates invalid, denorm and divide by
29803     // zero exceptions caused by the division.
29804     void
29805     Inst_VOP3__V_DIV_FIXUP_F16::execute(GPUDynInstPtr gpuDynInst)
29806     {
29807         panicUnimplemented();
29808     }
29809
29810     Inst_VOP3__V_CVT_PKACCUM_U8_F32::Inst_VOP3__V_CVT_PKACCUM_U8_F32(
29811           InFmt_VOP3 *iFmt)
29812         : Inst_VOP3(iFmt, "v_cvt_pkaccum_u8_f32", false)
29813     {
29814         setFlag(ALU);
29815         setFlag(F32);
29816     } // Inst_VOP3__V_CVT_PKACCUM_U8_F32
29817
29818     Inst_VOP3__V_CVT_PKACCUM_U8_F32::~Inst_VOP3__V_CVT_PKACCUM_U8_F32()
29819     {
29820     } // ~Inst_VOP3__V_CVT_PKACCUM_U8_F32
29821
29822     // byte = S1.u[1:0]; bit = byte * 8;
29823     // D.u[bit + 7:bit] = flt32_to_uint8(S0.f);
29824     // Pack converted value of S0.f into byte S1 of the destination.
29825     // SQ translates to V_CVT_PK_U8_F32.
29826     // Note: this opcode uses src_c to pass destination in as a source.
29827     void
29828     Inst_VOP3__V_CVT_PKACCUM_U8_F32::execute(GPUDynInstPtr gpuDynInst)
29829     {
29830         panicUnimplemented();
29831     }
29832
29833     Inst_VOP3__V_INTERP_P1_F32::Inst_VOP3__V_INTERP_P1_F32(InFmt_VOP3 *iFmt)
29834         : Inst_VOP3(iFmt, "v_interp_p1_f32", false)
29835     {
29836         setFlag(ALU);
29837         setFlag(F32);
29838     } // Inst_VOP3__V_INTERP_P1_F32
29839
29840     Inst_VOP3__V_INTERP_P1_F32::~Inst_VOP3__V_INTERP_P1_F32()
29841     {
29842     } // ~Inst_VOP3__V_INTERP_P1_F32
29843
29844     // D.f = P10 * S.f + P0;
29845     void
29846     Inst_VOP3__V_INTERP_P1_F32::execute(GPUDynInstPtr gpuDynInst)
29847     {
29848         panicUnimplemented();
29849     }
29850
29851     Inst_VOP3__V_INTERP_P2_F32::Inst_VOP3__V_INTERP_P2_F32(InFmt_VOP3 *iFmt)
29852         : Inst_VOP3(iFmt, "v_interp_p2_f32", false)
29853     {
29854         setFlag(ALU);
29855         setFlag(F32);
29856     } // Inst_VOP3__V_INTERP_P2_F32
29857
29858     Inst_VOP3__V_INTERP_P2_F32::~Inst_VOP3__V_INTERP_P2_F32()
29859     {
29860     } // ~Inst_VOP3__V_INTERP_P2_F32
29861
29862     // D.f = P20 * S.f + D.f;
29863     void
29864     Inst_VOP3__V_INTERP_P2_F32::execute(GPUDynInstPtr gpuDynInst)
29865     {
29866         panicUnimplemented();
29867     }
29868
29869     Inst_VOP3__V_INTERP_MOV_F32::Inst_VOP3__V_INTERP_MOV_F32(InFmt_VOP3 *iFmt)
29870         : Inst_VOP3(iFmt, "v_interp_mov_f32", false)
29871     {
29872         setFlag(ALU);
29873         setFlag(F32);
29874     } // Inst_VOP3__V_INTERP_MOV_F32
29875
29876     Inst_VOP3__V_INTERP_MOV_F32::~Inst_VOP3__V_INTERP_MOV_F32()
29877     {
29878     } // ~Inst_VOP3__V_INTERP_MOV_F32
29879
29880     // D.f = {P10,P20,P0}[S.u]; parameter load.
29881     void
29882     Inst_VOP3__V_INTERP_MOV_F32::execute(GPUDynInstPtr gpuDynInst)
29883     {
29884         panicUnimplemented();
29885     }
29886
29887     Inst_VOP3__V_INTERP_P1LL_F16::Inst_VOP3__V_INTERP_P1LL_F16(
29888           InFmt_VOP3 *iFmt)
29889         : Inst_VOP3(iFmt, "v_interp_p1ll_f16", false)
29890     {
29891         setFlag(ALU);
29892         setFlag(F16);
29893     } // Inst_VOP3__V_INTERP_P1LL_F16
29894
29895     Inst_VOP3__V_INTERP_P1LL_F16::~Inst_VOP3__V_INTERP_P1LL_F16()
29896     {
29897     } // ~Inst_VOP3__V_INTERP_P1LL_F16
29898
29899     // D.f32 = P10.f16 * S0.f32 + P0.f16.
29900     void
29901     Inst_VOP3__V_INTERP_P1LL_F16::execute(GPUDynInstPtr gpuDynInst)
29902     {
29903         panicUnimplemented();
29904     }
29905
29906     Inst_VOP3__V_INTERP_P1LV_F16::Inst_VOP3__V_INTERP_P1LV_F16(
29907           InFmt_VOP3 *iFmt)
29908         : Inst_VOP3(iFmt, "v_interp_p1lv_f16", false)
29909     {
29910         setFlag(ALU);
29911         setFlag(F16);
29912     } // Inst_VOP3__V_INTERP_P1LV_F16
29913
29914     Inst_VOP3__V_INTERP_P1LV_F16::~Inst_VOP3__V_INTERP_P1LV_F16()
29915     {
29916     } // ~Inst_VOP3__V_INTERP_P1LV_F16
29917
29918     void
29919     Inst_VOP3__V_INTERP_P1LV_F16::execute(GPUDynInstPtr gpuDynInst)
29920     {
29921         panicUnimplemented();
29922     }
29923
29924     Inst_VOP3__V_INTERP_P2_F16::Inst_VOP3__V_INTERP_P2_F16(InFmt_VOP3 *iFmt)
29925         : Inst_VOP3(iFmt, "v_interp_p2_f16", false)
29926     {
29927         setFlag(ALU);
29928         setFlag(F16);
29929     } // Inst_VOP3__V_INTERP_P2_F16
29930
29931     Inst_VOP3__V_INTERP_P2_F16::~Inst_VOP3__V_INTERP_P2_F16()
29932     {
29933     } // ~Inst_VOP3__V_INTERP_P2_F16
29934
29935     // D.f16 = P20.f16 * S0.f32 + S2.f32.
29936     void
29937     Inst_VOP3__V_INTERP_P2_F16::execute(GPUDynInstPtr gpuDynInst)
29938     {
29939         panicUnimplemented();
29940     }
29941
29942     Inst_VOP3__V_ADD_F64::Inst_VOP3__V_ADD_F64(InFmt_VOP3 *iFmt)
29943         : Inst_VOP3(iFmt, "v_add_f64", false)
29944     {
29945         setFlag(ALU);
29946         setFlag(F64);
29947     } // Inst_VOP3__V_ADD_F64
29948
29949     Inst_VOP3__V_ADD_F64::~Inst_VOP3__V_ADD_F64()
29950     {
29951     } // ~Inst_VOP3__V_ADD_F64
29952
29953     // D.d = S0.d + S1.d.
29954     void
29955     Inst_VOP3__V_ADD_F64::execute(GPUDynInstPtr gpuDynInst)
29956     {
29957         Wavefront *wf = gpuDynInst->wavefront();
29958         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
29959         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
29960         VecOperandF64 vdst(gpuDynInst, instData.VDST);
29961
29962         src0.readSrc();
29963         src1.readSrc();
29964
29965         if (instData.ABS & 0x1) {
29966             src0.absModifier();
29967         }
29968
29969         if (instData.ABS & 0x2) {
29970             src1.absModifier();
29971         }
29972
29973         if (extData.NEG & 0x1) {
29974             src0.negModifier();
29975         }
29976
29977         if (extData.NEG & 0x2) {
29978             src1.negModifier();
29979         }
29980
29981         /**
29982          * input modifiers are supported by FP operations only
29983          */
29984         assert(!(instData.ABS & 0x4));
29985         assert(!(extData.NEG & 0x4));
29986
29987         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
29988             if (wf->execMask(lane)) {
29989                 if (std::isnan(src0[lane]) ||
29990                     std::isnan(src1[lane]) ) {
29991                         vdst[lane] = NAN;
29992                 } else if (std::isinf(src0[lane]) &&
29993                            std::isinf(src1[lane])) {
29994                     if (std::signbit(src0[lane]) !=
29995                         std::signbit(src1[lane])) {
29996                         vdst[lane] = NAN;
29997                     } else {
29998                         vdst[lane] = src0[lane];
29999                     }
30000                 } else if (std::isinf(src0[lane])) {
30001                     vdst[lane] = src0[lane];
30002                 } else if (std::isinf(src1[lane])) {
30003                     vdst[lane] = src1[lane];
30004                 } else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
30005                            std::fpclassify(src0[lane]) == FP_ZERO) {
30006                     if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
30007                         std::fpclassify(src1[lane]) == FP_ZERO) {
30008                         if (std::signbit(src0[lane]) &&
30009                             std::signbit(src1[lane])) {
30010                             vdst[lane] = -0.0;
30011                         } else {
30012                             vdst[lane] = 0.0;
30013                         }
30014                     } else {
30015                         vdst[lane] = src1[lane];
30016                     }
30017                 } else if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
30018                            std::fpclassify(src1[lane]) == FP_ZERO) {
30019                     if (std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
30020                         std::fpclassify(src0[lane]) == FP_ZERO) {
30021                         if (std::signbit(src0[lane]) &&
30022                             std::signbit(src1[lane])) {
30023                             vdst[lane] = -0.0;
30024                         } else {
30025                             vdst[lane] = 0.0;
30026                         }
30027                     } else {
30028                         vdst[lane] = src0[lane];
30029                     }
30030                 } else {
30031                     vdst[lane] = src0[lane] + src1[lane];
30032                 }
30033             }
30034         }
30035
30036         vdst.write();
30037     }
30038
30039     Inst_VOP3__V_MUL_F64::Inst_VOP3__V_MUL_F64(InFmt_VOP3 *iFmt)
30040         : Inst_VOP3(iFmt, "v_mul_f64", false)
30041     {
30042         setFlag(ALU);
30043         setFlag(F64);
30044     } // Inst_VOP3__V_MUL_F64
30045
30046     Inst_VOP3__V_MUL_F64::~Inst_VOP3__V_MUL_F64()
30047     {
30048     } // ~Inst_VOP3__V_MUL_F64
30049
30050     // D.d = S0.d * S1.d.
30051     void
30052     Inst_VOP3__V_MUL_F64::execute(GPUDynInstPtr gpuDynInst)
30053     {
30054         Wavefront *wf = gpuDynInst->wavefront();
30055         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
30056         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
30057         VecOperandF64 vdst(gpuDynInst, instData.VDST);
30058
30059         src0.readSrc();
30060         src1.readSrc();
30061
30062         if (instData.ABS & 0x1) {
30063             src0.absModifier();
30064         }
30065
30066         if (instData.ABS & 0x2) {
30067             src1.absModifier();
30068         }
30069
30070         if (extData.NEG & 0x1) {
30071             src0.negModifier();
30072         }
30073
30074         if (extData.NEG & 0x2) {
30075             src1.negModifier();
30076         }
30077
30078         /**
30079          * input modifiers are supported by FP operations only
30080          */
30081         assert(!(instData.ABS & 0x4));
30082         assert(!(extData.NEG & 0x4));
30083
30084         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30085             if (wf->execMask(lane)) {
30086                 if (std::isnan(src0[lane]) ||
30087                     std::isnan(src1[lane])) {
30088                     vdst[lane] = NAN;
30089                 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
30090                            std::fpclassify(src0[lane]) == FP_ZERO) &&
30091                            !std::signbit(src0[lane])) {
30092                     if (std::isinf(src1[lane])) {
30093                         vdst[lane] = NAN;
30094                     } else if (!std::signbit(src1[lane])) {
30095                         vdst[lane] = +0.0;
30096                     } else {
30097                         vdst[lane] = -0.0;
30098                     }
30099                 } else if ((std::fpclassify(src0[lane]) == FP_SUBNORMAL ||
30100                            std::fpclassify(src0[lane]) == FP_ZERO) &&
30101                            std::signbit(src0[lane])) {
30102                     if (std::isinf(src1[lane])) {
30103                         vdst[lane] = NAN;
30104                     } else if (std::signbit(src1[lane])) {
30105                         vdst[lane] = +0.0;
30106                     } else {
30107                         vdst[lane] = -0.0;
30108                     }
30109                 } else if (std::isinf(src0[lane]) &&
30110                            !std::signbit(src0[lane])) {
30111                     if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
30112                         std::fpclassify(src1[lane]) == FP_ZERO) {
30113                         vdst[lane] = NAN;
30114                     } else if (!std::signbit(src1[lane])) {
30115                         vdst[lane] = +INFINITY;
30116                     } else {
30117                         vdst[lane] = -INFINITY;
30118                     }
30119                 } else if (std::isinf(src0[lane]) &&
30120                            std::signbit(src0[lane])) {
30121                     if (std::fpclassify(src1[lane]) == FP_SUBNORMAL ||
30122                         std::fpclassify(src1[lane]) == FP_ZERO) {
30123                         vdst[lane] = NAN;
30124                     } else if (std::signbit(src1[lane])) {
30125                         vdst[lane] = +INFINITY;
30126                     } else {
30127                         vdst[lane] = -INFINITY;
30128                     }
30129                 } else {
30130                     vdst[lane] = src0[lane] * src1[lane];
30131                 }
30132             }
30133         }
30134
30135         vdst.write();
30136     }
30137
30138     Inst_VOP3__V_MIN_F64::Inst_VOP3__V_MIN_F64(InFmt_VOP3 *iFmt)
30139         : Inst_VOP3(iFmt, "v_min_f64", false)
30140     {
30141         setFlag(ALU);
30142         setFlag(F64);
30143     } // Inst_VOP3__V_MIN_F64
30144
30145     Inst_VOP3__V_MIN_F64::~Inst_VOP3__V_MIN_F64()
30146     {
30147     } // ~Inst_VOP3__V_MIN_F64
30148
30149     // D.d = min(S0.d, S1.d).
30150     void
30151     Inst_VOP3__V_MIN_F64::execute(GPUDynInstPtr gpuDynInst)
30152     {
30153         Wavefront *wf = gpuDynInst->wavefront();
30154         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
30155         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
30156         VecOperandF64 vdst(gpuDynInst, instData.VDST);
30157
30158         src0.readSrc();
30159         src1.readSrc();
30160
30161         if (instData.ABS & 0x1) {
30162             src0.absModifier();
30163         }
30164
30165         if (instData.ABS & 0x2) {
30166             src1.absModifier();
30167         }
30168
30169         if (extData.NEG & 0x1) {
30170             src0.negModifier();
30171         }
30172
30173         if (extData.NEG & 0x2) {
30174             src1.negModifier();
30175         }
30176
30177         /**
30178          * input modifiers are supported by FP operations only
30179          */
30180         assert(!(instData.ABS & 0x4));
30181         assert(!(extData.NEG & 0x4));
30182
30183         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30184             if (wf->execMask(lane)) {
30185                 vdst[lane] = std::fmin(src0[lane], src1[lane]);
30186             }
30187         }
30188
30189         vdst.write();
30190     }
30191
30192     Inst_VOP3__V_MAX_F64::Inst_VOP3__V_MAX_F64(InFmt_VOP3 *iFmt)
30193         : Inst_VOP3(iFmt, "v_max_f64", false)
30194     {
30195         setFlag(ALU);
30196         setFlag(F64);
30197     } // Inst_VOP3__V_MAX_F64
30198
30199     Inst_VOP3__V_MAX_F64::~Inst_VOP3__V_MAX_F64()
30200     {
30201     } // ~Inst_VOP3__V_MAX_F64
30202
30203     // D.d = max(S0.d, S1.d).
30204     void
30205     Inst_VOP3__V_MAX_F64::execute(GPUDynInstPtr gpuDynInst)
30206     {
30207         Wavefront *wf = gpuDynInst->wavefront();
30208         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
30209         ConstVecOperandF64 src1(gpuDynInst, extData.SRC1);
30210         VecOperandF64 vdst(gpuDynInst, instData.VDST);
30211
30212         src0.readSrc();
30213         src1.readSrc();
30214
30215         if (instData.ABS & 0x1) {
30216             src0.absModifier();
30217         }
30218
30219         if (instData.ABS & 0x2) {
30220             src1.absModifier();
30221         }
30222
30223         if (extData.NEG & 0x1) {
30224             src0.negModifier();
30225         }
30226
30227         if (extData.NEG & 0x2) {
30228             src1.negModifier();
30229         }
30230
30231         /**
30232          * input modifiers are supported by FP operations only
30233          */
30234         assert(!(instData.ABS & 0x4));
30235         assert(!(extData.NEG & 0x4));
30236
30237         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30238             if (wf->execMask(lane)) {
30239                 vdst[lane] = std::fmax(src0[lane], src1[lane]);
30240             }
30241         }
30242
30243         vdst.write();
30244     }
30245
30246     Inst_VOP3__V_LDEXP_F64::Inst_VOP3__V_LDEXP_F64(InFmt_VOP3 *iFmt)
30247         : Inst_VOP3(iFmt, "v_ldexp_f64", false)
30248     {
30249         setFlag(ALU);
30250         setFlag(F64);
30251     } // Inst_VOP3__V_LDEXP_F64
30252
30253     Inst_VOP3__V_LDEXP_F64::~Inst_VOP3__V_LDEXP_F64()
30254     {
30255     } // ~Inst_VOP3__V_LDEXP_F64
30256
30257     // D.d = pow(S0.d, S1.i[31:0]).
30258     void
30259     Inst_VOP3__V_LDEXP_F64::execute(GPUDynInstPtr gpuDynInst)
30260     {
30261         Wavefront *wf = gpuDynInst->wavefront();
30262         ConstVecOperandF64 src0(gpuDynInst, extData.SRC0);
30263         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
30264         VecOperandF64 vdst(gpuDynInst, instData.VDST);
30265
30266         src0.readSrc();
30267         src1.readSrc();
30268
30269         if (instData.ABS & 0x1) {
30270             src0.absModifier();
30271         }
30272
30273         if (extData.NEG & 0x1) {
30274             src0.negModifier();
30275         }
30276
30277         /**
30278          * input modifiers are supported by FP operations only
30279          */
30280         assert(!(instData.ABS & 0x2));
30281         assert(!(instData.ABS & 0x4));
30282         assert(!(extData.NEG & 0x2));
30283         assert(!(extData.NEG & 0x4));
30284
30285         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30286             if (wf->execMask(lane)) {
30287                 if (std::isnan(src0[lane]) || std::isinf(src0[lane])) {
30288                     vdst[lane] = src0[lane];
30289                 } else if (std::fpclassify(src0[lane]) == FP_SUBNORMAL
30290                            || std::fpclassify(src0[lane]) == FP_ZERO) {
30291                     if (std::signbit(src0[lane])) {
30292                         vdst[lane] = -0.0;
30293                     } else {
30294                         vdst[lane] = +0.0;
30295                     }
30296                 } else {
30297                     vdst[lane] = std::ldexp(src0[lane], src1[lane]);
30298                 }
30299             }
30300         }
30301
30302         vdst.write();
30303     }
30304
30305     Inst_VOP3__V_MUL_LO_U32::Inst_VOP3__V_MUL_LO_U32(InFmt_VOP3 *iFmt)
30306         : Inst_VOP3(iFmt, "v_mul_lo_u32", false)
30307     {
30308         setFlag(ALU);
30309     } // Inst_VOP3__V_MUL_LO_U32
30310
30311     Inst_VOP3__V_MUL_LO_U32::~Inst_VOP3__V_MUL_LO_U32()
30312     {
30313     } // ~Inst_VOP3__V_MUL_LO_U32
30314
30315     // D.u = S0.u * S1.u.
30316     void
30317     Inst_VOP3__V_MUL_LO_U32::execute(GPUDynInstPtr gpuDynInst)
30318     {
30319         Wavefront *wf = gpuDynInst->wavefront();
30320         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30321         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
30322         VecOperandU32 vdst(gpuDynInst, instData.VDST);
30323
30324         src0.readSrc();
30325         src1.readSrc();
30326
30327         /**
30328          * input modifiers are supported by FP operations only
30329          */
30330         assert(!(instData.ABS & 0x1));
30331         assert(!(instData.ABS & 0x2));
30332         assert(!(instData.ABS & 0x4));
30333         assert(!(extData.NEG & 0x1));
30334         assert(!(extData.NEG & 0x2));
30335         assert(!(extData.NEG & 0x4));
30336
30337         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30338             if (wf->execMask(lane)) {
30339                 VecElemI64 s0 = (VecElemI64)src0[lane];
30340                 VecElemI64 s1 = (VecElemI64)src1[lane];
30341                 vdst[lane] = (VecElemU32)((s0 * s1) & 0xffffffffLL);
30342             }
30343         }
30344
30345         vdst.write();
30346     }
30347
30348     Inst_VOP3__V_MUL_HI_U32::Inst_VOP3__V_MUL_HI_U32(InFmt_VOP3 *iFmt)
30349         : Inst_VOP3(iFmt, "v_mul_hi_u32", false)
30350     {
30351         setFlag(ALU);
30352     } // Inst_VOP3__V_MUL_HI_U32
30353
30354     Inst_VOP3__V_MUL_HI_U32::~Inst_VOP3__V_MUL_HI_U32()
30355     {
30356     } // ~Inst_VOP3__V_MUL_HI_U32
30357
30358     // D.u = (S0.u * S1.u) >> 32.
30359     void
30360     Inst_VOP3__V_MUL_HI_U32::execute(GPUDynInstPtr gpuDynInst)
30361     {
30362         Wavefront *wf = gpuDynInst->wavefront();
30363         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30364         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
30365         VecOperandU32 vdst(gpuDynInst, instData.VDST);
30366
30367         src0.readSrc();
30368         src1.readSrc();
30369
30370         /**
30371          * input modifiers are supported by FP operations only
30372          */
30373         assert(!(instData.ABS & 0x1));
30374         assert(!(instData.ABS & 0x2));
30375         assert(!(instData.ABS & 0x4));
30376         assert(!(extData.NEG & 0x1));
30377         assert(!(extData.NEG & 0x2));
30378         assert(!(extData.NEG & 0x4));
30379
30380         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30381             if (wf->execMask(lane)) {
30382                 VecElemI64 s0 = (VecElemI64)src0[lane];
30383                 VecElemI64 s1 = (VecElemI64)src1[lane];
30384                 vdst[lane]
30385                     = (VecElemU32)(((s0 * s1) >> 32) & 0xffffffffLL);
30386             }
30387         }
30388
30389         vdst.write();
30390     }
30391
30392     Inst_VOP3__V_MUL_HI_I32::Inst_VOP3__V_MUL_HI_I32(InFmt_VOP3 *iFmt)
30393         : Inst_VOP3(iFmt, "v_mul_hi_i32", false)
30394     {
30395         setFlag(ALU);
30396     } // Inst_VOP3__V_MUL_HI_I32
30397
30398     Inst_VOP3__V_MUL_HI_I32::~Inst_VOP3__V_MUL_HI_I32()
30399     {
30400     } // ~Inst_VOP3__V_MUL_HI_I32
30401
30402     // D.i = (S0.i * S1.i) >> 32.
30403     void
30404     Inst_VOP3__V_MUL_HI_I32::execute(GPUDynInstPtr gpuDynInst)
30405     {
30406         Wavefront *wf = gpuDynInst->wavefront();
30407         ConstVecOperandI32 src0(gpuDynInst, extData.SRC0);
30408         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
30409         VecOperandI32 vdst(gpuDynInst, instData.VDST);
30410
30411         src0.readSrc();
30412         src1.readSrc();
30413
30414         /**
30415          * input modifiers are supported by FP operations only
30416          */
30417         assert(!(instData.ABS & 0x1));
30418         assert(!(instData.ABS & 0x2));
30419         assert(!(instData.ABS & 0x4));
30420         assert(!(extData.NEG & 0x1));
30421         assert(!(extData.NEG & 0x2));
30422         assert(!(extData.NEG & 0x4));
30423
30424         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30425             if (wf->execMask(lane)) {
30426                 VecElemI64 s0 = (VecElemI64)src0[lane];
30427                 VecElemI64 s1 = (VecElemI64)src1[lane];
30428                 vdst[lane]
30429                     = (VecElemI32)(((s0 * s1) >> 32LL) & 0xffffffffLL);
30430             }
30431         }
30432
30433         vdst.write();
30434     }
30435
30436     Inst_VOP3__V_LDEXP_F32::Inst_VOP3__V_LDEXP_F32(InFmt_VOP3 *iFmt)
30437         : Inst_VOP3(iFmt, "v_ldexp_f32", false)
30438     {
30439         setFlag(ALU);
30440         setFlag(F32);
30441     } // Inst_VOP3__V_LDEXP_F32
30442
30443     Inst_VOP3__V_LDEXP_F32::~Inst_VOP3__V_LDEXP_F32()
30444     {
30445     } // ~Inst_VOP3__V_LDEXP_F32
30446
30447     // D.f = pow(S0.f, S1.i)
30448     void
30449     Inst_VOP3__V_LDEXP_F32::execute(GPUDynInstPtr gpuDynInst)
30450     {
30451         Wavefront *wf = gpuDynInst->wavefront();
30452         ConstVecOperandF32 src0(gpuDynInst, extData.SRC0);
30453         ConstVecOperandI32 src1(gpuDynInst, extData.SRC1);
30454         VecOperandF32 vdst(gpuDynInst, instData.VDST);
30455
30456         src0.readSrc();
30457         src1.readSrc();
30458
30459         /**
30460          * input modifiers are supported by FP operations only
30461          */
30462         assert(!(instData.ABS & 0x2));
30463         assert(!(instData.ABS & 0x4));
30464         assert(!(extData.NEG & 0x2));
30465         assert(!(extData.NEG & 0x4));
30466
30467         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30468             if (wf->execMask(lane)) {
30469                 vdst[lane] = std::ldexp(src0[lane], src1[lane]);
30470             }
30471         }
30472
30473         vdst.write();
30474     }
30475
30476     Inst_VOP3__V_READLANE_B32::Inst_VOP3__V_READLANE_B32(InFmt_VOP3 *iFmt)
30477         : Inst_VOP3(iFmt, "v_readlane_b32", true)
30478     {
30479         setFlag(ALU);
30480         setFlag(IgnoreExec);
30481     } // Inst_VOP3__V_READLANE_B32
30482
30483     Inst_VOP3__V_READLANE_B32::~Inst_VOP3__V_READLANE_B32()
30484     {
30485     } // ~Inst_VOP3__V_READLANE_B32
30486
30487     // Copy one VGPR value to one SGPR. D = SGPR-dest, S0 = Source Data (VGPR#
30488     // or M0(lds-direct)), S1 = Lane Select (SGPR or M0). Ignores exec mask.
30489     // Input and output modifiers not supported; this is an untyped operation.
30490     void
30491     Inst_VOP3__V_READLANE_B32::execute(GPUDynInstPtr gpuDynInst)
30492     {
30493         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30494         ConstScalarOperandU32 src1(gpuDynInst, extData.SRC1);
30495         ScalarOperandU32 sdst(gpuDynInst, instData.VDST);
30496
30497         src0.readSrc();
30498         src1.read();
30499
30500         /**
30501          * input modifiers are supported by FP operations only
30502          */
30503         assert(!(instData.ABS & 0x1));
30504         assert(!(instData.ABS & 0x2));
30505         assert(!(instData.ABS & 0x4));
30506         assert(!(extData.NEG & 0x1));
30507         assert(!(extData.NEG & 0x2));
30508         assert(!(extData.NEG & 0x4));
30509
30510         sdst = src0[src1.rawData() & 0x3f];
30511
30512         sdst.write();
30513     }
30514
30515     Inst_VOP3__V_WRITELANE_B32::Inst_VOP3__V_WRITELANE_B32(InFmt_VOP3 *iFmt)
30516         : Inst_VOP3(iFmt, "v_writelane_b32", false)
30517     {
30518         setFlag(ALU);
30519         setFlag(IgnoreExec);
30520     } // Inst_VOP3__V_WRITELANE_B32
30521
30522     Inst_VOP3__V_WRITELANE_B32::~Inst_VOP3__V_WRITELANE_B32()
30523     {
30524     } // ~Inst_VOP3__V_WRITELANE_B32
30525
30526     // Write value into one VGPR in one lane. D = VGPR-dest, S0 = Source Data
30527     // (sgpr, m0, exec or constants), S1 = Lane Select (SGPR or M0). Ignores
30528     // exec mask. Input and output modifiers not supported; this is an untyped
30529     // operation.
30530     void
30531     Inst_VOP3__V_WRITELANE_B32::execute(GPUDynInstPtr gpuDynInst)
30532     {
30533         ConstScalarOperandU32 src0(gpuDynInst, extData.SRC0);
30534         ConstScalarOperandU32 src1(gpuDynInst, extData.SRC1);
30535         VecOperandU32 vdst(gpuDynInst, instData.VDST);
30536
30537         src0.read();
30538         src1.read();
30539         vdst.read();
30540
30541         /**
30542          * input modifiers are supported by FP operations only
30543          */
30544         assert(!(instData.ABS & 0x1));
30545         assert(!(instData.ABS & 0x2));
30546         assert(!(instData.ABS & 0x4));
30547         assert(!(extData.NEG & 0x1));
30548         assert(!(extData.NEG & 0x2));
30549         assert(!(extData.NEG & 0x4));
30550
30551         vdst[src1.rawData() & 0x3f] = src0.rawData();
30552
30553         vdst.write();
30554     }
30555
30556     Inst_VOP3__V_BCNT_U32_B32::Inst_VOP3__V_BCNT_U32_B32(InFmt_VOP3 *iFmt)
30557         : Inst_VOP3(iFmt, "v_bcnt_u32_b32", false)
30558     {
30559         setFlag(ALU);
30560     } // Inst_VOP3__V_BCNT_U32_B32
30561
30562     Inst_VOP3__V_BCNT_U32_B32::~Inst_VOP3__V_BCNT_U32_B32()
30563     {
30564     } // ~Inst_VOP3__V_BCNT_U32_B32
30565
30566     // D.u = CountOneBits(S0.u) + S1.u. Bit count.
30567     void
30568     Inst_VOP3__V_BCNT_U32_B32::execute(GPUDynInstPtr gpuDynInst)
30569     {
30570         Wavefront *wf = gpuDynInst->wavefront();
30571         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30572         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
30573         VecOperandU32 vdst(gpuDynInst, instData.VDST);
30574
30575         src0.readSrc();
30576         src1.readSrc();
30577
30578         /**
30579          * input modifiers are supported by FP operations only
30580          */
30581         assert(!(instData.ABS & 0x1));
30582         assert(!(instData.ABS & 0x2));
30583         assert(!(instData.ABS & 0x4));
30584         assert(!(extData.NEG & 0x1));
30585         assert(!(extData.NEG & 0x2));
30586         assert(!(extData.NEG & 0x4));
30587
30588         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30589             if (wf->execMask(lane)) {
30590                 vdst[lane] = popCount(src0[lane]) + src1[lane];
30591             }
30592         }
30593
30594         vdst.write();
30595     }
30596
30597     Inst_VOP3__V_MBCNT_LO_U32_B32::Inst_VOP3__V_MBCNT_LO_U32_B32(
30598           InFmt_VOP3 *iFmt)
30599         : Inst_VOP3(iFmt, "v_mbcnt_lo_u32_b32", false)
30600     {
30601         setFlag(ALU);
30602     } // Inst_VOP3__V_MBCNT_LO_U32_B32
30603
30604     Inst_VOP3__V_MBCNT_LO_U32_B32::~Inst_VOP3__V_MBCNT_LO_U32_B32()
30605     {
30606     } // ~Inst_VOP3__V_MBCNT_LO_U32_B32
30607
30608     // Masked bit count, ThreadPosition is the position of this thread in the
30609     // wavefront (in 0..63).
30610     void
30611     Inst_VOP3__V_MBCNT_LO_U32_B32::execute(GPUDynInstPtr gpuDynInst)
30612     {
30613         Wavefront *wf = gpuDynInst->wavefront();
30614         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30615         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
30616         VecOperandU32 vdst(gpuDynInst, instData.VDST);
30617         uint64_t threadMask = 0;
30618
30619         src0.readSrc();
30620         src1.readSrc();
30621
30622         /**
30623          * input modifiers are supported by FP operations only
30624          */
30625         assert(!(instData.ABS & 0x1));
30626         assert(!(instData.ABS & 0x2));
30627         assert(!(instData.ABS & 0x4));
30628         assert(!(extData.NEG & 0x1));
30629         assert(!(extData.NEG & 0x2));
30630         assert(!(extData.NEG & 0x4));
30631
30632         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30633             if (wf->execMask(lane)) {
30634                 threadMask = ((1LL << lane) - 1LL);
30635                 vdst[lane] = popCount(src0[lane] & bits(threadMask, 31, 0)) +
30636                              src1[lane];
30637             }
30638         }
30639
30640         vdst.write();
30641     } // execute
30642     // --- Inst_VOP3__V_MBCNT_HI_U32_B32 class methods ---
30643
30644     Inst_VOP3__V_MBCNT_HI_U32_B32::Inst_VOP3__V_MBCNT_HI_U32_B32(
30645           InFmt_VOP3 *iFmt)
30646         : Inst_VOP3(iFmt, "v_mbcnt_hi_u32_b32", false)
30647     {
30648         setFlag(ALU);
30649     } // Inst_VOP3__V_MBCNT_HI_U32_B32
30650
30651     Inst_VOP3__V_MBCNT_HI_U32_B32::~Inst_VOP3__V_MBCNT_HI_U32_B32()
30652     {
30653     } // ~Inst_VOP3__V_MBCNT_HI_U32_B32
30654
30655     // ThreadMask = (1 << ThreadPosition) - 1;
30656     // D.u = CountOneBits(S0.u & ThreadMask[63:32]) + S1.u.
30657     // Masked bit count, ThreadPosition is the position of this thread in the
30658     // wavefront (in 0..63).
30659     void
30660     Inst_VOP3__V_MBCNT_HI_U32_B32::execute(GPUDynInstPtr gpuDynInst)
30661     {
30662         Wavefront *wf = gpuDynInst->wavefront();
30663         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30664         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
30665         VecOperandU32 vdst(gpuDynInst, instData.VDST);
30666         uint64_t threadMask = 0;
30667
30668         src0.readSrc();
30669         src1.readSrc();
30670
30671         /**
30672          * input modifiers are supported by FP operations only
30673          */
30674         assert(!(instData.ABS & 0x1));
30675         assert(!(instData.ABS & 0x2));
30676         assert(!(instData.ABS & 0x4));
30677         assert(!(extData.NEG & 0x1));
30678         assert(!(extData.NEG & 0x2));
30679         assert(!(extData.NEG & 0x4));
30680
30681         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30682             if (wf->execMask(lane)) {
30683                 threadMask = ((1LL << lane) - 1LL);
30684                 vdst[lane] = popCount(src0[lane] & bits(threadMask, 63, 32)) +
30685                              src1[lane];
30686             }
30687         }
30688
30689         vdst.write();
30690     } // execute
30691     // --- Inst_VOP3__V_LSHLREV_B64 class methods ---
30692
30693     Inst_VOP3__V_LSHLREV_B64::Inst_VOP3__V_LSHLREV_B64(InFmt_VOP3 *iFmt)
30694         : Inst_VOP3(iFmt, "v_lshlrev_b64", false)
30695     {
30696         setFlag(ALU);
30697     } // Inst_VOP3__V_LSHLREV_B64
30698
30699     Inst_VOP3__V_LSHLREV_B64::~Inst_VOP3__V_LSHLREV_B64()
30700     {
30701     } // ~Inst_VOP3__V_LSHLREV_B64
30702
30703     // D.u64 = S1.u64 << S0.u[5:0].
30704     void
30705     Inst_VOP3__V_LSHLREV_B64::execute(GPUDynInstPtr gpuDynInst)
30706     {
30707         Wavefront *wf = gpuDynInst->wavefront();
30708         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30709         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
30710         VecOperandU64 vdst(gpuDynInst, instData.VDST);
30711
30712         src0.readSrc();
30713         src1.readSrc();
30714
30715         /**
30716          * input modifiers are supported by FP operations only
30717          */
30718         assert(!(instData.ABS & 0x1));
30719         assert(!(instData.ABS & 0x2));
30720         assert(!(instData.ABS & 0x4));
30721         assert(!(extData.NEG & 0x1));
30722         assert(!(extData.NEG & 0x2));
30723         assert(!(extData.NEG & 0x4));
30724
30725         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30726             if (wf->execMask(lane)) {
30727                 vdst[lane] = src1[lane] << bits(src0[lane], 5, 0);
30728             }
30729         }
30730
30731         vdst.write();
30732     }
30733
30734     Inst_VOP3__V_LSHRREV_B64::Inst_VOP3__V_LSHRREV_B64(InFmt_VOP3 *iFmt)
30735         : Inst_VOP3(iFmt, "v_lshrrev_b64", false)
30736     {
30737         setFlag(ALU);
30738     } // Inst_VOP3__V_LSHRREV_B64
30739
30740     Inst_VOP3__V_LSHRREV_B64::~Inst_VOP3__V_LSHRREV_B64()
30741     {
30742     } // ~Inst_VOP3__V_LSHRREV_B64
30743
30744     // D.u64 = S1.u64 >> S0.u[5:0].
30745     // The vacated bits are set to zero.
30746     void
30747     Inst_VOP3__V_LSHRREV_B64::execute(GPUDynInstPtr gpuDynInst)
30748     {
30749         Wavefront *wf = gpuDynInst->wavefront();
30750         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30751         ConstVecOperandU64 src1(gpuDynInst, extData.SRC1);
30752         VecOperandU64 vdst(gpuDynInst, instData.VDST);
30753
30754         src0.readSrc();
30755         src1.readSrc();
30756
30757         /**
30758          * input modifiers are supported by FP operations only
30759          */
30760         assert(!(instData.ABS & 0x1));
30761         assert(!(instData.ABS & 0x2));
30762         assert(!(instData.ABS & 0x4));
30763         assert(!(extData.NEG & 0x1));
30764         assert(!(extData.NEG & 0x2));
30765         assert(!(extData.NEG & 0x4));
30766
30767         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30768             if (wf->execMask(lane)) {
30769                 vdst[lane] = src1[lane] >> bits(src0[lane], 5, 0);
30770             }
30771         }
30772
30773         vdst.write();
30774     }
30775
30776     Inst_VOP3__V_ASHRREV_I64::Inst_VOP3__V_ASHRREV_I64(InFmt_VOP3 *iFmt)
30777         : Inst_VOP3(iFmt, "v_ashrrev_i64", false)
30778     {
30779         setFlag(ALU);
30780     } // Inst_VOP3__V_ASHRREV_I64
30781
30782     Inst_VOP3__V_ASHRREV_I64::~Inst_VOP3__V_ASHRREV_I64()
30783     {
30784     } // ~Inst_VOP3__V_ASHRREV_I64
30785
30786     // D.u64 = signext(S1.u64) >> S0.u[5:0].
30787     // The vacated bits are set to the sign bit of the input value.
30788     void
30789     Inst_VOP3__V_ASHRREV_I64::execute(GPUDynInstPtr gpuDynInst)
30790     {
30791         Wavefront *wf = gpuDynInst->wavefront();
30792         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30793         ConstVecOperandI64 src1(gpuDynInst, extData.SRC1);
30794         VecOperandU64 vdst(gpuDynInst, instData.VDST);
30795
30796         src0.readSrc();
30797         src1.readSrc();
30798
30799         /**
30800          * input modifiers are supported by FP operations only
30801          */
30802         assert(!(instData.ABS & 0x1));
30803         assert(!(instData.ABS & 0x2));
30804         assert(!(instData.ABS & 0x4));
30805         assert(!(extData.NEG & 0x1));
30806         assert(!(extData.NEG & 0x2));
30807         assert(!(extData.NEG & 0x4));
30808
30809         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30810             if (wf->execMask(lane)) {
30811                 vdst[lane]
30812                     = src1[lane] >> bits(src0[lane], 5, 0);
30813             }
30814         }
30815
30816         vdst.write();
30817     }
30818
30819     Inst_VOP3__V_TRIG_PREOP_F64::Inst_VOP3__V_TRIG_PREOP_F64(InFmt_VOP3 *iFmt)
30820         : Inst_VOP3(iFmt, "v_trig_preop_f64", false)
30821     {
30822         setFlag(ALU);
30823         setFlag(F64);
30824     } // Inst_VOP3__V_TRIG_PREOP_F64
30825
30826     Inst_VOP3__V_TRIG_PREOP_F64::~Inst_VOP3__V_TRIG_PREOP_F64()
30827     {
30828     } // ~Inst_VOP3__V_TRIG_PREOP_F64
30829
30830     void
30831     Inst_VOP3__V_TRIG_PREOP_F64::execute(GPUDynInstPtr gpuDynInst)
30832     {
30833         panicUnimplemented();
30834     }
30835
30836     Inst_VOP3__V_BFM_B32::Inst_VOP3__V_BFM_B32(InFmt_VOP3 *iFmt)
30837         : Inst_VOP3(iFmt, "v_bfm_b32", false)
30838     {
30839         setFlag(ALU);
30840     } // Inst_VOP3__V_BFM_B32
30841
30842     Inst_VOP3__V_BFM_B32::~Inst_VOP3__V_BFM_B32()
30843     {
30844     } // ~Inst_VOP3__V_BFM_B32
30845
30846     // D.u = ((1 << S0.u[4:0]) - 1) << S1.u[4:0];
30847     void
30848     Inst_VOP3__V_BFM_B32::execute(GPUDynInstPtr gpuDynInst)
30849     {
30850         Wavefront *wf = gpuDynInst->wavefront();
30851         ConstVecOperandU32 src0(gpuDynInst, extData.SRC0);
30852         ConstVecOperandU32 src1(gpuDynInst, extData.SRC1);
30853         VecOperandU32 vdst(gpuDynInst, instData.VDST);
30854
30855         src0.readSrc();
30856         src1.readSrc();
30857
30858         /**
30859          * input modifiers are supported by FP operations only
30860          */
30861         assert(!(instData.ABS & 0x1));
30862         assert(!(instData.ABS & 0x2));
30863         assert(!(instData.ABS & 0x4));
30864         assert(!(extData.NEG & 0x1));
30865         assert(!(extData.NEG & 0x2));
30866         assert(!(extData.NEG & 0x4));
30867
30868         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
30869             if (wf->execMask(lane)) {
30870                 vdst[lane] = ((1 << bits(src0[lane], 4, 0)) - 1)
30871                     << bits(src1[lane], 4, 0);
30872             }
30873         }
30874
30875         vdst.write();
30876     }
30877
30878     Inst_VOP3__V_CVT_PKNORM_I16_F32::Inst_VOP3__V_CVT_PKNORM_I16_F32(
30879           InFmt_VOP3 *iFmt)
30880         : Inst_VOP3(iFmt, "v_cvt_pknorm_i16_f32", false)
30881     {
30882         setFlag(ALU);
30883         setFlag(F32);
30884     } // Inst_VOP3__V_CVT_PKNORM_I16_F32
30885
30886     Inst_VOP3__V_CVT_PKNORM_I16_F32::~Inst_VOP3__V_CVT_PKNORM_I16_F32()
30887     {
30888     } // ~Inst_VOP3__V_CVT_PKNORM_I16_F32
30889
30890     // D = {(snorm)S1.f, (snorm)S0.f}.
30891     void
30892     Inst_VOP3__V_CVT_PKNORM_I16_F32::execute(GPUDynInstPtr gpuDynInst)
30893     {
30894         panicUnimplemented();
30895     }
30896
30897     Inst_VOP3__V_CVT_PKNORM_U16_F32::Inst_VOP3__V_CVT_PKNORM_U16_F32(
30898           InFmt_VOP3 *iFmt)
30899         : Inst_VOP3(iFmt, "v_cvt_pknorm_u16_f32", false)
30900     {
30901         setFlag(ALU);
30902         setFlag(F32);
30903     } // Inst_VOP3__V_CVT_PKNORM_U16_F32
30904
30905     Inst_VOP3__V_CVT_PKNORM_U16_F32::~Inst_VOP3__V_CVT_PKNORM_U16_F32()
30906     {
30907     } // ~Inst_VOP3__V_CVT_PKNORM_U16_F32
30908
30909     // D = {(unorm)S1.f, (unorm)S0.f}.
30910     void
30911     Inst_VOP3__V_CVT_PKNORM_U16_F32::execute(GPUDynInstPtr gpuDynInst)
30912     {
30913         panicUnimplemented();
30914     }
30915
30916     Inst_VOP3__V_CVT_PKRTZ_F16_F32::Inst_VOP3__V_CVT_PKRTZ_F16_F32(
30917           InFmt_VOP3 *iFmt)
30918         : Inst_VOP3(iFmt, "v_cvt_pkrtz_f16_f32", false)
30919     {
30920         setFlag(ALU);
30921         setFlag(F32);
30922     } // Inst_VOP3__V_CVT_PKRTZ_F16_F32
30923
30924     Inst_VOP3__V_CVT_PKRTZ_F16_F32::~Inst_VOP3__V_CVT_PKRTZ_F16_F32()
30925     {
30926     } // ~Inst_VOP3__V_CVT_PKRTZ_F16_F32
30927
30928     void
30929     Inst_VOP3__V_CVT_PKRTZ_F16_F32::execute(GPUDynInstPtr gpuDynInst)
30930     {
30931         panicUnimplemented();
30932     }
30933
30934     Inst_VOP3__V_CVT_PK_U16_U32::Inst_VOP3__V_CVT_PK_U16_U32(InFmt_VOP3 *iFmt)
30935         : Inst_VOP3(iFmt, "v_cvt_pk_u16_u32", false)
30936     {
30937         setFlag(ALU);
30938     } // Inst_VOP3__V_CVT_PK_U16_U32
30939
30940     Inst_VOP3__V_CVT_PK_U16_U32::~Inst_VOP3__V_CVT_PK_U16_U32()
30941     {
30942     } // ~Inst_VOP3__V_CVT_PK_U16_U32
30943
30944     // D = {uint32_to_uint16(S1.u), uint32_to_uint16(S0.u)}.
30945     void
30946     Inst_VOP3__V_CVT_PK_U16_U32::execute(GPUDynInstPtr gpuDynInst)
30947     {
30948         panicUnimplemented();
30949     }
30950
30951     Inst_VOP3__V_CVT_PK_I16_I32::Inst_VOP3__V_CVT_PK_I16_I32(InFmt_VOP3 *iFmt)
30952         : Inst_VOP3(iFmt, "v_cvt_pk_i16_i32", false)
30953     {
30954         setFlag(ALU);
30955     } // Inst_VOP3__V_CVT_PK_I16_I32
30956
30957     Inst_VOP3__V_CVT_PK_I16_I32::~Inst_VOP3__V_CVT_PK_I16_I32()
30958     {
30959     } // ~Inst_VOP3__V_CVT_PK_I16_I32
30960
30961     // D = {int32_to_int16(S1.i), int32_to_int16(S0.i)}.
30962     void
30963     Inst_VOP3__V_CVT_PK_I16_I32::execute(GPUDynInstPtr gpuDynInst)
30964     {
30965         panicUnimplemented();
30966     }
30967
30968     Inst_DS__DS_ADD_U32::Inst_DS__DS_ADD_U32(InFmt_DS *iFmt)
30969         : Inst_DS(iFmt, "ds_add_u32")
30970     {
30971     } // Inst_DS__DS_ADD_U32
30972
30973     Inst_DS__DS_ADD_U32::~Inst_DS__DS_ADD_U32()
30974     {
30975     } // ~Inst_DS__DS_ADD_U32
30976
30977     // tmp = MEM[ADDR];
30978     // MEM[ADDR] += DATA;
30979     // RETURN_DATA = tmp.
30980     void
30981     Inst_DS__DS_ADD_U32::execute(GPUDynInstPtr gpuDynInst)
30982     {
30983         panicUnimplemented();
30984     }
30985
30986     Inst_DS__DS_SUB_U32::Inst_DS__DS_SUB_U32(InFmt_DS *iFmt)
30987         : Inst_DS(iFmt, "ds_sub_u32")
30988     {
30989     } // Inst_DS__DS_SUB_U32
30990
30991     Inst_DS__DS_SUB_U32::~Inst_DS__DS_SUB_U32()
30992     {
30993     } // ~Inst_DS__DS_SUB_U32
30994
30995     // tmp = MEM[ADDR];
30996     // MEM[ADDR] -= DATA;
30997     // RETURN_DATA = tmp.
30998     void
30999     Inst_DS__DS_SUB_U32::execute(GPUDynInstPtr gpuDynInst)
31000     {
31001         panicUnimplemented();
31002     }
31003
31004     Inst_DS__DS_RSUB_U32::Inst_DS__DS_RSUB_U32(InFmt_DS *iFmt)
31005         : Inst_DS(iFmt, "ds_rsub_u32")
31006     {
31007     } // Inst_DS__DS_RSUB_U32
31008
31009     Inst_DS__DS_RSUB_U32::~Inst_DS__DS_RSUB_U32()
31010     {
31011     } // ~Inst_DS__DS_RSUB_U32
31012
31013     // tmp = MEM[ADDR];
31014     // MEM[ADDR] = DATA - MEM[ADDR];
31015     // RETURN_DATA = tmp.
31016     // Subtraction with reversed operands.
31017     void
31018     Inst_DS__DS_RSUB_U32::execute(GPUDynInstPtr gpuDynInst)
31019     {
31020         panicUnimplemented();
31021     }
31022
31023     Inst_DS__DS_INC_U32::Inst_DS__DS_INC_U32(InFmt_DS *iFmt)
31024         : Inst_DS(iFmt, "ds_inc_u32")
31025     {
31026     } // Inst_DS__DS_INC_U32
31027
31028     Inst_DS__DS_INC_U32::~Inst_DS__DS_INC_U32()
31029     {
31030     } // ~Inst_DS__DS_INC_U32
31031
31032     // tmp = MEM[ADDR];
31033     // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
31034     // RETURN_DATA = tmp.
31035     void
31036     Inst_DS__DS_INC_U32::execute(GPUDynInstPtr gpuDynInst)
31037     {
31038         panicUnimplemented();
31039     }
31040
31041     Inst_DS__DS_DEC_U32::Inst_DS__DS_DEC_U32(InFmt_DS *iFmt)
31042         : Inst_DS(iFmt, "ds_dec_u32")
31043     {
31044     } // Inst_DS__DS_DEC_U32
31045
31046     Inst_DS__DS_DEC_U32::~Inst_DS__DS_DEC_U32()
31047     {
31048     } // ~Inst_DS__DS_DEC_U32
31049
31050     // tmp = MEM[ADDR];
31051     // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
31052     // (unsigned compare); RETURN_DATA = tmp.
31053     void
31054     Inst_DS__DS_DEC_U32::execute(GPUDynInstPtr gpuDynInst)
31055     {
31056         panicUnimplemented();
31057     }
31058
31059     Inst_DS__DS_MIN_I32::Inst_DS__DS_MIN_I32(InFmt_DS *iFmt)
31060         : Inst_DS(iFmt, "ds_min_i32")
31061     {
31062     } // Inst_DS__DS_MIN_I32
31063
31064     Inst_DS__DS_MIN_I32::~Inst_DS__DS_MIN_I32()
31065     {
31066     } // ~Inst_DS__DS_MIN_I32
31067
31068     // tmp = MEM[ADDR];
31069     // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
31070     // RETURN_DATA = tmp.
31071     void
31072     Inst_DS__DS_MIN_I32::execute(GPUDynInstPtr gpuDynInst)
31073     {
31074         panicUnimplemented();
31075     }
31076
31077     Inst_DS__DS_MAX_I32::Inst_DS__DS_MAX_I32(InFmt_DS *iFmt)
31078         : Inst_DS(iFmt, "ds_max_i32")
31079     {
31080     } // Inst_DS__DS_MAX_I32
31081
31082     Inst_DS__DS_MAX_I32::~Inst_DS__DS_MAX_I32()
31083     {
31084     } // ~Inst_DS__DS_MAX_I32
31085
31086     // tmp = MEM[ADDR];
31087     // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
31088     // RETURN_DATA = tmp.
31089     void
31090     Inst_DS__DS_MAX_I32::execute(GPUDynInstPtr gpuDynInst)
31091     {
31092         panicUnimplemented();
31093     }
31094
31095     Inst_DS__DS_MIN_U32::Inst_DS__DS_MIN_U32(InFmt_DS *iFmt)
31096         : Inst_DS(iFmt, "ds_min_u32")
31097     {
31098     } // Inst_DS__DS_MIN_U32
31099
31100     Inst_DS__DS_MIN_U32::~Inst_DS__DS_MIN_U32()
31101     {
31102     } // ~Inst_DS__DS_MIN_U32
31103
31104     // tmp = MEM[ADDR];
31105     // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
31106     // RETURN_DATA = tmp.
31107     void
31108     Inst_DS__DS_MIN_U32::execute(GPUDynInstPtr gpuDynInst)
31109     {
31110         panicUnimplemented();
31111     }
31112
31113     Inst_DS__DS_MAX_U32::Inst_DS__DS_MAX_U32(InFmt_DS *iFmt)
31114         : Inst_DS(iFmt, "ds_max_u32")
31115     {
31116     } // Inst_DS__DS_MAX_U32
31117
31118     Inst_DS__DS_MAX_U32::~Inst_DS__DS_MAX_U32()
31119     {
31120     } // ~Inst_DS__DS_MAX_U32
31121
31122     // tmp = MEM[ADDR];
31123     // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
31124     // RETURN_DATA = tmp.
31125     void
31126     Inst_DS__DS_MAX_U32::execute(GPUDynInstPtr gpuDynInst)
31127     {
31128         panicUnimplemented();
31129     }
31130
31131     Inst_DS__DS_AND_B32::Inst_DS__DS_AND_B32(InFmt_DS *iFmt)
31132         : Inst_DS(iFmt, "ds_and_b32")
31133     {
31134     } // Inst_DS__DS_AND_B32
31135
31136     Inst_DS__DS_AND_B32::~Inst_DS__DS_AND_B32()
31137     {
31138     } // ~Inst_DS__DS_AND_B32
31139
31140     // tmp = MEM[ADDR];
31141     // MEM[ADDR] &= DATA;
31142     // RETURN_DATA = tmp.
31143     void
31144     Inst_DS__DS_AND_B32::execute(GPUDynInstPtr gpuDynInst)
31145     {
31146         panicUnimplemented();
31147     }
31148
31149     Inst_DS__DS_OR_B32::Inst_DS__DS_OR_B32(InFmt_DS *iFmt)
31150         : Inst_DS(iFmt, "ds_or_b32")
31151     {
31152     } // Inst_DS__DS_OR_B32
31153
31154     Inst_DS__DS_OR_B32::~Inst_DS__DS_OR_B32()
31155     {
31156     } // ~Inst_DS__DS_OR_B32
31157
31158     // tmp = MEM[ADDR];
31159     // MEM[ADDR] |= DATA;
31160     // RETURN_DATA = tmp.
31161     void
31162     Inst_DS__DS_OR_B32::execute(GPUDynInstPtr gpuDynInst)
31163     {
31164         panicUnimplemented();
31165     }
31166
31167     Inst_DS__DS_XOR_B32::Inst_DS__DS_XOR_B32(InFmt_DS *iFmt)
31168         : Inst_DS(iFmt, "ds_xor_b32")
31169     {
31170     } // Inst_DS__DS_XOR_B32
31171
31172     Inst_DS__DS_XOR_B32::~Inst_DS__DS_XOR_B32()
31173     {
31174     } // ~Inst_DS__DS_XOR_B32
31175
31176     // tmp = MEM[ADDR];
31177     // MEM[ADDR] ^= DATA;
31178     // RETURN_DATA = tmp.
31179     void
31180     Inst_DS__DS_XOR_B32::execute(GPUDynInstPtr gpuDynInst)
31181     {
31182         panicUnimplemented();
31183     }
31184
31185     Inst_DS__DS_MSKOR_B32::Inst_DS__DS_MSKOR_B32(InFmt_DS *iFmt)
31186         : Inst_DS(iFmt, "ds_mskor_b32")
31187     {
31188     } // Inst_DS__DS_MSKOR_B32
31189
31190     Inst_DS__DS_MSKOR_B32::~Inst_DS__DS_MSKOR_B32()
31191     {
31192     } // ~Inst_DS__DS_MSKOR_B32
31193
31194     // tmp = MEM[ADDR];
31195     // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
31196     // RETURN_DATA = tmp.
31197     void
31198     Inst_DS__DS_MSKOR_B32::execute(GPUDynInstPtr gpuDynInst)
31199     {
31200         panicUnimplemented();
31201     }
31202
31203     Inst_DS__DS_WRITE_B32::Inst_DS__DS_WRITE_B32(InFmt_DS *iFmt)
31204         : Inst_DS(iFmt, "ds_write_b32")
31205     {
31206         setFlag(MemoryRef);
31207         setFlag(Store);
31208     } // Inst_DS__DS_WRITE_B32
31209
31210     Inst_DS__DS_WRITE_B32::~Inst_DS__DS_WRITE_B32()
31211     {
31212     } // ~Inst_DS__DS_WRITE_B32
31213
31214     // MEM[ADDR] = DATA.
31215     // Write dword.
31216     void
31217     Inst_DS__DS_WRITE_B32::execute(GPUDynInstPtr gpuDynInst)
31218     {
31219         Wavefront *wf = gpuDynInst->wavefront();
31220         gpuDynInst->execUnitId = wf->execUnitId;
31221         gpuDynInst->exec_mask = wf->execMask();
31222         gpuDynInst->latency.init(gpuDynInst->computeUnit());
31223         gpuDynInst->latency.set(
31224                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
31225         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
31226         ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
31227
31228         addr.read();
31229         data.read();
31230
31231         calcAddr(gpuDynInst, addr);
31232
31233         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
31234             if (wf->execMask(lane)) {
31235                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane]
31236                     = data[lane];
31237             }
31238         }
31239
31240         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
31241
31242         wf->wrLmReqsInPipe--;
31243         wf->outstandingReqsWrLm++;
31244         wf->outstandingReqs++;
31245         wf->validateRequestCounters();
31246     }
31247
31248     void
31249     Inst_DS__DS_WRITE_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
31250     {
31251         Addr offset0 = instData.OFFSET0;
31252         Addr offset1 = instData.OFFSET1;
31253         Addr offset = (offset1 << 8) | offset0;
31254
31255         initMemWrite<VecElemU32>(gpuDynInst, offset);
31256     } // initiateAcc
31257
31258     void
31259     Inst_DS__DS_WRITE_B32::completeAcc(GPUDynInstPtr gpuDynInst)
31260     {
31261     } // completeAcc
31262
31263     Inst_DS__DS_WRITE2_B32::Inst_DS__DS_WRITE2_B32(InFmt_DS *iFmt)
31264         : Inst_DS(iFmt, "ds_write2_b32")
31265     {
31266         setFlag(MemoryRef);
31267         setFlag(Store);
31268     } // Inst_DS__DS_WRITE2_B32
31269
31270     Inst_DS__DS_WRITE2_B32::~Inst_DS__DS_WRITE2_B32()
31271     {
31272     } // ~Inst_DS__DS_WRITE2_B32
31273
31274     // MEM[ADDR_BASE + OFFSET0 * 4] = DATA;
31275     // MEM[ADDR_BASE + OFFSET1 * 4] = DATA2.
31276     // Write 2 dwords.
31277     void
31278     Inst_DS__DS_WRITE2_B32::execute(GPUDynInstPtr gpuDynInst)
31279     {
31280         Wavefront *wf = gpuDynInst->wavefront();
31281         gpuDynInst->execUnitId = wf->execUnitId;
31282         gpuDynInst->exec_mask = wf->execMask();
31283         gpuDynInst->latency.init(gpuDynInst->computeUnit());
31284         gpuDynInst->latency.set(
31285                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
31286         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
31287         ConstVecOperandU32 data0(gpuDynInst, extData.DATA0);
31288         ConstVecOperandU32 data1(gpuDynInst, extData.DATA1);
31289
31290         addr.read();
31291         data0.read();
31292         data1.read();
31293
31294         calcAddr(gpuDynInst, addr);
31295
31296         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
31297             if (wf->execMask(lane)) {
31298                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 2]
31299                     = data0[lane];
31300                 (reinterpret_cast<VecElemU32*>(
31301                     gpuDynInst->d_data))[lane * 2 + 1] = data1[lane];
31302             }
31303         }
31304
31305         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
31306
31307         wf->wrLmReqsInPipe--;
31308         wf->outstandingReqsWrLm++;
31309         wf->outstandingReqs++;
31310         wf->validateRequestCounters();
31311     }
31312
31313     void
31314     Inst_DS__DS_WRITE2_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
31315     {
31316         Addr offset0 = instData.OFFSET0 * 4;
31317         Addr offset1 = instData.OFFSET1 * 4;
31318
31319         initDualMemWrite<VecElemU32>(gpuDynInst, offset0, offset1);
31320     }
31321
31322     void
31323     Inst_DS__DS_WRITE2_B32::completeAcc(GPUDynInstPtr gpuDynInst)
31324     {
31325     }
31326
31327     Inst_DS__DS_WRITE2ST64_B32::Inst_DS__DS_WRITE2ST64_B32(InFmt_DS *iFmt)
31328         : Inst_DS(iFmt, "ds_write2st64_b32")
31329     {
31330         setFlag(MemoryRef);
31331         setFlag(Store);
31332     } // Inst_DS__DS_WRITE2ST64_B32
31333
31334     Inst_DS__DS_WRITE2ST64_B32::~Inst_DS__DS_WRITE2ST64_B32()
31335     {
31336     } // ~Inst_DS__DS_WRITE2ST64_B32
31337
31338     // MEM[ADDR_BASE + OFFSET0 * 4 * 64] = DATA;
31339     // MEM[ADDR_BASE + OFFSET1 * 4 * 64] = DATA2;
31340     // Write 2 dwords.
31341     void
31342     Inst_DS__DS_WRITE2ST64_B32::execute(GPUDynInstPtr gpuDynInst)
31343     {
31344         Wavefront *wf = gpuDynInst->wavefront();
31345         gpuDynInst->execUnitId = wf->execUnitId;
31346         gpuDynInst->exec_mask = wf->execMask();
31347         gpuDynInst->latency.init(gpuDynInst->computeUnit());
31348         gpuDynInst->latency.set(
31349                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
31350         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
31351         ConstVecOperandU32 data0(gpuDynInst, extData.DATA0);
31352         ConstVecOperandU32 data1(gpuDynInst, extData.DATA1);
31353
31354         addr.read();
31355         data0.read();
31356         data1.read();
31357
31358         calcAddr(gpuDynInst, addr);
31359
31360         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
31361             if (wf->execMask(lane)) {
31362                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 2]
31363                     = data0[lane];
31364                 (reinterpret_cast<VecElemU32*>(
31365                     gpuDynInst->d_data))[lane * 2 + 1] = data1[lane];
31366             }
31367         }
31368
31369         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
31370
31371         wf->wrLmReqsInPipe--;
31372         wf->outstandingReqsWrLm++;
31373         wf->outstandingReqs++;
31374         wf->validateRequestCounters();
31375     } // execute
31376
31377     void
31378     Inst_DS__DS_WRITE2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
31379     {
31380         Addr offset0 = instData.OFFSET0 * 4 * 64;
31381         Addr offset1 = instData.OFFSET1 * 4 * 64;
31382
31383         initDualMemWrite<VecElemU32>(gpuDynInst, offset0, offset1);
31384     }
31385
31386     void
31387     Inst_DS__DS_WRITE2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst)
31388     {
31389     }
31390     // --- Inst_DS__DS_CMPST_B32 class methods ---
31391
31392     Inst_DS__DS_CMPST_B32::Inst_DS__DS_CMPST_B32(InFmt_DS *iFmt)
31393         : Inst_DS(iFmt, "ds_cmpst_b32")
31394     {
31395     } // Inst_DS__DS_CMPST_B32
31396
31397     Inst_DS__DS_CMPST_B32::~Inst_DS__DS_CMPST_B32()
31398     {
31399     } // ~Inst_DS__DS_CMPST_B32
31400
31401     // tmp = MEM[ADDR];
31402     // src = DATA2;
31403     // cmp = DATA;
31404     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
31405     // RETURN_DATA[0] = tmp.
31406     // Compare and store.
31407     void
31408     Inst_DS__DS_CMPST_B32::execute(GPUDynInstPtr gpuDynInst)
31409     {
31410         panicUnimplemented();
31411     }
31412
31413     Inst_DS__DS_CMPST_F32::Inst_DS__DS_CMPST_F32(InFmt_DS *iFmt)
31414         : Inst_DS(iFmt, "ds_cmpst_f32")
31415     {
31416         setFlag(F32);
31417     } // Inst_DS__DS_CMPST_F32
31418
31419     Inst_DS__DS_CMPST_F32::~Inst_DS__DS_CMPST_F32()
31420     {
31421     } // ~Inst_DS__DS_CMPST_F32
31422
31423     // tmp = MEM[ADDR];
31424     // src = DATA2;
31425     // cmp = DATA;
31426     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
31427     // RETURN_DATA[0] = tmp.
31428     void
31429     Inst_DS__DS_CMPST_F32::execute(GPUDynInstPtr gpuDynInst)
31430     {
31431         panicUnimplemented();
31432     }
31433
31434     Inst_DS__DS_MIN_F32::Inst_DS__DS_MIN_F32(InFmt_DS *iFmt)
31435         : Inst_DS(iFmt, "ds_min_f32")
31436     {
31437         setFlag(F32);
31438     } // Inst_DS__DS_MIN_F32
31439
31440     Inst_DS__DS_MIN_F32::~Inst_DS__DS_MIN_F32()
31441     {
31442     } // ~Inst_DS__DS_MIN_F32
31443
31444     // tmp = MEM[ADDR];
31445     // src = DATA;
31446     // cmp = DATA2;
31447     // MEM[ADDR] = (cmp < tmp) ? src : tmp.
31448     void
31449     Inst_DS__DS_MIN_F32::execute(GPUDynInstPtr gpuDynInst)
31450     {
31451         panicUnimplemented();
31452     }
31453
31454     Inst_DS__DS_MAX_F32::Inst_DS__DS_MAX_F32(InFmt_DS *iFmt)
31455         : Inst_DS(iFmt, "ds_max_f32")
31456     {
31457         setFlag(F32);
31458     } // Inst_DS__DS_MAX_F32
31459
31460     Inst_DS__DS_MAX_F32::~Inst_DS__DS_MAX_F32()
31461     {
31462     } // ~Inst_DS__DS_MAX_F32
31463
31464     // tmp = MEM[ADDR];
31465     // src = DATA;
31466     // cmp = DATA2;
31467     // MEM[ADDR] = (tmp > cmp) ? src : tmp.
31468     void
31469     Inst_DS__DS_MAX_F32::execute(GPUDynInstPtr gpuDynInst)
31470     {
31471         panicUnimplemented();
31472     }
31473
31474     Inst_DS__DS_NOP::Inst_DS__DS_NOP(InFmt_DS *iFmt)
31475         : Inst_DS(iFmt, "ds_nop")
31476     {
31477         setFlag(Nop);
31478     } // Inst_DS__DS_NOP
31479
31480     Inst_DS__DS_NOP::~Inst_DS__DS_NOP()
31481     {
31482     } // ~Inst_DS__DS_NOP
31483
31484     // Do nothing.
31485     void
31486     Inst_DS__DS_NOP::execute(GPUDynInstPtr gpuDynInst)
31487     {
31488     }
31489
31490     Inst_DS__DS_ADD_F32::Inst_DS__DS_ADD_F32(InFmt_DS *iFmt)
31491         : Inst_DS(iFmt, "ds_add_f32")
31492     {
31493         setFlag(F32);
31494     } // Inst_DS__DS_ADD_F32
31495
31496     Inst_DS__DS_ADD_F32::~Inst_DS__DS_ADD_F32()
31497     {
31498     } // ~Inst_DS__DS_ADD_F32
31499
31500     // tmp = MEM[ADDR];
31501     // MEM[ADDR] += DATA;
31502     // RETURN_DATA = tmp.
31503     void
31504     Inst_DS__DS_ADD_F32::execute(GPUDynInstPtr gpuDynInst)
31505     {
31506         panicUnimplemented();
31507     }
31508
31509     Inst_DS__DS_WRITE_B8::Inst_DS__DS_WRITE_B8(InFmt_DS *iFmt)
31510         : Inst_DS(iFmt, "ds_write_b8")
31511     {
31512         setFlag(MemoryRef);
31513         setFlag(Store);
31514     } // Inst_DS__DS_WRITE_B8
31515
31516     Inst_DS__DS_WRITE_B8::~Inst_DS__DS_WRITE_B8()
31517     {
31518     } // ~Inst_DS__DS_WRITE_B8
31519
31520     // MEM[ADDR] = DATA[7:0].
31521     void
31522     Inst_DS__DS_WRITE_B8::execute(GPUDynInstPtr gpuDynInst)
31523     {
31524         Wavefront *wf = gpuDynInst->wavefront();
31525         gpuDynInst->execUnitId = wf->execUnitId;
31526         gpuDynInst->exec_mask = wf->execMask();
31527         gpuDynInst->latency.init(gpuDynInst->computeUnit());
31528         gpuDynInst->latency.set(
31529                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
31530         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
31531         ConstVecOperandU8 data(gpuDynInst, extData.DATA0);
31532
31533         addr.read();
31534         data.read();
31535
31536         calcAddr(gpuDynInst, addr);
31537
31538         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
31539             if (wf->execMask(lane)) {
31540                 (reinterpret_cast<VecElemU8*>(gpuDynInst->d_data))[lane]
31541                     = data[lane];
31542             }
31543         }
31544
31545         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
31546
31547         wf->wrLmReqsInPipe--;
31548         wf->outstandingReqsWrLm++;
31549         wf->outstandingReqs++;
31550         wf->validateRequestCounters();
31551     } // execute
31552
31553     void
31554     Inst_DS__DS_WRITE_B8::initiateAcc(GPUDynInstPtr gpuDynInst)
31555     {
31556         Addr offset0 = instData.OFFSET0;
31557         Addr offset1 = instData.OFFSET1;
31558         Addr offset = (offset1 << 8) | offset0;
31559
31560         initMemWrite<VecElemU8>(gpuDynInst, offset);
31561     } // initiateAcc
31562
31563     void
31564     Inst_DS__DS_WRITE_B8::completeAcc(GPUDynInstPtr gpuDynInst)
31565     {
31566     } // completeAcc
31567     // --- Inst_DS__DS_WRITE_B16 class methods ---
31568
31569     Inst_DS__DS_WRITE_B16::Inst_DS__DS_WRITE_B16(InFmt_DS *iFmt)
31570         : Inst_DS(iFmt, "ds_write_b16")
31571     {
31572         setFlag(MemoryRef);
31573         setFlag(Store);
31574     } // Inst_DS__DS_WRITE_B16
31575
31576     Inst_DS__DS_WRITE_B16::~Inst_DS__DS_WRITE_B16()
31577     {
31578     } // ~Inst_DS__DS_WRITE_B16
31579
31580     // MEM[ADDR] = DATA[15:0]
31581     void
31582     Inst_DS__DS_WRITE_B16::execute(GPUDynInstPtr gpuDynInst)
31583     {
31584         Wavefront *wf = gpuDynInst->wavefront();
31585         gpuDynInst->execUnitId = wf->execUnitId;
31586         gpuDynInst->exec_mask = wf->execMask();
31587         gpuDynInst->latency.init(gpuDynInst->computeUnit());
31588         gpuDynInst->latency.set(
31589                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
31590         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
31591         ConstVecOperandU16 data(gpuDynInst, extData.DATA0);
31592
31593         addr.read();
31594         data.read();
31595
31596         calcAddr(gpuDynInst, addr);
31597
31598         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
31599             if (wf->execMask(lane)) {
31600                 (reinterpret_cast<VecElemU16*>(gpuDynInst->d_data))[lane]
31601                     = data[lane];
31602             }
31603         }
31604
31605         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
31606
31607         wf->wrLmReqsInPipe--;
31608         wf->outstandingReqsWrLm++;
31609         wf->outstandingReqs++;
31610         wf->validateRequestCounters();
31611     } // execute
31612
31613     void
31614     Inst_DS__DS_WRITE_B16::initiateAcc(GPUDynInstPtr gpuDynInst)
31615     {
31616         Addr offset0 = instData.OFFSET0;
31617         Addr offset1 = instData.OFFSET1;
31618         Addr offset = (offset1 << 8) | offset0;
31619
31620         initMemWrite<VecElemU16>(gpuDynInst, offset);
31621     } // initiateAcc
31622
31623     void
31624     Inst_DS__DS_WRITE_B16::completeAcc(GPUDynInstPtr gpuDynInst)
31625     {
31626     } // completeAcc
31627     // --- Inst_DS__DS_ADD_RTN_U32 class methods ---
31628
31629     Inst_DS__DS_ADD_RTN_U32::Inst_DS__DS_ADD_RTN_U32(InFmt_DS *iFmt)
31630         : Inst_DS(iFmt, "ds_add_rtn_u32")
31631     {
31632     } // Inst_DS__DS_ADD_RTN_U32
31633
31634     Inst_DS__DS_ADD_RTN_U32::~Inst_DS__DS_ADD_RTN_U32()
31635     {
31636     } // ~Inst_DS__DS_ADD_RTN_U32
31637
31638     // tmp = MEM[ADDR];
31639     // MEM[ADDR] += DATA;
31640     // RETURN_DATA = tmp.
31641     void
31642     Inst_DS__DS_ADD_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
31643     {
31644         panicUnimplemented();
31645     }
31646
31647     Inst_DS__DS_SUB_RTN_U32::Inst_DS__DS_SUB_RTN_U32(InFmt_DS *iFmt)
31648         : Inst_DS(iFmt, "ds_sub_rtn_u32")
31649     {
31650     } // Inst_DS__DS_SUB_RTN_U32
31651
31652     Inst_DS__DS_SUB_RTN_U32::~Inst_DS__DS_SUB_RTN_U32()
31653     {
31654     } // ~Inst_DS__DS_SUB_RTN_U32
31655
31656     // tmp = MEM[ADDR];
31657     // MEM[ADDR] -= DATA;
31658     // RETURN_DATA = tmp.
31659     void
31660     Inst_DS__DS_SUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
31661     {
31662         panicUnimplemented();
31663     }
31664
31665     Inst_DS__DS_RSUB_RTN_U32::Inst_DS__DS_RSUB_RTN_U32(InFmt_DS *iFmt)
31666         : Inst_DS(iFmt, "ds_rsub_rtn_u32")
31667     {
31668     } // Inst_DS__DS_RSUB_RTN_U32
31669
31670     Inst_DS__DS_RSUB_RTN_U32::~Inst_DS__DS_RSUB_RTN_U32()
31671     {
31672     } // ~Inst_DS__DS_RSUB_RTN_U32
31673
31674     // tmp = MEM[ADDR];
31675     // MEM[ADDR] = DATA - MEM[ADDR];
31676     // RETURN_DATA = tmp.
31677     void
31678     Inst_DS__DS_RSUB_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
31679     {
31680         panicUnimplemented();
31681     }
31682
31683     Inst_DS__DS_INC_RTN_U32::Inst_DS__DS_INC_RTN_U32(InFmt_DS *iFmt)
31684         : Inst_DS(iFmt, "ds_inc_rtn_u32")
31685     {
31686     } // Inst_DS__DS_INC_RTN_U32
31687
31688     Inst_DS__DS_INC_RTN_U32::~Inst_DS__DS_INC_RTN_U32()
31689     {
31690     } // ~Inst_DS__DS_INC_RTN_U32
31691
31692     // tmp = MEM[ADDR];
31693     // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
31694     // RETURN_DATA = tmp.
31695     void
31696     Inst_DS__DS_INC_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
31697     {
31698         panicUnimplemented();
31699     }
31700
31701     Inst_DS__DS_DEC_RTN_U32::Inst_DS__DS_DEC_RTN_U32(InFmt_DS *iFmt)
31702         : Inst_DS(iFmt, "ds_dec_rtn_u32")
31703     {
31704     } // Inst_DS__DS_DEC_RTN_U32
31705
31706     Inst_DS__DS_DEC_RTN_U32::~Inst_DS__DS_DEC_RTN_U32()
31707     {
31708     } // ~Inst_DS__DS_DEC_RTN_U32
31709
31710     // tmp = MEM[ADDR];
31711     // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
31712     // (unsigned compare); RETURN_DATA = tmp.
31713     void
31714     Inst_DS__DS_DEC_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
31715     {
31716         panicUnimplemented();
31717     }
31718
31719     Inst_DS__DS_MIN_RTN_I32::Inst_DS__DS_MIN_RTN_I32(InFmt_DS *iFmt)
31720         : Inst_DS(iFmt, "ds_min_rtn_i32")
31721     {
31722     } // Inst_DS__DS_MIN_RTN_I32
31723
31724     Inst_DS__DS_MIN_RTN_I32::~Inst_DS__DS_MIN_RTN_I32()
31725     {
31726     } // ~Inst_DS__DS_MIN_RTN_I32
31727
31728     // tmp = MEM[ADDR];
31729     // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
31730     // RETURN_DATA = tmp.
31731     void
31732     Inst_DS__DS_MIN_RTN_I32::execute(GPUDynInstPtr gpuDynInst)
31733     {
31734         panicUnimplemented();
31735     }
31736
31737     Inst_DS__DS_MAX_RTN_I32::Inst_DS__DS_MAX_RTN_I32(InFmt_DS *iFmt)
31738         : Inst_DS(iFmt, "ds_max_rtn_i32")
31739     {
31740     } // Inst_DS__DS_MAX_RTN_I32
31741
31742     Inst_DS__DS_MAX_RTN_I32::~Inst_DS__DS_MAX_RTN_I32()
31743     {
31744     } // ~Inst_DS__DS_MAX_RTN_I32
31745
31746     // tmp = MEM[ADDR];
31747     // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
31748     // RETURN_DATA = tmp.
31749     void
31750     Inst_DS__DS_MAX_RTN_I32::execute(GPUDynInstPtr gpuDynInst)
31751     {
31752         panicUnimplemented();
31753     }
31754
31755     Inst_DS__DS_MIN_RTN_U32::Inst_DS__DS_MIN_RTN_U32(InFmt_DS *iFmt)
31756         : Inst_DS(iFmt, "ds_min_rtn_u32")
31757     {
31758     } // Inst_DS__DS_MIN_RTN_U32
31759
31760     Inst_DS__DS_MIN_RTN_U32::~Inst_DS__DS_MIN_RTN_U32()
31761     {
31762     } // ~Inst_DS__DS_MIN_RTN_U32
31763
31764     // tmp = MEM[ADDR];
31765     // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
31766     // RETURN_DATA = tmp.
31767     void
31768     Inst_DS__DS_MIN_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
31769     {
31770         panicUnimplemented();
31771     }
31772
31773     Inst_DS__DS_MAX_RTN_U32::Inst_DS__DS_MAX_RTN_U32(InFmt_DS *iFmt)
31774         : Inst_DS(iFmt, "ds_max_rtn_u32")
31775     {
31776     } // Inst_DS__DS_MAX_RTN_U32
31777
31778     Inst_DS__DS_MAX_RTN_U32::~Inst_DS__DS_MAX_RTN_U32()
31779     {
31780     } // ~Inst_DS__DS_MAX_RTN_U32
31781
31782     // tmp = MEM[ADDR];
31783     // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
31784     // RETURN_DATA = tmp.
31785     void
31786     Inst_DS__DS_MAX_RTN_U32::execute(GPUDynInstPtr gpuDynInst)
31787     {
31788         panicUnimplemented();
31789     }
31790
31791     Inst_DS__DS_AND_RTN_B32::Inst_DS__DS_AND_RTN_B32(InFmt_DS *iFmt)
31792         : Inst_DS(iFmt, "ds_and_rtn_b32")
31793     {
31794     } // Inst_DS__DS_AND_RTN_B32
31795
31796     Inst_DS__DS_AND_RTN_B32::~Inst_DS__DS_AND_RTN_B32()
31797     {
31798     } // ~Inst_DS__DS_AND_RTN_B32
31799
31800     // tmp = MEM[ADDR];
31801     // MEM[ADDR] &= DATA;
31802     // RETURN_DATA = tmp.
31803     void
31804     Inst_DS__DS_AND_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31805     {
31806         panicUnimplemented();
31807     }
31808
31809     Inst_DS__DS_OR_RTN_B32::Inst_DS__DS_OR_RTN_B32(InFmt_DS *iFmt)
31810         : Inst_DS(iFmt, "ds_or_rtn_b32")
31811     {
31812     } // Inst_DS__DS_OR_RTN_B32
31813
31814     Inst_DS__DS_OR_RTN_B32::~Inst_DS__DS_OR_RTN_B32()
31815     {
31816     } // ~Inst_DS__DS_OR_RTN_B32
31817
31818     // tmp = MEM[ADDR];
31819     // MEM[ADDR] |= DATA;
31820     // RETURN_DATA = tmp.
31821     void
31822     Inst_DS__DS_OR_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31823     {
31824         panicUnimplemented();
31825     }
31826
31827     Inst_DS__DS_XOR_RTN_B32::Inst_DS__DS_XOR_RTN_B32(InFmt_DS *iFmt)
31828         : Inst_DS(iFmt, "ds_xor_rtn_b32")
31829     {
31830     } // Inst_DS__DS_XOR_RTN_B32
31831
31832     Inst_DS__DS_XOR_RTN_B32::~Inst_DS__DS_XOR_RTN_B32()
31833     {
31834     } // ~Inst_DS__DS_XOR_RTN_B32
31835
31836     // tmp = MEM[ADDR];
31837     // MEM[ADDR] ^= DATA;
31838     // RETURN_DATA = tmp.
31839     void
31840     Inst_DS__DS_XOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31841     {
31842         panicUnimplemented();
31843     }
31844
31845     Inst_DS__DS_MSKOR_RTN_B32::Inst_DS__DS_MSKOR_RTN_B32(InFmt_DS *iFmt)
31846         : Inst_DS(iFmt, "ds_mskor_rtn_b32")
31847     {
31848     } // Inst_DS__DS_MSKOR_RTN_B32
31849
31850     Inst_DS__DS_MSKOR_RTN_B32::~Inst_DS__DS_MSKOR_RTN_B32()
31851     {
31852     } // ~Inst_DS__DS_MSKOR_RTN_B32
31853
31854     // tmp = MEM[ADDR];
31855     // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
31856     // RETURN_DATA = tmp.
31857     void
31858     Inst_DS__DS_MSKOR_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31859     {
31860         panicUnimplemented();
31861     }
31862
31863     Inst_DS__DS_WRXCHG_RTN_B32::Inst_DS__DS_WRXCHG_RTN_B32(InFmt_DS *iFmt)
31864         : Inst_DS(iFmt, "ds_wrxchg_rtn_b32")
31865     {
31866     } // Inst_DS__DS_WRXCHG_RTN_B32
31867
31868     Inst_DS__DS_WRXCHG_RTN_B32::~Inst_DS__DS_WRXCHG_RTN_B32()
31869     {
31870     } // ~Inst_DS__DS_WRXCHG_RTN_B32
31871
31872     // tmp = MEM[ADDR];
31873     // MEM[ADDR] = DATA;
31874     // RETURN_DATA = tmp.
31875     // Write-exchange operation.
31876     void
31877     Inst_DS__DS_WRXCHG_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31878     {
31879         panicUnimplemented();
31880     }
31881
31882     Inst_DS__DS_WRXCHG2_RTN_B32::Inst_DS__DS_WRXCHG2_RTN_B32(InFmt_DS *iFmt)
31883         : Inst_DS(iFmt, "ds_wrxchg2_rtn_b32")
31884     {
31885     } // Inst_DS__DS_WRXCHG2_RTN_B32
31886
31887     Inst_DS__DS_WRXCHG2_RTN_B32::~Inst_DS__DS_WRXCHG2_RTN_B32()
31888     {
31889     } // ~Inst_DS__DS_WRXCHG2_RTN_B32
31890
31891     // Write-exchange 2 separate dwords.
31892     void
31893     Inst_DS__DS_WRXCHG2_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31894     {
31895         panicUnimplemented();
31896     }
31897
31898     Inst_DS__DS_WRXCHG2ST64_RTN_B32::Inst_DS__DS_WRXCHG2ST64_RTN_B32(
31899           InFmt_DS *iFmt)
31900         : Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b32")
31901     {
31902     } // Inst_DS__DS_WRXCHG2ST64_RTN_B32
31903
31904     Inst_DS__DS_WRXCHG2ST64_RTN_B32::~Inst_DS__DS_WRXCHG2ST64_RTN_B32()
31905     {
31906     } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B32
31907
31908     // Write-exchange 2 separate dwords with a stride of 64 dwords.
31909     void
31910     Inst_DS__DS_WRXCHG2ST64_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31911     {
31912         panicUnimplemented();
31913     }
31914
31915     Inst_DS__DS_CMPST_RTN_B32::Inst_DS__DS_CMPST_RTN_B32(InFmt_DS *iFmt)
31916         : Inst_DS(iFmt, "ds_cmpst_rtn_b32")
31917     {
31918     } // Inst_DS__DS_CMPST_RTN_B32
31919
31920     Inst_DS__DS_CMPST_RTN_B32::~Inst_DS__DS_CMPST_RTN_B32()
31921     {
31922     } // ~Inst_DS__DS_CMPST_RTN_B32
31923
31924     // tmp = MEM[ADDR];
31925     // src = DATA2;
31926     // cmp = DATA;
31927     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
31928     // RETURN_DATA[0] = tmp.
31929     // Compare and store.
31930     void
31931     Inst_DS__DS_CMPST_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
31932     {
31933         panicUnimplemented();
31934     }
31935
31936     Inst_DS__DS_CMPST_RTN_F32::Inst_DS__DS_CMPST_RTN_F32(InFmt_DS *iFmt)
31937         : Inst_DS(iFmt, "ds_cmpst_rtn_f32")
31938     {
31939         setFlag(F32);
31940     } // Inst_DS__DS_CMPST_RTN_F32
31941
31942     Inst_DS__DS_CMPST_RTN_F32::~Inst_DS__DS_CMPST_RTN_F32()
31943     {
31944     } // ~Inst_DS__DS_CMPST_RTN_F32
31945
31946     // tmp = MEM[ADDR];
31947     // src = DATA2;
31948     // cmp = DATA;
31949     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
31950     // RETURN_DATA[0] = tmp.
31951     void
31952     Inst_DS__DS_CMPST_RTN_F32::execute(GPUDynInstPtr gpuDynInst)
31953     {
31954         panicUnimplemented();
31955     }
31956
31957     Inst_DS__DS_MIN_RTN_F32::Inst_DS__DS_MIN_RTN_F32(InFmt_DS *iFmt)
31958         : Inst_DS(iFmt, "ds_min_rtn_f32")
31959     {
31960         setFlag(F32);
31961     } // Inst_DS__DS_MIN_RTN_F32
31962
31963     Inst_DS__DS_MIN_RTN_F32::~Inst_DS__DS_MIN_RTN_F32()
31964     {
31965     } // ~Inst_DS__DS_MIN_RTN_F32
31966
31967     // tmp = MEM[ADDR];
31968     // src = DATA;
31969     // cmp = DATA2;
31970     // MEM[ADDR] = (cmp < tmp) ? src : tmp.
31971     void
31972     Inst_DS__DS_MIN_RTN_F32::execute(GPUDynInstPtr gpuDynInst)
31973     {
31974         panicUnimplemented();
31975     }
31976
31977     Inst_DS__DS_MAX_RTN_F32::Inst_DS__DS_MAX_RTN_F32(InFmt_DS *iFmt)
31978         : Inst_DS(iFmt, "ds_max_rtn_f32")
31979     {
31980         setFlag(F32);
31981     } // Inst_DS__DS_MAX_RTN_F32
31982
31983     Inst_DS__DS_MAX_RTN_F32::~Inst_DS__DS_MAX_RTN_F32()
31984     {
31985     } // ~Inst_DS__DS_MAX_RTN_F32
31986
31987     // tmp = MEM[ADDR];
31988     // src = DATA;
31989     // cmp = DATA2;
31990     // MEM[ADDR] = (tmp > cmp) ? src : tmp.
31991     void
31992     Inst_DS__DS_MAX_RTN_F32::execute(GPUDynInstPtr gpuDynInst)
31993     {
31994         panicUnimplemented();
31995     }
31996
31997     Inst_DS__DS_WRAP_RTN_B32::Inst_DS__DS_WRAP_RTN_B32(InFmt_DS *iFmt)
31998         : Inst_DS(iFmt, "ds_wrap_rtn_b32")
31999     {
32000     } // Inst_DS__DS_WRAP_RTN_B32
32001
32002     Inst_DS__DS_WRAP_RTN_B32::~Inst_DS__DS_WRAP_RTN_B32()
32003     {
32004     } // ~Inst_DS__DS_WRAP_RTN_B32
32005
32006     // tmp = MEM[ADDR];
32007     // MEM[ADDR] = (tmp >= DATA) ? tmp - DATA : tmp + DATA2;
32008     // RETURN_DATA = tmp.
32009     void
32010     Inst_DS__DS_WRAP_RTN_B32::execute(GPUDynInstPtr gpuDynInst)
32011     {
32012         panicUnimplemented();
32013     }
32014
32015     Inst_DS__DS_ADD_RTN_F32::Inst_DS__DS_ADD_RTN_F32(InFmt_DS *iFmt)
32016         : Inst_DS(iFmt, "ds_add_rtn_f32")
32017     {
32018         setFlag(F32);
32019     } // Inst_DS__DS_ADD_RTN_F32
32020
32021     Inst_DS__DS_ADD_RTN_F32::~Inst_DS__DS_ADD_RTN_F32()
32022     {
32023     } // ~Inst_DS__DS_ADD_RTN_F32
32024
32025     // tmp = MEM[ADDR];
32026     // MEM[ADDR] += DATA;
32027     // RETURN_DATA = tmp.
32028     void
32029     Inst_DS__DS_ADD_RTN_F32::execute(GPUDynInstPtr gpuDynInst)
32030     {
32031     }
32032
32033     Inst_DS__DS_READ_B32::Inst_DS__DS_READ_B32(InFmt_DS *iFmt)
32034         : Inst_DS(iFmt, "ds_read_b32")
32035     {
32036         setFlag(MemoryRef);
32037         setFlag(Load);
32038     } // Inst_DS__DS_READ_B32
32039
32040     Inst_DS__DS_READ_B32::~Inst_DS__DS_READ_B32()
32041     {
32042     } // ~Inst_DS__DS_READ_B32
32043
32044     // RETURN_DATA = MEM[ADDR].
32045     // Dword read.
32046     void
32047     Inst_DS__DS_READ_B32::execute(GPUDynInstPtr gpuDynInst)
32048     {
32049         Wavefront *wf = gpuDynInst->wavefront();
32050         gpuDynInst->execUnitId = wf->execUnitId;
32051         gpuDynInst->exec_mask = wf->execMask();
32052         gpuDynInst->latency.init(gpuDynInst->computeUnit());
32053         gpuDynInst->latency.set(
32054                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
32055         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32056
32057         addr.read();
32058
32059         calcAddr(gpuDynInst, addr);
32060
32061         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
32062
32063         wf->rdLmReqsInPipe--;
32064         wf->outstandingReqsRdLm++;
32065         wf->outstandingReqs++;
32066         wf->validateRequestCounters();
32067     }
32068
32069     void
32070     Inst_DS__DS_READ_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
32071     {
32072         Addr offset0 = instData.OFFSET0;
32073         Addr offset1 = instData.OFFSET1;
32074         Addr offset = (offset1 << 8) | offset0;
32075
32076         initMemRead<VecElemU32>(gpuDynInst, offset);
32077     } // initiateAcc
32078
32079     void
32080     Inst_DS__DS_READ_B32::completeAcc(GPUDynInstPtr gpuDynInst)
32081     {
32082         VecOperandU32 vdst(gpuDynInst, extData.VDST);
32083
32084         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32085             if (gpuDynInst->exec_mask[lane]) {
32086                 vdst[lane] = (reinterpret_cast<VecElemU32*>(
32087                     gpuDynInst->d_data))[lane];
32088             }
32089         }
32090
32091         vdst.write();
32092     } // completeAcc
32093
32094     Inst_DS__DS_READ2_B32::Inst_DS__DS_READ2_B32(InFmt_DS *iFmt)
32095         : Inst_DS(iFmt, "ds_read2_b32")
32096     {
32097         setFlag(MemoryRef);
32098         setFlag(Load);
32099     } // Inst_DS__DS_READ2_B32
32100
32101     Inst_DS__DS_READ2_B32::~Inst_DS__DS_READ2_B32()
32102     {
32103     } // ~Inst_DS__DS_READ2_B32
32104
32105     // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4];
32106     // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4].
32107     // Read 2 dwords.
32108     void
32109     Inst_DS__DS_READ2_B32::execute(GPUDynInstPtr gpuDynInst)
32110     {
32111         Wavefront *wf = gpuDynInst->wavefront();
32112         gpuDynInst->execUnitId = wf->execUnitId;
32113         gpuDynInst->exec_mask = wf->execMask();
32114         gpuDynInst->latency.init(gpuDynInst->computeUnit());
32115         gpuDynInst->latency.set(
32116                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
32117         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32118
32119         addr.read();
32120
32121         calcAddr(gpuDynInst, addr);
32122
32123         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
32124
32125         wf->rdLmReqsInPipe--;
32126         wf->outstandingReqsRdLm++;
32127         wf->outstandingReqs++;
32128         wf->validateRequestCounters();
32129     }
32130
32131     void
32132     Inst_DS__DS_READ2_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
32133     {
32134         Addr offset0 = instData.OFFSET0 * 4;
32135         Addr offset1 = instData.OFFSET1 * 4;
32136
32137         initDualMemRead<VecElemU32>(gpuDynInst, offset0, offset1);
32138     } // initiateAcc
32139
32140     void
32141     Inst_DS__DS_READ2_B32::completeAcc(GPUDynInstPtr gpuDynInst)
32142     {
32143         VecOperandU32 vdst0(gpuDynInst, extData.VDST);
32144         VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
32145
32146         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32147             if (gpuDynInst->exec_mask[lane]) {
32148                 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
32149                     gpuDynInst->d_data))[lane * 2];
32150                 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
32151                     gpuDynInst->d_data))[lane * 2 + 1];
32152             }
32153         }
32154
32155         vdst0.write();
32156         vdst1.write();
32157     } // completeAcc
32158
32159     Inst_DS__DS_READ2ST64_B32::Inst_DS__DS_READ2ST64_B32(InFmt_DS *iFmt)
32160         : Inst_DS(iFmt, "ds_read2st64_b32")
32161     {
32162         setFlag(MemoryRef);
32163         setFlag(Load);
32164     } // Inst_DS__DS_READ2ST64_B32
32165
32166     Inst_DS__DS_READ2ST64_B32::~Inst_DS__DS_READ2ST64_B32()
32167     {
32168     } // ~Inst_DS__DS_READ2ST64_B32
32169
32170     // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 4 * 64];
32171     // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 4 * 64].
32172     // Read 2 dwords.
32173     void
32174     Inst_DS__DS_READ2ST64_B32::execute(GPUDynInstPtr gpuDynInst)
32175     {
32176         Wavefront *wf = gpuDynInst->wavefront();
32177         gpuDynInst->execUnitId = wf->execUnitId;
32178         gpuDynInst->exec_mask = wf->execMask();
32179         gpuDynInst->latency.init(gpuDynInst->computeUnit());
32180         gpuDynInst->latency.set(
32181                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
32182         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32183
32184         addr.read();
32185
32186         calcAddr(gpuDynInst, addr);
32187
32188         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
32189
32190         wf->rdLmReqsInPipe--;
32191         wf->outstandingReqsRdLm++;
32192         wf->outstandingReqs++;
32193         wf->validateRequestCounters();
32194     } // execute
32195
32196     void
32197     Inst_DS__DS_READ2ST64_B32::initiateAcc(GPUDynInstPtr gpuDynInst)
32198     {
32199         Addr offset0 = (instData.OFFSET0 * 4 * 64);
32200         Addr offset1 = (instData.OFFSET1 * 4 * 64);
32201
32202         initDualMemRead<VecElemU32>(gpuDynInst, offset0, offset1);
32203     }
32204
32205     void
32206     Inst_DS__DS_READ2ST64_B32::completeAcc(GPUDynInstPtr gpuDynInst)
32207     {
32208         VecOperandU32 vdst0(gpuDynInst, extData.VDST);
32209         VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
32210
32211         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32212             if (gpuDynInst->exec_mask[lane]) {
32213                 vdst0[lane] = (reinterpret_cast<VecElemU64*>(
32214                     gpuDynInst->d_data))[lane * 2];
32215                 vdst1[lane] = (reinterpret_cast<VecElemU64*>(
32216                     gpuDynInst->d_data))[lane * 2 + 1];
32217             }
32218         }
32219
32220         vdst0.write();
32221         vdst1.write();
32222     }
32223     // --- Inst_DS__DS_READ_I8 class methods ---
32224
32225     Inst_DS__DS_READ_I8::Inst_DS__DS_READ_I8(InFmt_DS *iFmt)
32226         : Inst_DS(iFmt, "ds_read_i8")
32227     {
32228         setFlag(MemoryRef);
32229         setFlag(Load);
32230     } // Inst_DS__DS_READ_I8
32231
32232     Inst_DS__DS_READ_I8::~Inst_DS__DS_READ_I8()
32233     {
32234     } // ~Inst_DS__DS_READ_I8
32235
32236     // RETURN_DATA = signext(MEM[ADDR][7:0]).
32237     // Signed byte read.
32238     void
32239     Inst_DS__DS_READ_I8::execute(GPUDynInstPtr gpuDynInst)
32240     {
32241         panicUnimplemented();
32242     }
32243
32244     Inst_DS__DS_READ_U8::Inst_DS__DS_READ_U8(InFmt_DS *iFmt)
32245         : Inst_DS(iFmt, "ds_read_u8")
32246     {
32247         setFlag(MemoryRef);
32248         setFlag(Load);
32249     } // Inst_DS__DS_READ_U8
32250
32251     Inst_DS__DS_READ_U8::~Inst_DS__DS_READ_U8()
32252     {
32253     } // ~Inst_DS__DS_READ_U8
32254
32255     // RETURN_DATA = {24'h0,MEM[ADDR][7:0]}.
32256     // Unsigned byte read.
32257     void
32258     Inst_DS__DS_READ_U8::execute(GPUDynInstPtr gpuDynInst)
32259     {
32260         Wavefront *wf = gpuDynInst->wavefront();
32261         gpuDynInst->execUnitId = wf->execUnitId;
32262         gpuDynInst->exec_mask = wf->execMask();
32263         gpuDynInst->latency.init(gpuDynInst->computeUnit());
32264         gpuDynInst->latency.set(
32265                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
32266         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32267
32268         addr.read();
32269
32270         calcAddr(gpuDynInst, addr);
32271
32272         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
32273
32274         wf->rdLmReqsInPipe--;
32275         wf->outstandingReqsRdLm++;
32276         wf->outstandingReqs++;
32277         wf->validateRequestCounters();
32278     } // execute
32279
32280     void
32281     Inst_DS__DS_READ_U8::initiateAcc(GPUDynInstPtr gpuDynInst)
32282     {
32283         Addr offset0 = instData.OFFSET0;
32284         Addr offset1 = instData.OFFSET1;
32285         Addr offset = (offset1 << 8) | offset0;
32286
32287         initMemRead<VecElemU8>(gpuDynInst, offset);
32288     } // initiateAcc
32289
32290     void
32291     Inst_DS__DS_READ_U8::completeAcc(GPUDynInstPtr gpuDynInst)
32292     {
32293         VecOperandU32 vdst(gpuDynInst, extData.VDST);
32294
32295         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32296             if (gpuDynInst->exec_mask[lane]) {
32297                 vdst[lane] = (VecElemU32)(reinterpret_cast<VecElemU8*>(
32298                     gpuDynInst->d_data))[lane];
32299             }
32300         }
32301
32302         vdst.write();
32303     } // completeAcc
32304     // --- Inst_DS__DS_READ_I16 class methods ---
32305
32306     Inst_DS__DS_READ_I16::Inst_DS__DS_READ_I16(InFmt_DS *iFmt)
32307         : Inst_DS(iFmt, "ds_read_i16")
32308     {
32309         setFlag(MemoryRef);
32310         setFlag(Load);
32311     } // Inst_DS__DS_READ_I16
32312
32313     Inst_DS__DS_READ_I16::~Inst_DS__DS_READ_I16()
32314     {
32315     } // ~Inst_DS__DS_READ_I16
32316
32317     // RETURN_DATA = signext(MEM[ADDR][15:0]).
32318     // Signed short read.
32319     void
32320     Inst_DS__DS_READ_I16::execute(GPUDynInstPtr gpuDynInst)
32321     {
32322         panicUnimplemented();
32323     }
32324
32325     Inst_DS__DS_READ_U16::Inst_DS__DS_READ_U16(InFmt_DS *iFmt)
32326         : Inst_DS(iFmt, "ds_read_u16")
32327     {
32328         setFlag(MemoryRef);
32329         setFlag(Load);
32330     } // Inst_DS__DS_READ_U16
32331
32332     Inst_DS__DS_READ_U16::~Inst_DS__DS_READ_U16()
32333     {
32334     } // ~Inst_DS__DS_READ_U16
32335
32336     // RETURN_DATA = {16'h0,MEM[ADDR][15:0]}.
32337     // Unsigned short read.
32338     void
32339     Inst_DS__DS_READ_U16::execute(GPUDynInstPtr gpuDynInst)
32340     {
32341         Wavefront *wf = gpuDynInst->wavefront();
32342         gpuDynInst->execUnitId = wf->execUnitId;
32343         gpuDynInst->exec_mask = wf->execMask();
32344         gpuDynInst->latency.init(gpuDynInst->computeUnit());
32345         gpuDynInst->latency.set(
32346                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
32347         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32348
32349         addr.read();
32350
32351         calcAddr(gpuDynInst, addr);
32352
32353         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
32354
32355         wf->rdLmReqsInPipe--;
32356         wf->outstandingReqsRdLm++;
32357         wf->outstandingReqs++;
32358         wf->validateRequestCounters();
32359     } // execute
32360     void
32361     Inst_DS__DS_READ_U16::initiateAcc(GPUDynInstPtr gpuDynInst)
32362     {
32363         Addr offset0 = instData.OFFSET0;
32364         Addr offset1 = instData.OFFSET1;
32365         Addr offset = (offset1 << 8) | offset0;
32366
32367         initMemRead<VecElemU16>(gpuDynInst, offset);
32368     } // initiateAcc
32369
32370     void
32371     Inst_DS__DS_READ_U16::completeAcc(GPUDynInstPtr gpuDynInst)
32372     {
32373         VecOperandU32 vdst(gpuDynInst, extData.VDST);
32374
32375         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32376             if (gpuDynInst->exec_mask[lane]) {
32377                 vdst[lane] = (VecElemU32)(reinterpret_cast<VecElemU16*>(
32378                     gpuDynInst->d_data))[lane];
32379             }
32380         }
32381
32382         vdst.write();
32383     } // completeAcc
32384     // --- Inst_DS__DS_SWIZZLE_B32 class methods ---
32385
32386     Inst_DS__DS_SWIZZLE_B32::Inst_DS__DS_SWIZZLE_B32(InFmt_DS *iFmt)
32387         : Inst_DS(iFmt, "ds_swizzle_b32")
32388     {
32389          setFlag(Load);
32390     } // Inst_DS__DS_SWIZZLE_B32
32391
32392     Inst_DS__DS_SWIZZLE_B32::~Inst_DS__DS_SWIZZLE_B32()
32393     {
32394     } // ~Inst_DS__DS_SWIZZLE_B32
32395
32396     // RETURN_DATA = swizzle(vgpr_data, offset1:offset0).
32397     // Dword swizzle, no data is written to LDS memory;
32398     void
32399     Inst_DS__DS_SWIZZLE_B32::execute(GPUDynInstPtr gpuDynInst)
32400     {
32401         Wavefront *wf = gpuDynInst->wavefront();
32402         wf->rdLmReqsInPipe--;
32403         wf->validateRequestCounters();
32404
32405         if (gpuDynInst->exec_mask.none()) {
32406             return;
32407         }
32408
32409         gpuDynInst->execUnitId = wf->execUnitId;
32410         gpuDynInst->latency.init(gpuDynInst->computeUnit());
32411         gpuDynInst->latency.set(gpuDynInst->computeUnit()
32412                                 ->cyclesToTicks(Cycles(24)));
32413
32414         ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
32415         VecOperandU32 vdst(gpuDynInst, extData.VDST);
32416         /**
32417          * The "DS pattern" is comprised of both offset fields. That is, the
32418          * swizzle pattern between lanes. Bit 15 of the DS pattern dictates
32419          * which swizzle mode to use. There are two different swizzle
32420          * patterns: 1) QDMode and 2) Bit-masks mode. If bit 15 is set use
32421          * QDMode else use Bit-masks mode. The remaining bits dictate how to
32422          * swizzle the lanes.
32423          *
32424          * QDMode:      Chunks the lanes into 4s and swizzles among them.
32425          *              Bits 7:6 dictate where lane 3 (of the current chunk)
32426          *              gets its date, 5:4 lane 2, etc.
32427          *
32428          * Bit-mask:    This mode breaks bits 14:0 into 3 equal-sized chunks.
32429          *              14:10 is the xor_mask, 9:5 is the or_mask, and 4:0
32430          *              is the and_mask. Each lane is swizzled by performing
32431          *              the appropriate operation using these masks.
32432          */
32433         VecElemU16 ds_pattern = ((instData.OFFSET1 << 8) | instData.OFFSET0);
32434
32435         data.read();
32436
32437         if (bits(ds_pattern, 15)) {
32438             // QDMode
32439             for (int lane = 0; lane < NumVecElemPerVecReg; lane += 4) {
32440                 /**
32441                  * This operation allows data sharing between groups
32442                  * of four consecutive threads. Note the increment by
32443                  * 4 in the for loop.
32444                  */
32445                 if (gpuDynInst->exec_mask[lane]) {
32446                     int index0 = lane + bits(ds_pattern, 1, 0);
32447                     panic_if(index0 >= NumVecElemPerVecReg, "%s: index0 (%d) "
32448                              "is out of bounds.\n", gpuDynInst->disassemble(),
32449                              index0);
32450                     vdst[lane]
32451                         = gpuDynInst->exec_mask[index0] ? data[index0]: 0;
32452                 }
32453                 if (gpuDynInst->exec_mask[lane + 1]) {
32454                     int index1 = lane + bits(ds_pattern, 3, 2);
32455                     panic_if(index1 >= NumVecElemPerVecReg, "%s: index1 (%d) "
32456                              "is out of bounds.\n", gpuDynInst->disassemble(),
32457                              index1);
32458                     vdst[lane + 1]
32459                         = gpuDynInst->exec_mask[index1] ? data[index1]: 0;
32460                 }
32461                 if (gpuDynInst->exec_mask[lane + 2]) {
32462                     int index2 = lane + bits(ds_pattern, 5, 4);
32463                     panic_if(index2 >= NumVecElemPerVecReg, "%s: index2 (%d) "
32464                              "is out of bounds.\n", gpuDynInst->disassemble(),
32465                              index2);
32466                     vdst[lane + 2]
32467                         = gpuDynInst->exec_mask[index2] ? data[index2]: 0;
32468                 }
32469                 if (gpuDynInst->exec_mask[lane + 3]) {
32470                     int index3 = lane + bits(ds_pattern, 7, 6);
32471                     panic_if(index3 >= NumVecElemPerVecReg, "%s: index3 (%d) "
32472                              "is out of bounds.\n", gpuDynInst->disassemble(),
32473                              index3);
32474                     vdst[lane + 3]
32475                         = gpuDynInst->exec_mask[index3] ? data[index3]: 0;
32476                 }
32477             }
32478         } else {
32479             // Bit Mode
32480             int and_mask = bits(ds_pattern, 4, 0);
32481             int or_mask = bits(ds_pattern, 9, 5);
32482             int xor_mask = bits(ds_pattern, 14, 10);
32483             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32484                 if (gpuDynInst->exec_mask[lane]) {
32485                     int index = (((lane & and_mask) | or_mask) ^ xor_mask);
32486                     // Adjust for the next 32 lanes.
32487                     if (lane > 31) {
32488                         index += 32;
32489                     }
32490                     panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is "
32491                              "out of bounds.\n", gpuDynInst->disassemble(),
32492                              index);
32493                     vdst[lane]
32494                         = gpuDynInst->exec_mask[index] ? data[index] : 0;
32495                 }
32496             }
32497         }
32498
32499         vdst.write();
32500     } // execute
32501     // --- Inst_DS__DS_PERMUTE_B32 class methods ---
32502
32503     Inst_DS__DS_PERMUTE_B32::Inst_DS__DS_PERMUTE_B32(InFmt_DS *iFmt)
32504         : Inst_DS(iFmt, "ds_permute_b32")
32505     {
32506         setFlag(MemoryRef);
32507         /**
32508          * While this operation doesn't actually use DS storage we classify
32509          * it as a load here because it does a writeback to a VGPR, which
32510          * fits in better with the LDS pipeline logic.
32511          */
32512          setFlag(Load);
32513     } // Inst_DS__DS_PERMUTE_B32
32514
32515     Inst_DS__DS_PERMUTE_B32::~Inst_DS__DS_PERMUTE_B32()
32516     {
32517     } // ~Inst_DS__DS_PERMUTE_B32
32518
32519     // Forward permute.
32520     void
32521     Inst_DS__DS_PERMUTE_B32::execute(GPUDynInstPtr gpuDynInst)
32522     {
32523         Wavefront *wf = gpuDynInst->wavefront();
32524         gpuDynInst->execUnitId = wf->execUnitId;
32525         gpuDynInst->latency.init(gpuDynInst->computeUnit());
32526         gpuDynInst->latency.set(gpuDynInst->computeUnit()
32527                                 ->cyclesToTicks(Cycles(24)));
32528         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32529         ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
32530         VecOperandU32 vdst(gpuDynInst, extData.VDST);
32531
32532         addr.read();
32533         data.read();
32534
32535         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32536             if (wf->execMask(lane)) {
32537                 /**
32538                  * One of the offset fields can be used for the index.
32539                  * It is assumed OFFSET0 would be used, as OFFSET1 is
32540                  * typically only used for DS ops that operate on two
32541                  * disparate pieces of data.
32542                  */
32543                 assert(!instData.OFFSET1);
32544                 /**
32545                  * The address provided is a byte address, but VGPRs are
32546                  * 4 bytes, so we must divide by 4 to get the actual VGPR
32547                  * index. Additionally, the index is calculated modulo the
32548                  * WF size, 64 in this case, so we simply extract bits 7-2.
32549                  */
32550                 int index = bits(addr[lane] + instData.OFFSET0, 7, 2);
32551                 panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out "
32552                          "of bounds.\n", gpuDynInst->disassemble(), index);
32553                 /**
32554                  * If the shuffled index corresponds to a lane that is
32555                  * inactive then this instruction writes a 0 to the active
32556                  * lane in VDST.
32557                  */
32558                 if (wf->execMask(index)) {
32559                     vdst[index] = data[lane];
32560                 } else {
32561                     vdst[index] = 0;
32562                 }
32563             }
32564         }
32565
32566         vdst.write();
32567
32568         wf->decLGKMInstsIssued();
32569         wf->rdLmReqsInPipe--;
32570         wf->validateRequestCounters();
32571     } // execute
32572     // --- Inst_DS__DS_BPERMUTE_B32 class methods ---
32573
32574     Inst_DS__DS_BPERMUTE_B32::Inst_DS__DS_BPERMUTE_B32(InFmt_DS *iFmt)
32575         : Inst_DS(iFmt, "ds_bpermute_b32")
32576     {
32577         setFlag(MemoryRef);
32578         /**
32579          * While this operation doesn't actually use DS storage we classify
32580          * it as a load here because it does a writeback to a VGPR, which
32581          * fits in better with the LDS pipeline logic.
32582          */
32583         setFlag(Load);
32584     } // Inst_DS__DS_BPERMUTE_B32
32585
32586     Inst_DS__DS_BPERMUTE_B32::~Inst_DS__DS_BPERMUTE_B32()
32587     {
32588     } // ~Inst_DS__DS_BPERMUTE_B32
32589
32590     // Backward permute.
32591     void
32592     Inst_DS__DS_BPERMUTE_B32::execute(GPUDynInstPtr gpuDynInst)
32593     {
32594         Wavefront *wf = gpuDynInst->wavefront();
32595         gpuDynInst->execUnitId = wf->execUnitId;
32596         gpuDynInst->latency.init(gpuDynInst->computeUnit());
32597         gpuDynInst->latency.set(gpuDynInst->computeUnit()
32598                                 ->cyclesToTicks(Cycles(24)));
32599         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32600         ConstVecOperandU32 data(gpuDynInst, extData.DATA0);
32601         VecOperandU32 vdst(gpuDynInst, extData.VDST);
32602
32603         addr.read();
32604         data.read();
32605
32606         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32607             if (wf->execMask(lane)) {
32608                 /**
32609                  * One of the offset fields can be used for the index.
32610                  * It is assumed OFFSET0 would be used, as OFFSET1 is
32611                  * typically only used for DS ops that operate on two
32612                  * disparate pieces of data.
32613                  */
32614                 assert(!instData.OFFSET1);
32615                 /**
32616                  * The address provided is a byte address, but VGPRs are
32617                  * 4 bytes, so we must divide by 4 to get the actual VGPR
32618                  * index. Additionally, the index is calculated modulo the
32619                  * WF size, 64 in this case, so we simply extract bits 7-2.
32620                  */
32621                 int index = bits(addr[lane] + instData.OFFSET0, 7, 2);
32622                 panic_if(index >= NumVecElemPerVecReg, "%s: index (%d) is out "
32623                          "of bounds.\n", gpuDynInst->disassemble(), index);
32624                 /**
32625                  * If the shuffled index corresponds to a lane that is
32626                  * inactive then this instruction writes a 0 to the active
32627                  * lane in VDST.
32628                  */
32629                 if (wf->execMask(index)) {
32630                     vdst[lane] = data[index];
32631                 } else {
32632                     vdst[lane] = 0;
32633                 }
32634             }
32635         }
32636
32637         vdst.write();
32638
32639         wf->decLGKMInstsIssued();
32640         wf->rdLmReqsInPipe--;
32641         wf->validateRequestCounters();
32642     } // execute
32643
32644     // --- Inst_DS__DS_ADD_U64 class methods ---
32645
32646     Inst_DS__DS_ADD_U64::Inst_DS__DS_ADD_U64(InFmt_DS *iFmt)
32647         : Inst_DS(iFmt, "ds_add_u64")
32648     {
32649     } // Inst_DS__DS_ADD_U64
32650
32651     Inst_DS__DS_ADD_U64::~Inst_DS__DS_ADD_U64()
32652     {
32653     } // ~Inst_DS__DS_ADD_U64
32654
32655     // tmp = MEM[ADDR];
32656     // MEM[ADDR] += DATA[0:1];
32657     // RETURN_DATA[0:1] = tmp.
32658     void
32659     Inst_DS__DS_ADD_U64::execute(GPUDynInstPtr gpuDynInst)
32660     {
32661         panicUnimplemented();
32662     }
32663
32664     Inst_DS__DS_SUB_U64::Inst_DS__DS_SUB_U64(InFmt_DS *iFmt)
32665         : Inst_DS(iFmt, "ds_sub_u64")
32666     {
32667     } // Inst_DS__DS_SUB_U64
32668
32669     Inst_DS__DS_SUB_U64::~Inst_DS__DS_SUB_U64()
32670     {
32671     } // ~Inst_DS__DS_SUB_U64
32672
32673     // tmp = MEM[ADDR];
32674     // MEM[ADDR] -= DATA[0:1];
32675     // RETURN_DATA[0:1] = tmp.
32676     void
32677     Inst_DS__DS_SUB_U64::execute(GPUDynInstPtr gpuDynInst)
32678     {
32679         panicUnimplemented();
32680     }
32681
32682     Inst_DS__DS_RSUB_U64::Inst_DS__DS_RSUB_U64(InFmt_DS *iFmt)
32683         : Inst_DS(iFmt, "ds_rsub_u64")
32684     {
32685     } // Inst_DS__DS_RSUB_U64
32686
32687     Inst_DS__DS_RSUB_U64::~Inst_DS__DS_RSUB_U64()
32688     {
32689     } // ~Inst_DS__DS_RSUB_U64
32690
32691     // tmp = MEM[ADDR];
32692     // MEM[ADDR] = DATA - MEM[ADDR];
32693     // RETURN_DATA = tmp.
32694     // Subtraction with reversed operands.
32695     void
32696     Inst_DS__DS_RSUB_U64::execute(GPUDynInstPtr gpuDynInst)
32697     {
32698         panicUnimplemented();
32699     }
32700
32701     Inst_DS__DS_INC_U64::Inst_DS__DS_INC_U64(InFmt_DS *iFmt)
32702         : Inst_DS(iFmt, "ds_inc_u64")
32703     {
32704     } // Inst_DS__DS_INC_U64
32705
32706     Inst_DS__DS_INC_U64::~Inst_DS__DS_INC_U64()
32707     {
32708     } // ~Inst_DS__DS_INC_U64
32709
32710     // tmp = MEM[ADDR];
32711     // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
32712     // RETURN_DATA[0:1] = tmp.
32713     void
32714     Inst_DS__DS_INC_U64::execute(GPUDynInstPtr gpuDynInst)
32715     {
32716         panicUnimplemented();
32717     }
32718
32719     Inst_DS__DS_DEC_U64::Inst_DS__DS_DEC_U64(InFmt_DS *iFmt)
32720         : Inst_DS(iFmt, "ds_dec_u64")
32721     {
32722     } // Inst_DS__DS_DEC_U64
32723
32724     Inst_DS__DS_DEC_U64::~Inst_DS__DS_DEC_U64()
32725     {
32726     } // ~Inst_DS__DS_DEC_U64
32727
32728     // tmp = MEM[ADDR];
32729     // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
32730     // (unsigned compare);
32731     // RETURN_DATA[0:1] = tmp.
32732     void
32733     Inst_DS__DS_DEC_U64::execute(GPUDynInstPtr gpuDynInst)
32734     {
32735         panicUnimplemented();
32736     }
32737
32738     Inst_DS__DS_MIN_I64::Inst_DS__DS_MIN_I64(InFmt_DS *iFmt)
32739         : Inst_DS(iFmt, "ds_min_i64")
32740     {
32741     } // Inst_DS__DS_MIN_I64
32742
32743     Inst_DS__DS_MIN_I64::~Inst_DS__DS_MIN_I64()
32744     {
32745     } // ~Inst_DS__DS_MIN_I64
32746
32747     // tmp = MEM[ADDR];
32748     // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
32749     // RETURN_DATA[0:1] = tmp.
32750     void
32751     Inst_DS__DS_MIN_I64::execute(GPUDynInstPtr gpuDynInst)
32752     {
32753         panicUnimplemented();
32754     }
32755
32756     Inst_DS__DS_MAX_I64::Inst_DS__DS_MAX_I64(InFmt_DS *iFmt)
32757         : Inst_DS(iFmt, "ds_max_i64")
32758     {
32759     } // Inst_DS__DS_MAX_I64
32760
32761     Inst_DS__DS_MAX_I64::~Inst_DS__DS_MAX_I64()
32762     {
32763     } // ~Inst_DS__DS_MAX_I64
32764
32765     // tmp = MEM[ADDR];
32766     // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
32767     // RETURN_DATA[0:1] = tmp.
32768     void
32769     Inst_DS__DS_MAX_I64::execute(GPUDynInstPtr gpuDynInst)
32770     {
32771         panicUnimplemented();
32772     }
32773
32774     Inst_DS__DS_MIN_U64::Inst_DS__DS_MIN_U64(InFmt_DS *iFmt)
32775         : Inst_DS(iFmt, "ds_min_u64")
32776     {
32777     } // Inst_DS__DS_MIN_U64
32778
32779     Inst_DS__DS_MIN_U64::~Inst_DS__DS_MIN_U64()
32780     {
32781     } // ~Inst_DS__DS_MIN_U64
32782
32783     // tmp = MEM[ADDR];
32784     // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
32785     // RETURN_DATA[0:1] = tmp.
32786     void
32787     Inst_DS__DS_MIN_U64::execute(GPUDynInstPtr gpuDynInst)
32788     {
32789         panicUnimplemented();
32790     }
32791
32792     Inst_DS__DS_MAX_U64::Inst_DS__DS_MAX_U64(InFmt_DS *iFmt)
32793         : Inst_DS(iFmt, "ds_max_u64")
32794     {
32795     } // Inst_DS__DS_MAX_U64
32796
32797     Inst_DS__DS_MAX_U64::~Inst_DS__DS_MAX_U64()
32798     {
32799     } // ~Inst_DS__DS_MAX_U64
32800
32801     // tmp = MEM[ADDR];
32802     // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
32803     // RETURN_DATA[0:1] = tmp.
32804     void
32805     Inst_DS__DS_MAX_U64::execute(GPUDynInstPtr gpuDynInst)
32806     {
32807         panicUnimplemented();
32808     }
32809
32810     Inst_DS__DS_AND_B64::Inst_DS__DS_AND_B64(InFmt_DS *iFmt)
32811         : Inst_DS(iFmt, "ds_and_b64")
32812     {
32813     } // Inst_DS__DS_AND_B64
32814
32815     Inst_DS__DS_AND_B64::~Inst_DS__DS_AND_B64()
32816     {
32817     } // ~Inst_DS__DS_AND_B64
32818
32819     // tmp = MEM[ADDR];
32820     // MEM[ADDR] &= DATA[0:1];
32821     // RETURN_DATA[0:1] = tmp.
32822     void
32823     Inst_DS__DS_AND_B64::execute(GPUDynInstPtr gpuDynInst)
32824     {
32825         panicUnimplemented();
32826     }
32827
32828     Inst_DS__DS_OR_B64::Inst_DS__DS_OR_B64(InFmt_DS *iFmt)
32829         : Inst_DS(iFmt, "ds_or_b64")
32830     {
32831     } // Inst_DS__DS_OR_B64
32832
32833     Inst_DS__DS_OR_B64::~Inst_DS__DS_OR_B64()
32834     {
32835     } // ~Inst_DS__DS_OR_B64
32836
32837     // tmp = MEM[ADDR];
32838     // MEM[ADDR] |= DATA[0:1];
32839     // RETURN_DATA[0:1] = tmp.
32840     void
32841     Inst_DS__DS_OR_B64::execute(GPUDynInstPtr gpuDynInst)
32842     {
32843         panicUnimplemented();
32844     }
32845
32846     Inst_DS__DS_XOR_B64::Inst_DS__DS_XOR_B64(InFmt_DS *iFmt)
32847         : Inst_DS(iFmt, "ds_xor_b64")
32848     {
32849     } // Inst_DS__DS_XOR_B64
32850
32851     Inst_DS__DS_XOR_B64::~Inst_DS__DS_XOR_B64()
32852     {
32853     } // ~Inst_DS__DS_XOR_B64
32854
32855     // tmp = MEM[ADDR];
32856     // MEM[ADDR] ^= DATA[0:1];
32857     // RETURN_DATA[0:1] = tmp.
32858     void
32859     Inst_DS__DS_XOR_B64::execute(GPUDynInstPtr gpuDynInst)
32860     {
32861         panicUnimplemented();
32862     }
32863
32864     Inst_DS__DS_MSKOR_B64::Inst_DS__DS_MSKOR_B64(InFmt_DS *iFmt)
32865         : Inst_DS(iFmt, "ds_mskor_b64")
32866     {
32867     } // Inst_DS__DS_MSKOR_B64
32868
32869     Inst_DS__DS_MSKOR_B64::~Inst_DS__DS_MSKOR_B64()
32870     {
32871     } // ~Inst_DS__DS_MSKOR_B64
32872
32873     // tmp = MEM[ADDR];
32874     // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
32875     // RETURN_DATA = tmp.
32876     void
32877     Inst_DS__DS_MSKOR_B64::execute(GPUDynInstPtr gpuDynInst)
32878     {
32879         panicUnimplemented();
32880     }
32881
32882     Inst_DS__DS_WRITE_B64::Inst_DS__DS_WRITE_B64(InFmt_DS *iFmt)
32883         : Inst_DS(iFmt, "ds_write_b64")
32884     {
32885         setFlag(MemoryRef);
32886         setFlag(Store);
32887     } // Inst_DS__DS_WRITE_B64
32888
32889     Inst_DS__DS_WRITE_B64::~Inst_DS__DS_WRITE_B64()
32890     {
32891     } // ~Inst_DS__DS_WRITE_B64
32892
32893     // MEM[ADDR] = DATA.
32894     // Write qword.
32895     void
32896     Inst_DS__DS_WRITE_B64::execute(GPUDynInstPtr gpuDynInst)
32897     {
32898         Wavefront *wf = gpuDynInst->wavefront();
32899         gpuDynInst->execUnitId = wf->execUnitId;
32900         gpuDynInst->exec_mask = wf->execMask();
32901         gpuDynInst->latency.init(gpuDynInst->computeUnit());
32902         gpuDynInst->latency.set(
32903                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
32904         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32905         ConstVecOperandU64 data(gpuDynInst, extData.DATA0);
32906
32907         addr.read();
32908         data.read();
32909
32910         calcAddr(gpuDynInst, addr);
32911
32912         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32913             if (wf->execMask(lane)) {
32914                 (reinterpret_cast<VecElemU64*>(gpuDynInst->d_data))[lane]
32915                     = data[lane];
32916             }
32917         }
32918
32919         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
32920
32921         wf->wrLmReqsInPipe--;
32922         wf->outstandingReqsWrLm++;
32923         wf->outstandingReqs++;
32924         wf->validateRequestCounters();
32925     }
32926
32927     void
32928     Inst_DS__DS_WRITE_B64::initiateAcc(GPUDynInstPtr gpuDynInst)
32929     {
32930         Addr offset0 = instData.OFFSET0;
32931         Addr offset1 = instData.OFFSET1;
32932         Addr offset = (offset1 << 8) | offset0;
32933
32934         initMemWrite<VecElemU64>(gpuDynInst, offset);
32935     } // initiateAcc
32936
32937     void
32938     Inst_DS__DS_WRITE_B64::completeAcc(GPUDynInstPtr gpuDynInst)
32939     {
32940     } // completeAcc
32941
32942     Inst_DS__DS_WRITE2_B64::Inst_DS__DS_WRITE2_B64(InFmt_DS *iFmt)
32943         : Inst_DS(iFmt, "ds_write2_b64")
32944     {
32945         setFlag(MemoryRef);
32946         setFlag(Store);
32947     } // Inst_DS__DS_WRITE2_B64
32948
32949     Inst_DS__DS_WRITE2_B64::~Inst_DS__DS_WRITE2_B64()
32950     {
32951     } // ~Inst_DS__DS_WRITE2_B64
32952
32953     // MEM[ADDR_BASE + OFFSET0 * 8] = DATA;
32954     // MEM[ADDR_BASE + OFFSET1 * 8] = DATA2.
32955     // Write 2 qwords.
32956     void
32957     Inst_DS__DS_WRITE2_B64::execute(GPUDynInstPtr gpuDynInst)
32958     {
32959         Wavefront *wf = gpuDynInst->wavefront();
32960         gpuDynInst->execUnitId = wf->execUnitId;
32961         gpuDynInst->exec_mask = wf->execMask();
32962         gpuDynInst->latency.init(gpuDynInst->computeUnit());
32963         gpuDynInst->latency.set(
32964                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
32965         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
32966         ConstVecOperandU64 data0(gpuDynInst, extData.DATA0);
32967         ConstVecOperandU64 data1(gpuDynInst, extData.DATA1);
32968
32969         addr.read();
32970         data0.read();
32971         data1.read();
32972
32973         calcAddr(gpuDynInst, addr);
32974
32975         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
32976             if (wf->execMask(lane)) {
32977                 (reinterpret_cast<VecElemU64*>(
32978                     gpuDynInst->d_data))[lane * 2] = data0[lane];
32979                 (reinterpret_cast<VecElemU64*>(
32980                     gpuDynInst->d_data))[lane * 2 + 1] = data1[lane];
32981             }
32982         }
32983
32984         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
32985
32986         wf->wrLmReqsInPipe--;
32987         wf->outstandingReqsWrLm++;
32988         wf->outstandingReqs++;
32989         wf->validateRequestCounters();
32990     }
32991
32992     void
32993     Inst_DS__DS_WRITE2_B64::initiateAcc(GPUDynInstPtr gpuDynInst)
32994     {
32995         Addr offset0 = instData.OFFSET0 * 8;
32996         Addr offset1 = instData.OFFSET1 * 8;
32997
32998         initDualMemWrite<VecElemU64>(gpuDynInst, offset0, offset1);
32999     }
33000
33001     void
33002     Inst_DS__DS_WRITE2_B64::completeAcc(GPUDynInstPtr gpuDynInst)
33003     {
33004     }
33005
33006     Inst_DS__DS_WRITE2ST64_B64::Inst_DS__DS_WRITE2ST64_B64(InFmt_DS *iFmt)
33007         : Inst_DS(iFmt, "ds_write2st64_b64")
33008     {
33009         setFlag(MemoryRef);
33010         setFlag(Store);
33011     } // Inst_DS__DS_WRITE2ST64_B64
33012
33013     Inst_DS__DS_WRITE2ST64_B64::~Inst_DS__DS_WRITE2ST64_B64()
33014     {
33015     } // ~Inst_DS__DS_WRITE2ST64_B64
33016
33017     // MEM[ADDR_BASE + OFFSET0 * 8 * 64] = DATA;
33018     // MEM[ADDR_BASE + OFFSET1 * 8 * 64] = DATA2;
33019     // Write 2 qwords.
33020     void
33021     Inst_DS__DS_WRITE2ST64_B64::execute(GPUDynInstPtr gpuDynInst)
33022     {
33023         panicUnimplemented();
33024     }
33025
33026     Inst_DS__DS_CMPST_B64::Inst_DS__DS_CMPST_B64(InFmt_DS *iFmt)
33027         : Inst_DS(iFmt, "ds_cmpst_b64")
33028     {
33029     } // Inst_DS__DS_CMPST_B64
33030
33031     Inst_DS__DS_CMPST_B64::~Inst_DS__DS_CMPST_B64()
33032     {
33033     } // ~Inst_DS__DS_CMPST_B64
33034
33035     // tmp = MEM[ADDR];
33036     // src = DATA2;
33037     // cmp = DATA;
33038     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
33039     // RETURN_DATA[0] = tmp.
33040     // Compare and store.
33041     void
33042     Inst_DS__DS_CMPST_B64::execute(GPUDynInstPtr gpuDynInst)
33043     {
33044         panicUnimplemented();
33045     }
33046
33047     Inst_DS__DS_CMPST_F64::Inst_DS__DS_CMPST_F64(InFmt_DS *iFmt)
33048         : Inst_DS(iFmt, "ds_cmpst_f64")
33049     {
33050         setFlag(F64);
33051     } // Inst_DS__DS_CMPST_F64
33052
33053     Inst_DS__DS_CMPST_F64::~Inst_DS__DS_CMPST_F64()
33054     {
33055     } // ~Inst_DS__DS_CMPST_F64
33056
33057     // tmp = MEM[ADDR];
33058     // src = DATA2;
33059     // cmp = DATA;
33060     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
33061     // RETURN_DATA[0] = tmp.
33062     void
33063     Inst_DS__DS_CMPST_F64::execute(GPUDynInstPtr gpuDynInst)
33064     {
33065         panicUnimplemented();
33066     }
33067
33068     Inst_DS__DS_MIN_F64::Inst_DS__DS_MIN_F64(InFmt_DS *iFmt)
33069         : Inst_DS(iFmt, "ds_min_f64")
33070     {
33071         setFlag(F64);
33072     } // Inst_DS__DS_MIN_F64
33073
33074     Inst_DS__DS_MIN_F64::~Inst_DS__DS_MIN_F64()
33075     {
33076     } // ~Inst_DS__DS_MIN_F64
33077
33078     // tmp = MEM[ADDR];
33079     // src = DATA;
33080     // cmp = DATA2;
33081     // MEM[ADDR] = (cmp < tmp) ? src : tmp.
33082     void
33083     Inst_DS__DS_MIN_F64::execute(GPUDynInstPtr gpuDynInst)
33084     {
33085         panicUnimplemented();
33086     }
33087
33088     Inst_DS__DS_MAX_F64::Inst_DS__DS_MAX_F64(InFmt_DS *iFmt)
33089         : Inst_DS(iFmt, "ds_max_f64")
33090     {
33091         setFlag(F64);
33092     } // Inst_DS__DS_MAX_F64
33093
33094     Inst_DS__DS_MAX_F64::~Inst_DS__DS_MAX_F64()
33095     {
33096     } // ~Inst_DS__DS_MAX_F64
33097
33098     // tmp = MEM[ADDR];
33099     // src = DATA;
33100     // cmp = DATA2;
33101     // MEM[ADDR] = (tmp > cmp) ? src : tmp.
33102     void
33103     Inst_DS__DS_MAX_F64::execute(GPUDynInstPtr gpuDynInst)
33104     {
33105         panicUnimplemented();
33106     }
33107
33108     Inst_DS__DS_ADD_RTN_U64::Inst_DS__DS_ADD_RTN_U64(InFmt_DS *iFmt)
33109         : Inst_DS(iFmt, "ds_add_rtn_u64")
33110     {
33111     } // Inst_DS__DS_ADD_RTN_U64
33112
33113     Inst_DS__DS_ADD_RTN_U64::~Inst_DS__DS_ADD_RTN_U64()
33114     {
33115     } // ~Inst_DS__DS_ADD_RTN_U64
33116
33117     // tmp = MEM[ADDR];
33118     // MEM[ADDR] += DATA[0:1];
33119     // RETURN_DATA[0:1] = tmp.
33120     void
33121     Inst_DS__DS_ADD_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
33122     {
33123         panicUnimplemented();
33124     }
33125
33126     Inst_DS__DS_SUB_RTN_U64::Inst_DS__DS_SUB_RTN_U64(InFmt_DS *iFmt)
33127         : Inst_DS(iFmt, "ds_sub_rtn_u64")
33128     {
33129     } // Inst_DS__DS_SUB_RTN_U64
33130
33131     Inst_DS__DS_SUB_RTN_U64::~Inst_DS__DS_SUB_RTN_U64()
33132     {
33133     } // ~Inst_DS__DS_SUB_RTN_U64
33134
33135     // tmp = MEM[ADDR];
33136     // MEM[ADDR] -= DATA[0:1];
33137     // RETURN_DATA[0:1] = tmp.
33138     void
33139     Inst_DS__DS_SUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
33140     {
33141         panicUnimplemented();
33142     }
33143
33144     Inst_DS__DS_RSUB_RTN_U64::Inst_DS__DS_RSUB_RTN_U64(InFmt_DS *iFmt)
33145         : Inst_DS(iFmt, "ds_rsub_rtn_u64")
33146     {
33147     } // Inst_DS__DS_RSUB_RTN_U64
33148
33149     Inst_DS__DS_RSUB_RTN_U64::~Inst_DS__DS_RSUB_RTN_U64()
33150     {
33151     } // ~Inst_DS__DS_RSUB_RTN_U64
33152
33153     // tmp = MEM[ADDR];
33154     // MEM[ADDR] = DATA - MEM[ADDR];
33155     // RETURN_DATA = tmp.
33156     // Subtraction with reversed operands.
33157     void
33158     Inst_DS__DS_RSUB_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
33159     {
33160         panicUnimplemented();
33161     }
33162
33163     Inst_DS__DS_INC_RTN_U64::Inst_DS__DS_INC_RTN_U64(InFmt_DS *iFmt)
33164         : Inst_DS(iFmt, "ds_inc_rtn_u64")
33165     {
33166     } // Inst_DS__DS_INC_RTN_U64
33167
33168     Inst_DS__DS_INC_RTN_U64::~Inst_DS__DS_INC_RTN_U64()
33169     {
33170     } // ~Inst_DS__DS_INC_RTN_U64
33171
33172     // tmp = MEM[ADDR];
33173     // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
33174     // RETURN_DATA[0:1] = tmp.
33175     void
33176     Inst_DS__DS_INC_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
33177     {
33178         panicUnimplemented();
33179     }
33180
33181     Inst_DS__DS_DEC_RTN_U64::Inst_DS__DS_DEC_RTN_U64(InFmt_DS *iFmt)
33182         : Inst_DS(iFmt, "ds_dec_rtn_u64")
33183     {
33184     } // Inst_DS__DS_DEC_RTN_U64
33185
33186     Inst_DS__DS_DEC_RTN_U64::~Inst_DS__DS_DEC_RTN_U64()
33187     {
33188     } // ~Inst_DS__DS_DEC_RTN_U64
33189
33190     // tmp = MEM[ADDR];
33191     // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
33192     // (unsigned compare);
33193     // RETURN_DATA[0:1] = tmp.
33194     void
33195     Inst_DS__DS_DEC_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
33196     {
33197         panicUnimplemented();
33198     }
33199
33200     Inst_DS__DS_MIN_RTN_I64::Inst_DS__DS_MIN_RTN_I64(InFmt_DS *iFmt)
33201         : Inst_DS(iFmt, "ds_min_rtn_i64")
33202     {
33203     } // Inst_DS__DS_MIN_RTN_I64
33204
33205     Inst_DS__DS_MIN_RTN_I64::~Inst_DS__DS_MIN_RTN_I64()
33206     {
33207     } // ~Inst_DS__DS_MIN_RTN_I64
33208
33209     // tmp = MEM[ADDR];
33210     // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
33211     // RETURN_DATA[0:1] = tmp.
33212     void
33213     Inst_DS__DS_MIN_RTN_I64::execute(GPUDynInstPtr gpuDynInst)
33214     {
33215         panicUnimplemented();
33216     }
33217
33218     Inst_DS__DS_MAX_RTN_I64::Inst_DS__DS_MAX_RTN_I64(InFmt_DS *iFmt)
33219         : Inst_DS(iFmt, "ds_max_rtn_i64")
33220     {
33221     } // Inst_DS__DS_MAX_RTN_I64
33222
33223     Inst_DS__DS_MAX_RTN_I64::~Inst_DS__DS_MAX_RTN_I64()
33224     {
33225     } // ~Inst_DS__DS_MAX_RTN_I64
33226
33227     // tmp = MEM[ADDR];
33228     // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
33229     // RETURN_DATA[0:1] = tmp.
33230     void
33231     Inst_DS__DS_MAX_RTN_I64::execute(GPUDynInstPtr gpuDynInst)
33232     {
33233         panicUnimplemented();
33234     }
33235
33236     Inst_DS__DS_MIN_RTN_U64::Inst_DS__DS_MIN_RTN_U64(InFmt_DS *iFmt)
33237         : Inst_DS(iFmt, "ds_min_rtn_u64")
33238     {
33239     } // Inst_DS__DS_MIN_RTN_U64
33240
33241     Inst_DS__DS_MIN_RTN_U64::~Inst_DS__DS_MIN_RTN_U64()
33242     {
33243     } // ~Inst_DS__DS_MIN_RTN_U64
33244
33245     // tmp = MEM[ADDR];
33246     // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
33247     // RETURN_DATA[0:1] = tmp.
33248     void
33249     Inst_DS__DS_MIN_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
33250     {
33251         panicUnimplemented();
33252     }
33253
33254     Inst_DS__DS_MAX_RTN_U64::Inst_DS__DS_MAX_RTN_U64(InFmt_DS *iFmt)
33255         : Inst_DS(iFmt, "ds_max_rtn_u64")
33256     {
33257     } // Inst_DS__DS_MAX_RTN_U64
33258
33259     Inst_DS__DS_MAX_RTN_U64::~Inst_DS__DS_MAX_RTN_U64()
33260     {
33261     } // ~Inst_DS__DS_MAX_RTN_U64
33262
33263     // tmp = MEM[ADDR];
33264     // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
33265     // RETURN_DATA[0:1] = tmp.
33266     void
33267     Inst_DS__DS_MAX_RTN_U64::execute(GPUDynInstPtr gpuDynInst)
33268     {
33269         panicUnimplemented();
33270     }
33271
33272     Inst_DS__DS_AND_RTN_B64::Inst_DS__DS_AND_RTN_B64(InFmt_DS *iFmt)
33273         : Inst_DS(iFmt, "ds_and_rtn_b64")
33274     {
33275     } // Inst_DS__DS_AND_RTN_B64
33276
33277     Inst_DS__DS_AND_RTN_B64::~Inst_DS__DS_AND_RTN_B64()
33278     {
33279     } // ~Inst_DS__DS_AND_RTN_B64
33280
33281     // tmp = MEM[ADDR];
33282     // MEM[ADDR] &= DATA[0:1];
33283     // RETURN_DATA[0:1] = tmp.
33284     void
33285     Inst_DS__DS_AND_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33286     {
33287         panicUnimplemented();
33288     }
33289
33290     Inst_DS__DS_OR_RTN_B64::Inst_DS__DS_OR_RTN_B64(InFmt_DS *iFmt)
33291         : Inst_DS(iFmt, "ds_or_rtn_b64")
33292     {
33293     } // Inst_DS__DS_OR_RTN_B64
33294
33295     Inst_DS__DS_OR_RTN_B64::~Inst_DS__DS_OR_RTN_B64()
33296     {
33297     } // ~Inst_DS__DS_OR_RTN_B64
33298
33299     // tmp = MEM[ADDR];
33300     // MEM[ADDR] |= DATA[0:1];
33301     // RETURN_DATA[0:1] = tmp.
33302     void
33303     Inst_DS__DS_OR_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33304     {
33305         panicUnimplemented();
33306     }
33307
33308     Inst_DS__DS_XOR_RTN_B64::Inst_DS__DS_XOR_RTN_B64(InFmt_DS *iFmt)
33309         : Inst_DS(iFmt, "ds_xor_rtn_b64")
33310     {
33311     } // Inst_DS__DS_XOR_RTN_B64
33312
33313     Inst_DS__DS_XOR_RTN_B64::~Inst_DS__DS_XOR_RTN_B64()
33314     {
33315     } // ~Inst_DS__DS_XOR_RTN_B64
33316
33317     // tmp = MEM[ADDR];
33318     // MEM[ADDR] ^= DATA[0:1];
33319     // RETURN_DATA[0:1] = tmp.
33320     void
33321     Inst_DS__DS_XOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33322     {
33323         panicUnimplemented();
33324     }
33325
33326     Inst_DS__DS_MSKOR_RTN_B64::Inst_DS__DS_MSKOR_RTN_B64(InFmt_DS *iFmt)
33327         : Inst_DS(iFmt, "ds_mskor_rtn_b64")
33328     {
33329     } // Inst_DS__DS_MSKOR_RTN_B64
33330
33331     Inst_DS__DS_MSKOR_RTN_B64::~Inst_DS__DS_MSKOR_RTN_B64()
33332     {
33333     } // ~Inst_DS__DS_MSKOR_RTN_B64
33334
33335     // tmp = MEM[ADDR];
33336     // MEM[ADDR] = (MEM_ADDR[ADDR] & ~DATA) | DATA2;
33337     // RETURN_DATA = tmp.
33338     // Masked dword OR, D0 contains the mask and D1 contains the new value.
33339     void
33340     Inst_DS__DS_MSKOR_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33341     {
33342         panicUnimplemented();
33343     }
33344
33345     Inst_DS__DS_WRXCHG_RTN_B64::Inst_DS__DS_WRXCHG_RTN_B64(InFmt_DS *iFmt)
33346         : Inst_DS(iFmt, "ds_wrxchg_rtn_b64")
33347     {
33348     } // Inst_DS__DS_WRXCHG_RTN_B64
33349
33350     Inst_DS__DS_WRXCHG_RTN_B64::~Inst_DS__DS_WRXCHG_RTN_B64()
33351     {
33352     } // ~Inst_DS__DS_WRXCHG_RTN_B64
33353
33354     // tmp = MEM[ADDR];
33355     // MEM[ADDR] = DATA;
33356     // RETURN_DATA = tmp.
33357     // Write-exchange operation.
33358     void
33359     Inst_DS__DS_WRXCHG_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33360     {
33361         panicUnimplemented();
33362     }
33363
33364     Inst_DS__DS_WRXCHG2_RTN_B64::Inst_DS__DS_WRXCHG2_RTN_B64(InFmt_DS *iFmt)
33365         : Inst_DS(iFmt, "ds_wrxchg2_rtn_b64")
33366     {
33367     } // Inst_DS__DS_WRXCHG2_RTN_B64
33368
33369     Inst_DS__DS_WRXCHG2_RTN_B64::~Inst_DS__DS_WRXCHG2_RTN_B64()
33370     {
33371     } // ~Inst_DS__DS_WRXCHG2_RTN_B64
33372
33373     // Write-exchange 2 separate qwords.
33374     void
33375     Inst_DS__DS_WRXCHG2_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33376     {
33377         panicUnimplemented();
33378     }
33379
33380     Inst_DS__DS_WRXCHG2ST64_RTN_B64::Inst_DS__DS_WRXCHG2ST64_RTN_B64(
33381           InFmt_DS *iFmt)
33382         : Inst_DS(iFmt, "ds_wrxchg2st64_rtn_b64")
33383     {
33384     } // Inst_DS__DS_WRXCHG2ST64_RTN_B64
33385
33386     Inst_DS__DS_WRXCHG2ST64_RTN_B64::~Inst_DS__DS_WRXCHG2ST64_RTN_B64()
33387     {
33388     } // ~Inst_DS__DS_WRXCHG2ST64_RTN_B64
33389
33390     // Write-exchange 2 qwords with a stride of 64 qwords.
33391     void
33392     Inst_DS__DS_WRXCHG2ST64_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33393     {
33394         panicUnimplemented();
33395     }
33396
33397     Inst_DS__DS_CMPST_RTN_B64::Inst_DS__DS_CMPST_RTN_B64(InFmt_DS *iFmt)
33398         : Inst_DS(iFmt, "ds_cmpst_rtn_b64")
33399     {
33400     } // Inst_DS__DS_CMPST_RTN_B64
33401
33402     Inst_DS__DS_CMPST_RTN_B64::~Inst_DS__DS_CMPST_RTN_B64()
33403     {
33404     } // ~Inst_DS__DS_CMPST_RTN_B64
33405
33406     // tmp = MEM[ADDR];
33407     // src = DATA2;
33408     // cmp = DATA;
33409     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
33410     // RETURN_DATA[0] = tmp.
33411     // Compare and store.
33412     void
33413     Inst_DS__DS_CMPST_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33414     {
33415         panicUnimplemented();
33416     }
33417
33418     Inst_DS__DS_CMPST_RTN_F64::Inst_DS__DS_CMPST_RTN_F64(InFmt_DS *iFmt)
33419         : Inst_DS(iFmt, "ds_cmpst_rtn_f64")
33420     {
33421         setFlag(F64);
33422     } // Inst_DS__DS_CMPST_RTN_F64
33423
33424     Inst_DS__DS_CMPST_RTN_F64::~Inst_DS__DS_CMPST_RTN_F64()
33425     {
33426     } // ~Inst_DS__DS_CMPST_RTN_F64
33427
33428     // tmp = MEM[ADDR];
33429     // src = DATA2;
33430     // cmp = DATA;
33431     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
33432     // RETURN_DATA[0] = tmp.
33433     void
33434     Inst_DS__DS_CMPST_RTN_F64::execute(GPUDynInstPtr gpuDynInst)
33435     {
33436         panicUnimplemented();
33437     }
33438
33439     Inst_DS__DS_MIN_RTN_F64::Inst_DS__DS_MIN_RTN_F64(InFmt_DS *iFmt)
33440         : Inst_DS(iFmt, "ds_min_rtn_f64")
33441     {
33442         setFlag(F64);
33443     } // Inst_DS__DS_MIN_RTN_F64
33444
33445     Inst_DS__DS_MIN_RTN_F64::~Inst_DS__DS_MIN_RTN_F64()
33446     {
33447     } // ~Inst_DS__DS_MIN_RTN_F64
33448
33449     // tmp = MEM[ADDR];
33450     // src = DATA;
33451     // cmp = DATA2;
33452     // MEM[ADDR] = (cmp < tmp) ? src : tmp.
33453     void
33454     Inst_DS__DS_MIN_RTN_F64::execute(GPUDynInstPtr gpuDynInst)
33455     {
33456         panicUnimplemented();
33457     }
33458
33459     Inst_DS__DS_MAX_RTN_F64::Inst_DS__DS_MAX_RTN_F64(InFmt_DS *iFmt)
33460         : Inst_DS(iFmt, "ds_max_rtn_f64")
33461     {
33462         setFlag(F64);
33463     } // Inst_DS__DS_MAX_RTN_F64
33464
33465     Inst_DS__DS_MAX_RTN_F64::~Inst_DS__DS_MAX_RTN_F64()
33466     {
33467     } // ~Inst_DS__DS_MAX_RTN_F64
33468
33469     // tmp = MEM[ADDR];
33470     // src = DATA;
33471     // cmp = DATA2;
33472     // MEM[ADDR] = (tmp > cmp) ? src : tmp.
33473     void
33474     Inst_DS__DS_MAX_RTN_F64::execute(GPUDynInstPtr gpuDynInst)
33475     {
33476         panicUnimplemented();
33477     }
33478
33479     Inst_DS__DS_READ_B64::Inst_DS__DS_READ_B64(InFmt_DS *iFmt)
33480         : Inst_DS(iFmt, "ds_read_b64")
33481     {
33482         setFlag(MemoryRef);
33483         setFlag(Load);
33484     } // Inst_DS__DS_READ_B64
33485
33486     Inst_DS__DS_READ_B64::~Inst_DS__DS_READ_B64()
33487     {
33488     } // ~Inst_DS__DS_READ_B64
33489
33490     // RETURN_DATA = MEM[ADDR].
33491     // Read 1 qword.
33492     void
33493     Inst_DS__DS_READ_B64::execute(GPUDynInstPtr gpuDynInst)
33494     {
33495         Wavefront *wf = gpuDynInst->wavefront();
33496         gpuDynInst->execUnitId = wf->execUnitId;
33497         gpuDynInst->exec_mask = wf->execMask();
33498         gpuDynInst->latency.init(gpuDynInst->computeUnit());
33499         gpuDynInst->latency.set(
33500                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
33501         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
33502
33503         addr.read();
33504
33505         calcAddr(gpuDynInst, addr);
33506
33507         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
33508
33509         wf->rdLmReqsInPipe--;
33510         wf->outstandingReqsRdLm++;
33511         wf->outstandingReqs++;
33512         wf->validateRequestCounters();
33513     }
33514
33515     void
33516     Inst_DS__DS_READ_B64::initiateAcc(GPUDynInstPtr gpuDynInst)
33517     {
33518         Addr offset0 = instData.OFFSET0;
33519         Addr offset1 = instData.OFFSET1;
33520         Addr offset = (offset1 << 8) | offset0;
33521
33522         initMemRead<VecElemU64>(gpuDynInst, offset);
33523     } // initiateAcc
33524
33525     void
33526     Inst_DS__DS_READ_B64::completeAcc(GPUDynInstPtr gpuDynInst)
33527     {
33528         VecOperandU64 vdst(gpuDynInst, extData.VDST);
33529
33530         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
33531             if (gpuDynInst->exec_mask[lane]) {
33532                 vdst[lane] = (reinterpret_cast<VecElemU64*>(
33533                     gpuDynInst->d_data))[lane];
33534             }
33535         }
33536
33537         vdst.write();
33538     } // completeAcc
33539
33540     Inst_DS__DS_READ2_B64::Inst_DS__DS_READ2_B64(InFmt_DS *iFmt)
33541         : Inst_DS(iFmt, "ds_read2_b64")
33542     {
33543         setFlag(MemoryRef);
33544         setFlag(Load);
33545     } // Inst_DS__DS_READ2_B64
33546
33547     Inst_DS__DS_READ2_B64::~Inst_DS__DS_READ2_B64()
33548     {
33549     } // ~Inst_DS__DS_READ2_B64
33550
33551     // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8];
33552     // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8].
33553     // Read 2 qwords.
33554     void
33555     Inst_DS__DS_READ2_B64::execute(GPUDynInstPtr gpuDynInst)
33556     {
33557         Wavefront *wf = gpuDynInst->wavefront();
33558         gpuDynInst->execUnitId = wf->execUnitId;
33559         gpuDynInst->exec_mask = wf->execMask();
33560         gpuDynInst->latency.init(gpuDynInst->computeUnit());
33561         gpuDynInst->latency.set(
33562                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
33563         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
33564
33565         addr.read();
33566
33567         calcAddr(gpuDynInst, addr);
33568
33569         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
33570
33571         wf->rdLmReqsInPipe--;
33572         wf->outstandingReqsRdLm++;
33573         wf->outstandingReqs++;
33574         wf->validateRequestCounters();
33575     }
33576
33577     void
33578     Inst_DS__DS_READ2_B64::initiateAcc(GPUDynInstPtr gpuDynInst)
33579     {
33580         Addr offset0 = instData.OFFSET0 * 8;
33581         Addr offset1 = instData.OFFSET1 * 8;
33582
33583         initDualMemRead<VecElemU64>(gpuDynInst, offset0, offset1);
33584     } // initiateAcc
33585
33586     void
33587     Inst_DS__DS_READ2_B64::completeAcc(GPUDynInstPtr gpuDynInst)
33588     {
33589         VecOperandU64 vdst0(gpuDynInst, extData.VDST);
33590         VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2);
33591
33592         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
33593             if (gpuDynInst->exec_mask[lane]) {
33594                 vdst0[lane] = (reinterpret_cast<VecElemU64*>(
33595                     gpuDynInst->d_data))[lane * 2];
33596                 vdst1[lane] = (reinterpret_cast<VecElemU64*>(
33597                     gpuDynInst->d_data))[lane * 2 + 1];
33598             }
33599         }
33600
33601         vdst0.write();
33602         vdst1.write();
33603     } // completeAcc
33604
33605     Inst_DS__DS_READ2ST64_B64::Inst_DS__DS_READ2ST64_B64(InFmt_DS *iFmt)
33606         : Inst_DS(iFmt, "ds_read2st64_b64")
33607     {
33608         setFlag(MemoryRef);
33609         setFlag(Load);
33610     } // Inst_DS__DS_READ2ST64_B64
33611
33612     Inst_DS__DS_READ2ST64_B64::~Inst_DS__DS_READ2ST64_B64()
33613     {
33614     } // ~Inst_DS__DS_READ2ST64_B64
33615
33616     // RETURN_DATA[0] = MEM[ADDR_BASE + OFFSET0 * 8 * 64];
33617     // RETURN_DATA[1] = MEM[ADDR_BASE + OFFSET1 * 8 * 64].
33618     // Read 2 qwords.
33619     void
33620     Inst_DS__DS_READ2ST64_B64::execute(GPUDynInstPtr gpuDynInst)
33621     {
33622         Wavefront *wf = gpuDynInst->wavefront();
33623         gpuDynInst->execUnitId = wf->execUnitId;
33624         gpuDynInst->exec_mask = wf->execMask();
33625         gpuDynInst->latency.init(gpuDynInst->computeUnit());
33626         gpuDynInst->latency.set(
33627                 gpuDynInst->computeUnit()->cyclesToTicks(Cycles(24)));
33628         ConstVecOperandU32 addr(gpuDynInst, extData.ADDR);
33629
33630         addr.read();
33631
33632         calcAddr(gpuDynInst, addr);
33633
33634         gpuDynInst->computeUnit()->localMemoryPipe.issueRequest(gpuDynInst);
33635
33636         wf->rdLmReqsInPipe--;
33637         wf->outstandingReqsRdLm++;
33638         wf->outstandingReqs++;
33639         wf->validateRequestCounters();
33640     }
33641
33642     void
33643     Inst_DS__DS_READ2ST64_B64::initiateAcc(GPUDynInstPtr gpuDynInst)
33644     {
33645         Addr offset0 = (instData.OFFSET0 * 8 * 64);
33646         Addr offset1 = (instData.OFFSET1 * 8 * 64);
33647
33648         initDualMemRead<VecElemU64>(gpuDynInst, offset0, offset1);
33649     }
33650
33651     void
33652     Inst_DS__DS_READ2ST64_B64::completeAcc(GPUDynInstPtr gpuDynInst)
33653     {
33654         VecOperandU64 vdst0(gpuDynInst, extData.VDST);
33655         VecOperandU64 vdst1(gpuDynInst, extData.VDST + 2);
33656
33657         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
33658             if (gpuDynInst->exec_mask[lane]) {
33659                 vdst0[lane] = (reinterpret_cast<VecElemU64*>(
33660                     gpuDynInst->d_data))[lane * 2];
33661                 vdst1[lane] = (reinterpret_cast<VecElemU64*>(
33662                     gpuDynInst->d_data))[lane * 2 + 1];
33663             }
33664         }
33665
33666         vdst0.write();
33667         vdst1.write();
33668     }
33669
33670     Inst_DS__DS_CONDXCHG32_RTN_B64::Inst_DS__DS_CONDXCHG32_RTN_B64(
33671           InFmt_DS *iFmt)
33672         : Inst_DS(iFmt, "ds_condxchg32_rtn_b64")
33673     {
33674     } // Inst_DS__DS_CONDXCHG32_RTN_B64
33675
33676     Inst_DS__DS_CONDXCHG32_RTN_B64::~Inst_DS__DS_CONDXCHG32_RTN_B64()
33677     {
33678     } // ~Inst_DS__DS_CONDXCHG32_RTN_B64
33679
33680     // Conditional write exchange.
33681     void
33682     Inst_DS__DS_CONDXCHG32_RTN_B64::execute(GPUDynInstPtr gpuDynInst)
33683     {
33684         panicUnimplemented();
33685     }
33686
33687     Inst_DS__DS_ADD_SRC2_U32::Inst_DS__DS_ADD_SRC2_U32(InFmt_DS *iFmt)
33688         : Inst_DS(iFmt, "ds_add_src2_u32")
33689     {
33690     } // Inst_DS__DS_ADD_SRC2_U32
33691
33692     Inst_DS__DS_ADD_SRC2_U32::~Inst_DS__DS_ADD_SRC2_U32()
33693     {
33694     } // ~Inst_DS__DS_ADD_SRC2_U32
33695
33696     // A = ADDR_BASE;
33697     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33698     // {offset1[6],offset1[6:0],offset0});
33699     // MEM[A] = MEM[A] + MEM[B].
33700     void
33701     Inst_DS__DS_ADD_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
33702     {
33703         panicUnimplemented();
33704     }
33705
33706     Inst_DS__DS_SUB_SRC2_U32::Inst_DS__DS_SUB_SRC2_U32(InFmt_DS *iFmt)
33707         : Inst_DS(iFmt, "ds_sub_src2_u32")
33708     {
33709     } // Inst_DS__DS_SUB_SRC2_U32
33710
33711     Inst_DS__DS_SUB_SRC2_U32::~Inst_DS__DS_SUB_SRC2_U32()
33712     {
33713     } // ~Inst_DS__DS_SUB_SRC2_U32
33714
33715     // A = ADDR_BASE;
33716     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33717     // {offset1[6],offset1[6:0],offset0});
33718     // MEM[A] = MEM[A] - MEM[B].
33719     void
33720     Inst_DS__DS_SUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
33721     {
33722         panicUnimplemented();
33723     }
33724
33725     Inst_DS__DS_RSUB_SRC2_U32::Inst_DS__DS_RSUB_SRC2_U32(InFmt_DS *iFmt)
33726         : Inst_DS(iFmt, "ds_rsub_src2_u32")
33727     {
33728     } // Inst_DS__DS_RSUB_SRC2_U32
33729
33730     Inst_DS__DS_RSUB_SRC2_U32::~Inst_DS__DS_RSUB_SRC2_U32()
33731     {
33732     } // ~Inst_DS__DS_RSUB_SRC2_U32
33733
33734     // A = ADDR_BASE;
33735     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33736     // {offset1[6],offset1[6:0],offset0});
33737     // MEM[A] = MEM[B] - MEM[A].
33738     void
33739     Inst_DS__DS_RSUB_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
33740     {
33741         panicUnimplemented();
33742     }
33743
33744     Inst_DS__DS_INC_SRC2_U32::Inst_DS__DS_INC_SRC2_U32(InFmt_DS *iFmt)
33745         : Inst_DS(iFmt, "ds_inc_src2_u32")
33746     {
33747     } // Inst_DS__DS_INC_SRC2_U32
33748
33749     Inst_DS__DS_INC_SRC2_U32::~Inst_DS__DS_INC_SRC2_U32()
33750     {
33751     } // ~Inst_DS__DS_INC_SRC2_U32
33752
33753     // A = ADDR_BASE;
33754     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33755     // {offset1[6],offset1[6:0],offset0});
33756     // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1).
33757     void
33758     Inst_DS__DS_INC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
33759     {
33760         panicUnimplemented();
33761     }
33762
33763     Inst_DS__DS_DEC_SRC2_U32::Inst_DS__DS_DEC_SRC2_U32(InFmt_DS *iFmt)
33764         : Inst_DS(iFmt, "ds_dec_src2_u32")
33765     {
33766     } // Inst_DS__DS_DEC_SRC2_U32
33767
33768     Inst_DS__DS_DEC_SRC2_U32::~Inst_DS__DS_DEC_SRC2_U32()
33769     {
33770     } // ~Inst_DS__DS_DEC_SRC2_U32
33771
33772     // A = ADDR_BASE;
33773     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33774     // {offset1[6],offset1[6:0],offset0});
33775     // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1).
33776     // Uint decrement.
33777     void
33778     Inst_DS__DS_DEC_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
33779     {
33780         panicUnimplemented();
33781     }
33782
33783     Inst_DS__DS_MIN_SRC2_I32::Inst_DS__DS_MIN_SRC2_I32(InFmt_DS *iFmt)
33784         : Inst_DS(iFmt, "ds_min_src2_i32")
33785     {
33786     } // Inst_DS__DS_MIN_SRC2_I32
33787
33788     Inst_DS__DS_MIN_SRC2_I32::~Inst_DS__DS_MIN_SRC2_I32()
33789     {
33790     } // ~Inst_DS__DS_MIN_SRC2_I32
33791
33792     // A = ADDR_BASE;
33793     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33794     // {offset1[6],offset1[6:0],offset0});
33795     // MEM[A] = min(MEM[A], MEM[B]).
33796     void
33797     Inst_DS__DS_MIN_SRC2_I32::execute(GPUDynInstPtr gpuDynInst)
33798     {
33799         panicUnimplemented();
33800     }
33801
33802     Inst_DS__DS_MAX_SRC2_I32::Inst_DS__DS_MAX_SRC2_I32(InFmt_DS *iFmt)
33803         : Inst_DS(iFmt, "ds_max_src2_i32")
33804     {
33805     } // Inst_DS__DS_MAX_SRC2_I32
33806
33807     Inst_DS__DS_MAX_SRC2_I32::~Inst_DS__DS_MAX_SRC2_I32()
33808     {
33809     } // ~Inst_DS__DS_MAX_SRC2_I32
33810
33811     // A = ADDR_BASE;
33812     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33813     // {offset1[6],offset1[6:0],offset0});
33814     // MEM[A] = max(MEM[A], MEM[B]).
33815     void
33816     Inst_DS__DS_MAX_SRC2_I32::execute(GPUDynInstPtr gpuDynInst)
33817     {
33818         panicUnimplemented();
33819     }
33820
33821     Inst_DS__DS_MIN_SRC2_U32::Inst_DS__DS_MIN_SRC2_U32(InFmt_DS *iFmt)
33822         : Inst_DS(iFmt, "ds_min_src2_u32")
33823     {
33824     } // Inst_DS__DS_MIN_SRC2_U32
33825
33826     Inst_DS__DS_MIN_SRC2_U32::~Inst_DS__DS_MIN_SRC2_U32()
33827     {
33828     } // ~Inst_DS__DS_MIN_SRC2_U32
33829
33830     // A = ADDR_BASE;
33831     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33832     // {offset1[6],offset1[6:0],offset0});
33833     // MEM[A] = min(MEM[A], MEM[B]).
33834     void
33835     Inst_DS__DS_MIN_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
33836     {
33837         panicUnimplemented();
33838     }
33839
33840     Inst_DS__DS_MAX_SRC2_U32::Inst_DS__DS_MAX_SRC2_U32(InFmt_DS *iFmt)
33841         : Inst_DS(iFmt, "ds_max_src2_u32")
33842     {
33843     } // Inst_DS__DS_MAX_SRC2_U32
33844
33845     Inst_DS__DS_MAX_SRC2_U32::~Inst_DS__DS_MAX_SRC2_U32()
33846     {
33847     } // ~Inst_DS__DS_MAX_SRC2_U32
33848
33849     // A = ADDR_BASE;
33850     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33851     // {offset1[6],offset1[6:0],offset0});
33852     // MEM[A] = max(MEM[A], MEM[B]).
33853     void
33854     Inst_DS__DS_MAX_SRC2_U32::execute(GPUDynInstPtr gpuDynInst)
33855     {
33856         panicUnimplemented();
33857     }
33858
33859     Inst_DS__DS_AND_SRC2_B32::Inst_DS__DS_AND_SRC2_B32(InFmt_DS *iFmt)
33860         : Inst_DS(iFmt, "ds_and_src2_b32")
33861     {
33862     } // Inst_DS__DS_AND_SRC2_B32
33863
33864     Inst_DS__DS_AND_SRC2_B32::~Inst_DS__DS_AND_SRC2_B32()
33865     {
33866     } // ~Inst_DS__DS_AND_SRC2_B32
33867
33868     // A = ADDR_BASE;
33869     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33870     // {offset1[6],offset1[6:0],offset0});
33871     // MEM[A] = MEM[A] & MEM[B].
33872     void
33873     Inst_DS__DS_AND_SRC2_B32::execute(GPUDynInstPtr gpuDynInst)
33874     {
33875         panicUnimplemented();
33876     }
33877
33878     Inst_DS__DS_OR_SRC2_B32::Inst_DS__DS_OR_SRC2_B32(InFmt_DS *iFmt)
33879         : Inst_DS(iFmt, "ds_or_src2_b32")
33880     {
33881     } // Inst_DS__DS_OR_SRC2_B32
33882
33883     Inst_DS__DS_OR_SRC2_B32::~Inst_DS__DS_OR_SRC2_B32()
33884     {
33885     } // ~Inst_DS__DS_OR_SRC2_B32
33886
33887     // A = ADDR_BASE;
33888     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33889     // {offset1[6],offset1[6:0],offset0});
33890     // MEM[A] = MEM[A] | MEM[B].
33891     void
33892     Inst_DS__DS_OR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst)
33893     {
33894         panicUnimplemented();
33895     }
33896
33897     Inst_DS__DS_XOR_SRC2_B32::Inst_DS__DS_XOR_SRC2_B32(InFmt_DS *iFmt)
33898         : Inst_DS(iFmt, "ds_xor_src2_b32")
33899     {
33900     } // Inst_DS__DS_XOR_SRC2_B32
33901
33902     Inst_DS__DS_XOR_SRC2_B32::~Inst_DS__DS_XOR_SRC2_B32()
33903     {
33904     } // ~Inst_DS__DS_XOR_SRC2_B32
33905
33906     // A = ADDR_BASE;
33907     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33908     // {offset1[6],offset1[6:0],offset0});
33909     // MEM[A] = MEM[A] ^ MEM[B].
33910     void
33911     Inst_DS__DS_XOR_SRC2_B32::execute(GPUDynInstPtr gpuDynInst)
33912     {
33913         panicUnimplemented();
33914     }
33915
33916     Inst_DS__DS_WRITE_SRC2_B32::Inst_DS__DS_WRITE_SRC2_B32(InFmt_DS *iFmt)
33917         : Inst_DS(iFmt, "ds_write_src2_b32")
33918     {
33919         setFlag(MemoryRef);
33920         setFlag(Store);
33921     } // Inst_DS__DS_WRITE_SRC2_B32
33922
33923     Inst_DS__DS_WRITE_SRC2_B32::~Inst_DS__DS_WRITE_SRC2_B32()
33924     {
33925     } // ~Inst_DS__DS_WRITE_SRC2_B32
33926
33927     // A = ADDR_BASE;
33928     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33929     // {offset1[6],offset1[6:0],offset0});
33930     // MEM[A] = MEM[B].
33931     // Write dword.
33932     void
33933     Inst_DS__DS_WRITE_SRC2_B32::execute(GPUDynInstPtr gpuDynInst)
33934     {
33935         panicUnimplemented();
33936     }
33937
33938     Inst_DS__DS_MIN_SRC2_F32::Inst_DS__DS_MIN_SRC2_F32(InFmt_DS *iFmt)
33939         : Inst_DS(iFmt, "ds_min_src2_f32")
33940     {
33941         setFlag(F32);
33942     } // Inst_DS__DS_MIN_SRC2_F32
33943
33944     Inst_DS__DS_MIN_SRC2_F32::~Inst_DS__DS_MIN_SRC2_F32()
33945     {
33946     } // ~Inst_DS__DS_MIN_SRC2_F32
33947
33948     // A = ADDR_BASE;
33949     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33950     // {offset1[6],offset1[6:0],offset0});
33951     // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A].
33952     void
33953     Inst_DS__DS_MIN_SRC2_F32::execute(GPUDynInstPtr gpuDynInst)
33954     {
33955         panicUnimplemented();
33956     }
33957
33958     Inst_DS__DS_MAX_SRC2_F32::Inst_DS__DS_MAX_SRC2_F32(InFmt_DS *iFmt)
33959         : Inst_DS(iFmt, "ds_max_src2_f32")
33960     {
33961         setFlag(F32);
33962     } // Inst_DS__DS_MAX_SRC2_F32
33963
33964     Inst_DS__DS_MAX_SRC2_F32::~Inst_DS__DS_MAX_SRC2_F32()
33965     {
33966     } // ~Inst_DS__DS_MAX_SRC2_F32
33967
33968     // A = ADDR_BASE;
33969     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33970     // {offset1[6],offset1[6:0],offset0});
33971     // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A].
33972     void
33973     Inst_DS__DS_MAX_SRC2_F32::execute(GPUDynInstPtr gpuDynInst)
33974     {
33975         panicUnimplemented();
33976     }
33977
33978     Inst_DS__DS_ADD_SRC2_F32::Inst_DS__DS_ADD_SRC2_F32(InFmt_DS *iFmt)
33979         : Inst_DS(iFmt, "ds_add_src2_f32")
33980     {
33981         setFlag(F32);
33982     } // Inst_DS__DS_ADD_SRC2_F32
33983
33984     Inst_DS__DS_ADD_SRC2_F32::~Inst_DS__DS_ADD_SRC2_F32()
33985     {
33986     } // ~Inst_DS__DS_ADD_SRC2_F32
33987
33988     // A = ADDR_BASE;
33989     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
33990     // {offset1[6],offset1[6:0],offset0});
33991     // MEM[A] = MEM[B] + MEM[A].
33992     void
33993     Inst_DS__DS_ADD_SRC2_F32::execute(GPUDynInstPtr gpuDynInst)
33994     {
33995         panicUnimplemented();
33996     }
33997
33998     Inst_DS__DS_GWS_SEMA_RELEASE_ALL::Inst_DS__DS_GWS_SEMA_RELEASE_ALL(
33999           InFmt_DS *iFmt)
34000         : Inst_DS(iFmt, "ds_gws_sema_release_all")
34001     {
34002     } // Inst_DS__DS_GWS_SEMA_RELEASE_ALL
34003
34004     Inst_DS__DS_GWS_SEMA_RELEASE_ALL::~Inst_DS__DS_GWS_SEMA_RELEASE_ALL()
34005     {
34006     } // ~Inst_DS__DS_GWS_SEMA_RELEASE_ALL
34007
34008     void
34009     Inst_DS__DS_GWS_SEMA_RELEASE_ALL::execute(GPUDynInstPtr gpuDynInst)
34010     {
34011         panicUnimplemented();
34012     }
34013
34014     Inst_DS__DS_GWS_INIT::Inst_DS__DS_GWS_INIT(InFmt_DS *iFmt)
34015         : Inst_DS(iFmt, "ds_gws_init")
34016     {
34017     } // Inst_DS__DS_GWS_INIT
34018
34019     Inst_DS__DS_GWS_INIT::~Inst_DS__DS_GWS_INIT()
34020     {
34021     } // ~Inst_DS__DS_GWS_INIT
34022
34023     void
34024     Inst_DS__DS_GWS_INIT::execute(GPUDynInstPtr gpuDynInst)
34025     {
34026         panicUnimplemented();
34027     }
34028
34029     Inst_DS__DS_GWS_SEMA_V::Inst_DS__DS_GWS_SEMA_V(InFmt_DS *iFmt)
34030         : Inst_DS(iFmt, "ds_gws_sema_v")
34031     {
34032     } // Inst_DS__DS_GWS_SEMA_V
34033
34034     Inst_DS__DS_GWS_SEMA_V::~Inst_DS__DS_GWS_SEMA_V()
34035     {
34036     } // ~Inst_DS__DS_GWS_SEMA_V
34037
34038     void
34039     Inst_DS__DS_GWS_SEMA_V::execute(GPUDynInstPtr gpuDynInst)
34040     {
34041         panicUnimplemented();
34042     }
34043
34044     Inst_DS__DS_GWS_SEMA_BR::Inst_DS__DS_GWS_SEMA_BR(InFmt_DS *iFmt)
34045         : Inst_DS(iFmt, "ds_gws_sema_br")
34046     {
34047     } // Inst_DS__DS_GWS_SEMA_BR
34048
34049     Inst_DS__DS_GWS_SEMA_BR::~Inst_DS__DS_GWS_SEMA_BR()
34050     {
34051     } // ~Inst_DS__DS_GWS_SEMA_BR
34052
34053     void
34054     Inst_DS__DS_GWS_SEMA_BR::execute(GPUDynInstPtr gpuDynInst)
34055     {
34056         panicUnimplemented();
34057     }
34058
34059     Inst_DS__DS_GWS_SEMA_P::Inst_DS__DS_GWS_SEMA_P(InFmt_DS *iFmt)
34060         : Inst_DS(iFmt, "ds_gws_sema_p")
34061     {
34062     } // Inst_DS__DS_GWS_SEMA_P
34063
34064     Inst_DS__DS_GWS_SEMA_P::~Inst_DS__DS_GWS_SEMA_P()
34065     {
34066     } // ~Inst_DS__DS_GWS_SEMA_P
34067
34068     void
34069     Inst_DS__DS_GWS_SEMA_P::execute(GPUDynInstPtr gpuDynInst)
34070     {
34071         panicUnimplemented();
34072     }
34073
34074     Inst_DS__DS_GWS_BARRIER::Inst_DS__DS_GWS_BARRIER(InFmt_DS *iFmt)
34075         : Inst_DS(iFmt, "ds_gws_barrier")
34076     {
34077     } // Inst_DS__DS_GWS_BARRIER
34078
34079     Inst_DS__DS_GWS_BARRIER::~Inst_DS__DS_GWS_BARRIER()
34080     {
34081     } // ~Inst_DS__DS_GWS_BARRIER
34082
34083     void
34084     Inst_DS__DS_GWS_BARRIER::execute(GPUDynInstPtr gpuDynInst)
34085     {
34086         panicUnimplemented();
34087     }
34088
34089     Inst_DS__DS_CONSUME::Inst_DS__DS_CONSUME(InFmt_DS *iFmt)
34090         : Inst_DS(iFmt, "ds_consume")
34091     {
34092     } // Inst_DS__DS_CONSUME
34093
34094     Inst_DS__DS_CONSUME::~Inst_DS__DS_CONSUME()
34095     {
34096     } // ~Inst_DS__DS_CONSUME
34097
34098     void
34099     Inst_DS__DS_CONSUME::execute(GPUDynInstPtr gpuDynInst)
34100     {
34101         panicUnimplemented();
34102     }
34103
34104     Inst_DS__DS_APPEND::Inst_DS__DS_APPEND(InFmt_DS *iFmt)
34105         : Inst_DS(iFmt, "ds_append")
34106     {
34107     } // Inst_DS__DS_APPEND
34108
34109     Inst_DS__DS_APPEND::~Inst_DS__DS_APPEND()
34110     {
34111     } // ~Inst_DS__DS_APPEND
34112
34113     void
34114     Inst_DS__DS_APPEND::execute(GPUDynInstPtr gpuDynInst)
34115     {
34116         panicUnimplemented();
34117     }
34118
34119     Inst_DS__DS_ORDERED_COUNT::Inst_DS__DS_ORDERED_COUNT(InFmt_DS *iFmt)
34120         : Inst_DS(iFmt, "ds_ordered_count")
34121     {
34122     } // Inst_DS__DS_ORDERED_COUNT
34123
34124     Inst_DS__DS_ORDERED_COUNT::~Inst_DS__DS_ORDERED_COUNT()
34125     {
34126     } // ~Inst_DS__DS_ORDERED_COUNT
34127
34128     void
34129     Inst_DS__DS_ORDERED_COUNT::execute(GPUDynInstPtr gpuDynInst)
34130     {
34131         panicUnimplemented();
34132     }
34133
34134     Inst_DS__DS_ADD_SRC2_U64::Inst_DS__DS_ADD_SRC2_U64(InFmt_DS *iFmt)
34135         : Inst_DS(iFmt, "ds_add_src2_u64")
34136     {
34137     } // Inst_DS__DS_ADD_SRC2_U64
34138
34139     Inst_DS__DS_ADD_SRC2_U64::~Inst_DS__DS_ADD_SRC2_U64()
34140     {
34141     } // ~Inst_DS__DS_ADD_SRC2_U64
34142
34143     // A = ADDR_BASE;
34144     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34145     // {offset1[6],offset1[6:0],offset0});
34146     // MEM[A] = MEM[A] + MEM[B].
34147     void
34148     Inst_DS__DS_ADD_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
34149     {
34150         panicUnimplemented();
34151     }
34152
34153     Inst_DS__DS_SUB_SRC2_U64::Inst_DS__DS_SUB_SRC2_U64(InFmt_DS *iFmt)
34154         : Inst_DS(iFmt, "ds_sub_src2_u64")
34155     {
34156     } // Inst_DS__DS_SUB_SRC2_U64
34157
34158     Inst_DS__DS_SUB_SRC2_U64::~Inst_DS__DS_SUB_SRC2_U64()
34159     {
34160     } // ~Inst_DS__DS_SUB_SRC2_U64
34161
34162     // A = ADDR_BASE;
34163     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34164     // {offset1[6],offset1[6:0],offset0});
34165     // MEM[A] = MEM[A] - MEM[B].
34166     void
34167     Inst_DS__DS_SUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
34168     {
34169         panicUnimplemented();
34170     }
34171
34172     Inst_DS__DS_RSUB_SRC2_U64::Inst_DS__DS_RSUB_SRC2_U64(InFmt_DS *iFmt)
34173         : Inst_DS(iFmt, "ds_rsub_src2_u64")
34174     {
34175     } // Inst_DS__DS_RSUB_SRC2_U64
34176
34177     Inst_DS__DS_RSUB_SRC2_U64::~Inst_DS__DS_RSUB_SRC2_U64()
34178     {
34179     } // ~Inst_DS__DS_RSUB_SRC2_U64
34180
34181     // A = ADDR_BASE;
34182     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34183     // {offset1[6],offset1[6:0],offset0});
34184     // MEM[A] = MEM[B] - MEM[A].
34185     void
34186     Inst_DS__DS_RSUB_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
34187     {
34188         panicUnimplemented();
34189     }
34190
34191     Inst_DS__DS_INC_SRC2_U64::Inst_DS__DS_INC_SRC2_U64(InFmt_DS *iFmt)
34192         : Inst_DS(iFmt, "ds_inc_src2_u64")
34193     {
34194     } // Inst_DS__DS_INC_SRC2_U64
34195
34196     Inst_DS__DS_INC_SRC2_U64::~Inst_DS__DS_INC_SRC2_U64()
34197     {
34198     } // ~Inst_DS__DS_INC_SRC2_U64
34199
34200     // A = ADDR_BASE;
34201     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34202     // {offset1[6],offset1[6:0],offset0});
34203     // MEM[A] = (MEM[A] >= MEM[B] ? 0 : MEM[A] + 1).
34204     void
34205     Inst_DS__DS_INC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
34206     {
34207         panicUnimplemented();
34208     }
34209
34210     Inst_DS__DS_DEC_SRC2_U64::Inst_DS__DS_DEC_SRC2_U64(InFmt_DS *iFmt)
34211         : Inst_DS(iFmt, "ds_dec_src2_u64")
34212     {
34213     } // Inst_DS__DS_DEC_SRC2_U64
34214
34215     Inst_DS__DS_DEC_SRC2_U64::~Inst_DS__DS_DEC_SRC2_U64()
34216     {
34217     } // ~Inst_DS__DS_DEC_SRC2_U64
34218
34219     // A = ADDR_BASE;
34220     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34221     // {offset1[6],offset1[6:0],offset0});
34222     // MEM[A] = (MEM[A] == 0 || MEM[A] > MEM[B] ? MEM[B] : MEM[A] - 1).
34223     // Uint decrement.
34224     void
34225     Inst_DS__DS_DEC_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
34226     {
34227         panicUnimplemented();
34228     }
34229
34230     Inst_DS__DS_MIN_SRC2_I64::Inst_DS__DS_MIN_SRC2_I64(InFmt_DS *iFmt)
34231         : Inst_DS(iFmt, "ds_min_src2_i64")
34232     {
34233     } // Inst_DS__DS_MIN_SRC2_I64
34234
34235     Inst_DS__DS_MIN_SRC2_I64::~Inst_DS__DS_MIN_SRC2_I64()
34236     {
34237     } // ~Inst_DS__DS_MIN_SRC2_I64
34238
34239     // A = ADDR_BASE;
34240     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34241     // {offset1[6],offset1[6:0],offset0});
34242     // MEM[A] = min(MEM[A], MEM[B]).
34243     void
34244     Inst_DS__DS_MIN_SRC2_I64::execute(GPUDynInstPtr gpuDynInst)
34245     {
34246         panicUnimplemented();
34247     }
34248
34249     Inst_DS__DS_MAX_SRC2_I64::Inst_DS__DS_MAX_SRC2_I64(InFmt_DS *iFmt)
34250         : Inst_DS(iFmt, "ds_max_src2_i64")
34251     {
34252     } // Inst_DS__DS_MAX_SRC2_I64
34253
34254     Inst_DS__DS_MAX_SRC2_I64::~Inst_DS__DS_MAX_SRC2_I64()
34255     {
34256     } // ~Inst_DS__DS_MAX_SRC2_I64
34257
34258     // A = ADDR_BASE;
34259     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34260     // {offset1[6],offset1[6:0],offset0});
34261     // MEM[A] = max(MEM[A], MEM[B]).
34262     void
34263     Inst_DS__DS_MAX_SRC2_I64::execute(GPUDynInstPtr gpuDynInst)
34264     {
34265         panicUnimplemented();
34266     }
34267
34268     Inst_DS__DS_MIN_SRC2_U64::Inst_DS__DS_MIN_SRC2_U64(InFmt_DS *iFmt)
34269         : Inst_DS(iFmt, "ds_min_src2_u64")
34270     {
34271     } // Inst_DS__DS_MIN_SRC2_U64
34272
34273     Inst_DS__DS_MIN_SRC2_U64::~Inst_DS__DS_MIN_SRC2_U64()
34274     {
34275     } // ~Inst_DS__DS_MIN_SRC2_U64
34276
34277     // A = ADDR_BASE;
34278     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34279     // {offset1[6],offset1[6:0],offset0});
34280     // MEM[A] = min(MEM[A], MEM[B]).
34281     void
34282     Inst_DS__DS_MIN_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
34283     {
34284         panicUnimplemented();
34285     }
34286
34287     Inst_DS__DS_MAX_SRC2_U64::Inst_DS__DS_MAX_SRC2_U64(InFmt_DS *iFmt)
34288         : Inst_DS(iFmt, "ds_max_src2_u64")
34289     {
34290     } // Inst_DS__DS_MAX_SRC2_U64
34291
34292     Inst_DS__DS_MAX_SRC2_U64::~Inst_DS__DS_MAX_SRC2_U64()
34293     {
34294     } // ~Inst_DS__DS_MAX_SRC2_U64
34295
34296     // A = ADDR_BASE;
34297     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34298     // {offset1[6],offset1[6:0],offset0});
34299     // MEM[A] = max(MEM[A], MEM[B]).
34300     void
34301     Inst_DS__DS_MAX_SRC2_U64::execute(GPUDynInstPtr gpuDynInst)
34302     {
34303         panicUnimplemented();
34304     }
34305
34306     Inst_DS__DS_AND_SRC2_B64::Inst_DS__DS_AND_SRC2_B64(InFmt_DS *iFmt)
34307         : Inst_DS(iFmt, "ds_and_src2_b64")
34308     {
34309     } // Inst_DS__DS_AND_SRC2_B64
34310
34311     Inst_DS__DS_AND_SRC2_B64::~Inst_DS__DS_AND_SRC2_B64()
34312     {
34313     } // ~Inst_DS__DS_AND_SRC2_B64
34314
34315     // A = ADDR_BASE;
34316     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34317     // {offset1[6],offset1[6:0],offset0});
34318     // MEM[A] = MEM[A] & MEM[B].
34319     void
34320     Inst_DS__DS_AND_SRC2_B64::execute(GPUDynInstPtr gpuDynInst)
34321     {
34322         panicUnimplemented();
34323     }
34324
34325     Inst_DS__DS_OR_SRC2_B64::Inst_DS__DS_OR_SRC2_B64(InFmt_DS *iFmt)
34326         : Inst_DS(iFmt, "ds_or_src2_b64")
34327     {
34328     } // Inst_DS__DS_OR_SRC2_B64
34329
34330     Inst_DS__DS_OR_SRC2_B64::~Inst_DS__DS_OR_SRC2_B64()
34331     {
34332     } // ~Inst_DS__DS_OR_SRC2_B64
34333
34334     // A = ADDR_BASE;
34335     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34336     // {offset1[6],offset1[6:0],offset0});
34337     // MEM[A] = MEM[A] | MEM[B].
34338     void
34339     Inst_DS__DS_OR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst)
34340     {
34341         panicUnimplemented();
34342     }
34343
34344     Inst_DS__DS_XOR_SRC2_B64::Inst_DS__DS_XOR_SRC2_B64(InFmt_DS *iFmt)
34345         : Inst_DS(iFmt, "ds_xor_src2_b64")
34346     {
34347     } // Inst_DS__DS_XOR_SRC2_B64
34348
34349     Inst_DS__DS_XOR_SRC2_B64::~Inst_DS__DS_XOR_SRC2_B64()
34350     {
34351     } // ~Inst_DS__DS_XOR_SRC2_B64
34352
34353     // A = ADDR_BASE;
34354     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34355     // {offset1[6],offset1[6:0],offset0});
34356     // MEM[A] = MEM[A] ^ MEM[B].
34357     void
34358     Inst_DS__DS_XOR_SRC2_B64::execute(GPUDynInstPtr gpuDynInst)
34359     {
34360         panicUnimplemented();
34361     }
34362
34363     Inst_DS__DS_WRITE_SRC2_B64::Inst_DS__DS_WRITE_SRC2_B64(InFmt_DS *iFmt)
34364         : Inst_DS(iFmt, "ds_write_src2_b64")
34365     {
34366         setFlag(MemoryRef);
34367         setFlag(Store);
34368     } // Inst_DS__DS_WRITE_SRC2_B64
34369
34370     Inst_DS__DS_WRITE_SRC2_B64::~Inst_DS__DS_WRITE_SRC2_B64()
34371     {
34372     } // ~Inst_DS__DS_WRITE_SRC2_B64
34373
34374     // A = ADDR_BASE;
34375     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34376     // {offset1[6],offset1[6:0],offset0});
34377     // MEM[A] = MEM[B].
34378     // Write qword.
34379     void
34380     Inst_DS__DS_WRITE_SRC2_B64::execute(GPUDynInstPtr gpuDynInst)
34381     {
34382         panicUnimplemented();
34383     }
34384
34385     Inst_DS__DS_MIN_SRC2_F64::Inst_DS__DS_MIN_SRC2_F64(InFmt_DS *iFmt)
34386         : Inst_DS(iFmt, "ds_min_src2_f64")
34387     {
34388         setFlag(F64);
34389     } // Inst_DS__DS_MIN_SRC2_F64
34390
34391     Inst_DS__DS_MIN_SRC2_F64::~Inst_DS__DS_MIN_SRC2_F64()
34392     {
34393     } // ~Inst_DS__DS_MIN_SRC2_F64
34394
34395     // A = ADDR_BASE;
34396     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34397     // {offset1[6],offset1[6:0],offset0});
34398     // MEM[A] = (MEM[B] < MEM[A]) ? MEM[B] : MEM[A].
34399     void
34400     Inst_DS__DS_MIN_SRC2_F64::execute(GPUDynInstPtr gpuDynInst)
34401     {
34402         panicUnimplemented();
34403     }
34404
34405     Inst_DS__DS_MAX_SRC2_F64::Inst_DS__DS_MAX_SRC2_F64(InFmt_DS *iFmt)
34406         : Inst_DS(iFmt, "ds_max_src2_f64")
34407     {
34408         setFlag(F64);
34409     } // Inst_DS__DS_MAX_SRC2_F64
34410
34411     Inst_DS__DS_MAX_SRC2_F64::~Inst_DS__DS_MAX_SRC2_F64()
34412     {
34413     } // ~Inst_DS__DS_MAX_SRC2_F64
34414
34415     // A = ADDR_BASE;
34416     // B = A + 4*(offset1[7] ? {A[31],A[31:17]} :
34417     // {offset1[6],offset1[6:0],offset0});
34418     // MEM[A] = (MEM[B] > MEM[A]) ? MEM[B] : MEM[A].
34419     void
34420     Inst_DS__DS_MAX_SRC2_F64::execute(GPUDynInstPtr gpuDynInst)
34421     {
34422         panicUnimplemented();
34423     }
34424
34425     Inst_DS__DS_WRITE_B96::Inst_DS__DS_WRITE_B96(InFmt_DS *iFmt)
34426         : Inst_DS(iFmt, "ds_write_b96")
34427     {
34428         setFlag(MemoryRef);
34429         setFlag(Store);
34430     } // Inst_DS__DS_WRITE_B96
34431
34432     Inst_DS__DS_WRITE_B96::~Inst_DS__DS_WRITE_B96()
34433     {
34434     } // ~Inst_DS__DS_WRITE_B96
34435
34436     // {MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[95:0].
34437     // Tri-dword write.
34438     void
34439     Inst_DS__DS_WRITE_B96::execute(GPUDynInstPtr gpuDynInst)
34440     {
34441         panicUnimplemented();
34442     }
34443
34444     Inst_DS__DS_WRITE_B128::Inst_DS__DS_WRITE_B128(InFmt_DS *iFmt)
34445         : Inst_DS(iFmt, "ds_write_b128")
34446     {
34447         setFlag(MemoryRef);
34448         setFlag(Store);
34449     } // Inst_DS__DS_WRITE_B128
34450
34451     Inst_DS__DS_WRITE_B128::~Inst_DS__DS_WRITE_B128()
34452     {
34453     } // ~Inst_DS__DS_WRITE_B128
34454
34455     // {MEM[ADDR + 12], MEM[ADDR + 8], MEM[ADDR + 4], MEM[ADDR]} = DATA[127:0].
34456     // Qword write.
34457     void
34458     Inst_DS__DS_WRITE_B128::execute(GPUDynInstPtr gpuDynInst)
34459     {
34460         panicUnimplemented();
34461     }
34462
34463     Inst_DS__DS_READ_B96::Inst_DS__DS_READ_B96(InFmt_DS *iFmt)
34464         : Inst_DS(iFmt, "ds_read_b96")
34465     {
34466         setFlag(MemoryRef);
34467         setFlag(Load);
34468     } // Inst_DS__DS_READ_B96
34469
34470     Inst_DS__DS_READ_B96::~Inst_DS__DS_READ_B96()
34471     {
34472     } // ~Inst_DS__DS_READ_B96
34473
34474     // Tri-dword read.
34475     void
34476     Inst_DS__DS_READ_B96::execute(GPUDynInstPtr gpuDynInst)
34477     {
34478         panicUnimplemented();
34479     }
34480
34481     Inst_DS__DS_READ_B128::Inst_DS__DS_READ_B128(InFmt_DS *iFmt)
34482         : Inst_DS(iFmt, "ds_read_b128")
34483     {
34484         setFlag(MemoryRef);
34485         setFlag(Load);
34486     } // Inst_DS__DS_READ_B128
34487
34488     Inst_DS__DS_READ_B128::~Inst_DS__DS_READ_B128()
34489     {
34490     } // ~Inst_DS__DS_READ_B128
34491
34492     // Qword read.
34493     void
34494     Inst_DS__DS_READ_B128::execute(GPUDynInstPtr gpuDynInst)
34495     {
34496         panicUnimplemented();
34497     }
34498
34499     Inst_MUBUF__BUFFER_LOAD_FORMAT_X
34500         ::Inst_MUBUF__BUFFER_LOAD_FORMAT_X(InFmt_MUBUF *iFmt)
34501         : Inst_MUBUF(iFmt, "buffer_load_format_x")
34502     {
34503         setFlag(MemoryRef);
34504         setFlag(Load);
34505         setFlag(GlobalSegment);
34506     } // Inst_MUBUF__BUFFER_LOAD_FORMAT_X
34507
34508     Inst_MUBUF__BUFFER_LOAD_FORMAT_X::~Inst_MUBUF__BUFFER_LOAD_FORMAT_X()
34509     {
34510     } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_X
34511
34512     // Untyped buffer load 1 dword with format conversion.
34513     void
34514     Inst_MUBUF__BUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst)
34515     {
34516         panicUnimplemented();
34517     }
34518
34519     void
34520     Inst_MUBUF__BUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst)
34521     {
34522     } // initiateAcc
34523
34524     void
34525     Inst_MUBUF__BUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst)
34526     {
34527     }
34528
34529     Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
34530         ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XY(InFmt_MUBUF *iFmt)
34531         : Inst_MUBUF(iFmt, "buffer_load_format_xy")
34532     {
34533         setFlag(MemoryRef);
34534         setFlag(Load);
34535         setFlag(GlobalSegment);
34536     } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
34537
34538     Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY()
34539     {
34540     } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XY
34541
34542     // Untyped buffer load 2 dwords with format conversion.
34543     void
34544     Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst)
34545     {
34546         panicUnimplemented();
34547     }
34548
34549     void
34550     Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst)
34551     {
34552     } // initiateAcc
34553
34554     void
34555     Inst_MUBUF__BUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst)
34556     {
34557     }
34558
34559     Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
34560         ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ(InFmt_MUBUF *iFmt)
34561         : Inst_MUBUF(iFmt, "buffer_load_format_xyz")
34562     {
34563         setFlag(MemoryRef);
34564         setFlag(Load);
34565         setFlag(GlobalSegment);
34566     } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
34567
34568     Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ()
34569     {
34570     } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ
34571
34572     // Untyped buffer load 3 dwords with format conversion.
34573     void
34574     Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst)
34575     {
34576         panicUnimplemented();
34577     }
34578
34579     void
34580     Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst)
34581     {
34582     } // initiateAcc
34583
34584     void
34585     Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst)
34586     {
34587     }
34588
34589     Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
34590         ::Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW(InFmt_MUBUF *iFmt)
34591         : Inst_MUBUF(iFmt, "buffer_load_format_xyzw")
34592     {
34593         setFlag(MemoryRef);
34594         setFlag(Load);
34595         setFlag(GlobalSegment);
34596     } // Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
34597
34598     Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW()
34599     {
34600     } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW
34601
34602     // Untyped buffer load 4 dwords with format conversion.
34603     void
34604     Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst)
34605     {
34606         panicUnimplemented();
34607     }
34608
34609     void
34610     Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst)
34611     {
34612     } // initiateAcc
34613
34614     void
34615     Inst_MUBUF__BUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst)
34616     {
34617     }
34618
34619     Inst_MUBUF__BUFFER_STORE_FORMAT_X
34620         ::Inst_MUBUF__BUFFER_STORE_FORMAT_X(InFmt_MUBUF *iFmt)
34621         : Inst_MUBUF(iFmt, "buffer_store_format_x")
34622     {
34623         setFlag(MemoryRef);
34624         setFlag(Store);
34625         setFlag(GlobalSegment);
34626     } // Inst_MUBUF__BUFFER_STORE_FORMAT_X
34627
34628     Inst_MUBUF__BUFFER_STORE_FORMAT_X::~Inst_MUBUF__BUFFER_STORE_FORMAT_X()
34629     {
34630     } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_X
34631
34632     // Untyped buffer store 1 dword with format conversion.
34633     void
34634     Inst_MUBUF__BUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst)
34635     {
34636         panicUnimplemented();
34637     }
34638
34639     void
34640     Inst_MUBUF__BUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst)
34641     {
34642     } // initiateAcc
34643
34644     void
34645     Inst_MUBUF__BUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst)
34646     {
34647     }
34648
34649     Inst_MUBUF__BUFFER_STORE_FORMAT_XY
34650         ::Inst_MUBUF__BUFFER_STORE_FORMAT_XY(InFmt_MUBUF *iFmt)
34651         : Inst_MUBUF(iFmt, "buffer_store_format_xy")
34652     {
34653         setFlag(MemoryRef);
34654         setFlag(Store);
34655         setFlag(GlobalSegment);
34656     } // Inst_MUBUF__BUFFER_STORE_FORMAT_XY
34657
34658     Inst_MUBUF__BUFFER_STORE_FORMAT_XY::~Inst_MUBUF__BUFFER_STORE_FORMAT_XY()
34659     {
34660     } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XY
34661
34662     // Untyped buffer store 2 dwords with format conversion.
34663     void
34664     Inst_MUBUF__BUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst)
34665     {
34666         panicUnimplemented();
34667     }
34668
34669     void
34670     Inst_MUBUF__BUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst)
34671     {
34672     } // initiateAcc
34673
34674     void
34675     Inst_MUBUF__BUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst)
34676     {
34677     }
34678
34679     Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
34680         ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ(InFmt_MUBUF *iFmt)
34681         : Inst_MUBUF(iFmt, "buffer_store_format_xyz")
34682     {
34683         setFlag(MemoryRef);
34684         setFlag(Store);
34685         setFlag(GlobalSegment);
34686     } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
34687
34688     Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ()
34689     {
34690     } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ
34691
34692     // Untyped buffer store 3 dwords with format conversion.
34693     void
34694     Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst)
34695     {
34696         panicUnimplemented();
34697     }
34698
34699     void
34700     Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst)
34701     {
34702     } // initiateAcc
34703
34704     void
34705     Inst_MUBUF__BUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst)
34706     {
34707     }
34708
34709     Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
34710         ::Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW(InFmt_MUBUF *iFmt)
34711         : Inst_MUBUF(iFmt, "buffer_store_format_xyzw")
34712     {
34713         setFlag(MemoryRef);
34714         setFlag(Store);
34715         setFlag(GlobalSegment);
34716     } // Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
34717
34718     Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
34719         ::~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW()
34720     {
34721     } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW
34722
34723     // Untyped buffer store 4 dwords with format conversion.
34724     void
34725     Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst)
34726     {
34727         panicUnimplemented();
34728     }
34729
34730     void
34731     Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst)
34732     {
34733     } // initiateAcc
34734
34735     void
34736     Inst_MUBUF__BUFFER_STORE_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst)
34737     {
34738     }
34739
34740     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
34741         ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X(InFmt_MUBUF *iFmt)
34742         : Inst_MUBUF(iFmt, "buffer_load_format_d16_x")
34743     {
34744         setFlag(MemoryRef);
34745         setFlag(Load);
34746         setFlag(GlobalSegment);
34747     } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
34748
34749     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
34750         ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X()
34751     {
34752     } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X
34753
34754     // Untyped buffer load 1 dword with format conversion.
34755     void
34756     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst)
34757     {
34758         panicUnimplemented();
34759     }
34760
34761     void
34762     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::initiateAcc(GPUDynInstPtr gpuDynInst)
34763     {
34764     } // initiateAcc
34765
34766     void
34767     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_X::completeAcc(GPUDynInstPtr gpuDynInst)
34768     {
34769     }
34770
34771     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
34772         ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY(InFmt_MUBUF *iFmt)
34773         : Inst_MUBUF(iFmt, "buffer_load_format_d16_xy")
34774     {
34775         setFlag(MemoryRef);
34776         setFlag(Load);
34777         setFlag(GlobalSegment);
34778     } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
34779
34780     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
34781         ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY()
34782     {
34783     } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY
34784
34785     // Untyped buffer load 2 dwords with format conversion.
34786     void
34787     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst)
34788     {
34789         panicUnimplemented();
34790     }
34791
34792     void
34793     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::initiateAcc(
34794         GPUDynInstPtr gpuDynInst)
34795     {
34796     } // initiateAcc
34797
34798     void
34799     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XY::completeAcc(
34800         GPUDynInstPtr gpuDynInst)
34801     {
34802     }
34803
34804     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
34805         ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt)
34806         : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyz")
34807     {
34808         setFlag(MemoryRef);
34809         setFlag(Load);
34810         setFlag(GlobalSegment);
34811     } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
34812
34813     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
34814         ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ()
34815     {
34816     } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ
34817
34818     // Untyped buffer load 3 dwords with format conversion.
34819     void
34820     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst)
34821     {
34822         panicUnimplemented();
34823     }
34824
34825     void
34826     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc(
34827         GPUDynInstPtr gpuDynInst)
34828     {
34829     } // initiateAcc
34830
34831     void
34832     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZ::completeAcc(
34833         GPUDynInstPtr gpuDynInst)
34834     {
34835     }
34836
34837     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
34838         ::Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt)
34839         : Inst_MUBUF(iFmt, "buffer_load_format_d16_xyzw")
34840     {
34841         setFlag(MemoryRef);
34842         setFlag(Load);
34843         setFlag(GlobalSegment);
34844     } // Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
34845
34846     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
34847         ::~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW()
34848     {
34849     } // ~Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW
34850
34851     // Untyped buffer load 4 dwords with format conversion.
34852     void
34853     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst)
34854     {
34855         panicUnimplemented();
34856     }
34857
34858     void
34859     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc(
34860         GPUDynInstPtr gpuDynInst)
34861     {
34862     } // initiateAcc
34863
34864     void
34865     Inst_MUBUF__BUFFER_LOAD_FORMAT_D16_XYZW::completeAcc(
34866         GPUDynInstPtr gpuDynInst)
34867     {
34868     }
34869
34870     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
34871         ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X(InFmt_MUBUF *iFmt)
34872         : Inst_MUBUF(iFmt, "buffer_store_format_d16_x")
34873     {
34874         setFlag(MemoryRef);
34875         setFlag(Store);
34876         setFlag(GlobalSegment);
34877     } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
34878
34879     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
34880         ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X()
34881     {
34882     } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X
34883
34884     // Untyped buffer store 1 dword with format conversion.
34885     void
34886     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst)
34887     {
34888         panicUnimplemented();
34889     }
34890
34891     void
34892     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::initiateAcc(
34893         GPUDynInstPtr gpuDynInst)
34894     {
34895     } // initiateAcc
34896
34897     void
34898     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_X::completeAcc(
34899         GPUDynInstPtr gpuDynInst)
34900     {
34901     }
34902
34903     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
34904         ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY(InFmt_MUBUF *iFmt)
34905         : Inst_MUBUF(iFmt, "buffer_store_format_d16_xy")
34906     {
34907         setFlag(MemoryRef);
34908         setFlag(Store);
34909         setFlag(GlobalSegment);
34910     } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
34911
34912     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
34913         ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY()
34914     {
34915     } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY
34916
34917     // Untyped buffer store 2 dwords with format conversion.
34918     void
34919     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst)
34920     {
34921         panicUnimplemented();
34922     }
34923
34924     void
34925     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::initiateAcc(
34926         GPUDynInstPtr gpuDynInst)
34927     {
34928     } // initiateAcc
34929
34930     void
34931     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XY::completeAcc(
34932         GPUDynInstPtr gpuDynInst)
34933     {
34934     }
34935
34936     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
34937         ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ(InFmt_MUBUF *iFmt)
34938         : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyz")
34939     {
34940         setFlag(MemoryRef);
34941         setFlag(Store);
34942         setFlag(GlobalSegment);
34943     } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
34944
34945     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
34946         ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ()
34947     {
34948     } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ
34949
34950     // Untyped buffer store 3 dwords with format conversion.
34951     void
34952     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst)
34953     {
34954         panicUnimplemented();
34955     }
34956
34957     void
34958     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::initiateAcc(
34959         GPUDynInstPtr gpuDynInst)
34960     {
34961     } // initiateAcc
34962
34963     void
34964     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZ::completeAcc(
34965         GPUDynInstPtr gpuDynInst)
34966     {
34967     }
34968
34969     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
34970         ::Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW(InFmt_MUBUF *iFmt)
34971         : Inst_MUBUF(iFmt, "buffer_store_format_d16_xyzw")
34972     {
34973         setFlag(MemoryRef);
34974         setFlag(Store);
34975         setFlag(GlobalSegment);
34976     } // Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
34977
34978     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
34979         ::~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW()
34980     {
34981     } // ~Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW
34982
34983     // Untyped buffer store 4 dwords with format conversion.
34984     void
34985     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst)
34986     {
34987         panicUnimplemented();
34988     }
34989
34990     void
34991     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::initiateAcc(
34992         GPUDynInstPtr gpuDynInst)
34993     {
34994     } // initiateAcc
34995
34996     void
34997     Inst_MUBUF__BUFFER_STORE_FORMAT_D16_XYZW::completeAcc(
34998         GPUDynInstPtr gpuDynInst)
34999     {
35000     }
35001
35002     Inst_MUBUF__BUFFER_LOAD_UBYTE
35003         ::Inst_MUBUF__BUFFER_LOAD_UBYTE(InFmt_MUBUF *iFmt)
35004         : Inst_MUBUF(iFmt, "buffer_load_ubyte")
35005     {
35006         setFlag(MemoryRef);
35007         setFlag(Load);
35008         if (instData.LDS) {
35009             setFlag(GroupSegment);
35010         } else {
35011             setFlag(GlobalSegment);
35012         }
35013     } // Inst_MUBUF__BUFFER_LOAD_UBYTE
35014
35015     Inst_MUBUF__BUFFER_LOAD_UBYTE::~Inst_MUBUF__BUFFER_LOAD_UBYTE()
35016     {
35017     } // ~Inst_MUBUF__BUFFER_LOAD_UBYTE
35018
35019     // Untyped buffer load unsigned byte (zero extend to VGPR destination).
35020     void
35021     Inst_MUBUF__BUFFER_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst)
35022     {
35023         Wavefront *wf = gpuDynInst->wavefront();
35024         gpuDynInst->execUnitId = wf->execUnitId;
35025         gpuDynInst->exec_mask = wf->execMask();
35026         gpuDynInst->latency.init(gpuDynInst->computeUnit());
35027         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35028
35029         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35030         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35031         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35032         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35033
35034         rsrcDesc.read();
35035         offset.read();
35036
35037         int inst_offset = instData.OFFSET;
35038
35039         if (!instData.IDXEN && !instData.OFFEN) {
35040             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35041                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35042                     addr0, addr1, rsrcDesc, offset, inst_offset);
35043         } else if (!instData.IDXEN && instData.OFFEN) {
35044             addr0.read();
35045             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35046                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35047                     addr0, addr1, rsrcDesc, offset, inst_offset);
35048         } else if (instData.IDXEN && !instData.OFFEN) {
35049             addr0.read();
35050             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35051                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35052                     addr1, addr0, rsrcDesc, offset, inst_offset);
35053         } else {
35054             addr0.read();
35055             addr1.read();
35056             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35057                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35058                     addr1, addr0, rsrcDesc, offset, inst_offset);
35059         }
35060
35061         if (isLocalMem()) {
35062             gpuDynInst->computeUnit()->localMemoryPipe.
35063                 issueRequest(gpuDynInst);
35064             wf->rdLmReqsInPipe--;
35065             wf->outstandingReqsRdLm++;
35066         } else {
35067             gpuDynInst->computeUnit()->globalMemoryPipe.
35068                 issueRequest(gpuDynInst);
35069             wf->rdGmReqsInPipe--;
35070             wf->outstandingReqsRdGm++;
35071         }
35072
35073         wf->outstandingReqs++;
35074         wf->validateRequestCounters();
35075     }
35076
35077     void
35078     Inst_MUBUF__BUFFER_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
35079     {
35080         initMemRead<VecElemU8>(gpuDynInst);
35081     } // initiateAcc
35082
35083     void
35084     Inst_MUBUF__BUFFER_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst)
35085     {
35086         VecOperandU32 vdst(gpuDynInst, extData.VDATA);
35087
35088         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35089             if (gpuDynInst->exec_mask[lane]) {
35090                 if (!oobMask[lane]) {
35091                     vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU8*>(
35092                         gpuDynInst->d_data))[lane]);
35093                 } else {
35094                     vdst[lane] = 0;
35095                 }
35096             }
35097         }
35098
35099         vdst.write();
35100     }
35101
35102
35103     Inst_MUBUF__BUFFER_LOAD_SBYTE
35104         ::Inst_MUBUF__BUFFER_LOAD_SBYTE(InFmt_MUBUF *iFmt)
35105         : Inst_MUBUF(iFmt, "buffer_load_sbyte")
35106     {
35107         setFlag(MemoryRef);
35108         setFlag(Load);
35109         setFlag(GlobalSegment);
35110     } // Inst_MUBUF__BUFFER_LOAD_SBYTE
35111
35112     Inst_MUBUF__BUFFER_LOAD_SBYTE::~Inst_MUBUF__BUFFER_LOAD_SBYTE()
35113     {
35114     } // ~Inst_MUBUF__BUFFER_LOAD_SBYTE
35115
35116     // Untyped buffer load signed byte (sign extend to VGPR destination).
35117     void
35118     Inst_MUBUF__BUFFER_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst)
35119     {
35120         panicUnimplemented();
35121     }
35122
35123     void
35124     Inst_MUBUF__BUFFER_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
35125     {
35126     } // initiateAcc
35127
35128     void
35129     Inst_MUBUF__BUFFER_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst)
35130     {
35131     }
35132
35133     Inst_MUBUF__BUFFER_LOAD_USHORT
35134         ::Inst_MUBUF__BUFFER_LOAD_USHORT(InFmt_MUBUF *iFmt)
35135         : Inst_MUBUF(iFmt, "buffer_load_ushort")
35136     {
35137         setFlag(MemoryRef);
35138         setFlag(Load);
35139         if (instData.LDS) {
35140             setFlag(GroupSegment);
35141         } else {
35142             setFlag(GlobalSegment);
35143         }
35144     } // Inst_MUBUF__BUFFER_LOAD_USHORT
35145
35146     Inst_MUBUF__BUFFER_LOAD_USHORT::~Inst_MUBUF__BUFFER_LOAD_USHORT()
35147     {
35148     } // ~Inst_MUBUF__BUFFER_LOAD_USHORT
35149
35150     // Untyped buffer load unsigned short (zero extend to VGPR destination).
35151     void
35152     Inst_MUBUF__BUFFER_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst)
35153     {
35154         Wavefront *wf = gpuDynInst->wavefront();
35155         gpuDynInst->execUnitId = wf->execUnitId;
35156         gpuDynInst->exec_mask = wf->execMask();
35157         gpuDynInst->latency.init(gpuDynInst->computeUnit());
35158         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35159
35160         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35161         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35162         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35163         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35164
35165         rsrcDesc.read();
35166         offset.read();
35167
35168         int inst_offset = instData.OFFSET;
35169
35170         if (!instData.IDXEN && !instData.OFFEN) {
35171             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35172                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35173                     addr0, addr1, rsrcDesc, offset, inst_offset);
35174         } else if (!instData.IDXEN && instData.OFFEN) {
35175             addr0.read();
35176             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35177                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35178                     addr0, addr1, rsrcDesc, offset, inst_offset);
35179         } else if (instData.IDXEN && !instData.OFFEN) {
35180             addr0.read();
35181             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35182                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35183                     addr1, addr0, rsrcDesc, offset, inst_offset);
35184         } else {
35185             addr0.read();
35186             addr1.read();
35187             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35188                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35189                     addr1, addr0, rsrcDesc, offset, inst_offset);
35190         }
35191
35192         if (isLocalMem()) {
35193             gpuDynInst->computeUnit()->localMemoryPipe
35194                 .issueRequest(gpuDynInst);
35195             wf->rdLmReqsInPipe--;
35196             wf->outstandingReqsRdLm++;
35197         } else {
35198             gpuDynInst->computeUnit()->globalMemoryPipe
35199                 .issueRequest(gpuDynInst);
35200             wf->rdGmReqsInPipe--;
35201             wf->outstandingReqsRdGm++;
35202         }
35203
35204         wf->outstandingReqs++;
35205         wf->validateRequestCounters();
35206     }
35207
35208     void
35209     Inst_MUBUF__BUFFER_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
35210     {
35211         initMemRead<VecElemU16>(gpuDynInst);
35212     } // initiateAcc
35213
35214     void
35215     Inst_MUBUF__BUFFER_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst)
35216     {
35217         VecOperandU32 vdst(gpuDynInst, extData.VDATA);
35218
35219         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35220             if (gpuDynInst->exec_mask[lane]) {
35221                 if (!oobMask[lane]) {
35222                     vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU16*>(
35223                         gpuDynInst->d_data))[lane]);
35224                 } else {
35225                     vdst[lane] = 0;
35226                 }
35227             }
35228         }
35229
35230         vdst.write();
35231     }
35232
35233
35234     Inst_MUBUF__BUFFER_LOAD_SSHORT
35235         ::Inst_MUBUF__BUFFER_LOAD_SSHORT(InFmt_MUBUF *iFmt)
35236         : Inst_MUBUF(iFmt, "buffer_load_sshort")
35237     {
35238         setFlag(MemoryRef);
35239         setFlag(Load);
35240         setFlag(GlobalSegment);
35241     } // Inst_MUBUF__BUFFER_LOAD_SSHORT
35242
35243     Inst_MUBUF__BUFFER_LOAD_SSHORT::~Inst_MUBUF__BUFFER_LOAD_SSHORT()
35244     {
35245     } // ~Inst_MUBUF__BUFFER_LOAD_SSHORT
35246
35247     // Untyped buffer load signed short (sign extend to VGPR destination).
35248     void
35249     Inst_MUBUF__BUFFER_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst)
35250     {
35251         panicUnimplemented();
35252     }
35253
35254     void
35255     Inst_MUBUF__BUFFER_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
35256     {
35257     } // initiateAcc
35258
35259     void
35260     Inst_MUBUF__BUFFER_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst)
35261     {
35262     }
35263
35264     Inst_MUBUF__BUFFER_LOAD_DWORD
35265         ::Inst_MUBUF__BUFFER_LOAD_DWORD(InFmt_MUBUF *iFmt)
35266         : Inst_MUBUF(iFmt, "buffer_load_dword")
35267     {
35268         setFlag(MemoryRef);
35269         setFlag(Load);
35270         if (instData.LDS) {
35271             setFlag(GroupSegment);
35272         } else {
35273             setFlag(GlobalSegment);
35274         }
35275     } // Inst_MUBUF__BUFFER_LOAD_DWORD
35276
35277     Inst_MUBUF__BUFFER_LOAD_DWORD::~Inst_MUBUF__BUFFER_LOAD_DWORD()
35278     {
35279     } // ~Inst_MUBUF__BUFFER_LOAD_DWORD
35280
35281     // Untyped buffer load dword.
35282     void
35283     Inst_MUBUF__BUFFER_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst)
35284     {
35285         Wavefront *wf = gpuDynInst->wavefront();
35286         gpuDynInst->execUnitId = wf->execUnitId;
35287         gpuDynInst->exec_mask = wf->execMask();
35288         gpuDynInst->latency.init(gpuDynInst->computeUnit());
35289         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35290
35291         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35292         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35293         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35294         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35295
35296         rsrcDesc.read();
35297         offset.read();
35298
35299         int inst_offset = instData.OFFSET;
35300
35301         if (!instData.IDXEN && !instData.OFFEN) {
35302             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35303                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35304                     addr0, addr1, rsrcDesc, offset, inst_offset);
35305         } else if (!instData.IDXEN && instData.OFFEN) {
35306             addr0.read();
35307             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35308                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35309                     addr0, addr1, rsrcDesc, offset, inst_offset);
35310         } else if (instData.IDXEN && !instData.OFFEN) {
35311             addr0.read();
35312             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35313                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35314                     addr1, addr0, rsrcDesc, offset, inst_offset);
35315         } else {
35316             addr0.read();
35317             addr1.read();
35318             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35319                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35320                     addr1, addr0, rsrcDesc, offset, inst_offset);
35321         }
35322
35323         if (isLocalMem()) {
35324             gpuDynInst->computeUnit()->localMemoryPipe
35325                 .issueRequest(gpuDynInst);
35326             wf->rdLmReqsInPipe--;
35327             wf->outstandingReqsRdLm++;
35328         } else {
35329             gpuDynInst->computeUnit()->globalMemoryPipe
35330                 .issueRequest(gpuDynInst);
35331             wf->rdGmReqsInPipe--;
35332             wf->outstandingReqsRdGm++;
35333         }
35334
35335         wf->outstandingReqs++;
35336         wf->validateRequestCounters();
35337     }
35338
35339     void
35340     Inst_MUBUF__BUFFER_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
35341     {
35342         initMemRead<VecElemU32>(gpuDynInst);
35343     } // initiateAcc
35344
35345     void
35346     Inst_MUBUF__BUFFER_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
35347     {
35348         VecOperandU32 vdst(gpuDynInst, extData.VDATA);
35349
35350         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35351             if (gpuDynInst->exec_mask[lane]) {
35352                 if (!oobMask[lane]) {
35353                     vdst[lane] = (reinterpret_cast<VecElemU32*>(
35354                         gpuDynInst->d_data))[lane];
35355                 } else {
35356                     vdst[lane] = 0;
35357                 }
35358             }
35359         }
35360
35361         vdst.write();
35362     } // completeAcc
35363
35364     Inst_MUBUF__BUFFER_LOAD_DWORDX2
35365         ::Inst_MUBUF__BUFFER_LOAD_DWORDX2(InFmt_MUBUF *iFmt)
35366         : Inst_MUBUF(iFmt, "buffer_load_dwordx2")
35367     {
35368         setFlag(MemoryRef);
35369         setFlag(Load);
35370         if (instData.LDS) {
35371             setFlag(GroupSegment);
35372         } else {
35373             setFlag(GlobalSegment);
35374         }
35375     } // Inst_MUBUF__BUFFER_LOAD_DWORDX2
35376
35377     Inst_MUBUF__BUFFER_LOAD_DWORDX2::~Inst_MUBUF__BUFFER_LOAD_DWORDX2()
35378     {
35379     } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX2
35380
35381     // Untyped buffer load 2 dwords.
35382     void
35383     Inst_MUBUF__BUFFER_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
35384     {
35385         Wavefront *wf = gpuDynInst->wavefront();
35386         gpuDynInst->execUnitId = wf->execUnitId;
35387         gpuDynInst->exec_mask = wf->execMask();
35388         gpuDynInst->latency.init(gpuDynInst->computeUnit());
35389         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35390
35391         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35392         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35393         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35394         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35395
35396         rsrcDesc.read();
35397         offset.read();
35398
35399         int inst_offset = instData.OFFSET;
35400
35401         if (!instData.IDXEN && !instData.OFFEN) {
35402             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35403                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35404                     addr0, addr1, rsrcDesc, offset, inst_offset);
35405         } else if (!instData.IDXEN && instData.OFFEN) {
35406             addr0.read();
35407             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35408                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35409                     addr0, addr1, rsrcDesc, offset, inst_offset);
35410         } else if (instData.IDXEN && !instData.OFFEN) {
35411             addr0.read();
35412             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35413                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35414                     addr1, addr0, rsrcDesc, offset, inst_offset);
35415         } else {
35416             addr0.read();
35417             addr1.read();
35418             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35419                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35420                     addr1, addr0, rsrcDesc, offset, inst_offset);
35421         }
35422
35423         if (isLocalMem()) {
35424             gpuDynInst->computeUnit()->localMemoryPipe
35425                 .issueRequest(gpuDynInst);
35426             wf->rdLmReqsInPipe--;
35427             wf->outstandingReqsRdLm++;
35428         } else {
35429             gpuDynInst->computeUnit()->globalMemoryPipe
35430                 .issueRequest(gpuDynInst);
35431             wf->rdGmReqsInPipe--;
35432             wf->outstandingReqsRdGm++;
35433         }
35434
35435         wf->outstandingReqs++;
35436         wf->validateRequestCounters();
35437     } // execute
35438
35439     void
35440     Inst_MUBUF__BUFFER_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
35441     {
35442         initMemRead<2>(gpuDynInst);
35443     } // initiateAcc
35444
35445     void
35446     Inst_MUBUF__BUFFER_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
35447     {
35448         VecOperandU32 vdst0(gpuDynInst, extData.VDATA);
35449         VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1);
35450
35451         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35452             if (gpuDynInst->exec_mask[lane]) {
35453                 if (!oobMask[lane]) {
35454                     vdst0[lane] = (reinterpret_cast<VecElemU32*>(
35455                         gpuDynInst->d_data))[lane * 2];
35456                     vdst1[lane] = (reinterpret_cast<VecElemU32*>(
35457                         gpuDynInst->d_data))[lane * 2 + 1];
35458                 } else {
35459                     vdst0[lane] = 0;
35460                     vdst1[lane] = 0;
35461                 }
35462             }
35463         }
35464
35465         vdst0.write();
35466         vdst1.write();
35467     } // completeAcc
35468
35469     Inst_MUBUF__BUFFER_LOAD_DWORDX3
35470         ::Inst_MUBUF__BUFFER_LOAD_DWORDX3(InFmt_MUBUF *iFmt)
35471         : Inst_MUBUF(iFmt, "buffer_load_dwordx3")
35472     {
35473         setFlag(MemoryRef);
35474         setFlag(Load);
35475         if (instData.LDS) {
35476             setFlag(GroupSegment);
35477         } else {
35478             setFlag(GlobalSegment);
35479         }
35480     } // Inst_MUBUF__BUFFER_LOAD_DWORDX3
35481
35482     Inst_MUBUF__BUFFER_LOAD_DWORDX3::~Inst_MUBUF__BUFFER_LOAD_DWORDX3()
35483     {
35484     } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX3
35485
35486     // Untyped buffer load 3 dwords.
35487     void
35488     Inst_MUBUF__BUFFER_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst)
35489     {
35490         Wavefront *wf = gpuDynInst->wavefront();
35491         gpuDynInst->execUnitId = wf->execUnitId;
35492         gpuDynInst->exec_mask = wf->execMask();
35493         gpuDynInst->latency.init(gpuDynInst->computeUnit());
35494         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35495
35496         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35497         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35498         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35499         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35500
35501         rsrcDesc.read();
35502         offset.read();
35503
35504         int inst_offset = instData.OFFSET;
35505
35506         if (!instData.IDXEN && !instData.OFFEN) {
35507             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35508                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35509                     addr0, addr1, rsrcDesc, offset, inst_offset);
35510         } else if (!instData.IDXEN && instData.OFFEN) {
35511             addr0.read();
35512             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35513                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35514                     addr0, addr1, rsrcDesc, offset, inst_offset);
35515         } else if (instData.IDXEN && !instData.OFFEN) {
35516             addr0.read();
35517             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35518                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35519                     addr1, addr0, rsrcDesc, offset, inst_offset);
35520         } else {
35521             addr0.read();
35522             addr1.read();
35523             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35524                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35525                     addr1, addr0, rsrcDesc, offset, inst_offset);
35526         }
35527
35528         if (isLocalMem()) {
35529             gpuDynInst->computeUnit()->localMemoryPipe
35530                 .issueRequest(gpuDynInst);
35531             wf->rdLmReqsInPipe--;
35532             wf->outstandingReqsRdLm++;
35533         } else {
35534             gpuDynInst->computeUnit()->globalMemoryPipe
35535                 .issueRequest(gpuDynInst);
35536             wf->rdGmReqsInPipe--;
35537             wf->outstandingReqsRdGm++;
35538         }
35539
35540         wf->outstandingReqs++;
35541         wf->validateRequestCounters();
35542     } // execute
35543
35544     void
35545     Inst_MUBUF__BUFFER_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst)
35546     {
35547         initMemRead<3>(gpuDynInst);
35548     } // initiateAcc
35549
35550     void
35551     Inst_MUBUF__BUFFER_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst)
35552     {
35553         VecOperandU32 vdst0(gpuDynInst, extData.VDATA);
35554         VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1);
35555         VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2);
35556
35557         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35558             if (gpuDynInst->exec_mask[lane]) {
35559                 if (!oobMask[lane]) {
35560                     vdst0[lane] = (reinterpret_cast<VecElemU32*>(
35561                         gpuDynInst->d_data))[lane * 3];
35562                     vdst1[lane] = (reinterpret_cast<VecElemU32*>(
35563                         gpuDynInst->d_data))[lane * 3 + 1];
35564                     vdst2[lane] = (reinterpret_cast<VecElemU32*>(
35565                         gpuDynInst->d_data))[lane * 3 + 2];
35566                 } else {
35567                     vdst0[lane] = 0;
35568                     vdst1[lane] = 0;
35569                     vdst2[lane] = 0;
35570                 }
35571             }
35572         }
35573
35574         vdst0.write();
35575         vdst1.write();
35576         vdst2.write();
35577     } // completeAcc
35578
35579     Inst_MUBUF__BUFFER_LOAD_DWORDX4
35580         ::Inst_MUBUF__BUFFER_LOAD_DWORDX4(InFmt_MUBUF *iFmt)
35581         : Inst_MUBUF(iFmt, "buffer_load_dwordx4")
35582     {
35583         setFlag(MemoryRef);
35584         setFlag(Load);
35585         if (instData.LDS) {
35586             setFlag(GroupSegment);
35587         } else {
35588             setFlag(GlobalSegment);
35589         }
35590     } // Inst_MUBUF__BUFFER_LOAD_DWORDX4
35591
35592     Inst_MUBUF__BUFFER_LOAD_DWORDX4::~Inst_MUBUF__BUFFER_LOAD_DWORDX4()
35593     {
35594     } // ~Inst_MUBUF__BUFFER_LOAD_DWORDX4
35595
35596     // Untyped buffer load 4 dwords.
35597     void
35598     Inst_MUBUF__BUFFER_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
35599     {
35600         Wavefront *wf = gpuDynInst->wavefront();
35601         gpuDynInst->execUnitId = wf->execUnitId;
35602         gpuDynInst->exec_mask = wf->execMask();
35603         gpuDynInst->latency.init(gpuDynInst->computeUnit());
35604         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35605
35606         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35607         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35608         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35609         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35610
35611         rsrcDesc.read();
35612         offset.read();
35613
35614         int inst_offset = instData.OFFSET;
35615
35616         if (!instData.IDXEN && !instData.OFFEN) {
35617             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35618                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35619                     addr0, addr1, rsrcDesc, offset, inst_offset);
35620         } else if (!instData.IDXEN && instData.OFFEN) {
35621             addr0.read();
35622             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35623                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35624                     addr0, addr1, rsrcDesc, offset, inst_offset);
35625         } else if (instData.IDXEN && !instData.OFFEN) {
35626             addr0.read();
35627             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35628                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35629                     addr1, addr0, rsrcDesc, offset, inst_offset);
35630         } else {
35631             addr0.read();
35632             addr1.read();
35633             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35634                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35635                     addr1, addr0, rsrcDesc, offset, inst_offset);
35636         }
35637
35638         if (isLocalMem()) {
35639             gpuDynInst->computeUnit()->localMemoryPipe
35640                 .issueRequest(gpuDynInst);
35641             wf->rdLmReqsInPipe--;
35642             wf->outstandingReqsRdLm++;
35643         } else {
35644             gpuDynInst->computeUnit()->globalMemoryPipe
35645                 .issueRequest(gpuDynInst);
35646             wf->rdGmReqsInPipe--;
35647             wf->outstandingReqsRdGm++;
35648         }
35649
35650         wf->outstandingReqs++;
35651         wf->validateRequestCounters();
35652     } // execute
35653
35654     void
35655     Inst_MUBUF__BUFFER_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
35656     {
35657         initMemRead<4>(gpuDynInst);
35658     } // initiateAcc
35659
35660     void
35661     Inst_MUBUF__BUFFER_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
35662     {
35663         VecOperandU32 vdst0(gpuDynInst, extData.VDATA);
35664         VecOperandU32 vdst1(gpuDynInst, extData.VDATA + 1);
35665         VecOperandU32 vdst2(gpuDynInst, extData.VDATA + 2);
35666         VecOperandU32 vdst3(gpuDynInst, extData.VDATA + 3);
35667
35668         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35669             if (gpuDynInst->exec_mask[lane]) {
35670                 if (!oobMask[lane]) {
35671                     vdst0[lane] = (reinterpret_cast<VecElemU32*>(
35672                         gpuDynInst->d_data))[lane * 4];
35673                     vdst1[lane] = (reinterpret_cast<VecElemU32*>(
35674                         gpuDynInst->d_data))[lane * 4 + 1];
35675                     vdst2[lane] = (reinterpret_cast<VecElemU32*>(
35676                         gpuDynInst->d_data))[lane * 4 + 2];
35677                     vdst3[lane] = (reinterpret_cast<VecElemU32*>(
35678                         gpuDynInst->d_data))[lane * 4 + 3];
35679                 } else {
35680                     vdst0[lane] = 0;
35681                     vdst1[lane] = 0;
35682                     vdst2[lane] = 0;
35683                     vdst3[lane] = 0;
35684                 }
35685             }
35686         }
35687
35688         vdst0.write();
35689         vdst1.write();
35690         vdst2.write();
35691         vdst3.write();
35692     } // completeAcc
35693
35694     Inst_MUBUF__BUFFER_STORE_BYTE
35695         ::Inst_MUBUF__BUFFER_STORE_BYTE(InFmt_MUBUF *iFmt)
35696         : Inst_MUBUF(iFmt, "buffer_store_byte")
35697     {
35698         setFlag(MemoryRef);
35699         setFlag(Store);
35700         if (instData.LDS) {
35701             setFlag(GroupSegment);
35702         } else {
35703             setFlag(GlobalSegment);
35704         }
35705     } // Inst_MUBUF__BUFFER_STORE_BYTE
35706
35707     Inst_MUBUF__BUFFER_STORE_BYTE::~Inst_MUBUF__BUFFER_STORE_BYTE()
35708     {
35709     } // ~Inst_MUBUF__BUFFER_STORE_BYTE
35710
35711     // Untyped buffer store byte.
35712     void
35713     Inst_MUBUF__BUFFER_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst)
35714     {
35715         Wavefront *wf = gpuDynInst->wavefront();
35716         gpuDynInst->execUnitId = wf->execUnitId;
35717         gpuDynInst->exec_mask = wf->execMask();
35718         gpuDynInst->latency.init(gpuDynInst->computeUnit());
35719         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35720
35721         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35722         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35723         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35724         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35725
35726         rsrcDesc.read();
35727         offset.read();
35728
35729         int inst_offset = instData.OFFSET;
35730
35731         if (!instData.IDXEN && !instData.OFFEN) {
35732             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35733                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35734                     addr0, addr1, rsrcDesc, offset, inst_offset);
35735         } else if (!instData.IDXEN && instData.OFFEN) {
35736             addr0.read();
35737             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35738                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35739                     addr0, addr1, rsrcDesc, offset, inst_offset);
35740         } else if (instData.IDXEN && !instData.OFFEN) {
35741             addr0.read();
35742             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35743                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35744                     addr1, addr0, rsrcDesc, offset, inst_offset);
35745         } else {
35746             addr0.read();
35747             addr1.read();
35748             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35749                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35750                     addr1, addr0, rsrcDesc, offset, inst_offset);
35751         }
35752
35753         if (isLocalMem()) {
35754             gpuDynInst->computeUnit()->localMemoryPipe
35755                 .issueRequest(gpuDynInst);
35756             wf->wrLmReqsInPipe--;
35757             wf->outstandingReqsWrLm++;
35758         } else {
35759             gpuDynInst->computeUnit()->globalMemoryPipe
35760                 .issueRequest(gpuDynInst);
35761             wf->wrGmReqsInPipe--;
35762             wf->outstandingReqsWrGm++;
35763         }
35764
35765         wf->outstandingReqs++;
35766         wf->validateRequestCounters();
35767     }
35768
35769     void
35770     Inst_MUBUF__BUFFER_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
35771     {
35772         ConstVecOperandI8 data(gpuDynInst, extData.VDATA);
35773         data.read();
35774
35775         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35776             if (gpuDynInst->exec_mask[lane]) {
35777                 (reinterpret_cast<VecElemI8*>(gpuDynInst->d_data))[lane]
35778                     = data[lane];
35779             }
35780         }
35781
35782         initMemWrite<VecElemI8>(gpuDynInst);
35783     } // initiateAcc
35784
35785     void
35786     Inst_MUBUF__BUFFER_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst)
35787     {
35788     }
35789
35790     Inst_MUBUF__BUFFER_STORE_SHORT
35791         ::Inst_MUBUF__BUFFER_STORE_SHORT(InFmt_MUBUF *iFmt)
35792         : Inst_MUBUF(iFmt, "buffer_store_short")
35793     {
35794         setFlag(MemoryRef);
35795         setFlag(Store);
35796         if (instData.LDS) {
35797             setFlag(GroupSegment);
35798         } else {
35799             setFlag(GlobalSegment);
35800         }
35801     } // Inst_MUBUF__BUFFER_STORE_SHORT
35802
35803     Inst_MUBUF__BUFFER_STORE_SHORT::~Inst_MUBUF__BUFFER_STORE_SHORT()
35804     {
35805     } // ~Inst_MUBUF__BUFFER_STORE_SHORT
35806
35807     // Untyped buffer store short.
35808     void
35809     Inst_MUBUF__BUFFER_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst)
35810     {
35811         Wavefront *wf = gpuDynInst->wavefront();
35812         gpuDynInst->execUnitId = wf->execUnitId;
35813         gpuDynInst->exec_mask = wf->execMask();
35814         gpuDynInst->latency.init(gpuDynInst->computeUnit());
35815         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35816
35817         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35818         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35819         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35820         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35821
35822         rsrcDesc.read();
35823         offset.read();
35824
35825         int inst_offset = instData.OFFSET;
35826
35827         if (!instData.IDXEN && !instData.OFFEN) {
35828             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35829                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35830                     addr0, addr1, rsrcDesc, offset, inst_offset);
35831         } else if (!instData.IDXEN && instData.OFFEN) {
35832             addr0.read();
35833             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35834                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35835                     addr0, addr1, rsrcDesc, offset, inst_offset);
35836         } else if (instData.IDXEN && !instData.OFFEN) {
35837             addr0.read();
35838             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35839                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35840                     addr1, addr0, rsrcDesc, offset, inst_offset);
35841         } else {
35842             addr0.read();
35843             addr1.read();
35844             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35845                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35846                     addr1, addr0, rsrcDesc, offset, inst_offset);
35847         }
35848
35849         if (isLocalMem()) {
35850             gpuDynInst->computeUnit()->localMemoryPipe
35851                 .issueRequest(gpuDynInst);
35852             wf->wrLmReqsInPipe--;
35853             wf->outstandingReqsWrLm++;
35854         } else {
35855             gpuDynInst->computeUnit()->globalMemoryPipe
35856                 .issueRequest(gpuDynInst);
35857             wf->wrGmReqsInPipe--;
35858             wf->outstandingReqsWrGm++;
35859         }
35860
35861         wf->outstandingReqs++;
35862         wf->validateRequestCounters();
35863     }
35864
35865     void
35866     Inst_MUBUF__BUFFER_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
35867     {
35868         ConstVecOperandI16 data(gpuDynInst, extData.VDATA);
35869         data.read();
35870
35871         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35872             if (gpuDynInst->exec_mask[lane]) {
35873                 (reinterpret_cast<VecElemI16*>(gpuDynInst->d_data))[lane]
35874                     = data[lane];
35875             }
35876         }
35877
35878         initMemWrite<VecElemI16>(gpuDynInst);
35879     } // initiateAcc
35880
35881     void
35882     Inst_MUBUF__BUFFER_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst)
35883     {
35884     }
35885
35886     Inst_MUBUF__BUFFER_STORE_DWORD::
35887         Inst_MUBUF__BUFFER_STORE_DWORD(InFmt_MUBUF *iFmt)
35888         : Inst_MUBUF(iFmt, "buffer_store_dword")
35889     {
35890         setFlag(MemoryRef);
35891         setFlag(Store);
35892         if (instData.LDS) {
35893             setFlag(GroupSegment);
35894         } else {
35895             setFlag(GlobalSegment);
35896         }
35897     } // Inst_MUBUF__BUFFER_STORE_DWORD
35898
35899     Inst_MUBUF__BUFFER_STORE_DWORD::~Inst_MUBUF__BUFFER_STORE_DWORD()
35900     {
35901     } // ~Inst_MUBUF__BUFFER_STORE_DWORD
35902
35903     // Untyped buffer store dword.
35904     void
35905     Inst_MUBUF__BUFFER_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst)
35906     {
35907         Wavefront *wf = gpuDynInst->wavefront();
35908         gpuDynInst->execUnitId = wf->execUnitId;
35909         gpuDynInst->exec_mask = wf->execMask();
35910         gpuDynInst->latency.init(gpuDynInst->computeUnit());
35911         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
35912
35913         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
35914         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
35915         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
35916         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
35917
35918         rsrcDesc.read();
35919         offset.read();
35920
35921         int inst_offset = instData.OFFSET;
35922
35923         if (!instData.IDXEN && !instData.OFFEN) {
35924             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35925                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35926                     addr0, addr1, rsrcDesc, offset, inst_offset);
35927         } else if (!instData.IDXEN && instData.OFFEN) {
35928             addr0.read();
35929             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35930                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35931                     addr0, addr1, rsrcDesc, offset, inst_offset);
35932         } else if (instData.IDXEN && !instData.OFFEN) {
35933             addr0.read();
35934             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35935                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35936                     addr1, addr0, rsrcDesc, offset, inst_offset);
35937         } else {
35938             addr0.read();
35939             addr1.read();
35940             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
35941                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
35942                     addr1, addr0, rsrcDesc, offset, inst_offset);
35943         }
35944
35945         if (isLocalMem()) {
35946             gpuDynInst->computeUnit()->localMemoryPipe
35947                 .issueRequest(gpuDynInst);
35948             wf->wrLmReqsInPipe--;
35949             wf->outstandingReqsWrLm++;
35950         } else {
35951             gpuDynInst->computeUnit()->globalMemoryPipe
35952                 .issueRequest(gpuDynInst);
35953             wf->wrGmReqsInPipe--;
35954             wf->outstandingReqsWrGm++;
35955         }
35956
35957         wf->outstandingReqs++;
35958         wf->validateRequestCounters();
35959     }
35960
35961     void
35962     Inst_MUBUF__BUFFER_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
35963     {
35964         ConstVecOperandU32 data(gpuDynInst, extData.VDATA);
35965         data.read();
35966
35967         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
35968             if (gpuDynInst->exec_mask[lane]) {
35969                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane]
35970                     = data[lane];
35971             }
35972         }
35973
35974         initMemWrite<VecElemU32>(gpuDynInst);
35975     } // initiateAcc
35976
35977     void
35978     Inst_MUBUF__BUFFER_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
35979     {
35980     } // completeAcc
35981
35982     Inst_MUBUF__BUFFER_STORE_DWORDX2
35983         ::Inst_MUBUF__BUFFER_STORE_DWORDX2(InFmt_MUBUF *iFmt)
35984         : Inst_MUBUF(iFmt, "buffer_store_dwordx2")
35985     {
35986         setFlag(MemoryRef);
35987         setFlag(Store);
35988         if (instData.LDS) {
35989             setFlag(GroupSegment);
35990         } else {
35991             setFlag(GlobalSegment);
35992         }
35993     } // Inst_MUBUF__BUFFER_STORE_DWORDX2
35994
35995     Inst_MUBUF__BUFFER_STORE_DWORDX2::~Inst_MUBUF__BUFFER_STORE_DWORDX2()
35996     {
35997     } // ~Inst_MUBUF__BUFFER_STORE_DWORDX2
35998
35999     // Untyped buffer store 2 dwords.
36000     void
36001     Inst_MUBUF__BUFFER_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
36002     {
36003         Wavefront *wf = gpuDynInst->wavefront();
36004         gpuDynInst->execUnitId = wf->execUnitId;
36005         gpuDynInst->exec_mask = wf->execMask();
36006         gpuDynInst->latency.init(gpuDynInst->computeUnit());
36007         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
36008
36009         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
36010         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
36011         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
36012         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
36013         ConstVecOperandU32 data0(gpuDynInst, extData.VDATA);
36014         ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1);
36015
36016         rsrcDesc.read();
36017         offset.read();
36018         data0.read();
36019         data1.read();
36020
36021         int inst_offset = instData.OFFSET;
36022
36023         if (!instData.IDXEN && !instData.OFFEN) {
36024             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36025                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36026                     addr0, addr1, rsrcDesc, offset, inst_offset);
36027         } else if (!instData.IDXEN && instData.OFFEN) {
36028             addr0.read();
36029             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36030                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36031                     addr0, addr1, rsrcDesc, offset, inst_offset);
36032         } else if (instData.IDXEN && !instData.OFFEN) {
36033             addr0.read();
36034             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36035                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36036                     addr1, addr0, rsrcDesc, offset, inst_offset);
36037         } else {
36038             addr0.read();
36039             addr1.read();
36040             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36041                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36042                     addr1, addr0, rsrcDesc, offset, inst_offset);
36043         }
36044
36045         if (isLocalMem()) {
36046             gpuDynInst->computeUnit()->localMemoryPipe
36047                 .issueRequest(gpuDynInst);
36048             wf->wrLmReqsInPipe--;
36049             wf->outstandingReqsWrLm++;
36050         } else {
36051             gpuDynInst->computeUnit()->globalMemoryPipe
36052                 .issueRequest(gpuDynInst);
36053             wf->wrGmReqsInPipe--;
36054             wf->outstandingReqsWrGm++;
36055         }
36056
36057         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
36058             if (gpuDynInst->exec_mask[lane]) {
36059                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 4]
36060                     = data0[lane];
36061                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 1]
36062                     = data1[lane];
36063             }
36064         }
36065
36066         wf->outstandingReqs++;
36067         wf->validateRequestCounters();
36068     } // execute
36069
36070     void
36071     Inst_MUBUF__BUFFER_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
36072     {
36073         initMemWrite<2>(gpuDynInst);
36074     } // initiateAcc
36075
36076     void
36077     Inst_MUBUF__BUFFER_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
36078     {
36079     } // completeAcc
36080
36081     Inst_MUBUF__BUFFER_STORE_DWORDX3
36082         ::Inst_MUBUF__BUFFER_STORE_DWORDX3(InFmt_MUBUF *iFmt)
36083         : Inst_MUBUF(iFmt, "buffer_store_dwordx3")
36084     {
36085         setFlag(MemoryRef);
36086         setFlag(Store);
36087         if (instData.LDS) {
36088             setFlag(GroupSegment);
36089         } else {
36090             setFlag(GlobalSegment);
36091         }
36092     } // Inst_MUBUF__BUFFER_STORE_DWORDX3
36093
36094     Inst_MUBUF__BUFFER_STORE_DWORDX3::~Inst_MUBUF__BUFFER_STORE_DWORDX3()
36095     {
36096     } // ~Inst_MUBUF__BUFFER_STORE_DWORDX3
36097
36098     // Untyped buffer store 3 dwords.
36099     void
36100     Inst_MUBUF__BUFFER_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst)
36101     {
36102         Wavefront *wf = gpuDynInst->wavefront();
36103         gpuDynInst->execUnitId = wf->execUnitId;
36104         gpuDynInst->exec_mask = wf->execMask();
36105         gpuDynInst->latency.init(gpuDynInst->computeUnit());
36106         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
36107
36108         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
36109         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
36110         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
36111         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
36112         ConstVecOperandU32 data0(gpuDynInst, extData.VDATA);
36113         ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1);
36114         ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2);
36115
36116         rsrcDesc.read();
36117         offset.read();
36118         data0.read();
36119         data1.read();
36120         data2.read();
36121
36122         int inst_offset = instData.OFFSET;
36123
36124         if (!instData.IDXEN && !instData.OFFEN) {
36125             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36126                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36127                     addr0, addr1, rsrcDesc, offset, inst_offset);
36128         } else if (!instData.IDXEN && instData.OFFEN) {
36129             addr0.read();
36130             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36131                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36132                     addr0, addr1, rsrcDesc, offset, inst_offset);
36133         } else if (instData.IDXEN && !instData.OFFEN) {
36134             addr0.read();
36135             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36136                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36137                     addr1, addr0, rsrcDesc, offset, inst_offset);
36138         } else {
36139             addr0.read();
36140             addr1.read();
36141             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36142                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36143                     addr1, addr0, rsrcDesc, offset, inst_offset);
36144         }
36145
36146         if (isLocalMem()) {
36147             gpuDynInst->computeUnit()->localMemoryPipe
36148                 .issueRequest(gpuDynInst);
36149             wf->wrLmReqsInPipe--;
36150             wf->outstandingReqsWrLm++;
36151         } else {
36152             gpuDynInst->computeUnit()->globalMemoryPipe
36153                 .issueRequest(gpuDynInst);
36154             wf->wrGmReqsInPipe--;
36155             wf->outstandingReqsWrGm++;
36156         }
36157
36158         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
36159             if (gpuDynInst->exec_mask[lane]) {
36160                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 4]
36161                     = data0[lane];
36162                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 1]
36163                     = data1[lane];
36164                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 2]
36165                     = data2[lane];
36166             }
36167         }
36168
36169         wf->outstandingReqs++;
36170         wf->validateRequestCounters();
36171     } // execute
36172
36173     void
36174     Inst_MUBUF__BUFFER_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst)
36175     {
36176         initMemWrite<3>(gpuDynInst);
36177     } // initiateAcc
36178
36179     void
36180     Inst_MUBUF__BUFFER_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst)
36181     {
36182     } // completeAcc
36183
36184     Inst_MUBUF__BUFFER_STORE_DWORDX4
36185         ::Inst_MUBUF__BUFFER_STORE_DWORDX4(InFmt_MUBUF *iFmt)
36186         : Inst_MUBUF(iFmt, "buffer_store_dwordx4")
36187     {
36188         setFlag(MemoryRef);
36189         setFlag(Store);
36190         if (instData.LDS) {
36191             setFlag(GroupSegment);
36192         } else {
36193             setFlag(GlobalSegment);
36194         }
36195     } // Inst_MUBUF__BUFFER_STORE_DWORDX4
36196
36197     Inst_MUBUF__BUFFER_STORE_DWORDX4::~Inst_MUBUF__BUFFER_STORE_DWORDX4()
36198     {
36199     } // ~Inst_MUBUF__BUFFER_STORE_DWORDX4
36200
36201     // Untyped buffer store 4 dwords.
36202     void
36203     Inst_MUBUF__BUFFER_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
36204     {
36205         Wavefront *wf = gpuDynInst->wavefront();
36206         gpuDynInst->execUnitId = wf->execUnitId;
36207         gpuDynInst->exec_mask = wf->execMask();
36208         gpuDynInst->latency.init(gpuDynInst->computeUnit());
36209         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
36210
36211         ConstVecOperandU32 addr0(gpuDynInst, extData.VADDR);
36212         ConstVecOperandU32 addr1(gpuDynInst, extData.VADDR + 1);
36213         ConstScalarOperandU128 rsrcDesc(gpuDynInst, extData.SRSRC * 4);
36214         ConstScalarOperandU32 offset(gpuDynInst, extData.SOFFSET);
36215         ConstVecOperandU32 data0(gpuDynInst, extData.VDATA);
36216         ConstVecOperandU32 data1(gpuDynInst, extData.VDATA + 1);
36217         ConstVecOperandU32 data2(gpuDynInst, extData.VDATA + 2);
36218         ConstVecOperandU32 data3(gpuDynInst, extData.VDATA + 3);
36219
36220         rsrcDesc.read();
36221         offset.read();
36222         data0.read();
36223         data1.read();
36224         data2.read();
36225         data3.read();
36226
36227         int inst_offset = instData.OFFSET;
36228
36229         if (!instData.IDXEN && !instData.OFFEN) {
36230             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36231                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36232                     addr0, addr1, rsrcDesc, offset, inst_offset);
36233         } else if (!instData.IDXEN && instData.OFFEN) {
36234             addr0.read();
36235             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36236                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36237                     addr0, addr1, rsrcDesc, offset, inst_offset);
36238         } else if (instData.IDXEN && !instData.OFFEN) {
36239             addr0.read();
36240             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36241                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36242                     addr1, addr0, rsrcDesc, offset, inst_offset);
36243         } else {
36244             addr0.read();
36245             addr1.read();
36246             calcAddr<ConstVecOperandU32, ConstVecOperandU32,
36247                 ConstScalarOperandU128, ConstScalarOperandU32>(gpuDynInst,
36248                     addr1, addr0, rsrcDesc, offset, inst_offset);
36249         }
36250
36251         if (isLocalMem()) {
36252             gpuDynInst->computeUnit()->localMemoryPipe
36253                 .issueRequest(gpuDynInst);
36254             wf->wrLmReqsInPipe--;
36255             wf->outstandingReqsWrLm++;
36256         } else {
36257             gpuDynInst->computeUnit()->globalMemoryPipe
36258                 .issueRequest(gpuDynInst);
36259             wf->wrGmReqsInPipe--;
36260             wf->outstandingReqsWrGm++;
36261         }
36262
36263         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
36264             if (gpuDynInst->exec_mask[lane]) {
36265                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane * 4]
36266                     = data0[lane];
36267                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 1]
36268                     = data1[lane];
36269                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 2]
36270                     = data2[lane];
36271                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane*4 + 3]
36272                     = data3[lane];
36273             }
36274         }
36275
36276         wf->outstandingReqs++;
36277         wf->validateRequestCounters();
36278     } // execute
36279
36280     void
36281     Inst_MUBUF__BUFFER_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
36282     {
36283         initMemWrite<4>(gpuDynInst);
36284     } // initiateAcc
36285
36286     void
36287     Inst_MUBUF__BUFFER_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
36288     {
36289     } // completeAcc
36290
36291     Inst_MUBUF__BUFFER_STORE_LDS_DWORD
36292         ::Inst_MUBUF__BUFFER_STORE_LDS_DWORD(InFmt_MUBUF *iFmt)
36293         : Inst_MUBUF(iFmt, "buffer_store_lds_dword")
36294     {
36295         setFlag(GlobalSegment);
36296     } // Inst_MUBUF__BUFFER_STORE_LDS_DWORD
36297
36298     Inst_MUBUF__BUFFER_STORE_LDS_DWORD::~Inst_MUBUF__BUFFER_STORE_LDS_DWORD()
36299     {
36300     } // ~Inst_MUBUF__BUFFER_STORE_LDS_DWORD
36301
36302     // Store one DWORD from LDS memory to system memory without utilizing
36303     // VGPRs.
36304     void
36305     Inst_MUBUF__BUFFER_STORE_LDS_DWORD::execute(GPUDynInstPtr gpuDynInst)
36306     {
36307         panicUnimplemented();
36308     }
36309
36310     Inst_MUBUF__BUFFER_WBINVL1::Inst_MUBUF__BUFFER_WBINVL1(InFmt_MUBUF *iFmt)
36311         : Inst_MUBUF(iFmt, "buffer_wbinvl1")
36312     {
36313         setFlag(MemoryRef);
36314         setFlag(GPUStaticInst::MemSync);
36315         setFlag(GlobalSegment);
36316         setFlag(MemSync);
36317     } // Inst_MUBUF__BUFFER_WBINVL1
36318
36319     Inst_MUBUF__BUFFER_WBINVL1::~Inst_MUBUF__BUFFER_WBINVL1()
36320     {
36321     } // ~Inst_MUBUF__BUFFER_WBINVL1
36322
36323     // Write back and invalidate the shader L1.
36324     // Always returns ACK to shader.
36325     void
36326     Inst_MUBUF__BUFFER_WBINVL1::execute(GPUDynInstPtr gpuDynInst)
36327     {
36328         Wavefront *wf = gpuDynInst->wavefront();
36329         gpuDynInst->execUnitId = wf->execUnitId;
36330         gpuDynInst->exec_mask = wf->execMask();
36331         gpuDynInst->latency.init(gpuDynInst->computeUnit());
36332         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
36333
36334         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
36335             gpuDynInst->computeUnit()->globalMemoryPipe.
36336                 issueRequest(gpuDynInst);
36337             wf->wrGmReqsInPipe--;
36338             wf->rdGmReqsInPipe--;
36339
36340             wf->outstandingReqsWrGm++;
36341             wf->outstandingReqsRdGm++;
36342         } else {
36343             fatal("Non global flat instructions not implemented yet.\n");
36344         }
36345
36346         wf->outstandingReqs++;
36347         wf->validateRequestCounters();
36348     }
36349
36350     void
36351     Inst_MUBUF__BUFFER_WBINVL1::initiateAcc(GPUDynInstPtr gpuDynInst)
36352     {
36353         injectGlobalMemFence(gpuDynInst);
36354     } // initiateAcc
36355
36356     void
36357     Inst_MUBUF__BUFFER_WBINVL1::completeAcc(GPUDynInstPtr gpuDynInst)
36358     {
36359     } // completeAcc
36360
36361     Inst_MUBUF__BUFFER_WBINVL1_VOL
36362         ::Inst_MUBUF__BUFFER_WBINVL1_VOL(InFmt_MUBUF*iFmt)
36363         : Inst_MUBUF(iFmt, "buffer_wbinvl1_vol") {
36364         /**
36365          * This instruction is same as buffer_wbinvl1 instruction except this
36366          * instruction only invalidate L1 shader line with MTYPE for system
36367          * or group coherence. Since L1 do not differentiate between its cache
36368          * lines, this instruction currently behaves (and implemented )
36369          * exactly like buffer_wbinvl1 instruction.
36370          */
36371         setFlag(MemoryRef);
36372         setFlag(GPUStaticInst::MemSync);
36373         setFlag(GlobalSegment);
36374         setFlag(MemSync);
36375     } // Inst_MUBUF__BUFFER_WBINVL1_VOL
36376
36377     Inst_MUBUF__BUFFER_WBINVL1_VOL::~Inst_MUBUF__BUFFER_WBINVL1_VOL()
36378     {
36379     } // ~Inst_MUBUF__BUFFER_WBINVL1_VOL
36380
36381     // Write back and invalidate the shader L1 only for lines that are marked
36382     // volatile. Always returns ACK to shader.
36383     void
36384     Inst_MUBUF__BUFFER_WBINVL1_VOL::execute(GPUDynInstPtr gpuDynInst)
36385     {
36386         Wavefront *wf = gpuDynInst->wavefront();
36387         gpuDynInst->execUnitId = wf->execUnitId;
36388         gpuDynInst->exec_mask = wf->execMask();
36389         gpuDynInst->latency.init(gpuDynInst->computeUnit());
36390         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
36391
36392         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
36393             gpuDynInst->computeUnit()->globalMemoryPipe.
36394                 issueRequest(gpuDynInst);
36395             wf->wrGmReqsInPipe--;
36396             wf->rdGmReqsInPipe--;
36397
36398             wf->outstandingReqsWrGm++;
36399             wf->outstandingReqsRdGm++;
36400         } else {
36401             fatal("Non global flat instructions not implemented yet.\n");
36402         }
36403
36404         wf->outstandingReqs++;
36405         wf->validateRequestCounters();
36406     }
36407     void
36408     Inst_MUBUF__BUFFER_WBINVL1_VOL::initiateAcc(GPUDynInstPtr gpuDynInst)
36409     {
36410         injectGlobalMemFence(gpuDynInst);
36411     } // initiateAcc
36412     void
36413     Inst_MUBUF__BUFFER_WBINVL1_VOL::completeAcc(GPUDynInstPtr gpuDynInst)
36414     {
36415     } // completeAcc
36416
36417     Inst_MUBUF__BUFFER_ATOMIC_SWAP
36418         ::Inst_MUBUF__BUFFER_ATOMIC_SWAP(InFmt_MUBUF *iFmt)
36419         : Inst_MUBUF(iFmt, "buffer_atomic_swap")
36420     {
36421         setFlag(AtomicExch);
36422         if (instData.GLC) {
36423             setFlag(AtomicReturn);
36424         } else {
36425             setFlag(AtomicNoReturn);
36426         } // if
36427         setFlag(MemoryRef);
36428         setFlag(GlobalSegment);
36429     } // Inst_MUBUF__BUFFER_ATOMIC_SWAP
36430
36431     Inst_MUBUF__BUFFER_ATOMIC_SWAP::~Inst_MUBUF__BUFFER_ATOMIC_SWAP()
36432     {
36433     } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP
36434
36435     // tmp = MEM[ADDR];
36436     // MEM[ADDR] = DATA;
36437     // RETURN_DATA = tmp.
36438     void
36439     Inst_MUBUF__BUFFER_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst)
36440     {
36441         panicUnimplemented();
36442     }
36443
36444     Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
36445         ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP(InFmt_MUBUF *iFmt)
36446         : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap")
36447     {
36448         setFlag(AtomicCAS);
36449         if (instData.GLC) {
36450             setFlag(AtomicReturn);
36451         } else {
36452             setFlag(AtomicNoReturn);
36453         }
36454         setFlag(MemoryRef);
36455         setFlag(GlobalSegment);
36456     } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
36457
36458     Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP()
36459     {
36460     } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP
36461
36462     // tmp = MEM[ADDR];
36463     // src = DATA[0];
36464     // cmp = DATA[1];
36465     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
36466     // RETURN_DATA[0] = tmp.
36467     void
36468     Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst)
36469     {
36470         panicUnimplemented();
36471     }
36472
36473     Inst_MUBUF__BUFFER_ATOMIC_ADD
36474         ::Inst_MUBUF__BUFFER_ATOMIC_ADD(InFmt_MUBUF *iFmt)
36475         : Inst_MUBUF(iFmt, "buffer_atomic_add")
36476     {
36477         setFlag(AtomicAdd);
36478         if (instData.GLC) {
36479             setFlag(AtomicReturn);
36480         } else {
36481             setFlag(AtomicNoReturn);
36482         } // if
36483         setFlag(MemoryRef);
36484         setFlag(GlobalSegment);
36485     } // Inst_MUBUF__BUFFER_ATOMIC_ADD
36486
36487     Inst_MUBUF__BUFFER_ATOMIC_ADD::~Inst_MUBUF__BUFFER_ATOMIC_ADD()
36488     {
36489     } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD
36490
36491     // tmp = MEM[ADDR];
36492     // MEM[ADDR] += DATA;
36493     // RETURN_DATA = tmp.
36494     void
36495     Inst_MUBUF__BUFFER_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst)
36496     {
36497         panicUnimplemented();
36498     }
36499
36500     Inst_MUBUF__BUFFER_ATOMIC_SUB
36501         ::Inst_MUBUF__BUFFER_ATOMIC_SUB(InFmt_MUBUF *iFmt)
36502         : Inst_MUBUF(iFmt, "buffer_atomic_sub")
36503     {
36504         setFlag(AtomicSub);
36505         if (instData.GLC) {
36506             setFlag(AtomicReturn);
36507         } else {
36508             setFlag(AtomicNoReturn);
36509         }
36510         setFlag(MemoryRef);
36511         setFlag(GlobalSegment);
36512     } // Inst_MUBUF__BUFFER_ATOMIC_SUB
36513
36514     Inst_MUBUF__BUFFER_ATOMIC_SUB::~Inst_MUBUF__BUFFER_ATOMIC_SUB()
36515     {
36516     } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB
36517
36518     // tmp = MEM[ADDR];
36519     // MEM[ADDR] -= DATA;
36520     // RETURN_DATA = tmp.
36521     void
36522     Inst_MUBUF__BUFFER_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst)
36523     {
36524         panicUnimplemented();
36525     }
36526
36527     Inst_MUBUF__BUFFER_ATOMIC_SMIN
36528         ::Inst_MUBUF__BUFFER_ATOMIC_SMIN(InFmt_MUBUF *iFmt)
36529         : Inst_MUBUF(iFmt, "buffer_atomic_smin")
36530     {
36531         setFlag(AtomicMin);
36532         if (instData.GLC) {
36533             setFlag(AtomicReturn);
36534         } else {
36535             setFlag(AtomicNoReturn);
36536         }
36537         setFlag(MemoryRef);
36538         setFlag(GlobalSegment);
36539     } // Inst_MUBUF__BUFFER_ATOMIC_SMIN
36540
36541     Inst_MUBUF__BUFFER_ATOMIC_SMIN::~Inst_MUBUF__BUFFER_ATOMIC_SMIN()
36542     {
36543     } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN
36544
36545     // tmp = MEM[ADDR];
36546     // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
36547     // RETURN_DATA = tmp.
36548     void
36549     Inst_MUBUF__BUFFER_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst)
36550     {
36551         panicUnimplemented();
36552     }
36553
36554     Inst_MUBUF__BUFFER_ATOMIC_UMIN
36555         ::Inst_MUBUF__BUFFER_ATOMIC_UMIN(InFmt_MUBUF *iFmt)
36556         : Inst_MUBUF(iFmt, "buffer_atomic_umin")
36557     {
36558         setFlag(AtomicMin);
36559         if (instData.GLC) {
36560             setFlag(AtomicReturn);
36561         } else {
36562             setFlag(AtomicNoReturn);
36563         }
36564         setFlag(MemoryRef);
36565         setFlag(GlobalSegment);
36566     } // Inst_MUBUF__BUFFER_ATOMIC_UMIN
36567
36568     Inst_MUBUF__BUFFER_ATOMIC_UMIN::~Inst_MUBUF__BUFFER_ATOMIC_UMIN()
36569     {
36570     } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN
36571
36572     // tmp = MEM[ADDR];
36573     // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
36574     // RETURN_DATA = tmp.
36575     void
36576     Inst_MUBUF__BUFFER_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst)
36577     {
36578         panicUnimplemented();
36579     }
36580
36581     Inst_MUBUF__BUFFER_ATOMIC_SMAX
36582         ::Inst_MUBUF__BUFFER_ATOMIC_SMAX(InFmt_MUBUF *iFmt)
36583         : Inst_MUBUF(iFmt, "buffer_atomic_smax")
36584     {
36585         setFlag(AtomicMax);
36586         if (instData.GLC) {
36587             setFlag(AtomicReturn);
36588         } else {
36589             setFlag(AtomicNoReturn);
36590         }
36591         setFlag(MemoryRef);
36592         setFlag(GlobalSegment);
36593     } // Inst_MUBUF__BUFFER_ATOMIC_SMAX
36594
36595     Inst_MUBUF__BUFFER_ATOMIC_SMAX::~Inst_MUBUF__BUFFER_ATOMIC_SMAX()
36596     {
36597     } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX
36598
36599     // tmp = MEM[ADDR];
36600     // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
36601     // RETURN_DATA = tmp.
36602     void
36603     Inst_MUBUF__BUFFER_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst)
36604     {
36605         panicUnimplemented();
36606     }
36607
36608     Inst_MUBUF__BUFFER_ATOMIC_UMAX
36609         ::Inst_MUBUF__BUFFER_ATOMIC_UMAX(InFmt_MUBUF *iFmt)
36610         : Inst_MUBUF(iFmt, "buffer_atomic_umax")
36611     {
36612         setFlag(AtomicMax);
36613         if (instData.GLC) {
36614             setFlag(AtomicReturn);
36615         } else {
36616             setFlag(AtomicNoReturn);
36617         } // if
36618         setFlag(MemoryRef);
36619         setFlag(GlobalSegment);
36620     } // Inst_MUBUF__BUFFER_ATOMIC_UMAX
36621
36622     Inst_MUBUF__BUFFER_ATOMIC_UMAX::~Inst_MUBUF__BUFFER_ATOMIC_UMAX()
36623     {
36624     } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX
36625
36626     // tmp = MEM[ADDR];
36627     // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
36628     // RETURN_DATA = tmp.
36629     void
36630     Inst_MUBUF__BUFFER_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst)
36631     {
36632         panicUnimplemented();
36633     }
36634
36635     Inst_MUBUF__BUFFER_ATOMIC_AND
36636         ::Inst_MUBUF__BUFFER_ATOMIC_AND(InFmt_MUBUF *iFmt)
36637         : Inst_MUBUF(iFmt, "buffer_atomic_and")
36638     {
36639         setFlag(AtomicAnd);
36640         if (instData.GLC) {
36641             setFlag(AtomicReturn);
36642         } else {
36643             setFlag(AtomicNoReturn);
36644         }
36645         setFlag(MemoryRef);
36646         setFlag(GlobalSegment);
36647     } // Inst_MUBUF__BUFFER_ATOMIC_AND
36648
36649     Inst_MUBUF__BUFFER_ATOMIC_AND::~Inst_MUBUF__BUFFER_ATOMIC_AND()
36650     {
36651     } // ~Inst_MUBUF__BUFFER_ATOMIC_AND
36652
36653     // tmp = MEM[ADDR];
36654     // MEM[ADDR] &= DATA;
36655     // RETURN_DATA = tmp.
36656     void
36657     Inst_MUBUF__BUFFER_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst)
36658     {
36659         panicUnimplemented();
36660     }
36661
36662     Inst_MUBUF__BUFFER_ATOMIC_OR
36663         ::Inst_MUBUF__BUFFER_ATOMIC_OR(InFmt_MUBUF *iFmt)
36664         : Inst_MUBUF(iFmt, "buffer_atomic_or")
36665     {
36666         setFlag(AtomicOr);
36667         if (instData.GLC) {
36668             setFlag(AtomicReturn);
36669         } else {
36670             setFlag(AtomicNoReturn);
36671         }
36672         setFlag(MemoryRef);
36673         setFlag(GlobalSegment);
36674     } // Inst_MUBUF__BUFFER_ATOMIC_OR
36675
36676     Inst_MUBUF__BUFFER_ATOMIC_OR::~Inst_MUBUF__BUFFER_ATOMIC_OR()
36677     {
36678     } // ~Inst_MUBUF__BUFFER_ATOMIC_OR
36679
36680     // tmp = MEM[ADDR];
36681     // MEM[ADDR] |= DATA;
36682     // RETURN_DATA = tmp.
36683     void
36684     Inst_MUBUF__BUFFER_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst)
36685     {
36686         panicUnimplemented();
36687     }
36688
36689     Inst_MUBUF__BUFFER_ATOMIC_XOR
36690         ::Inst_MUBUF__BUFFER_ATOMIC_XOR(InFmt_MUBUF *iFmt)
36691         : Inst_MUBUF(iFmt, "buffer_atomic_xor")
36692     {
36693         setFlag(AtomicXor);
36694         if (instData.GLC) {
36695             setFlag(AtomicReturn);
36696         } else {
36697             setFlag(AtomicNoReturn);
36698         }
36699         setFlag(MemoryRef);
36700         setFlag(GlobalSegment);
36701     } // Inst_MUBUF__BUFFER_ATOMIC_XOR
36702
36703     Inst_MUBUF__BUFFER_ATOMIC_XOR::~Inst_MUBUF__BUFFER_ATOMIC_XOR()
36704     {
36705     } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR
36706
36707     // tmp = MEM[ADDR];
36708     // MEM[ADDR] ^= DATA;
36709     // RETURN_DATA = tmp.
36710     void
36711     Inst_MUBUF__BUFFER_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst)
36712     {
36713         panicUnimplemented();
36714     }
36715
36716     Inst_MUBUF__BUFFER_ATOMIC_INC
36717         ::Inst_MUBUF__BUFFER_ATOMIC_INC(InFmt_MUBUF *iFmt)
36718         : Inst_MUBUF(iFmt, "buffer_atomic_inc")
36719     {
36720         setFlag(AtomicInc);
36721         if (instData.GLC) {
36722             setFlag(AtomicReturn);
36723         } else {
36724             setFlag(AtomicNoReturn);
36725         }
36726         setFlag(MemoryRef);
36727         setFlag(GlobalSegment);
36728     } // Inst_MUBUF__BUFFER_ATOMIC_INC
36729
36730     Inst_MUBUF__BUFFER_ATOMIC_INC::~Inst_MUBUF__BUFFER_ATOMIC_INC()
36731     {
36732     } // ~Inst_MUBUF__BUFFER_ATOMIC_INC
36733
36734     // tmp = MEM[ADDR];
36735     // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
36736     // RETURN_DATA = tmp.
36737     void
36738     Inst_MUBUF__BUFFER_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst)
36739     {
36740         panicUnimplemented();
36741     }
36742
36743     Inst_MUBUF__BUFFER_ATOMIC_DEC
36744         ::Inst_MUBUF__BUFFER_ATOMIC_DEC(InFmt_MUBUF *iFmt)
36745         : Inst_MUBUF(iFmt, "buffer_atomic_dec")
36746     {
36747         setFlag(AtomicDec);
36748         if (instData.GLC) {
36749             setFlag(AtomicReturn);
36750         } else {
36751             setFlag(AtomicNoReturn);
36752         }
36753         setFlag(MemoryRef);
36754         setFlag(GlobalSegment);
36755     } // Inst_MUBUF__BUFFER_ATOMIC_DEC
36756
36757     Inst_MUBUF__BUFFER_ATOMIC_DEC::~Inst_MUBUF__BUFFER_ATOMIC_DEC()
36758     {
36759     } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC
36760
36761     // tmp = MEM[ADDR];
36762     // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
36763     // (unsigned compare); RETURN_DATA = tmp.
36764     void
36765     Inst_MUBUF__BUFFER_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst)
36766     {
36767         panicUnimplemented();
36768     }
36769
36770     Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
36771         ::Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2(InFmt_MUBUF *iFmt)
36772         : Inst_MUBUF(iFmt, "buffer_atomic_swap_x2")
36773     {
36774         setFlag(AtomicExch);
36775         if (instData.GLC) {
36776             setFlag(AtomicReturn);
36777         } else {
36778             setFlag(AtomicNoReturn);
36779         }
36780         setFlag(MemoryRef);
36781         setFlag(GlobalSegment);
36782     } // Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
36783
36784     Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2()
36785     {
36786     } // ~Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2
36787
36788     // tmp = MEM[ADDR];
36789     // MEM[ADDR] = DATA[0:1];
36790     // RETURN_DATA[0:1] = tmp.
36791     void
36792     Inst_MUBUF__BUFFER_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst)
36793     {
36794         panicUnimplemented();
36795     }
36796
36797     Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
36798         ::Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2(InFmt_MUBUF *iFmt)
36799         : Inst_MUBUF(iFmt, "buffer_atomic_cmpswap_x2")
36800     {
36801         setFlag(AtomicCAS);
36802         if (instData.GLC) {
36803             setFlag(AtomicReturn);
36804         } else {
36805             setFlag(AtomicNoReturn);
36806         }
36807         setFlag(MemoryRef);
36808         setFlag(GlobalSegment);
36809     } // Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
36810
36811     Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
36812         ::~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2()
36813     {
36814     } // ~Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2
36815
36816     // tmp = MEM[ADDR];
36817     // src = DATA[0:1];
36818     // cmp = DATA[2:3];
36819     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
36820     // RETURN_DATA[0:1] = tmp.
36821     void
36822     Inst_MUBUF__BUFFER_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst)
36823     {
36824         panicUnimplemented();
36825     }
36826
36827     Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
36828         ::Inst_MUBUF__BUFFER_ATOMIC_ADD_X2(InFmt_MUBUF *iFmt)
36829         : Inst_MUBUF(iFmt, "buffer_atomic_add_x2")
36830     {
36831         setFlag(AtomicAdd);
36832         if (instData.GLC) {
36833             setFlag(AtomicReturn);
36834         } else {
36835             setFlag(AtomicNoReturn);
36836         }
36837         setFlag(MemoryRef);
36838         setFlag(GlobalSegment);
36839     } // Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
36840
36841     Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2()
36842     {
36843     } // ~Inst_MUBUF__BUFFER_ATOMIC_ADD_X2
36844
36845     // tmp = MEM[ADDR];
36846     // MEM[ADDR] += DATA[0:1];
36847     // RETURN_DATA[0:1] = tmp.
36848     void
36849     Inst_MUBUF__BUFFER_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst)
36850     {
36851         panicUnimplemented();
36852     }
36853
36854     Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
36855         ::Inst_MUBUF__BUFFER_ATOMIC_SUB_X2(InFmt_MUBUF *iFmt)
36856         : Inst_MUBUF(iFmt, "buffer_atomic_sub_x2")
36857     {
36858         setFlag(AtomicSub);
36859         if (instData.GLC) {
36860             setFlag(AtomicReturn);
36861         } else {
36862             setFlag(AtomicNoReturn);
36863         }
36864         setFlag(MemoryRef);
36865         setFlag(GlobalSegment);
36866     } // Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
36867
36868     Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2()
36869     {
36870     } // ~Inst_MUBUF__BUFFER_ATOMIC_SUB_X2
36871
36872     // tmp = MEM[ADDR];
36873     // MEM[ADDR] -= DATA[0:1];
36874     // RETURN_DATA[0:1] = tmp.
36875     void
36876     Inst_MUBUF__BUFFER_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst)
36877     {
36878         panicUnimplemented();
36879     }
36880
36881     Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
36882         ::Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2(InFmt_MUBUF *iFmt)
36883         : Inst_MUBUF(iFmt, "buffer_atomic_smin_x2")
36884     {
36885         setFlag(AtomicMin);
36886         if (instData.GLC) {
36887             setFlag(AtomicReturn);
36888         } else {
36889             setFlag(AtomicNoReturn);
36890         }
36891         setFlag(MemoryRef);
36892         setFlag(GlobalSegment);
36893     } // Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
36894
36895     Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2()
36896     {
36897     } // ~Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2
36898
36899     // tmp = MEM[ADDR];
36900     // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
36901     // RETURN_DATA[0:1] = tmp.
36902     void
36903     Inst_MUBUF__BUFFER_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst)
36904     {
36905         panicUnimplemented();
36906     }
36907
36908     Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
36909         ::Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2(InFmt_MUBUF *iFmt)
36910         : Inst_MUBUF(iFmt, "buffer_atomic_umin_x2")
36911     {
36912         setFlag(AtomicMin);
36913         if (instData.GLC) {
36914             setFlag(AtomicReturn);
36915         } else {
36916             setFlag(AtomicNoReturn);
36917         }
36918         setFlag(MemoryRef);
36919         setFlag(GlobalSegment);
36920     } // Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
36921
36922     Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2()
36923     {
36924     } // ~Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2
36925
36926     // tmp = MEM[ADDR];
36927     // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
36928     // RETURN_DATA[0:1] = tmp.
36929     void
36930     Inst_MUBUF__BUFFER_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst)
36931     {
36932         panicUnimplemented();
36933     }
36934
36935     Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
36936         ::Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2(InFmt_MUBUF *iFmt)
36937         : Inst_MUBUF(iFmt, "buffer_atomic_smax_x2")
36938     {
36939         setFlag(AtomicMax);
36940         if (instData.GLC) {
36941             setFlag(AtomicReturn);
36942         } else {
36943             setFlag(AtomicNoReturn);
36944         }
36945         setFlag(MemoryRef);
36946         setFlag(GlobalSegment);
36947     } // Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
36948
36949     Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2()
36950     {
36951     } // ~Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2
36952
36953     // tmp = MEM[ADDR];
36954     // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
36955     // RETURN_DATA[0:1] = tmp.
36956     void
36957     Inst_MUBUF__BUFFER_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst)
36958     {
36959         panicUnimplemented();
36960     }
36961
36962     Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
36963         ::Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2(InFmt_MUBUF *iFmt)
36964         : Inst_MUBUF(iFmt, "buffer_atomic_umax_x2")
36965     {
36966         setFlag(AtomicMax);
36967         if (instData.GLC) {
36968             setFlag(AtomicReturn);
36969         } else {
36970             setFlag(AtomicNoReturn);
36971         }
36972         setFlag(MemoryRef);
36973         setFlag(GlobalSegment);
36974     } // Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
36975
36976     Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2()
36977     {
36978     } // ~Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2
36979
36980     // tmp = MEM[ADDR];
36981     // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
36982     // RETURN_DATA[0:1] = tmp.
36983     void
36984     Inst_MUBUF__BUFFER_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst)
36985     {
36986         panicUnimplemented();
36987     }
36988
36989     Inst_MUBUF__BUFFER_ATOMIC_AND_X2
36990         ::Inst_MUBUF__BUFFER_ATOMIC_AND_X2(InFmt_MUBUF *iFmt)
36991         : Inst_MUBUF(iFmt, "buffer_atomic_and_x2")
36992     {
36993         setFlag(AtomicAnd);
36994         if (instData.GLC) {
36995             setFlag(AtomicReturn);
36996         } else {
36997             setFlag(AtomicNoReturn);
36998         }
36999         setFlag(MemoryRef);
37000         setFlag(GlobalSegment);
37001     } // Inst_MUBUF__BUFFER_ATOMIC_AND_X2
37002
37003     Inst_MUBUF__BUFFER_ATOMIC_AND_X2::~Inst_MUBUF__BUFFER_ATOMIC_AND_X2()
37004     {
37005     } // ~Inst_MUBUF__BUFFER_ATOMIC_AND_X2
37006
37007     // tmp = MEM[ADDR];
37008     // MEM[ADDR] &= DATA[0:1];
37009     // RETURN_DATA[0:1] = tmp.
37010     void
37011     Inst_MUBUF__BUFFER_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst)
37012     {
37013         panicUnimplemented();
37014     }
37015
37016     Inst_MUBUF__BUFFER_ATOMIC_OR_X2
37017         ::Inst_MUBUF__BUFFER_ATOMIC_OR_X2(InFmt_MUBUF *iFmt)
37018         : Inst_MUBUF(iFmt, "buffer_atomic_or_x2")
37019     {
37020         setFlag(AtomicOr);
37021         if (instData.GLC) {
37022             setFlag(AtomicReturn);
37023         } else {
37024             setFlag(AtomicNoReturn);
37025         }
37026         setFlag(MemoryRef);
37027         setFlag(GlobalSegment);
37028     } // Inst_MUBUF__BUFFER_ATOMIC_OR_X2
37029
37030     Inst_MUBUF__BUFFER_ATOMIC_OR_X2::~Inst_MUBUF__BUFFER_ATOMIC_OR_X2()
37031     {
37032     } // ~Inst_MUBUF__BUFFER_ATOMIC_OR_X2
37033
37034     // tmp = MEM[ADDR];
37035     // MEM[ADDR] |= DATA[0:1];
37036     // RETURN_DATA[0:1] = tmp.
37037     void
37038     Inst_MUBUF__BUFFER_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst)
37039     {
37040         panicUnimplemented();
37041     }
37042
37043     Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
37044         ::Inst_MUBUF__BUFFER_ATOMIC_XOR_X2(InFmt_MUBUF *iFmt)
37045         : Inst_MUBUF(iFmt, "buffer_atomic_xor_x2")
37046     {
37047         setFlag(AtomicXor);
37048         if (instData.GLC) {
37049             setFlag(AtomicReturn);
37050         } else {
37051             setFlag(AtomicNoReturn);
37052         }
37053         setFlag(MemoryRef);
37054         setFlag(GlobalSegment);
37055     } // Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
37056
37057     Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2()
37058     {
37059     } // ~Inst_MUBUF__BUFFER_ATOMIC_XOR_X2
37060
37061     // tmp = MEM[ADDR];
37062     // MEM[ADDR] ^= DATA[0:1];
37063     // RETURN_DATA[0:1] = tmp.
37064     void
37065     Inst_MUBUF__BUFFER_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst)
37066     {
37067         panicUnimplemented();
37068     }
37069
37070     Inst_MUBUF__BUFFER_ATOMIC_INC_X2
37071         ::Inst_MUBUF__BUFFER_ATOMIC_INC_X2(InFmt_MUBUF *iFmt)
37072         : Inst_MUBUF(iFmt, "buffer_atomic_inc_x2")
37073     {
37074         setFlag(AtomicInc);
37075         if (instData.GLC) {
37076             setFlag(AtomicReturn);
37077         } else {
37078             setFlag(AtomicNoReturn);
37079         }
37080         setFlag(MemoryRef);
37081         setFlag(GlobalSegment);
37082     } // Inst_MUBUF__BUFFER_ATOMIC_INC_X2
37083
37084     Inst_MUBUF__BUFFER_ATOMIC_INC_X2::~Inst_MUBUF__BUFFER_ATOMIC_INC_X2()
37085     {
37086     } // ~Inst_MUBUF__BUFFER_ATOMIC_INC_X2
37087
37088     // tmp = MEM[ADDR];
37089     // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
37090     // RETURN_DATA[0:1] = tmp.
37091     void
37092     Inst_MUBUF__BUFFER_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst)
37093     {
37094         panicUnimplemented();
37095     }
37096
37097     Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
37098         ::Inst_MUBUF__BUFFER_ATOMIC_DEC_X2(InFmt_MUBUF *iFmt)
37099         : Inst_MUBUF(iFmt, "buffer_atomic_dec_x2")
37100     {
37101         setFlag(AtomicDec);
37102         if (instData.GLC) {
37103             setFlag(AtomicReturn);
37104         } else {
37105             setFlag(AtomicNoReturn);
37106         }
37107         setFlag(MemoryRef);
37108         setFlag(GlobalSegment);
37109     } // Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
37110
37111     Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2()
37112     {
37113     } // ~Inst_MUBUF__BUFFER_ATOMIC_DEC_X2
37114
37115     // tmp = MEM[ADDR];
37116     // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
37117     // (unsigned compare);
37118     // RETURN_DATA[0:1] = tmp.
37119     void
37120     Inst_MUBUF__BUFFER_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst)
37121     {
37122         panicUnimplemented();
37123     }
37124
37125     Inst_MTBUF__TBUFFER_LOAD_FORMAT_X
37126         ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_X(InFmt_MTBUF *iFmt)
37127         : Inst_MTBUF(iFmt, "tbuffer_load_format_x")
37128     {
37129         setFlag(MemoryRef);
37130         setFlag(Load);
37131         setFlag(GlobalSegment);
37132     } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_X
37133
37134     Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X()
37135     {
37136     } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_X
37137
37138     // Typed buffer load 1 dword with format conversion.
37139     void
37140     Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::execute(GPUDynInstPtr gpuDynInst)
37141     {
37142         panicUnimplemented();
37143     }
37144
37145     void
37146     Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst)
37147     {
37148     } // initiateAcc
37149
37150     void
37151     Inst_MTBUF__TBUFFER_LOAD_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst)
37152     {
37153     }
37154
37155     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY
37156         ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY(InFmt_MTBUF *iFmt)
37157         : Inst_MTBUF(iFmt, "tbuffer_load_format_xy")
37158     {
37159         setFlag(MemoryRef);
37160         setFlag(Load);
37161         setFlag(GlobalSegment);
37162     } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY
37163
37164     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY()
37165     {
37166     } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY
37167
37168     // Typed buffer load 2 dwords with format conversion.
37169     void
37170     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst)
37171     {
37172         panicUnimplemented();
37173     }
37174
37175     void
37176     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst)
37177     {
37178     } // initiateAcc
37179
37180     void
37181     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst)
37182     {
37183     }
37184
37185     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ
37186         ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ(InFmt_MTBUF *iFmt)
37187         : Inst_MTBUF(iFmt, "tbuffer_load_format_xyz")
37188     {
37189         setFlag(MemoryRef);
37190         setFlag(Load);
37191         setFlag(GlobalSegment);
37192     } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ
37193
37194     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ()
37195     {
37196     } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ
37197
37198     // Typed buffer load 3 dwords with format conversion.
37199     void
37200     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst)
37201     {
37202         panicUnimplemented();
37203     }
37204
37205     void
37206     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst)
37207     {
37208     } // initiateAcc
37209
37210     void
37211     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst)
37212     {
37213     }
37214
37215     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
37216         ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW(InFmt_MTBUF *iFmt)
37217         : Inst_MTBUF(iFmt, "tbuffer_load_format_xyzw")
37218     {
37219         setFlag(MemoryRef);
37220         setFlag(Load);
37221         setFlag(GlobalSegment);
37222     } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
37223
37224     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
37225         ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW()
37226     {
37227     } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW
37228
37229     // Typed buffer load 4 dwords with format conversion.
37230     void
37231     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst)
37232     {
37233         panicUnimplemented();
37234     }
37235
37236     void
37237     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::initiateAcc(GPUDynInstPtr gpuDynInst)
37238     {
37239     } // initiateAcc
37240
37241     void
37242     Inst_MTBUF__TBUFFER_LOAD_FORMAT_XYZW::completeAcc(GPUDynInstPtr gpuDynInst)
37243     {
37244     }
37245
37246     Inst_MTBUF__TBUFFER_STORE_FORMAT_X
37247         ::Inst_MTBUF__TBUFFER_STORE_FORMAT_X(InFmt_MTBUF *iFmt)
37248         : Inst_MTBUF(iFmt, "tbuffer_store_format_x")
37249     {
37250         setFlag(MemoryRef);
37251         setFlag(Store);
37252         setFlag(GlobalSegment);
37253     } // Inst_MTBUF__TBUFFER_STORE_FORMAT_X
37254
37255     Inst_MTBUF__TBUFFER_STORE_FORMAT_X::~Inst_MTBUF__TBUFFER_STORE_FORMAT_X()
37256     {
37257     } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_X
37258
37259     // Typed buffer store 1 dword with format conversion.
37260     void
37261     Inst_MTBUF__TBUFFER_STORE_FORMAT_X::execute(GPUDynInstPtr gpuDynInst)
37262     {
37263         panicUnimplemented();
37264     }
37265
37266     void
37267     Inst_MTBUF__TBUFFER_STORE_FORMAT_X::initiateAcc(GPUDynInstPtr gpuDynInst)
37268     {
37269     } // initiateAcc
37270
37271     void
37272     Inst_MTBUF__TBUFFER_STORE_FORMAT_X::completeAcc(GPUDynInstPtr gpuDynInst)
37273     {
37274     }
37275
37276     Inst_MTBUF__TBUFFER_STORE_FORMAT_XY
37277         ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XY(InFmt_MTBUF *iFmt)
37278         : Inst_MTBUF(iFmt, "tbuffer_store_format_xy")
37279     {
37280         setFlag(MemoryRef);
37281         setFlag(Store);
37282         setFlag(GlobalSegment);
37283     } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XY
37284
37285     Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY()
37286     {
37287     } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XY
37288
37289     // Typed buffer store 2 dwords with format conversion.
37290     void
37291     Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::execute(GPUDynInstPtr gpuDynInst)
37292     {
37293         panicUnimplemented();
37294     }
37295
37296     void
37297     Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::initiateAcc(GPUDynInstPtr gpuDynInst)
37298     {
37299     } // initiateAcc
37300
37301     void
37302     Inst_MTBUF__TBUFFER_STORE_FORMAT_XY::completeAcc(GPUDynInstPtr gpuDynInst)
37303     {
37304     }
37305
37306     Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
37307         ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ(InFmt_MTBUF *iFmt)
37308         : Inst_MTBUF(iFmt, "tbuffer_store_format_xyz")
37309     {
37310         setFlag(MemoryRef);
37311         setFlag(Store);
37312         setFlag(GlobalSegment);
37313     } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
37314
37315     Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
37316         ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ()
37317     {
37318     } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ
37319
37320     // Typed buffer store 3 dwords with format conversion.
37321     void
37322     Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::execute(GPUDynInstPtr gpuDynInst)
37323     {
37324         panicUnimplemented();
37325     }
37326
37327     void
37328     Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::initiateAcc(GPUDynInstPtr gpuDynInst)
37329     {
37330     } // initiateAcc
37331
37332     void
37333     Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZ::completeAcc(GPUDynInstPtr gpuDynInst)
37334     {
37335     }
37336
37337     Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
37338         ::Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW(InFmt_MTBUF *iFmt)
37339         : Inst_MTBUF(iFmt, "tbuffer_store_format_xyzw")
37340     {
37341         setFlag(MemoryRef);
37342         setFlag(Store);
37343         setFlag(GlobalSegment);
37344     } // Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
37345
37346     Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
37347         ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW()
37348     {
37349     } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW
37350
37351     // Typed buffer store 4 dwords with format conversion.
37352     void
37353     Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::execute(GPUDynInstPtr gpuDynInst)
37354     {
37355         panicUnimplemented();
37356     }
37357
37358     void
37359     Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::initiateAcc(
37360         GPUDynInstPtr gpuDynInst)
37361     {
37362     } // initiateAcc
37363
37364     void
37365     Inst_MTBUF__TBUFFER_STORE_FORMAT_XYZW::completeAcc(
37366         GPUDynInstPtr gpuDynInst)
37367     {
37368     }
37369
37370     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X
37371         ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X(InFmt_MTBUF *iFmt)
37372         : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_x")
37373     {
37374         setFlag(MemoryRef);
37375         setFlag(Load);
37376         setFlag(GlobalSegment);
37377     } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X
37378
37379     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::
37380         ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X()
37381     {
37382     } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X
37383
37384     // Typed buffer load 1 dword with format conversion.
37385     void
37386     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst)
37387     {
37388         panicUnimplemented();
37389     }
37390
37391     void
37392     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::initiateAcc(
37393           GPUDynInstPtr gpuDynInst)
37394     {
37395     } // initiateAcc
37396
37397     void
37398     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_X::completeAcc(
37399         GPUDynInstPtr gpuDynInst)
37400     {
37401     }
37402
37403     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
37404         ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY(InFmt_MTBUF *iFmt)
37405         : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xy")
37406     {
37407         setFlag(MemoryRef);
37408         setFlag(Load);
37409         setFlag(GlobalSegment);
37410     } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
37411
37412     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
37413         ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY()
37414     {
37415     } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY
37416
37417     // Typed buffer load 2 dwords with format conversion.
37418     void
37419     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst)
37420     {
37421         panicUnimplemented();
37422     }
37423
37424     void
37425     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::initiateAcc(
37426         GPUDynInstPtr gpuDynInst)
37427     {
37428     } // initiateAcc
37429
37430     void
37431     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XY::completeAcc(
37432         GPUDynInstPtr gpuDynInst)
37433     {
37434     }
37435
37436     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
37437         ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ(
37438           InFmt_MTBUF *iFmt)
37439         : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xyz")
37440     {
37441         setFlag(MemoryRef);
37442         setFlag(Load);
37443         setFlag(GlobalSegment);
37444     } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
37445
37446     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
37447         ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ()
37448     {
37449     } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ
37450
37451     // Typed buffer load 3 dwords with format conversion.
37452     void
37453     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst)
37454     {
37455         panicUnimplemented();
37456     }
37457
37458     void
37459     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::initiateAcc(
37460         GPUDynInstPtr gpuDynInst)
37461     {
37462     } // initiateAcc
37463
37464     void
37465     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZ::completeAcc(
37466         GPUDynInstPtr gpuDynInst)
37467     {
37468     }
37469
37470     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
37471         ::Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW(
37472           InFmt_MTBUF *iFmt)
37473         : Inst_MTBUF(iFmt, "tbuffer_load_format_d16_xyzw")
37474     {
37475         setFlag(MemoryRef);
37476         setFlag(Load);
37477         setFlag(GlobalSegment);
37478     } // Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
37479
37480     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
37481         ::~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW()
37482     {
37483     } // ~Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW
37484
37485     // Typed buffer load 4 dwords with format conversion.
37486     void
37487     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::execute(GPUDynInstPtr gpuDynInst)
37488     {
37489         panicUnimplemented();
37490     }
37491
37492     void
37493     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::initiateAcc(
37494         GPUDynInstPtr gpuDynInst)
37495     {
37496     } // initiateAcc
37497
37498     void
37499     Inst_MTBUF__TBUFFER_LOAD_FORMAT_D16_XYZW::completeAcc(
37500         GPUDynInstPtr gpuDynInst)
37501     {
37502     }
37503
37504     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
37505         ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X(InFmt_MTBUF *iFmt)
37506         : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_x")
37507     {
37508         setFlag(MemoryRef);
37509         setFlag(Store);
37510         setFlag(GlobalSegment);
37511     } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
37512
37513     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
37514         ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X()
37515     {
37516     } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X
37517
37518     // Typed buffer store 1 dword with format conversion.
37519     void
37520     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::execute(GPUDynInstPtr gpuDynInst)
37521     {
37522         panicUnimplemented();
37523     }
37524
37525     void
37526     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::initiateAcc(
37527         GPUDynInstPtr gpuDynInst)
37528     {
37529     } // initiateAcc
37530
37531     void
37532     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_X::completeAcc(
37533         GPUDynInstPtr gpuDynInst)
37534     {
37535     }
37536
37537     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
37538         ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY(InFmt_MTBUF *iFmt)
37539         : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xy")
37540     {
37541         setFlag(MemoryRef);
37542         setFlag(Store);
37543         setFlag(GlobalSegment);
37544     } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
37545
37546     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
37547         ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY()
37548     {
37549     } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY
37550
37551     // Typed buffer store 2 dwords with format conversion.
37552     void
37553     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::execute(GPUDynInstPtr gpuDynInst)
37554     {
37555         panicUnimplemented();
37556     }
37557
37558     void
37559     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::initiateAcc(
37560         GPUDynInstPtr gpuDynInst)
37561     {
37562     } // initiateAcc
37563
37564     void
37565     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XY::completeAcc(
37566         GPUDynInstPtr gpuDynInst)
37567     {
37568     }
37569
37570     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
37571         ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ(InFmt_MTBUF *iFmt)
37572         : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xyz")
37573     {
37574         setFlag(MemoryRef);
37575         setFlag(Store);
37576         setFlag(GlobalSegment);
37577     } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
37578
37579     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
37580         ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ()
37581     {
37582     } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ
37583
37584     // Typed buffer store 3 dwords with format conversion.
37585     void
37586     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::execute(GPUDynInstPtr gpuDynInst)
37587     {
37588         panicUnimplemented();
37589     }
37590
37591     void
37592     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::initiateAcc(
37593           GPUDynInstPtr gpuDynInst)
37594     {
37595     } // initiateAcc
37596
37597     void
37598     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZ::completeAcc(
37599         GPUDynInstPtr gpuDynInst)
37600     {
37601     }
37602
37603     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
37604         ::Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW(InFmt_MTBUF *iFmt)
37605         : Inst_MTBUF(iFmt, "tbuffer_store_format_d16_xyzw")
37606     {
37607         setFlag(MemoryRef);
37608         setFlag(Store);
37609         setFlag(GlobalSegment);
37610     } // Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
37611
37612     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
37613         ::~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW()
37614     {
37615     } // ~Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW
37616
37617     // Typed buffer store 4 dwords with format conversion.
37618     void
37619     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::execute(
37620         GPUDynInstPtr gpuDynInst)
37621     {
37622         panicUnimplemented();
37623     }
37624
37625     void
37626     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::initiateAcc(
37627         GPUDynInstPtr gpuDynInst)
37628     {
37629     } // initiateAcc
37630
37631     void
37632     Inst_MTBUF__TBUFFER_STORE_FORMAT_D16_XYZW::completeAcc(
37633         GPUDynInstPtr gpuDynInst)
37634     {
37635     }
37636
37637     Inst_MIMG__IMAGE_LOAD::Inst_MIMG__IMAGE_LOAD(InFmt_MIMG *iFmt)
37638         : Inst_MIMG(iFmt, "image_load")
37639     {
37640         setFlag(MemoryRef);
37641         setFlag(Load);
37642         setFlag(GlobalSegment);
37643     } // Inst_MIMG__IMAGE_LOAD
37644
37645     Inst_MIMG__IMAGE_LOAD::~Inst_MIMG__IMAGE_LOAD()
37646     {
37647     } // ~Inst_MIMG__IMAGE_LOAD
37648
37649     // Image memory load with format conversion specified
37650     void
37651     Inst_MIMG__IMAGE_LOAD::execute(GPUDynInstPtr gpuDynInst)
37652     {
37653         panicUnimplemented();
37654     }
37655
37656     void
37657     Inst_MIMG__IMAGE_LOAD::initiateAcc(GPUDynInstPtr gpuDynInst)
37658     {
37659     } // initiateAcc
37660
37661     void
37662     Inst_MIMG__IMAGE_LOAD::completeAcc(GPUDynInstPtr gpuDynInst)
37663     {
37664     }
37665
37666     Inst_MIMG__IMAGE_LOAD_MIP::Inst_MIMG__IMAGE_LOAD_MIP(InFmt_MIMG *iFmt)
37667         : Inst_MIMG(iFmt, "image_load_mip")
37668     {
37669         setFlag(MemoryRef);
37670         setFlag(Load);
37671         setFlag(GlobalSegment);
37672     } // Inst_MIMG__IMAGE_LOAD_MIP
37673
37674     Inst_MIMG__IMAGE_LOAD_MIP::~Inst_MIMG__IMAGE_LOAD_MIP()
37675     {
37676     } // ~Inst_MIMG__IMAGE_LOAD_MIP
37677
37678     void
37679     Inst_MIMG__IMAGE_LOAD_MIP::execute(GPUDynInstPtr gpuDynInst)
37680     {
37681         panicUnimplemented();
37682     }
37683
37684     void
37685     Inst_MIMG__IMAGE_LOAD_MIP::initiateAcc(GPUDynInstPtr gpuDynInst)
37686     {
37687     } // initiateAcc
37688
37689     void
37690     Inst_MIMG__IMAGE_LOAD_MIP::completeAcc(GPUDynInstPtr gpuDynInst)
37691     {
37692     }
37693
37694     Inst_MIMG__IMAGE_LOAD_PCK::Inst_MIMG__IMAGE_LOAD_PCK(InFmt_MIMG *iFmt)
37695         : Inst_MIMG(iFmt, "image_load_pck")
37696     {
37697         setFlag(MemoryRef);
37698         setFlag(Load);
37699         setFlag(GlobalSegment);
37700     } // Inst_MIMG__IMAGE_LOAD_PCK
37701
37702     Inst_MIMG__IMAGE_LOAD_PCK::~Inst_MIMG__IMAGE_LOAD_PCK()
37703     {
37704     } // ~Inst_MIMG__IMAGE_LOAD_PCK
37705
37706     void
37707     Inst_MIMG__IMAGE_LOAD_PCK::execute(GPUDynInstPtr gpuDynInst)
37708     {
37709         panicUnimplemented();
37710     }
37711
37712     void
37713     Inst_MIMG__IMAGE_LOAD_PCK::initiateAcc(GPUDynInstPtr gpuDynInst)
37714     {
37715     } // initiateAcc
37716
37717     void
37718     Inst_MIMG__IMAGE_LOAD_PCK::completeAcc(GPUDynInstPtr gpuDynInst)
37719     {
37720     }
37721
37722     Inst_MIMG__IMAGE_LOAD_PCK_SGN::Inst_MIMG__IMAGE_LOAD_PCK_SGN(
37723         InFmt_MIMG *iFmt)
37724         : Inst_MIMG(iFmt, "image_load_pck_sgn")
37725     {
37726         setFlag(MemoryRef);
37727         setFlag(Load);
37728         setFlag(GlobalSegment);
37729     } // Inst_MIMG__IMAGE_LOAD_PCK_SGN
37730
37731     Inst_MIMG__IMAGE_LOAD_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_PCK_SGN()
37732     {
37733     } // ~Inst_MIMG__IMAGE_LOAD_PCK_SGN
37734
37735     // Image memory load with with no format conversion and sign extension
37736     void
37737     Inst_MIMG__IMAGE_LOAD_PCK_SGN::execute(GPUDynInstPtr gpuDynInst)
37738     {
37739         panicUnimplemented();
37740     }
37741
37742     void
37743     Inst_MIMG__IMAGE_LOAD_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst)
37744     {
37745     } // initiateAcc
37746
37747     void
37748     Inst_MIMG__IMAGE_LOAD_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst)
37749     {
37750     }
37751
37752     Inst_MIMG__IMAGE_LOAD_MIP_PCK::Inst_MIMG__IMAGE_LOAD_MIP_PCK(
37753           InFmt_MIMG *iFmt)
37754         : Inst_MIMG(iFmt, "image_load_mip_pck")
37755     {
37756         setFlag(MemoryRef);
37757         setFlag(Load);
37758         setFlag(GlobalSegment);
37759     } // Inst_MIMG__IMAGE_LOAD_MIP_PCK
37760
37761     Inst_MIMG__IMAGE_LOAD_MIP_PCK::~Inst_MIMG__IMAGE_LOAD_MIP_PCK()
37762     {
37763     } // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK
37764
37765     // Image memory load with user-supplied mip level, no format conversion
37766     void
37767     Inst_MIMG__IMAGE_LOAD_MIP_PCK::execute(GPUDynInstPtr gpuDynInst)
37768     {
37769         panicUnimplemented();
37770     }
37771
37772     void
37773     Inst_MIMG__IMAGE_LOAD_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst)
37774     {
37775     } // initiateAcc
37776
37777     void
37778     Inst_MIMG__IMAGE_LOAD_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst)
37779     {
37780     }
37781
37782     Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN(
37783         InFmt_MIMG *iFmt)
37784         : Inst_MIMG(iFmt, "image_load_mip_pck_sgn")
37785     {
37786         setFlag(MemoryRef);
37787         setFlag(Load);
37788         setFlag(GlobalSegment);
37789     } // Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN
37790
37791     Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN()
37792     {
37793     } // ~Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN
37794
37795     // Image memory load with user-supplied mip level, no format conversion.
37796     void
37797     Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::execute(GPUDynInstPtr gpuDynInst)
37798     {
37799         panicUnimplemented();
37800     }
37801
37802     void
37803     Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::initiateAcc(GPUDynInstPtr gpuDynInst)
37804     {
37805     } // initiateAcc
37806
37807     void
37808     Inst_MIMG__IMAGE_LOAD_MIP_PCK_SGN::completeAcc(GPUDynInstPtr gpuDynInst)
37809     {
37810     }
37811
37812     Inst_MIMG__IMAGE_STORE::Inst_MIMG__IMAGE_STORE(InFmt_MIMG *iFmt)
37813         : Inst_MIMG(iFmt, "image_store")
37814     {
37815         setFlag(MemoryRef);
37816         setFlag(Store);
37817         setFlag(GlobalSegment);
37818     } // Inst_MIMG__IMAGE_STORE
37819
37820     Inst_MIMG__IMAGE_STORE::~Inst_MIMG__IMAGE_STORE()
37821     {
37822     } // ~Inst_MIMG__IMAGE_STORE
37823
37824     // Image memory store with format conversion specified
37825     void
37826     Inst_MIMG__IMAGE_STORE::execute(GPUDynInstPtr gpuDynInst)
37827     {
37828         panicUnimplemented();
37829     }
37830
37831     void
37832     Inst_MIMG__IMAGE_STORE::initiateAcc(GPUDynInstPtr gpuDynInst)
37833     {
37834     } // initiateAcc
37835
37836     void
37837     Inst_MIMG__IMAGE_STORE::completeAcc(GPUDynInstPtr gpuDynInst)
37838     {
37839     }
37840
37841     Inst_MIMG__IMAGE_STORE_MIP::Inst_MIMG__IMAGE_STORE_MIP(InFmt_MIMG *iFmt)
37842         : Inst_MIMG(iFmt, "image_store_mip")
37843     {
37844         setFlag(MemoryRef);
37845         setFlag(Store);
37846         setFlag(GlobalSegment);
37847     } // Inst_MIMG__IMAGE_STORE_MIP
37848
37849     Inst_MIMG__IMAGE_STORE_MIP::~Inst_MIMG__IMAGE_STORE_MIP()
37850     {
37851     } // ~Inst_MIMG__IMAGE_STORE_MIP
37852
37853     void
37854     Inst_MIMG__IMAGE_STORE_MIP::execute(GPUDynInstPtr gpuDynInst)
37855     {
37856         panicUnimplemented();
37857     }
37858
37859     void
37860     Inst_MIMG__IMAGE_STORE_MIP::initiateAcc(GPUDynInstPtr gpuDynInst)
37861     {
37862     } // initiateAcc
37863
37864     void
37865     Inst_MIMG__IMAGE_STORE_MIP::completeAcc(GPUDynInstPtr gpuDynInst)
37866     {
37867     }
37868
37869     Inst_MIMG__IMAGE_STORE_PCK::Inst_MIMG__IMAGE_STORE_PCK(InFmt_MIMG *iFmt)
37870         : Inst_MIMG(iFmt, "image_store_pck")
37871     {
37872         setFlag(MemoryRef);
37873         setFlag(Store);
37874         setFlag(GlobalSegment);
37875     } // Inst_MIMG__IMAGE_STORE_PCK
37876
37877     Inst_MIMG__IMAGE_STORE_PCK::~Inst_MIMG__IMAGE_STORE_PCK()
37878     {
37879     } // ~Inst_MIMG__IMAGE_STORE_PCK
37880
37881     // Image memory store of packed data without format conversion.
37882     void
37883     Inst_MIMG__IMAGE_STORE_PCK::execute(GPUDynInstPtr gpuDynInst)
37884     {
37885         panicUnimplemented();
37886     }
37887
37888     void
37889     Inst_MIMG__IMAGE_STORE_PCK::initiateAcc(GPUDynInstPtr gpuDynInst)
37890     {
37891     } // initiateAcc
37892
37893     void
37894     Inst_MIMG__IMAGE_STORE_PCK::completeAcc(GPUDynInstPtr gpuDynInst)
37895     {
37896     }
37897
37898     Inst_MIMG__IMAGE_STORE_MIP_PCK::Inst_MIMG__IMAGE_STORE_MIP_PCK(
37899           InFmt_MIMG *iFmt)
37900         : Inst_MIMG(iFmt, "image_store_mip_pck")
37901     {
37902         setFlag(MemoryRef);
37903         setFlag(Store);
37904         setFlag(GlobalSegment);
37905     } // Inst_MIMG__IMAGE_STORE_MIP_PCK
37906
37907     Inst_MIMG__IMAGE_STORE_MIP_PCK::~Inst_MIMG__IMAGE_STORE_MIP_PCK()
37908     {
37909     } // ~Inst_MIMG__IMAGE_STORE_MIP_PCK
37910
37911     // Image memory store of packed data without format conversion
37912     void
37913     Inst_MIMG__IMAGE_STORE_MIP_PCK::execute(GPUDynInstPtr gpuDynInst)
37914     {
37915         panicUnimplemented();
37916     }
37917
37918     void
37919     Inst_MIMG__IMAGE_STORE_MIP_PCK::initiateAcc(GPUDynInstPtr gpuDynInst)
37920     {
37921     } // initiateAcc
37922
37923     void
37924     Inst_MIMG__IMAGE_STORE_MIP_PCK::completeAcc(GPUDynInstPtr gpuDynInst)
37925     {
37926     }
37927
37928     Inst_MIMG__IMAGE_GET_RESINFO::Inst_MIMG__IMAGE_GET_RESINFO(
37929           InFmt_MIMG *iFmt)
37930         : Inst_MIMG(iFmt, "image_get_resinfo")
37931     {
37932         setFlag(GlobalSegment);
37933     } // Inst_MIMG__IMAGE_GET_RESINFO
37934
37935     Inst_MIMG__IMAGE_GET_RESINFO::~Inst_MIMG__IMAGE_GET_RESINFO()
37936     {
37937     } // ~Inst_MIMG__IMAGE_GET_RESINFO
37938
37939     void
37940     Inst_MIMG__IMAGE_GET_RESINFO::execute(GPUDynInstPtr gpuDynInst)
37941     {
37942         panicUnimplemented();
37943     }
37944
37945     Inst_MIMG__IMAGE_ATOMIC_SWAP::Inst_MIMG__IMAGE_ATOMIC_SWAP(
37946         InFmt_MIMG *iFmt)
37947         : Inst_MIMG(iFmt, "image_atomic_swap")
37948     {
37949         setFlag(AtomicExch);
37950         if (instData.GLC) {
37951             setFlag(AtomicReturn);
37952         } else {
37953             setFlag(AtomicNoReturn);
37954         }
37955         setFlag(MemoryRef);
37956         setFlag(GlobalSegment);
37957     } // Inst_MIMG__IMAGE_ATOMIC_SWAP
37958
37959     Inst_MIMG__IMAGE_ATOMIC_SWAP::~Inst_MIMG__IMAGE_ATOMIC_SWAP()
37960     {
37961     } // ~Inst_MIMG__IMAGE_ATOMIC_SWAP
37962
37963     // tmp = MEM[ADDR];
37964     // MEM[ADDR] = DATA;
37965     // RETURN_DATA = tmp.
37966     void
37967     Inst_MIMG__IMAGE_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst)
37968     {
37969         panicUnimplemented();
37970     }
37971
37972     Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::Inst_MIMG__IMAGE_ATOMIC_CMPSWAP(
37973         InFmt_MIMG *iFmt)
37974         : Inst_MIMG(iFmt, "image_atomic_cmpswap")
37975     {
37976         setFlag(AtomicCAS);
37977         if (instData.GLC) {
37978             setFlag(AtomicReturn);
37979         } else {
37980             setFlag(AtomicNoReturn);
37981         }
37982         setFlag(MemoryRef);
37983         setFlag(GlobalSegment);
37984     } // Inst_MIMG__IMAGE_ATOMIC_CMPSWAP
37985
37986     Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP()
37987     {
37988     } // ~Inst_MIMG__IMAGE_ATOMIC_CMPSWAP
37989
37990     // tmp = MEM[ADDR];
37991     // src = DATA[0];
37992     // cmp = DATA[1];
37993     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
37994     // RETURN_DATA[0] = tmp.
37995     void
37996     Inst_MIMG__IMAGE_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst)
37997     {
37998         panicUnimplemented();
37999     }
38000
38001     Inst_MIMG__IMAGE_ATOMIC_ADD::Inst_MIMG__IMAGE_ATOMIC_ADD(InFmt_MIMG *iFmt)
38002         : Inst_MIMG(iFmt, "image_atomic_add")
38003     {
38004         setFlag(AtomicAdd);
38005         if (instData.GLC) {
38006             setFlag(AtomicReturn);
38007         } else {
38008             setFlag(AtomicNoReturn);
38009         }
38010         setFlag(MemoryRef);
38011         setFlag(GlobalSegment);
38012     } // Inst_MIMG__IMAGE_ATOMIC_ADD
38013
38014     Inst_MIMG__IMAGE_ATOMIC_ADD::~Inst_MIMG__IMAGE_ATOMIC_ADD()
38015     {
38016     } // ~Inst_MIMG__IMAGE_ATOMIC_ADD
38017
38018     // tmp = MEM[ADDR];
38019     // MEM[ADDR] += DATA;
38020     // RETURN_DATA = tmp.
38021     void
38022     Inst_MIMG__IMAGE_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst)
38023     {
38024         panicUnimplemented();
38025     }
38026
38027     Inst_MIMG__IMAGE_ATOMIC_SUB::Inst_MIMG__IMAGE_ATOMIC_SUB(InFmt_MIMG *iFmt)
38028         : Inst_MIMG(iFmt, "image_atomic_sub")
38029     {
38030         setFlag(AtomicSub);
38031         if (instData.GLC) {
38032             setFlag(AtomicReturn);
38033         } else {
38034             setFlag(AtomicNoReturn);
38035         }
38036         setFlag(MemoryRef);
38037         setFlag(GlobalSegment);
38038     } // Inst_MIMG__IMAGE_ATOMIC_SUB
38039
38040     Inst_MIMG__IMAGE_ATOMIC_SUB::~Inst_MIMG__IMAGE_ATOMIC_SUB()
38041     {
38042     } // ~Inst_MIMG__IMAGE_ATOMIC_SUB
38043
38044     // tmp = MEM[ADDR];
38045     // MEM[ADDR] -= DATA;
38046     // RETURN_DATA = tmp.
38047     void
38048     Inst_MIMG__IMAGE_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst)
38049     {
38050         panicUnimplemented();
38051     }
38052
38053     Inst_MIMG__IMAGE_ATOMIC_SMIN::Inst_MIMG__IMAGE_ATOMIC_SMIN(
38054           InFmt_MIMG *iFmt)
38055         : Inst_MIMG(iFmt, "image_atomic_smin")
38056     {
38057         setFlag(AtomicMin);
38058         if (instData.GLC) {
38059             setFlag(AtomicReturn);
38060         } else {
38061             setFlag(AtomicNoReturn);
38062         }
38063         setFlag(MemoryRef);
38064         setFlag(GlobalSegment);
38065     } // Inst_MIMG__IMAGE_ATOMIC_SMIN
38066
38067     Inst_MIMG__IMAGE_ATOMIC_SMIN::~Inst_MIMG__IMAGE_ATOMIC_SMIN()
38068     {
38069     } // ~Inst_MIMG__IMAGE_ATOMIC_SMIN
38070
38071     // tmp = MEM[ADDR];
38072     // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
38073     // RETURN_DATA = tmp.
38074     void
38075     Inst_MIMG__IMAGE_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst)
38076     {
38077         panicUnimplemented();
38078     }
38079
38080     Inst_MIMG__IMAGE_ATOMIC_UMIN::Inst_MIMG__IMAGE_ATOMIC_UMIN(
38081           InFmt_MIMG *iFmt)
38082         : Inst_MIMG(iFmt, "image_atomic_umin")
38083     {
38084         setFlag(AtomicMin);
38085         if (instData.GLC) {
38086             setFlag(AtomicReturn);
38087         } else {
38088             setFlag(AtomicNoReturn);
38089         }
38090         setFlag(MemoryRef);
38091         setFlag(GlobalSegment);
38092     } // Inst_MIMG__IMAGE_ATOMIC_UMIN
38093
38094     Inst_MIMG__IMAGE_ATOMIC_UMIN::~Inst_MIMG__IMAGE_ATOMIC_UMIN()
38095     {
38096     } // ~Inst_MIMG__IMAGE_ATOMIC_UMIN
38097
38098     // tmp = MEM[ADDR];
38099     // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
38100     // RETURN_DATA = tmp.
38101     void
38102     Inst_MIMG__IMAGE_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst)
38103     {
38104         panicUnimplemented();
38105     }
38106
38107     Inst_MIMG__IMAGE_ATOMIC_SMAX::Inst_MIMG__IMAGE_ATOMIC_SMAX(
38108           InFmt_MIMG *iFmt)
38109         : Inst_MIMG(iFmt, "image_atomic_smax")
38110     {
38111         setFlag(AtomicMax);
38112         if (instData.GLC) {
38113             setFlag(AtomicReturn);
38114         } else {
38115             setFlag(AtomicNoReturn);
38116         }
38117         setFlag(MemoryRef);
38118         setFlag(GlobalSegment);
38119     } // Inst_MIMG__IMAGE_ATOMIC_SMAX
38120
38121     Inst_MIMG__IMAGE_ATOMIC_SMAX::~Inst_MIMG__IMAGE_ATOMIC_SMAX()
38122     {
38123     } // ~Inst_MIMG__IMAGE_ATOMIC_SMAX
38124
38125     // tmp = MEM[ADDR];
38126     // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
38127     // RETURN_DATA = tmp.
38128     void
38129     Inst_MIMG__IMAGE_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst)
38130     {
38131         panicUnimplemented();
38132     }
38133
38134     Inst_MIMG__IMAGE_ATOMIC_UMAX::Inst_MIMG__IMAGE_ATOMIC_UMAX(
38135         InFmt_MIMG *iFmt)
38136         : Inst_MIMG(iFmt, "image_atomic_umax")
38137     {
38138         setFlag(AtomicMax);
38139         if (instData.GLC) {
38140             setFlag(AtomicReturn);
38141         } else {
38142             setFlag(AtomicNoReturn);
38143         }
38144         setFlag(MemoryRef);
38145         setFlag(GlobalSegment);
38146     } // Inst_MIMG__IMAGE_ATOMIC_UMAX
38147
38148     Inst_MIMG__IMAGE_ATOMIC_UMAX::~Inst_MIMG__IMAGE_ATOMIC_UMAX()
38149     {
38150     } // ~Inst_MIMG__IMAGE_ATOMIC_UMAX
38151
38152     // tmp = MEM[ADDR];
38153     // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
38154     // RETURN_DATA = tmp.
38155     void
38156     Inst_MIMG__IMAGE_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst)
38157     {
38158         panicUnimplemented();
38159     }
38160
38161     Inst_MIMG__IMAGE_ATOMIC_AND::Inst_MIMG__IMAGE_ATOMIC_AND(InFmt_MIMG *iFmt)
38162         : Inst_MIMG(iFmt, "image_atomic_and")
38163     {
38164         setFlag(AtomicAnd);
38165         if (instData.GLC) {
38166             setFlag(AtomicReturn);
38167         } else {
38168             setFlag(AtomicNoReturn);
38169         }
38170         setFlag(MemoryRef);
38171         setFlag(GlobalSegment);
38172     } // Inst_MIMG__IMAGE_ATOMIC_AND
38173
38174     Inst_MIMG__IMAGE_ATOMIC_AND::~Inst_MIMG__IMAGE_ATOMIC_AND()
38175     {
38176     } // ~Inst_MIMG__IMAGE_ATOMIC_AND
38177
38178     // tmp = MEM[ADDR];
38179     // MEM[ADDR] &= DATA;
38180     // RETURN_DATA = tmp.
38181     void
38182     Inst_MIMG__IMAGE_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst)
38183     {
38184         panicUnimplemented();
38185     }
38186
38187     Inst_MIMG__IMAGE_ATOMIC_OR::Inst_MIMG__IMAGE_ATOMIC_OR(InFmt_MIMG *iFmt)
38188         : Inst_MIMG(iFmt, "image_atomic_or")
38189     {
38190         setFlag(AtomicOr);
38191         if (instData.GLC) {
38192             setFlag(AtomicReturn);
38193         } else {
38194             setFlag(AtomicNoReturn);
38195         }
38196         setFlag(MemoryRef);
38197         setFlag(GlobalSegment);
38198     } // Inst_MIMG__IMAGE_ATOMIC_OR
38199
38200     Inst_MIMG__IMAGE_ATOMIC_OR::~Inst_MIMG__IMAGE_ATOMIC_OR()
38201     {
38202     } // ~Inst_MIMG__IMAGE_ATOMIC_OR
38203
38204     // tmp = MEM[ADDR];
38205     // MEM[ADDR] |= DATA;
38206     // RETURN_DATA = tmp.
38207     void
38208     Inst_MIMG__IMAGE_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst)
38209     {
38210         panicUnimplemented();
38211     }
38212
38213     Inst_MIMG__IMAGE_ATOMIC_XOR::Inst_MIMG__IMAGE_ATOMIC_XOR(InFmt_MIMG *iFmt)
38214         : Inst_MIMG(iFmt, "image_atomic_xor")
38215     {
38216         setFlag(AtomicXor);
38217         if (instData.GLC) {
38218             setFlag(AtomicReturn);
38219         } else {
38220             setFlag(AtomicNoReturn);
38221         }
38222         setFlag(MemoryRef);
38223         setFlag(GlobalSegment);
38224     } // Inst_MIMG__IMAGE_ATOMIC_XOR
38225
38226     Inst_MIMG__IMAGE_ATOMIC_XOR::~Inst_MIMG__IMAGE_ATOMIC_XOR()
38227     {
38228     } // ~Inst_MIMG__IMAGE_ATOMIC_XOR
38229
38230     // tmp = MEM[ADDR];
38231     // MEM[ADDR] ^= DATA;
38232     // RETURN_DATA = tmp.
38233     void
38234     Inst_MIMG__IMAGE_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst)
38235     {
38236         panicUnimplemented();
38237     }
38238
38239     Inst_MIMG__IMAGE_ATOMIC_INC::Inst_MIMG__IMAGE_ATOMIC_INC(InFmt_MIMG *iFmt)
38240         : Inst_MIMG(iFmt, "image_atomic_inc")
38241     {
38242         setFlag(AtomicInc);
38243         if (instData.GLC) {
38244             setFlag(AtomicReturn);
38245         } else {
38246             setFlag(AtomicNoReturn);
38247         }
38248         setFlag(MemoryRef);
38249         setFlag(GlobalSegment);
38250     } // Inst_MIMG__IMAGE_ATOMIC_INC
38251
38252     Inst_MIMG__IMAGE_ATOMIC_INC::~Inst_MIMG__IMAGE_ATOMIC_INC()
38253     {
38254     } // ~Inst_MIMG__IMAGE_ATOMIC_INC
38255
38256     // tmp = MEM[ADDR];
38257     // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
38258     // RETURN_DATA = tmp.
38259     void
38260     Inst_MIMG__IMAGE_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst)
38261     {
38262         panicUnimplemented();
38263     }
38264
38265     Inst_MIMG__IMAGE_ATOMIC_DEC::Inst_MIMG__IMAGE_ATOMIC_DEC(InFmt_MIMG *iFmt)
38266         : Inst_MIMG(iFmt, "image_atomic_dec")
38267     {
38268         setFlag(AtomicDec);
38269         if (instData.GLC) {
38270             setFlag(AtomicReturn);
38271         } else {
38272             setFlag(AtomicNoReturn);
38273         }
38274         setFlag(MemoryRef);
38275         setFlag(GlobalSegment);
38276     } // Inst_MIMG__IMAGE_ATOMIC_DEC
38277
38278     Inst_MIMG__IMAGE_ATOMIC_DEC::~Inst_MIMG__IMAGE_ATOMIC_DEC()
38279     {
38280     } // ~Inst_MIMG__IMAGE_ATOMIC_DEC
38281
38282     // tmp = MEM[ADDR];
38283     // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
38284     // (unsigned compare); RETURN_DATA = tmp.
38285     void
38286     Inst_MIMG__IMAGE_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst)
38287     {
38288         panicUnimplemented();
38289     }
38290
38291     Inst_MIMG__IMAGE_SAMPLE::Inst_MIMG__IMAGE_SAMPLE(InFmt_MIMG *iFmt)
38292         : Inst_MIMG(iFmt, "image_sample")
38293     {
38294         setFlag(GlobalSegment);
38295     } // Inst_MIMG__IMAGE_SAMPLE
38296
38297     Inst_MIMG__IMAGE_SAMPLE::~Inst_MIMG__IMAGE_SAMPLE()
38298     {
38299     } // ~Inst_MIMG__IMAGE_SAMPLE
38300
38301     void
38302     Inst_MIMG__IMAGE_SAMPLE::execute(GPUDynInstPtr gpuDynInst)
38303     {
38304         panicUnimplemented();
38305     }
38306
38307     Inst_MIMG__IMAGE_SAMPLE_CL::Inst_MIMG__IMAGE_SAMPLE_CL(InFmt_MIMG *iFmt)
38308         : Inst_MIMG(iFmt, "image_sample_cl")
38309     {
38310         setFlag(GlobalSegment);
38311     } // Inst_MIMG__IMAGE_SAMPLE_CL
38312
38313     Inst_MIMG__IMAGE_SAMPLE_CL::~Inst_MIMG__IMAGE_SAMPLE_CL()
38314     {
38315     } // ~Inst_MIMG__IMAGE_SAMPLE_CL
38316
38317     void
38318     Inst_MIMG__IMAGE_SAMPLE_CL::execute(GPUDynInstPtr gpuDynInst)
38319     {
38320         panicUnimplemented();
38321     }
38322
38323     Inst_MIMG__IMAGE_SAMPLE_D::Inst_MIMG__IMAGE_SAMPLE_D(InFmt_MIMG *iFmt)
38324         : Inst_MIMG(iFmt, "image_sample_d")
38325     {
38326         setFlag(GlobalSegment);
38327     } // Inst_MIMG__IMAGE_SAMPLE_D
38328
38329     Inst_MIMG__IMAGE_SAMPLE_D::~Inst_MIMG__IMAGE_SAMPLE_D()
38330     {
38331     } // ~Inst_MIMG__IMAGE_SAMPLE_D
38332
38333     void
38334     Inst_MIMG__IMAGE_SAMPLE_D::execute(GPUDynInstPtr gpuDynInst)
38335     {
38336         panicUnimplemented();
38337     }
38338
38339     Inst_MIMG__IMAGE_SAMPLE_D_CL::Inst_MIMG__IMAGE_SAMPLE_D_CL(
38340           InFmt_MIMG *iFmt)
38341         : Inst_MIMG(iFmt, "image_sample_d_cl")
38342     {
38343         setFlag(GlobalSegment);
38344     } // Inst_MIMG__IMAGE_SAMPLE_D_CL
38345
38346     Inst_MIMG__IMAGE_SAMPLE_D_CL::~Inst_MIMG__IMAGE_SAMPLE_D_CL()
38347     {
38348     } // ~Inst_MIMG__IMAGE_SAMPLE_D_CL
38349
38350     void
38351     Inst_MIMG__IMAGE_SAMPLE_D_CL::execute(GPUDynInstPtr gpuDynInst)
38352     {
38353         panicUnimplemented();
38354     }
38355
38356     Inst_MIMG__IMAGE_SAMPLE_L::Inst_MIMG__IMAGE_SAMPLE_L(InFmt_MIMG *iFmt)
38357         : Inst_MIMG(iFmt, "image_sample_l")
38358     {
38359         setFlag(GlobalSegment);
38360     } // Inst_MIMG__IMAGE_SAMPLE_L
38361
38362     Inst_MIMG__IMAGE_SAMPLE_L::~Inst_MIMG__IMAGE_SAMPLE_L()
38363     {
38364     } // ~Inst_MIMG__IMAGE_SAMPLE_L
38365
38366     void
38367     Inst_MIMG__IMAGE_SAMPLE_L::execute(GPUDynInstPtr gpuDynInst)
38368     {
38369         panicUnimplemented();
38370     }
38371
38372     Inst_MIMG__IMAGE_SAMPLE_B::Inst_MIMG__IMAGE_SAMPLE_B(InFmt_MIMG *iFmt)
38373         : Inst_MIMG(iFmt, "image_sample_b")
38374     {
38375         setFlag(GlobalSegment);
38376     } // Inst_MIMG__IMAGE_SAMPLE_B
38377
38378     Inst_MIMG__IMAGE_SAMPLE_B::~Inst_MIMG__IMAGE_SAMPLE_B()
38379     {
38380     } // ~Inst_MIMG__IMAGE_SAMPLE_B
38381
38382     void
38383     Inst_MIMG__IMAGE_SAMPLE_B::execute(GPUDynInstPtr gpuDynInst)
38384     {
38385         panicUnimplemented();
38386     }
38387
38388     Inst_MIMG__IMAGE_SAMPLE_B_CL::Inst_MIMG__IMAGE_SAMPLE_B_CL(
38389           InFmt_MIMG *iFmt)
38390         : Inst_MIMG(iFmt, "image_sample_b_cl")
38391     {
38392         setFlag(GlobalSegment);
38393     } // Inst_MIMG__IMAGE_SAMPLE_B_CL
38394
38395     Inst_MIMG__IMAGE_SAMPLE_B_CL::~Inst_MIMG__IMAGE_SAMPLE_B_CL()
38396     {
38397     } // ~Inst_MIMG__IMAGE_SAMPLE_B_CL
38398
38399     void
38400     Inst_MIMG__IMAGE_SAMPLE_B_CL::execute(GPUDynInstPtr gpuDynInst)
38401     {
38402         panicUnimplemented();
38403     }
38404
38405     Inst_MIMG__IMAGE_SAMPLE_LZ::Inst_MIMG__IMAGE_SAMPLE_LZ(InFmt_MIMG *iFmt)
38406         : Inst_MIMG(iFmt, "image_sample_lz")
38407     {
38408         setFlag(GlobalSegment);
38409     } // Inst_MIMG__IMAGE_SAMPLE_LZ
38410
38411     Inst_MIMG__IMAGE_SAMPLE_LZ::~Inst_MIMG__IMAGE_SAMPLE_LZ()
38412     {
38413     } // ~Inst_MIMG__IMAGE_SAMPLE_LZ
38414
38415     void
38416     Inst_MIMG__IMAGE_SAMPLE_LZ::execute(GPUDynInstPtr gpuDynInst)
38417     {
38418         panicUnimplemented();
38419     }
38420
38421     Inst_MIMG__IMAGE_SAMPLE_C::Inst_MIMG__IMAGE_SAMPLE_C(InFmt_MIMG *iFmt)
38422         : Inst_MIMG(iFmt, "image_sample_c")
38423     {
38424         setFlag(GlobalSegment);
38425     } // Inst_MIMG__IMAGE_SAMPLE_C
38426
38427     Inst_MIMG__IMAGE_SAMPLE_C::~Inst_MIMG__IMAGE_SAMPLE_C()
38428     {
38429     } // ~Inst_MIMG__IMAGE_SAMPLE_C
38430
38431     void
38432     Inst_MIMG__IMAGE_SAMPLE_C::execute(GPUDynInstPtr gpuDynInst)
38433     {
38434         panicUnimplemented();
38435     }
38436
38437     Inst_MIMG__IMAGE_SAMPLE_C_CL::Inst_MIMG__IMAGE_SAMPLE_C_CL(
38438           InFmt_MIMG *iFmt)
38439         : Inst_MIMG(iFmt, "image_sample_c_cl")
38440     {
38441         setFlag(GlobalSegment);
38442     } // Inst_MIMG__IMAGE_SAMPLE_C_CL
38443
38444     Inst_MIMG__IMAGE_SAMPLE_C_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CL()
38445     {
38446     } // ~Inst_MIMG__IMAGE_SAMPLE_C_CL
38447
38448     void
38449     Inst_MIMG__IMAGE_SAMPLE_C_CL::execute(GPUDynInstPtr gpuDynInst)
38450     {
38451         panicUnimplemented();
38452     }
38453
38454     Inst_MIMG__IMAGE_SAMPLE_C_D::Inst_MIMG__IMAGE_SAMPLE_C_D(InFmt_MIMG *iFmt)
38455         : Inst_MIMG(iFmt, "image_sample_c_d")
38456     {
38457         setFlag(GlobalSegment);
38458     } // Inst_MIMG__IMAGE_SAMPLE_C_D
38459
38460     Inst_MIMG__IMAGE_SAMPLE_C_D::~Inst_MIMG__IMAGE_SAMPLE_C_D()
38461     {
38462     } // ~Inst_MIMG__IMAGE_SAMPLE_C_D
38463
38464     void
38465     Inst_MIMG__IMAGE_SAMPLE_C_D::execute(GPUDynInstPtr gpuDynInst)
38466     {
38467         panicUnimplemented();
38468     }
38469
38470     Inst_MIMG__IMAGE_SAMPLE_C_D_CL::Inst_MIMG__IMAGE_SAMPLE_C_D_CL(
38471           InFmt_MIMG *iFmt)
38472         : Inst_MIMG(iFmt, "image_sample_c_d_cl")
38473     {
38474         setFlag(GlobalSegment);
38475     } // Inst_MIMG__IMAGE_SAMPLE_C_D_CL
38476
38477     Inst_MIMG__IMAGE_SAMPLE_C_D_CL::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL()
38478     {
38479     } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL
38480
38481     void
38482     Inst_MIMG__IMAGE_SAMPLE_C_D_CL::execute(GPUDynInstPtr gpuDynInst)
38483     {
38484         panicUnimplemented();
38485     }
38486
38487     Inst_MIMG__IMAGE_SAMPLE_C_L::Inst_MIMG__IMAGE_SAMPLE_C_L(InFmt_MIMG *iFmt)
38488         : Inst_MIMG(iFmt, "image_sample_c_l")
38489     {
38490         setFlag(GlobalSegment);
38491     } // Inst_MIMG__IMAGE_SAMPLE_C_L
38492
38493     Inst_MIMG__IMAGE_SAMPLE_C_L::~Inst_MIMG__IMAGE_SAMPLE_C_L()
38494     {
38495     } // ~Inst_MIMG__IMAGE_SAMPLE_C_L
38496
38497     void
38498     Inst_MIMG__IMAGE_SAMPLE_C_L::execute(GPUDynInstPtr gpuDynInst)
38499     {
38500         panicUnimplemented();
38501     }
38502
38503     Inst_MIMG__IMAGE_SAMPLE_C_B::Inst_MIMG__IMAGE_SAMPLE_C_B(InFmt_MIMG *iFmt)
38504         : Inst_MIMG(iFmt, "image_sample_c_b")
38505     {
38506         setFlag(GlobalSegment);
38507     } // Inst_MIMG__IMAGE_SAMPLE_C_B
38508
38509     Inst_MIMG__IMAGE_SAMPLE_C_B::~Inst_MIMG__IMAGE_SAMPLE_C_B()
38510     {
38511     } // ~Inst_MIMG__IMAGE_SAMPLE_C_B
38512
38513     void
38514     Inst_MIMG__IMAGE_SAMPLE_C_B::execute(GPUDynInstPtr gpuDynInst)
38515     {
38516         panicUnimplemented();
38517     }
38518
38519     Inst_MIMG__IMAGE_SAMPLE_C_B_CL::Inst_MIMG__IMAGE_SAMPLE_C_B_CL(
38520           InFmt_MIMG *iFmt)
38521         : Inst_MIMG(iFmt, "image_sample_c_b_cl")
38522     {
38523         setFlag(GlobalSegment);
38524     } // Inst_MIMG__IMAGE_SAMPLE_C_B_CL
38525
38526     Inst_MIMG__IMAGE_SAMPLE_C_B_CL::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL()
38527     {
38528     } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL
38529
38530     void
38531     Inst_MIMG__IMAGE_SAMPLE_C_B_CL::execute(GPUDynInstPtr gpuDynInst)
38532     {
38533         panicUnimplemented();
38534     }
38535
38536     Inst_MIMG__IMAGE_SAMPLE_C_LZ::Inst_MIMG__IMAGE_SAMPLE_C_LZ(
38537           InFmt_MIMG *iFmt)
38538         : Inst_MIMG(iFmt, "image_sample_c_lz")
38539     {
38540         setFlag(GlobalSegment);
38541     } // Inst_MIMG__IMAGE_SAMPLE_C_LZ
38542
38543     Inst_MIMG__IMAGE_SAMPLE_C_LZ::~Inst_MIMG__IMAGE_SAMPLE_C_LZ()
38544     {
38545     } // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ
38546
38547     void
38548     Inst_MIMG__IMAGE_SAMPLE_C_LZ::execute(GPUDynInstPtr gpuDynInst)
38549     {
38550         panicUnimplemented();
38551     }
38552
38553     Inst_MIMG__IMAGE_SAMPLE_O::Inst_MIMG__IMAGE_SAMPLE_O(InFmt_MIMG *iFmt)
38554         : Inst_MIMG(iFmt, "image_sample_o")
38555     {
38556         setFlag(GlobalSegment);
38557     } // Inst_MIMG__IMAGE_SAMPLE_O
38558
38559     Inst_MIMG__IMAGE_SAMPLE_O::~Inst_MIMG__IMAGE_SAMPLE_O()
38560     {
38561     } // ~Inst_MIMG__IMAGE_SAMPLE_O
38562
38563     void
38564     Inst_MIMG__IMAGE_SAMPLE_O::execute(GPUDynInstPtr gpuDynInst)
38565     {
38566         panicUnimplemented();
38567     }
38568
38569     Inst_MIMG__IMAGE_SAMPLE_CL_O::Inst_MIMG__IMAGE_SAMPLE_CL_O(
38570           InFmt_MIMG *iFmt)
38571         : Inst_MIMG(iFmt, "image_sample_cl_o")
38572     {
38573         setFlag(GlobalSegment);
38574     } // Inst_MIMG__IMAGE_SAMPLE_CL_O
38575
38576     Inst_MIMG__IMAGE_SAMPLE_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CL_O()
38577     {
38578     } // ~Inst_MIMG__IMAGE_SAMPLE_CL_O
38579
38580     void
38581     Inst_MIMG__IMAGE_SAMPLE_CL_O::execute(GPUDynInstPtr gpuDynInst)
38582     {
38583         panicUnimplemented();
38584     }
38585
38586     Inst_MIMG__IMAGE_SAMPLE_D_O::Inst_MIMG__IMAGE_SAMPLE_D_O(InFmt_MIMG *iFmt)
38587         : Inst_MIMG(iFmt, "image_sample_d_o")
38588     {
38589         setFlag(GlobalSegment);
38590     } // Inst_MIMG__IMAGE_SAMPLE_D_O
38591
38592     Inst_MIMG__IMAGE_SAMPLE_D_O::~Inst_MIMG__IMAGE_SAMPLE_D_O()
38593     {
38594     } // ~Inst_MIMG__IMAGE_SAMPLE_D_O
38595
38596     void
38597     Inst_MIMG__IMAGE_SAMPLE_D_O::execute(GPUDynInstPtr gpuDynInst)
38598     {
38599         panicUnimplemented();
38600     }
38601
38602     Inst_MIMG__IMAGE_SAMPLE_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_D_CL_O(
38603           InFmt_MIMG *iFmt)
38604         : Inst_MIMG(iFmt, "image_sample_d_cl_o")
38605     {
38606         setFlag(GlobalSegment);
38607     } // Inst_MIMG__IMAGE_SAMPLE_D_CL_O
38608
38609     Inst_MIMG__IMAGE_SAMPLE_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_D_CL_O()
38610     {
38611     } // ~Inst_MIMG__IMAGE_SAMPLE_D_CL_O
38612
38613     void
38614     Inst_MIMG__IMAGE_SAMPLE_D_CL_O::execute(GPUDynInstPtr gpuDynInst)
38615     {
38616         panicUnimplemented();
38617     }
38618
38619     Inst_MIMG__IMAGE_SAMPLE_L_O::Inst_MIMG__IMAGE_SAMPLE_L_O(InFmt_MIMG *iFmt)
38620         : Inst_MIMG(iFmt, "image_sample_l_o")
38621     {
38622         setFlag(GlobalSegment);
38623     } // Inst_MIMG__IMAGE_SAMPLE_L_O
38624
38625     Inst_MIMG__IMAGE_SAMPLE_L_O::~Inst_MIMG__IMAGE_SAMPLE_L_O()
38626     {
38627     } // ~Inst_MIMG__IMAGE_SAMPLE_L_O
38628
38629     void
38630     Inst_MIMG__IMAGE_SAMPLE_L_O::execute(GPUDynInstPtr gpuDynInst)
38631     {
38632         panicUnimplemented();
38633     }
38634
38635     Inst_MIMG__IMAGE_SAMPLE_B_O::Inst_MIMG__IMAGE_SAMPLE_B_O(InFmt_MIMG *iFmt)
38636         : Inst_MIMG(iFmt, "image_sample_b_o")
38637     {
38638         setFlag(GlobalSegment);
38639     } // Inst_MIMG__IMAGE_SAMPLE_B_O
38640
38641     Inst_MIMG__IMAGE_SAMPLE_B_O::~Inst_MIMG__IMAGE_SAMPLE_B_O()
38642     {
38643     } // ~Inst_MIMG__IMAGE_SAMPLE_B_O
38644
38645     void
38646     Inst_MIMG__IMAGE_SAMPLE_B_O::execute(GPUDynInstPtr gpuDynInst)
38647     {
38648         panicUnimplemented();
38649     }
38650
38651     Inst_MIMG__IMAGE_SAMPLE_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_B_CL_O(
38652           InFmt_MIMG *iFmt)
38653         : Inst_MIMG(iFmt, "image_sample_b_cl_o")
38654     {
38655         setFlag(GlobalSegment);
38656     } // Inst_MIMG__IMAGE_SAMPLE_B_CL_O
38657
38658     Inst_MIMG__IMAGE_SAMPLE_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_B_CL_O()
38659     {
38660     } // ~Inst_MIMG__IMAGE_SAMPLE_B_CL_O
38661
38662     void
38663     Inst_MIMG__IMAGE_SAMPLE_B_CL_O::execute(GPUDynInstPtr gpuDynInst)
38664     {
38665         panicUnimplemented();
38666     }
38667
38668     Inst_MIMG__IMAGE_SAMPLE_LZ_O::Inst_MIMG__IMAGE_SAMPLE_LZ_O(
38669           InFmt_MIMG *iFmt)
38670         : Inst_MIMG(iFmt, "image_sample_lz_o")
38671     {
38672         setFlag(GlobalSegment);
38673     } // Inst_MIMG__IMAGE_SAMPLE_LZ_O
38674
38675     Inst_MIMG__IMAGE_SAMPLE_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_LZ_O()
38676     {
38677     } // ~Inst_MIMG__IMAGE_SAMPLE_LZ_O
38678
38679     void
38680     Inst_MIMG__IMAGE_SAMPLE_LZ_O::execute(GPUDynInstPtr gpuDynInst)
38681     {
38682         panicUnimplemented();
38683     }
38684
38685     Inst_MIMG__IMAGE_SAMPLE_C_O::Inst_MIMG__IMAGE_SAMPLE_C_O(InFmt_MIMG *iFmt)
38686         : Inst_MIMG(iFmt, "image_sample_c_o")
38687     {
38688         setFlag(GlobalSegment);
38689     } // Inst_MIMG__IMAGE_SAMPLE_C_O
38690
38691     Inst_MIMG__IMAGE_SAMPLE_C_O::~Inst_MIMG__IMAGE_SAMPLE_C_O()
38692     {
38693     } // ~Inst_MIMG__IMAGE_SAMPLE_C_O
38694
38695     void
38696     Inst_MIMG__IMAGE_SAMPLE_C_O::execute(GPUDynInstPtr gpuDynInst)
38697     {
38698         panicUnimplemented();
38699     }
38700
38701     Inst_MIMG__IMAGE_SAMPLE_C_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CL_O(
38702           InFmt_MIMG *iFmt)
38703         : Inst_MIMG(iFmt, "image_sample_c_cl_o")
38704     {
38705         setFlag(GlobalSegment);
38706     } // Inst_MIMG__IMAGE_SAMPLE_C_CL_O
38707
38708     Inst_MIMG__IMAGE_SAMPLE_C_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CL_O()
38709     {
38710     } // ~Inst_MIMG__IMAGE_SAMPLE_C_CL_O
38711
38712     void
38713     Inst_MIMG__IMAGE_SAMPLE_C_CL_O::execute(GPUDynInstPtr gpuDynInst)
38714     {
38715         panicUnimplemented();
38716     }
38717
38718     Inst_MIMG__IMAGE_SAMPLE_C_D_O::Inst_MIMG__IMAGE_SAMPLE_C_D_O(
38719           InFmt_MIMG *iFmt)
38720         : Inst_MIMG(iFmt, "image_sample_c_d_o")
38721     {
38722         setFlag(GlobalSegment);
38723     } // Inst_MIMG__IMAGE_SAMPLE_C_D_O
38724
38725     Inst_MIMG__IMAGE_SAMPLE_C_D_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_O()
38726     {
38727     } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_O
38728
38729     void
38730     Inst_MIMG__IMAGE_SAMPLE_C_D_O::execute(GPUDynInstPtr gpuDynInst)
38731     {
38732         panicUnimplemented();
38733     }
38734
38735     Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O(
38736           InFmt_MIMG *iFmt)
38737         : Inst_MIMG(iFmt, "image_sample_c_d_cl_o")
38738     {
38739         setFlag(GlobalSegment);
38740     } // Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O
38741
38742     Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O()
38743     {
38744     } // ~Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O
38745
38746     void
38747     Inst_MIMG__IMAGE_SAMPLE_C_D_CL_O::execute(GPUDynInstPtr gpuDynInst)
38748     {
38749         panicUnimplemented();
38750     }
38751
38752     Inst_MIMG__IMAGE_SAMPLE_C_L_O::Inst_MIMG__IMAGE_SAMPLE_C_L_O(
38753           InFmt_MIMG *iFmt)
38754         : Inst_MIMG(iFmt, "image_sample_c_l_o")
38755     {
38756         setFlag(GlobalSegment);
38757     } // Inst_MIMG__IMAGE_SAMPLE_C_L_O
38758
38759     Inst_MIMG__IMAGE_SAMPLE_C_L_O::~Inst_MIMG__IMAGE_SAMPLE_C_L_O()
38760     {
38761     } // ~Inst_MIMG__IMAGE_SAMPLE_C_L_O
38762
38763     void
38764     Inst_MIMG__IMAGE_SAMPLE_C_L_O::execute(GPUDynInstPtr gpuDynInst)
38765     {
38766         panicUnimplemented();
38767     }
38768
38769     Inst_MIMG__IMAGE_SAMPLE_C_B_O::Inst_MIMG__IMAGE_SAMPLE_C_B_O(
38770           InFmt_MIMG *iFmt)
38771         : Inst_MIMG(iFmt, "image_sample_c_b_o")
38772     {
38773         setFlag(GlobalSegment);
38774     } // Inst_MIMG__IMAGE_SAMPLE_C_B_O
38775
38776     Inst_MIMG__IMAGE_SAMPLE_C_B_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_O()
38777     {
38778     } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_O
38779
38780     void
38781     Inst_MIMG__IMAGE_SAMPLE_C_B_O::execute(GPUDynInstPtr gpuDynInst)
38782     {
38783         panicUnimplemented();
38784     }
38785
38786     Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O(
38787           InFmt_MIMG *iFmt)
38788         : Inst_MIMG(iFmt, "image_sample_c_b_cl_o")
38789     {
38790         setFlag(GlobalSegment);
38791     } // Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O
38792
38793     Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O()
38794     {
38795     } // ~Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O
38796
38797     void
38798     Inst_MIMG__IMAGE_SAMPLE_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst)
38799     {
38800         panicUnimplemented();
38801     }
38802
38803     Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::Inst_MIMG__IMAGE_SAMPLE_C_LZ_O(
38804           InFmt_MIMG *iFmt)
38805         : Inst_MIMG(iFmt, "image_sample_c_lz_o")
38806     {
38807         setFlag(GlobalSegment);
38808     } // Inst_MIMG__IMAGE_SAMPLE_C_LZ_O
38809
38810     Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O()
38811     {
38812     } // ~Inst_MIMG__IMAGE_SAMPLE_C_LZ_O
38813
38814     void
38815     Inst_MIMG__IMAGE_SAMPLE_C_LZ_O::execute(GPUDynInstPtr gpuDynInst)
38816     {
38817         panicUnimplemented();
38818     }
38819
38820     Inst_MIMG__IMAGE_GATHER4::Inst_MIMG__IMAGE_GATHER4(InFmt_MIMG *iFmt)
38821         : Inst_MIMG(iFmt, "image_gather4")
38822     {
38823         setFlag(GlobalSegment);
38824     } // Inst_MIMG__IMAGE_GATHER4
38825
38826     Inst_MIMG__IMAGE_GATHER4::~Inst_MIMG__IMAGE_GATHER4()
38827     {
38828     } // ~Inst_MIMG__IMAGE_GATHER4
38829
38830     void
38831     Inst_MIMG__IMAGE_GATHER4::execute(GPUDynInstPtr gpuDynInst)
38832     {
38833         panicUnimplemented();
38834     }
38835
38836     Inst_MIMG__IMAGE_GATHER4_CL::Inst_MIMG__IMAGE_GATHER4_CL(InFmt_MIMG *iFmt)
38837         : Inst_MIMG(iFmt, "image_gather4_cl")
38838     {
38839         setFlag(GlobalSegment);
38840     } // Inst_MIMG__IMAGE_GATHER4_CL
38841
38842     Inst_MIMG__IMAGE_GATHER4_CL::~Inst_MIMG__IMAGE_GATHER4_CL()
38843     {
38844     } // ~Inst_MIMG__IMAGE_GATHER4_CL
38845
38846     void
38847     Inst_MIMG__IMAGE_GATHER4_CL::execute(GPUDynInstPtr gpuDynInst)
38848     {
38849         panicUnimplemented();
38850     }
38851
38852     Inst_MIMG__IMAGE_GATHER4_L::Inst_MIMG__IMAGE_GATHER4_L(InFmt_MIMG *iFmt)
38853         : Inst_MIMG(iFmt, "image_gather4_l")
38854     {
38855         setFlag(GlobalSegment);
38856     } // Inst_MIMG__IMAGE_GATHER4_L
38857
38858     Inst_MIMG__IMAGE_GATHER4_L::~Inst_MIMG__IMAGE_GATHER4_L()
38859     {
38860     } // ~Inst_MIMG__IMAGE_GATHER4_L
38861
38862     void
38863     Inst_MIMG__IMAGE_GATHER4_L::execute(GPUDynInstPtr gpuDynInst)
38864     {
38865         panicUnimplemented();
38866     }
38867
38868     Inst_MIMG__IMAGE_GATHER4_B::Inst_MIMG__IMAGE_GATHER4_B(InFmt_MIMG *iFmt)
38869         : Inst_MIMG(iFmt, "image_gather4_b")
38870     {
38871         setFlag(GlobalSegment);
38872     } // Inst_MIMG__IMAGE_GATHER4_B
38873
38874     Inst_MIMG__IMAGE_GATHER4_B::~Inst_MIMG__IMAGE_GATHER4_B()
38875     {
38876     } // ~Inst_MIMG__IMAGE_GATHER4_B
38877
38878     void
38879     Inst_MIMG__IMAGE_GATHER4_B::execute(GPUDynInstPtr gpuDynInst)
38880     {
38881         panicUnimplemented();
38882     }
38883
38884     Inst_MIMG__IMAGE_GATHER4_B_CL::Inst_MIMG__IMAGE_GATHER4_B_CL(
38885           InFmt_MIMG *iFmt)
38886         : Inst_MIMG(iFmt, "image_gather4_b_cl")
38887     {
38888         setFlag(GlobalSegment);
38889     } // Inst_MIMG__IMAGE_GATHER4_B_CL
38890
38891     Inst_MIMG__IMAGE_GATHER4_B_CL::~Inst_MIMG__IMAGE_GATHER4_B_CL()
38892     {
38893     } // ~Inst_MIMG__IMAGE_GATHER4_B_CL
38894
38895     void
38896     Inst_MIMG__IMAGE_GATHER4_B_CL::execute(GPUDynInstPtr gpuDynInst)
38897     {
38898         panicUnimplemented();
38899     }
38900
38901     Inst_MIMG__IMAGE_GATHER4_LZ::Inst_MIMG__IMAGE_GATHER4_LZ(InFmt_MIMG *iFmt)
38902         : Inst_MIMG(iFmt, "image_gather4_lz")
38903     {
38904         setFlag(GlobalSegment);
38905     } // Inst_MIMG__IMAGE_GATHER4_LZ
38906
38907     Inst_MIMG__IMAGE_GATHER4_LZ::~Inst_MIMG__IMAGE_GATHER4_LZ()
38908     {
38909     } // ~Inst_MIMG__IMAGE_GATHER4_LZ
38910
38911     void
38912     Inst_MIMG__IMAGE_GATHER4_LZ::execute(GPUDynInstPtr gpuDynInst)
38913     {
38914         panicUnimplemented();
38915     }
38916
38917     Inst_MIMG__IMAGE_GATHER4_C::Inst_MIMG__IMAGE_GATHER4_C(InFmt_MIMG *iFmt)
38918         : Inst_MIMG(iFmt, "image_gather4_c")
38919     {
38920         setFlag(GlobalSegment);
38921     } // Inst_MIMG__IMAGE_GATHER4_C
38922
38923     Inst_MIMG__IMAGE_GATHER4_C::~Inst_MIMG__IMAGE_GATHER4_C()
38924     {
38925     } // ~Inst_MIMG__IMAGE_GATHER4_C
38926
38927     void
38928     Inst_MIMG__IMAGE_GATHER4_C::execute(GPUDynInstPtr gpuDynInst)
38929     {
38930         panicUnimplemented();
38931     }
38932
38933     Inst_MIMG__IMAGE_GATHER4_C_CL::Inst_MIMG__IMAGE_GATHER4_C_CL(
38934           InFmt_MIMG *iFmt)
38935         : Inst_MIMG(iFmt, "image_gather4_c_cl")
38936     {
38937         setFlag(GlobalSegment);
38938     } // Inst_MIMG__IMAGE_GATHER4_C_CL
38939
38940     Inst_MIMG__IMAGE_GATHER4_C_CL::~Inst_MIMG__IMAGE_GATHER4_C_CL()
38941     {
38942     } // ~Inst_MIMG__IMAGE_GATHER4_C_CL
38943
38944     void
38945     Inst_MIMG__IMAGE_GATHER4_C_CL::execute(GPUDynInstPtr gpuDynInst)
38946     {
38947         panicUnimplemented();
38948     }
38949
38950     Inst_MIMG__IMAGE_GATHER4_C_L::Inst_MIMG__IMAGE_GATHER4_C_L(
38951           InFmt_MIMG *iFmt)
38952         : Inst_MIMG(iFmt, "image_gather4_c_l")
38953     {
38954         setFlag(GlobalSegment);
38955     } // Inst_MIMG__IMAGE_GATHER4_C_L
38956
38957     Inst_MIMG__IMAGE_GATHER4_C_L::~Inst_MIMG__IMAGE_GATHER4_C_L()
38958     {
38959     } // ~Inst_MIMG__IMAGE_GATHER4_C_L
38960
38961     void
38962     Inst_MIMG__IMAGE_GATHER4_C_L::execute(GPUDynInstPtr gpuDynInst)
38963     {
38964         panicUnimplemented();
38965     }
38966
38967     Inst_MIMG__IMAGE_GATHER4_C_B::Inst_MIMG__IMAGE_GATHER4_C_B(
38968           InFmt_MIMG *iFmt)
38969         : Inst_MIMG(iFmt, "image_gather4_c_b")
38970     {
38971         setFlag(GlobalSegment);
38972     } // Inst_MIMG__IMAGE_GATHER4_C_B
38973
38974     Inst_MIMG__IMAGE_GATHER4_C_B::~Inst_MIMG__IMAGE_GATHER4_C_B()
38975     {
38976     } // ~Inst_MIMG__IMAGE_GATHER4_C_B
38977
38978     void
38979     Inst_MIMG__IMAGE_GATHER4_C_B::execute(GPUDynInstPtr gpuDynInst)
38980     {
38981         panicUnimplemented();
38982     }
38983
38984     Inst_MIMG__IMAGE_GATHER4_C_B_CL::Inst_MIMG__IMAGE_GATHER4_C_B_CL(
38985           InFmt_MIMG *iFmt)
38986         : Inst_MIMG(iFmt, "image_gather4_c_b_cl")
38987     {
38988         setFlag(GlobalSegment);
38989     } // Inst_MIMG__IMAGE_GATHER4_C_B_CL
38990
38991     Inst_MIMG__IMAGE_GATHER4_C_B_CL::~Inst_MIMG__IMAGE_GATHER4_C_B_CL()
38992     {
38993     } // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL
38994
38995     void
38996     Inst_MIMG__IMAGE_GATHER4_C_B_CL::execute(GPUDynInstPtr gpuDynInst)
38997     {
38998         panicUnimplemented();
38999     }
39000
39001     Inst_MIMG__IMAGE_GATHER4_C_LZ::Inst_MIMG__IMAGE_GATHER4_C_LZ(
39002           InFmt_MIMG *iFmt)
39003         : Inst_MIMG(iFmt, "image_gather4_c_lz")
39004     {
39005         setFlag(GlobalSegment);
39006     } // Inst_MIMG__IMAGE_GATHER4_C_LZ
39007
39008     Inst_MIMG__IMAGE_GATHER4_C_LZ::~Inst_MIMG__IMAGE_GATHER4_C_LZ()
39009     {
39010     } // ~Inst_MIMG__IMAGE_GATHER4_C_LZ
39011
39012     void
39013     Inst_MIMG__IMAGE_GATHER4_C_LZ::execute(GPUDynInstPtr gpuDynInst)
39014     {
39015         panicUnimplemented();
39016     }
39017
39018     Inst_MIMG__IMAGE_GATHER4_O::Inst_MIMG__IMAGE_GATHER4_O(InFmt_MIMG *iFmt)
39019         : Inst_MIMG(iFmt, "image_gather4_o")
39020     {
39021         setFlag(GlobalSegment);
39022     } // Inst_MIMG__IMAGE_GATHER4_O
39023
39024     Inst_MIMG__IMAGE_GATHER4_O::~Inst_MIMG__IMAGE_GATHER4_O()
39025     {
39026     } // ~Inst_MIMG__IMAGE_GATHER4_O
39027
39028     void
39029     Inst_MIMG__IMAGE_GATHER4_O::execute(GPUDynInstPtr gpuDynInst)
39030     {
39031         panicUnimplemented();
39032     }
39033
39034     Inst_MIMG__IMAGE_GATHER4_CL_O::Inst_MIMG__IMAGE_GATHER4_CL_O(
39035           InFmt_MIMG *iFmt)
39036         : Inst_MIMG(iFmt, "image_gather4_cl_o")
39037     {
39038         setFlag(GlobalSegment);
39039     } // Inst_MIMG__IMAGE_GATHER4_CL_O
39040
39041     Inst_MIMG__IMAGE_GATHER4_CL_O::~Inst_MIMG__IMAGE_GATHER4_CL_O()
39042     {
39043     } // ~Inst_MIMG__IMAGE_GATHER4_CL_O
39044
39045     void
39046     Inst_MIMG__IMAGE_GATHER4_CL_O::execute(GPUDynInstPtr gpuDynInst)
39047     {
39048         panicUnimplemented();
39049     }
39050
39051     Inst_MIMG__IMAGE_GATHER4_L_O::Inst_MIMG__IMAGE_GATHER4_L_O(
39052           InFmt_MIMG *iFmt)
39053         : Inst_MIMG(iFmt, "image_gather4_l_o")
39054     {
39055         setFlag(GlobalSegment);
39056     } // Inst_MIMG__IMAGE_GATHER4_L_O
39057
39058     Inst_MIMG__IMAGE_GATHER4_L_O::~Inst_MIMG__IMAGE_GATHER4_L_O()
39059     {
39060     } // ~Inst_MIMG__IMAGE_GATHER4_L_O
39061
39062     void
39063     Inst_MIMG__IMAGE_GATHER4_L_O::execute(GPUDynInstPtr gpuDynInst)
39064     {
39065         panicUnimplemented();
39066     }
39067
39068     Inst_MIMG__IMAGE_GATHER4_B_O::Inst_MIMG__IMAGE_GATHER4_B_O(
39069           InFmt_MIMG *iFmt)
39070         : Inst_MIMG(iFmt, "image_gather4_b_o")
39071     {
39072         setFlag(GlobalSegment);
39073     } // Inst_MIMG__IMAGE_GATHER4_B_O
39074
39075     Inst_MIMG__IMAGE_GATHER4_B_O::~Inst_MIMG__IMAGE_GATHER4_B_O()
39076     {
39077     } // ~Inst_MIMG__IMAGE_GATHER4_B_O
39078
39079     void
39080     Inst_MIMG__IMAGE_GATHER4_B_O::execute(GPUDynInstPtr gpuDynInst)
39081     {
39082         panicUnimplemented();
39083     }
39084
39085     Inst_MIMG__IMAGE_GATHER4_B_CL_O::Inst_MIMG__IMAGE_GATHER4_B_CL_O(
39086           InFmt_MIMG *iFmt)
39087         : Inst_MIMG(iFmt, "image_gather4_b_cl_o")
39088     {
39089         setFlag(GlobalSegment);
39090     } // Inst_MIMG__IMAGE_GATHER4_B_CL_O
39091
39092     Inst_MIMG__IMAGE_GATHER4_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_B_CL_O()
39093     {
39094     } // ~Inst_MIMG__IMAGE_GATHER4_B_CL_O
39095
39096     void
39097     Inst_MIMG__IMAGE_GATHER4_B_CL_O::execute(GPUDynInstPtr gpuDynInst)
39098     {
39099         panicUnimplemented();
39100     }
39101
39102     Inst_MIMG__IMAGE_GATHER4_LZ_O::Inst_MIMG__IMAGE_GATHER4_LZ_O(
39103           InFmt_MIMG *iFmt)
39104         : Inst_MIMG(iFmt, "image_gather4_lz_o")
39105     {
39106         setFlag(GlobalSegment);
39107     } // Inst_MIMG__IMAGE_GATHER4_LZ_O
39108
39109     Inst_MIMG__IMAGE_GATHER4_LZ_O::~Inst_MIMG__IMAGE_GATHER4_LZ_O()
39110     {
39111     } // ~Inst_MIMG__IMAGE_GATHER4_LZ_O
39112
39113     void
39114     Inst_MIMG__IMAGE_GATHER4_LZ_O::execute(GPUDynInstPtr gpuDynInst)
39115     {
39116         panicUnimplemented();
39117     }
39118
39119     Inst_MIMG__IMAGE_GATHER4_C_O::Inst_MIMG__IMAGE_GATHER4_C_O(
39120           InFmt_MIMG *iFmt)
39121         : Inst_MIMG(iFmt, "image_gather4_c_o")
39122     {
39123         setFlag(GlobalSegment);
39124     } // Inst_MIMG__IMAGE_GATHER4_C_O
39125
39126     Inst_MIMG__IMAGE_GATHER4_C_O::~Inst_MIMG__IMAGE_GATHER4_C_O()
39127     {
39128     } // ~Inst_MIMG__IMAGE_GATHER4_C_O
39129
39130     void
39131     Inst_MIMG__IMAGE_GATHER4_C_O::execute(GPUDynInstPtr gpuDynInst)
39132     {
39133         panicUnimplemented();
39134     }
39135
39136     Inst_MIMG__IMAGE_GATHER4_C_CL_O::Inst_MIMG__IMAGE_GATHER4_C_CL_O(
39137           InFmt_MIMG *iFmt)
39138         : Inst_MIMG(iFmt, "image_gather4_c_cl_o")
39139     {
39140         setFlag(GlobalSegment);
39141     } // Inst_MIMG__IMAGE_GATHER4_C_CL_O
39142
39143     Inst_MIMG__IMAGE_GATHER4_C_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_CL_O()
39144     {
39145     } // ~Inst_MIMG__IMAGE_GATHER4_C_CL_O
39146
39147     void
39148     Inst_MIMG__IMAGE_GATHER4_C_CL_O::execute(GPUDynInstPtr gpuDynInst)
39149     {
39150         panicUnimplemented();
39151     }
39152
39153     Inst_MIMG__IMAGE_GATHER4_C_L_O::Inst_MIMG__IMAGE_GATHER4_C_L_O(
39154           InFmt_MIMG *iFmt)
39155         : Inst_MIMG(iFmt, "image_gather4_c_l_o")
39156     {
39157         setFlag(GlobalSegment);
39158     } // Inst_MIMG__IMAGE_GATHER4_C_L_O
39159
39160     Inst_MIMG__IMAGE_GATHER4_C_L_O::~Inst_MIMG__IMAGE_GATHER4_C_L_O()
39161     {
39162     } // ~Inst_MIMG__IMAGE_GATHER4_C_L_O
39163
39164     void
39165     Inst_MIMG__IMAGE_GATHER4_C_L_O::execute(GPUDynInstPtr gpuDynInst)
39166     {
39167         panicUnimplemented();
39168     }
39169
39170     Inst_MIMG__IMAGE_GATHER4_C_B_O::Inst_MIMG__IMAGE_GATHER4_C_B_O(
39171           InFmt_MIMG *iFmt)
39172         : Inst_MIMG(iFmt, "image_gather4_c_b_o")
39173     {
39174         setFlag(GlobalSegment);
39175     } // Inst_MIMG__IMAGE_GATHER4_C_B_O
39176
39177     Inst_MIMG__IMAGE_GATHER4_C_B_O::~Inst_MIMG__IMAGE_GATHER4_C_B_O()
39178     {
39179     } // ~Inst_MIMG__IMAGE_GATHER4_C_B_O
39180
39181     void
39182     Inst_MIMG__IMAGE_GATHER4_C_B_O::execute(GPUDynInstPtr gpuDynInst)
39183     {
39184         panicUnimplemented();
39185     }
39186
39187     Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::Inst_MIMG__IMAGE_GATHER4_C_B_CL_O(
39188           InFmt_MIMG *iFmt)
39189         : Inst_MIMG(iFmt, "image_gather4_c_b_cl_o")
39190     {
39191         setFlag(GlobalSegment);
39192     } // Inst_MIMG__IMAGE_GATHER4_C_B_CL_O
39193
39194     Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O()
39195     {
39196     } // ~Inst_MIMG__IMAGE_GATHER4_C_B_CL_O
39197
39198     void
39199     Inst_MIMG__IMAGE_GATHER4_C_B_CL_O::execute(GPUDynInstPtr gpuDynInst)
39200     {
39201         panicUnimplemented();
39202     }
39203
39204     Inst_MIMG__IMAGE_GATHER4_C_LZ_O::Inst_MIMG__IMAGE_GATHER4_C_LZ_O(
39205           InFmt_MIMG *iFmt)
39206         : Inst_MIMG(iFmt, "image_gather4_c_lz_o")
39207     {
39208         setFlag(GlobalSegment);
39209     } // Inst_MIMG__IMAGE_GATHER4_C_LZ_O
39210
39211     Inst_MIMG__IMAGE_GATHER4_C_LZ_O::~Inst_MIMG__IMAGE_GATHER4_C_LZ_O()
39212     {
39213     } // ~Inst_MIMG__IMAGE_GATHER4_C_LZ_O
39214
39215     void
39216     Inst_MIMG__IMAGE_GATHER4_C_LZ_O::execute(GPUDynInstPtr gpuDynInst)
39217     {
39218         panicUnimplemented();
39219     }
39220
39221     Inst_MIMG__IMAGE_GET_LOD::Inst_MIMG__IMAGE_GET_LOD(InFmt_MIMG *iFmt)
39222         : Inst_MIMG(iFmt, "image_get_lod")
39223     {
39224         setFlag(GlobalSegment);
39225     } // Inst_MIMG__IMAGE_GET_LOD
39226
39227     Inst_MIMG__IMAGE_GET_LOD::~Inst_MIMG__IMAGE_GET_LOD()
39228     {
39229     } // ~Inst_MIMG__IMAGE_GET_LOD
39230
39231     void
39232     Inst_MIMG__IMAGE_GET_LOD::execute(GPUDynInstPtr gpuDynInst)
39233     {
39234         panicUnimplemented();
39235     }
39236
39237     Inst_MIMG__IMAGE_SAMPLE_CD::Inst_MIMG__IMAGE_SAMPLE_CD(InFmt_MIMG *iFmt)
39238         : Inst_MIMG(iFmt, "image_sample_cd")
39239     {
39240         setFlag(GlobalSegment);
39241     } // Inst_MIMG__IMAGE_SAMPLE_CD
39242
39243     Inst_MIMG__IMAGE_SAMPLE_CD::~Inst_MIMG__IMAGE_SAMPLE_CD()
39244     {
39245     } // ~Inst_MIMG__IMAGE_SAMPLE_CD
39246
39247     void
39248     Inst_MIMG__IMAGE_SAMPLE_CD::execute(GPUDynInstPtr gpuDynInst)
39249     {
39250         panicUnimplemented();
39251     }
39252
39253     Inst_MIMG__IMAGE_SAMPLE_CD_CL::Inst_MIMG__IMAGE_SAMPLE_CD_CL(
39254           InFmt_MIMG *iFmt)
39255         : Inst_MIMG(iFmt, "image_sample_cd_cl")
39256     {
39257         setFlag(GlobalSegment);
39258     } // Inst_MIMG__IMAGE_SAMPLE_CD_CL
39259
39260     Inst_MIMG__IMAGE_SAMPLE_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_CD_CL()
39261     {
39262     } // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL
39263
39264     void
39265     Inst_MIMG__IMAGE_SAMPLE_CD_CL::execute(GPUDynInstPtr gpuDynInst)
39266     {
39267         panicUnimplemented();
39268     }
39269
39270     Inst_MIMG__IMAGE_SAMPLE_C_CD::Inst_MIMG__IMAGE_SAMPLE_C_CD(
39271           InFmt_MIMG *iFmt)
39272         : Inst_MIMG(iFmt, "image_sample_c_cd")
39273     {
39274         setFlag(GlobalSegment);
39275     } // Inst_MIMG__IMAGE_SAMPLE_C_CD
39276
39277     Inst_MIMG__IMAGE_SAMPLE_C_CD::~Inst_MIMG__IMAGE_SAMPLE_C_CD()
39278     {
39279     } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD
39280
39281     void
39282     Inst_MIMG__IMAGE_SAMPLE_C_CD::execute(GPUDynInstPtr gpuDynInst)
39283     {
39284         panicUnimplemented();
39285     }
39286
39287     Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL(
39288           InFmt_MIMG *iFmt)
39289         : Inst_MIMG(iFmt, "image_sample_c_cd_cl")
39290     {
39291         setFlag(GlobalSegment);
39292     } // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL
39293
39294     Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL()
39295     {
39296     } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL
39297
39298     void
39299     Inst_MIMG__IMAGE_SAMPLE_C_CD_CL::execute(GPUDynInstPtr gpuDynInst)
39300     {
39301         panicUnimplemented();
39302     }
39303
39304     Inst_MIMG__IMAGE_SAMPLE_CD_O::Inst_MIMG__IMAGE_SAMPLE_CD_O(
39305           InFmt_MIMG *iFmt)
39306         : Inst_MIMG(iFmt, "image_sample_cd_o")
39307     {
39308         setFlag(GlobalSegment);
39309     } // Inst_MIMG__IMAGE_SAMPLE_CD_O
39310
39311     Inst_MIMG__IMAGE_SAMPLE_CD_O::~Inst_MIMG__IMAGE_SAMPLE_CD_O()
39312     {
39313     } // ~Inst_MIMG__IMAGE_SAMPLE_CD_O
39314
39315     void
39316     Inst_MIMG__IMAGE_SAMPLE_CD_O::execute(GPUDynInstPtr gpuDynInst)
39317     {
39318         panicUnimplemented();
39319     }
39320
39321     Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_CD_CL_O(
39322           InFmt_MIMG *iFmt)
39323         : Inst_MIMG(iFmt, "image_sample_cd_cl_o")
39324     {
39325         setFlag(GlobalSegment);
39326     } // Inst_MIMG__IMAGE_SAMPLE_CD_CL_O
39327
39328     Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O()
39329     {
39330     } // ~Inst_MIMG__IMAGE_SAMPLE_CD_CL_O
39331
39332     void
39333     Inst_MIMG__IMAGE_SAMPLE_CD_CL_O::execute(GPUDynInstPtr gpuDynInst)
39334     {
39335         panicUnimplemented();
39336     }
39337
39338     Inst_MIMG__IMAGE_SAMPLE_C_CD_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_O(
39339           InFmt_MIMG *iFmt)
39340         : Inst_MIMG(iFmt, "image_sample_c_cd_o")
39341     {
39342         setFlag(GlobalSegment);
39343     } // Inst_MIMG__IMAGE_SAMPLE_C_CD_O
39344
39345     Inst_MIMG__IMAGE_SAMPLE_C_CD_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_O()
39346     {
39347     } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_O
39348
39349     void
39350     Inst_MIMG__IMAGE_SAMPLE_C_CD_O::execute(GPUDynInstPtr gpuDynInst)
39351     {
39352         panicUnimplemented();
39353     }
39354
39355     Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O(
39356           InFmt_MIMG *iFmt)
39357         : Inst_MIMG(iFmt, "image_sample_c_cd_cl_o")
39358     {
39359         setFlag(GlobalSegment);
39360     } // Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O
39361
39362     Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O()
39363     {
39364     } // ~Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O
39365
39366     void
39367     Inst_MIMG__IMAGE_SAMPLE_C_CD_CL_O::execute(GPUDynInstPtr gpuDynInst)
39368     {
39369         panicUnimplemented();
39370     }
39371
39372     Inst_EXP__EXP::Inst_EXP__EXP(InFmt_EXP *iFmt)
39373         : Inst_EXP(iFmt, "exp")
39374     {
39375     } // Inst_EXP__EXP
39376
39377     Inst_EXP__EXP::~Inst_EXP__EXP()
39378     {
39379     } // ~Inst_EXP__EXP
39380
39381     void
39382     Inst_EXP__EXP::execute(GPUDynInstPtr gpuDynInst)
39383     {
39384         panicUnimplemented();
39385     }
39386
39387     Inst_FLAT__FLAT_LOAD_UBYTE::Inst_FLAT__FLAT_LOAD_UBYTE(InFmt_FLAT *iFmt)
39388         : Inst_FLAT(iFmt, "flat_load_ubyte")
39389     {
39390         setFlag(MemoryRef);
39391         setFlag(Load);
39392     } // Inst_FLAT__FLAT_LOAD_UBYTE
39393
39394     Inst_FLAT__FLAT_LOAD_UBYTE::~Inst_FLAT__FLAT_LOAD_UBYTE()
39395     {
39396     } // ~Inst_FLAT__FLAT_LOAD_UBYTE
39397
39398     // Untyped buffer load unsigned byte (zero extend to VGPR destination).
39399     void
39400     Inst_FLAT__FLAT_LOAD_UBYTE::execute(GPUDynInstPtr gpuDynInst)
39401     {
39402         Wavefront *wf = gpuDynInst->wavefront();
39403
39404         if (wf->execMask().none()) {
39405             wf->decVMemInstsIssued();
39406             wf->decLGKMInstsIssued();
39407             wf->rdGmReqsInPipe--;
39408             wf->rdLmReqsInPipe--;
39409             return;
39410         }
39411
39412         gpuDynInst->execUnitId = wf->execUnitId;
39413         gpuDynInst->exec_mask = gpuDynInst->wavefront()->execMask();
39414         gpuDynInst->latency.init(gpuDynInst->computeUnit());
39415         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39416
39417         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39418
39419         addr.read();
39420
39421         calcAddr(gpuDynInst, addr);
39422
39423         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39424             gpuDynInst->computeUnit()->globalMemoryPipe
39425                 .issueRequest(gpuDynInst);
39426             wf->rdGmReqsInPipe--;
39427             wf->outstandingReqsRdGm++;
39428         } else {
39429             fatal("Non global flat instructions not implemented yet.\n");
39430         }
39431
39432         gpuDynInst->wavefront()->outstandingReqs++;
39433         gpuDynInst->wavefront()->validateRequestCounters();
39434     } // execute
39435
39436     void
39437     Inst_FLAT__FLAT_LOAD_UBYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
39438     {
39439         initMemRead<VecElemU8>(gpuDynInst);
39440     } // initiateAcc
39441
39442     void
39443     Inst_FLAT__FLAT_LOAD_UBYTE::completeAcc(GPUDynInstPtr gpuDynInst)
39444     {
39445         VecOperandU32 vdst(gpuDynInst, extData.VDST);
39446
39447         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
39448             if (gpuDynInst->exec_mask[lane]) {
39449                 vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU8*>(
39450                     gpuDynInst->d_data))[lane]);
39451             }
39452         }
39453         vdst.write();
39454     } // execute
39455     // --- Inst_FLAT__FLAT_LOAD_SBYTE class methods ---
39456
39457     Inst_FLAT__FLAT_LOAD_SBYTE::Inst_FLAT__FLAT_LOAD_SBYTE(InFmt_FLAT *iFmt)
39458         : Inst_FLAT(iFmt, "flat_load_sbyte")
39459     {
39460         setFlag(MemoryRef);
39461         setFlag(Load);
39462     } // Inst_FLAT__FLAT_LOAD_SBYTE
39463
39464     Inst_FLAT__FLAT_LOAD_SBYTE::~Inst_FLAT__FLAT_LOAD_SBYTE()
39465     {
39466     } // ~Inst_FLAT__FLAT_LOAD_SBYTE
39467
39468     // Untyped buffer load signed byte (sign extend to VGPR destination).
39469     void
39470     Inst_FLAT__FLAT_LOAD_SBYTE::execute(GPUDynInstPtr gpuDynInst)
39471     {
39472         panicUnimplemented();
39473     }
39474
39475     void
39476     Inst_FLAT__FLAT_LOAD_SBYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
39477     {
39478     } // initiateAcc
39479
39480     void
39481     Inst_FLAT__FLAT_LOAD_SBYTE::completeAcc(GPUDynInstPtr gpuDynInst)
39482     {
39483     }
39484
39485     Inst_FLAT__FLAT_LOAD_USHORT::Inst_FLAT__FLAT_LOAD_USHORT(InFmt_FLAT *iFmt)
39486         : Inst_FLAT(iFmt, "flat_load_ushort")
39487     {
39488         setFlag(MemoryRef);
39489         setFlag(Load);
39490     } // Inst_FLAT__FLAT_LOAD_USHORT
39491
39492     Inst_FLAT__FLAT_LOAD_USHORT::~Inst_FLAT__FLAT_LOAD_USHORT()
39493     {
39494     } // ~Inst_FLAT__FLAT_LOAD_USHORT
39495
39496     // Untyped buffer load unsigned short (zero extend to VGPR destination).
39497     void
39498     Inst_FLAT__FLAT_LOAD_USHORT::execute(GPUDynInstPtr gpuDynInst)
39499     {
39500         Wavefront *wf = gpuDynInst->wavefront();
39501
39502         if (wf->execMask().none()) {
39503             wf->decVMemInstsIssued();
39504             wf->decLGKMInstsIssued();
39505             wf->rdGmReqsInPipe--;
39506             wf->rdLmReqsInPipe--;
39507             return;
39508         }
39509
39510         gpuDynInst->execUnitId = wf->execUnitId;
39511         gpuDynInst->exec_mask = gpuDynInst->wavefront()->execMask();
39512         gpuDynInst->latency.init(gpuDynInst->computeUnit());
39513         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39514
39515         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39516
39517         addr.read();
39518
39519         calcAddr(gpuDynInst, addr);
39520
39521         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39522             gpuDynInst->computeUnit()->globalMemoryPipe
39523                 .issueRequest(gpuDynInst);
39524             wf->rdGmReqsInPipe--;
39525             wf->outstandingReqsRdGm++;
39526         } else {
39527             fatal("Non global flat instructions not implemented yet.\n");
39528         }
39529
39530         gpuDynInst->wavefront()->outstandingReqs++;
39531         gpuDynInst->wavefront()->validateRequestCounters();
39532     }
39533
39534     void
39535     Inst_FLAT__FLAT_LOAD_USHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
39536     {
39537         initMemRead<VecElemU16>(gpuDynInst);
39538     } // initiateAcc
39539
39540     void
39541     Inst_FLAT__FLAT_LOAD_USHORT::completeAcc(GPUDynInstPtr gpuDynInst)
39542     {
39543         VecOperandU32 vdst(gpuDynInst, extData.VDST);
39544
39545         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
39546             if (gpuDynInst->exec_mask[lane]) {
39547                 vdst[lane] = (VecElemU32)((reinterpret_cast<VecElemU16*>(
39548                     gpuDynInst->d_data))[lane]);
39549             }
39550         }
39551         vdst.write();
39552     }
39553
39554
39555     Inst_FLAT__FLAT_LOAD_SSHORT::Inst_FLAT__FLAT_LOAD_SSHORT(InFmt_FLAT *iFmt)
39556         : Inst_FLAT(iFmt, "flat_load_sshort")
39557     {
39558         setFlag(MemoryRef);
39559         setFlag(Load);
39560     } // Inst_FLAT__FLAT_LOAD_SSHORT
39561
39562     Inst_FLAT__FLAT_LOAD_SSHORT::~Inst_FLAT__FLAT_LOAD_SSHORT()
39563     {
39564     } // ~Inst_FLAT__FLAT_LOAD_SSHORT
39565
39566     // Untyped buffer load signed short (sign extend to VGPR destination).
39567     void
39568     Inst_FLAT__FLAT_LOAD_SSHORT::execute(GPUDynInstPtr gpuDynInst)
39569     {
39570         panicUnimplemented();
39571     }
39572
39573     void
39574     Inst_FLAT__FLAT_LOAD_SSHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
39575     {
39576     } // initiateAcc
39577
39578     void
39579     Inst_FLAT__FLAT_LOAD_SSHORT::completeAcc(GPUDynInstPtr gpuDynInst)
39580     {
39581     }
39582
39583     Inst_FLAT__FLAT_LOAD_DWORD::Inst_FLAT__FLAT_LOAD_DWORD(InFmt_FLAT *iFmt)
39584         : Inst_FLAT(iFmt, "flat_load_dword")
39585     {
39586         setFlag(MemoryRef);
39587         setFlag(Load);
39588     } // Inst_FLAT__FLAT_LOAD_DWORD
39589
39590     Inst_FLAT__FLAT_LOAD_DWORD::~Inst_FLAT__FLAT_LOAD_DWORD()
39591     {
39592     } // ~Inst_FLAT__FLAT_LOAD_DWORD
39593
39594     // Untyped buffer load dword.
39595     void
39596     Inst_FLAT__FLAT_LOAD_DWORD::execute(GPUDynInstPtr gpuDynInst)
39597     {
39598         Wavefront *wf = gpuDynInst->wavefront();
39599
39600         if (wf->execMask().none()) {
39601             wf->decVMemInstsIssued();
39602             wf->decLGKMInstsIssued();
39603             wf->rdGmReqsInPipe--;
39604             wf->rdLmReqsInPipe--;
39605             return;
39606         }
39607
39608         gpuDynInst->execUnitId = wf->execUnitId;
39609         gpuDynInst->exec_mask = gpuDynInst->wavefront()->execMask();
39610         gpuDynInst->latency.init(gpuDynInst->computeUnit());
39611         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39612
39613         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39614
39615         addr.read();
39616
39617         calcAddr(gpuDynInst, addr);
39618
39619         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39620             gpuDynInst->computeUnit()->globalMemoryPipe
39621                 .issueRequest(gpuDynInst);
39622             wf->rdGmReqsInPipe--;
39623             wf->outstandingReqsRdGm++;
39624         } else {
39625             fatal("Non global flat instructions not implemented yet.\n");
39626         }
39627
39628         gpuDynInst->wavefront()->outstandingReqs++;
39629         gpuDynInst->wavefront()->validateRequestCounters();
39630     }
39631
39632     void
39633     Inst_FLAT__FLAT_LOAD_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
39634     {
39635         initMemRead<VecElemU32>(gpuDynInst);
39636     } // initiateAcc
39637
39638     void
39639     Inst_FLAT__FLAT_LOAD_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
39640     {
39641         VecOperandU32 vdst(gpuDynInst, extData.VDST);
39642
39643         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
39644             if (gpuDynInst->exec_mask[lane]) {
39645                 vdst[lane] = (reinterpret_cast<VecElemU32*>(
39646                     gpuDynInst->d_data))[lane];
39647             }
39648         }
39649         vdst.write();
39650     } // completeAcc
39651
39652     Inst_FLAT__FLAT_LOAD_DWORDX2::Inst_FLAT__FLAT_LOAD_DWORDX2(
39653           InFmt_FLAT *iFmt)
39654         : Inst_FLAT(iFmt, "flat_load_dwordx2")
39655     {
39656         setFlag(MemoryRef);
39657         setFlag(Load);
39658     } // Inst_FLAT__FLAT_LOAD_DWORDX2
39659
39660     Inst_FLAT__FLAT_LOAD_DWORDX2::~Inst_FLAT__FLAT_LOAD_DWORDX2()
39661     {
39662     } // ~Inst_FLAT__FLAT_LOAD_DWORDX2
39663
39664     // Untyped buffer load 2 dwords.
39665     void
39666     Inst_FLAT__FLAT_LOAD_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
39667     {
39668         Wavefront *wf = gpuDynInst->wavefront();
39669
39670         if (wf->execMask().none()) {
39671             wf->decVMemInstsIssued();
39672             wf->decLGKMInstsIssued();
39673             wf->rdGmReqsInPipe--;
39674             wf->rdLmReqsInPipe--;
39675             return;
39676         }
39677
39678         gpuDynInst->execUnitId = wf->execUnitId;
39679         gpuDynInst->exec_mask = gpuDynInst->wavefront()->execMask();
39680         gpuDynInst->latency.init(gpuDynInst->computeUnit());
39681         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39682
39683         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39684
39685         addr.read();
39686
39687         calcAddr(gpuDynInst, addr);
39688
39689         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39690             gpuDynInst->computeUnit()->globalMemoryPipe
39691                 .issueRequest(gpuDynInst);
39692             wf->rdGmReqsInPipe--;
39693             wf->outstandingReqsRdGm++;
39694         } else {
39695             fatal("Non global flat instructions not implemented yet.\n");
39696         }
39697
39698         gpuDynInst->wavefront()->outstandingReqs++;
39699         gpuDynInst->wavefront()->validateRequestCounters();
39700     }
39701
39702     void
39703     Inst_FLAT__FLAT_LOAD_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
39704     {
39705         initMemRead<VecElemU64>(gpuDynInst);
39706     } // initiateAcc
39707
39708     void
39709     Inst_FLAT__FLAT_LOAD_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
39710     {
39711         VecOperandU64 vdst(gpuDynInst, extData.VDST);
39712
39713         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
39714             if (gpuDynInst->exec_mask[lane]) {
39715                 vdst[lane] = (reinterpret_cast<VecElemU64*>(
39716                     gpuDynInst->d_data))[lane];
39717             }
39718         }
39719         vdst.write();
39720     } // completeAcc
39721
39722     Inst_FLAT__FLAT_LOAD_DWORDX3::Inst_FLAT__FLAT_LOAD_DWORDX3(
39723           InFmt_FLAT *iFmt)
39724         : Inst_FLAT(iFmt, "flat_load_dwordx3")
39725     {
39726         setFlag(MemoryRef);
39727         setFlag(Load);
39728     } // Inst_FLAT__FLAT_LOAD_DWORDX3
39729
39730     Inst_FLAT__FLAT_LOAD_DWORDX3::~Inst_FLAT__FLAT_LOAD_DWORDX3()
39731     {
39732     } // ~Inst_FLAT__FLAT_LOAD_DWORDX3
39733
39734     // Untyped buffer load 3 dwords.
39735     void
39736     Inst_FLAT__FLAT_LOAD_DWORDX3::execute(GPUDynInstPtr gpuDynInst)
39737     {
39738         Wavefront *wf = gpuDynInst->wavefront();
39739
39740         if (wf->execMask().none()) {
39741             wf->decVMemInstsIssued();
39742             wf->decLGKMInstsIssued();
39743             wf->rdGmReqsInPipe--;
39744             wf->rdLmReqsInPipe--;
39745             return;
39746         }
39747
39748         gpuDynInst->execUnitId = wf->execUnitId;
39749         gpuDynInst->exec_mask = wf->execMask();
39750         gpuDynInst->latency.init(gpuDynInst->computeUnit());
39751         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39752
39753         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39754
39755         addr.read();
39756
39757         calcAddr(gpuDynInst, addr);
39758
39759         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39760             gpuDynInst->computeUnit()->globalMemoryPipe
39761                 .issueRequest(gpuDynInst);
39762             wf->rdGmReqsInPipe--;
39763             wf->outstandingReqsRdGm++;
39764         } else {
39765             fatal("Non global flat instructions not implemented yet.\n");
39766         }
39767
39768         gpuDynInst->wavefront()->outstandingReqs++;
39769         gpuDynInst->wavefront()->validateRequestCounters();
39770     }
39771
39772     void
39773     Inst_FLAT__FLAT_LOAD_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst)
39774     {
39775         initMemRead<3>(gpuDynInst);
39776     } // initiateAcc
39777
39778     void
39779     Inst_FLAT__FLAT_LOAD_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst)
39780     {
39781         VecOperandU32 vdst0(gpuDynInst, extData.VDST);
39782         VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
39783         VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2);
39784
39785         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
39786             if (gpuDynInst->exec_mask[lane]) {
39787                 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
39788                     gpuDynInst->d_data))[lane * 3];
39789                 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
39790                     gpuDynInst->d_data))[lane * 3 + 1];
39791                 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
39792                     gpuDynInst->d_data))[lane * 3 + 2];
39793             }
39794         }
39795
39796         vdst0.write();
39797         vdst1.write();
39798         vdst2.write();
39799     } // completeAcc
39800
39801     Inst_FLAT__FLAT_LOAD_DWORDX4::Inst_FLAT__FLAT_LOAD_DWORDX4(
39802           InFmt_FLAT *iFmt)
39803         : Inst_FLAT(iFmt, "flat_load_dwordx4")
39804     {
39805         setFlag(MemoryRef);
39806         setFlag(Load);
39807     } // Inst_FLAT__FLAT_LOAD_DWORDX4
39808
39809     Inst_FLAT__FLAT_LOAD_DWORDX4::~Inst_FLAT__FLAT_LOAD_DWORDX4()
39810     {
39811     } // ~Inst_FLAT__FLAT_LOAD_DWORDX4
39812
39813     // Untyped buffer load 4 dwords.
39814     void
39815     Inst_FLAT__FLAT_LOAD_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
39816     {
39817         Wavefront *wf = gpuDynInst->wavefront();
39818
39819         if (wf->execMask().none()) {
39820             wf->decVMemInstsIssued();
39821             wf->decLGKMInstsIssued();
39822             wf->rdGmReqsInPipe--;
39823             wf->rdLmReqsInPipe--;
39824         }
39825
39826         gpuDynInst->execUnitId = wf->execUnitId;
39827         gpuDynInst->exec_mask = wf->execMask();
39828         gpuDynInst->latency.init(gpuDynInst->computeUnit());
39829         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39830
39831         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39832
39833         addr.read();
39834
39835         calcAddr(gpuDynInst, addr);
39836
39837         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39838             gpuDynInst->computeUnit()->globalMemoryPipe
39839                 .issueRequest(gpuDynInst);
39840             wf->rdGmReqsInPipe--;
39841             wf->outstandingReqsRdGm++;
39842         } else {
39843             fatal("Non global flat instructions not implemented yet.\n");
39844         }
39845
39846         gpuDynInst->wavefront()->outstandingReqs++;
39847         gpuDynInst->wavefront()->validateRequestCounters();
39848     }
39849
39850     void
39851     Inst_FLAT__FLAT_LOAD_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
39852     {
39853         initMemRead<4>(gpuDynInst);
39854     } // initiateAcc
39855
39856     void
39857     Inst_FLAT__FLAT_LOAD_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
39858     {
39859         VecOperandU32 vdst0(gpuDynInst, extData.VDST);
39860         VecOperandU32 vdst1(gpuDynInst, extData.VDST + 1);
39861         VecOperandU32 vdst2(gpuDynInst, extData.VDST + 2);
39862         VecOperandU32 vdst3(gpuDynInst, extData.VDST + 3);
39863
39864         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
39865             if (gpuDynInst->exec_mask[lane]) {
39866                 vdst0[lane] = (reinterpret_cast<VecElemU32*>(
39867                     gpuDynInst->d_data))[lane * 4];
39868                 vdst1[lane] = (reinterpret_cast<VecElemU32*>(
39869                     gpuDynInst->d_data))[lane * 4 + 1];
39870                 vdst2[lane] = (reinterpret_cast<VecElemU32*>(
39871                     gpuDynInst->d_data))[lane * 4 + 2];
39872                 vdst3[lane] = (reinterpret_cast<VecElemU32*>(
39873                     gpuDynInst->d_data))[lane * 4 + 3];
39874             }
39875         }
39876
39877         vdst0.write();
39878         vdst1.write();
39879         vdst2.write();
39880         vdst3.write();
39881     } // completeAcc
39882
39883     Inst_FLAT__FLAT_STORE_BYTE::Inst_FLAT__FLAT_STORE_BYTE(InFmt_FLAT *iFmt)
39884         : Inst_FLAT(iFmt, "flat_store_byte")
39885     {
39886         setFlag(MemoryRef);
39887         setFlag(Store);
39888     } // Inst_FLAT__FLAT_STORE_BYTE
39889
39890     Inst_FLAT__FLAT_STORE_BYTE::~Inst_FLAT__FLAT_STORE_BYTE()
39891     {
39892     } // ~Inst_FLAT__FLAT_STORE_BYTE
39893
39894     // Untyped buffer store byte.
39895     void
39896     Inst_FLAT__FLAT_STORE_BYTE::execute(GPUDynInstPtr gpuDynInst)
39897     {
39898         Wavefront *wf = gpuDynInst->wavefront();
39899
39900         if (wf->execMask().none()) {
39901             wf->decVMemInstsIssued();
39902             wf->decLGKMInstsIssued();
39903             wf->wrGmReqsInPipe--;
39904             wf->wrLmReqsInPipe--;
39905             return;
39906         }
39907
39908         gpuDynInst->execUnitId = wf->execUnitId;
39909         gpuDynInst->exec_mask = wf->execMask();
39910         gpuDynInst->latency.init(gpuDynInst->computeUnit());
39911         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39912
39913         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39914
39915         addr.read();
39916
39917         calcAddr(gpuDynInst, addr);
39918
39919         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39920             gpuDynInst->computeUnit()->globalMemoryPipe
39921                 .issueRequest(gpuDynInst);
39922             wf->wrGmReqsInPipe--;
39923             wf->outstandingReqsWrGm++;
39924         } else {
39925             fatal("Non global flat instructions not implemented yet.\n");
39926         }
39927
39928         gpuDynInst->wavefront()->outstandingReqs++;
39929         gpuDynInst->wavefront()->validateRequestCounters();
39930     } // execute
39931
39932     void
39933     Inst_FLAT__FLAT_STORE_BYTE::initiateAcc(GPUDynInstPtr gpuDynInst)
39934     {
39935         ConstVecOperandU8 data(gpuDynInst, extData.DATA);
39936         data.read();
39937
39938         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
39939             if (gpuDynInst->exec_mask[lane]) {
39940                 (reinterpret_cast<VecElemU8*>(gpuDynInst->d_data))[lane]
39941                     = data[lane];
39942             }
39943         }
39944
39945         initMemWrite<VecElemU8>(gpuDynInst);
39946     } // initiateAcc
39947
39948     void
39949     Inst_FLAT__FLAT_STORE_BYTE::completeAcc(GPUDynInstPtr gpuDynInst)
39950     {
39951     }
39952
39953     Inst_FLAT__FLAT_STORE_SHORT::Inst_FLAT__FLAT_STORE_SHORT(InFmt_FLAT *iFmt)
39954         : Inst_FLAT(iFmt, "flat_store_short")
39955     {
39956         setFlag(MemoryRef);
39957         setFlag(Store);
39958     } // Inst_FLAT__FLAT_STORE_SHORT
39959
39960     Inst_FLAT__FLAT_STORE_SHORT::~Inst_FLAT__FLAT_STORE_SHORT()
39961     {
39962     } // ~Inst_FLAT__FLAT_STORE_SHORT
39963
39964     // Untyped buffer store short.
39965     void
39966     Inst_FLAT__FLAT_STORE_SHORT::execute(GPUDynInstPtr gpuDynInst)
39967     {
39968         Wavefront *wf = gpuDynInst->wavefront();
39969
39970         if (wf->execMask().none()) {
39971             wf->decVMemInstsIssued();
39972             wf->decLGKMInstsIssued();
39973             wf->wrGmReqsInPipe--;
39974             wf->wrLmReqsInPipe--;
39975             return;
39976         }
39977
39978         gpuDynInst->execUnitId = wf->execUnitId;
39979         gpuDynInst->exec_mask = wf->execMask();
39980         gpuDynInst->latency.init(gpuDynInst->computeUnit());
39981         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
39982
39983         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
39984
39985         addr.read();
39986
39987         calcAddr(gpuDynInst, addr);
39988
39989         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
39990             gpuDynInst->computeUnit()->globalMemoryPipe
39991                 .issueRequest(gpuDynInst);
39992             wf->wrGmReqsInPipe--;
39993             wf->outstandingReqsWrGm++;
39994         } else {
39995             fatal("Non global flat instructions not implemented yet.\n");
39996         }
39997
39998         gpuDynInst->wavefront()->outstandingReqs++;
39999         gpuDynInst->wavefront()->validateRequestCounters();
40000     }
40001
40002     void
40003     Inst_FLAT__FLAT_STORE_SHORT::initiateAcc(GPUDynInstPtr gpuDynInst)
40004     {
40005         ConstVecOperandU16 data(gpuDynInst, extData.DATA);
40006
40007         data.read();
40008
40009         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40010             if (gpuDynInst->exec_mask[lane]) {
40011                 (reinterpret_cast<VecElemU16*>(gpuDynInst->d_data))[lane]
40012                     = data[lane];
40013             }
40014         }
40015
40016         initMemWrite<VecElemU16>(gpuDynInst);
40017     } // initiateAcc
40018
40019     void
40020     Inst_FLAT__FLAT_STORE_SHORT::completeAcc(GPUDynInstPtr gpuDynInst)
40021     {
40022     } // completeAcc
40023
40024     Inst_FLAT__FLAT_STORE_DWORD::Inst_FLAT__FLAT_STORE_DWORD(InFmt_FLAT *iFmt)
40025         : Inst_FLAT(iFmt, "flat_store_dword")
40026     {
40027         setFlag(MemoryRef);
40028         setFlag(Store);
40029     } // Inst_FLAT__FLAT_STORE_DWORD
40030
40031     Inst_FLAT__FLAT_STORE_DWORD::~Inst_FLAT__FLAT_STORE_DWORD()
40032     {
40033     } // ~Inst_FLAT__FLAT_STORE_DWORD
40034
40035     // Untyped buffer store dword.
40036     void
40037     Inst_FLAT__FLAT_STORE_DWORD::execute(GPUDynInstPtr gpuDynInst)
40038     {
40039         Wavefront *wf = gpuDynInst->wavefront();
40040
40041         if (wf->execMask().none()) {
40042             wf->decVMemInstsIssued();
40043             wf->decLGKMInstsIssued();
40044             wf->wrGmReqsInPipe--;
40045             wf->wrLmReqsInPipe--;
40046             return;
40047         }
40048
40049         gpuDynInst->execUnitId = wf->execUnitId;
40050         gpuDynInst->exec_mask = wf->execMask();
40051         gpuDynInst->latency.init(gpuDynInst->computeUnit());
40052         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40053
40054         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40055
40056         addr.read();
40057
40058         calcAddr(gpuDynInst, addr);
40059
40060         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
40061             gpuDynInst->computeUnit()->globalMemoryPipe
40062                 .issueRequest(gpuDynInst);
40063             wf->wrGmReqsInPipe--;
40064             wf->outstandingReqsWrGm++;
40065         } else {
40066             fatal("Non global flat instructions not implemented yet.\n");
40067         }
40068
40069         gpuDynInst->wavefront()->outstandingReqs++;
40070         gpuDynInst->wavefront()->validateRequestCounters();
40071     }
40072
40073     void
40074     Inst_FLAT__FLAT_STORE_DWORD::initiateAcc(GPUDynInstPtr gpuDynInst)
40075     {
40076         ConstVecOperandU32 data(gpuDynInst, extData.DATA);
40077         data.read();
40078
40079         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40080             if (gpuDynInst->exec_mask[lane]) {
40081                 (reinterpret_cast<VecElemU32*>(gpuDynInst->d_data))[lane]
40082                     = data[lane];
40083             }
40084         }
40085
40086         initMemWrite<VecElemU32>(gpuDynInst);
40087     } // initiateAcc
40088
40089     void
40090     Inst_FLAT__FLAT_STORE_DWORD::completeAcc(GPUDynInstPtr gpuDynInst)
40091     {
40092     } // completeAcc
40093
40094     Inst_FLAT__FLAT_STORE_DWORDX2::Inst_FLAT__FLAT_STORE_DWORDX2(
40095           InFmt_FLAT *iFmt)
40096         : Inst_FLAT(iFmt, "flat_store_dwordx2")
40097     {
40098         setFlag(MemoryRef);
40099         setFlag(Store);
40100     } // Inst_FLAT__FLAT_STORE_DWORDX2
40101
40102     Inst_FLAT__FLAT_STORE_DWORDX2::~Inst_FLAT__FLAT_STORE_DWORDX2()
40103     {
40104     } // ~Inst_FLAT__FLAT_STORE_DWORDX2
40105
40106     // Untyped buffer store 2 dwords.
40107     void
40108     Inst_FLAT__FLAT_STORE_DWORDX2::execute(GPUDynInstPtr gpuDynInst)
40109     {
40110         Wavefront *wf = gpuDynInst->wavefront();
40111
40112         if (wf->execMask().none()) {
40113             wf->decVMemInstsIssued();
40114             wf->decLGKMInstsIssued();
40115             wf->wrGmReqsInPipe--;
40116             wf->wrLmReqsInPipe--;
40117             return;
40118         }
40119
40120         gpuDynInst->execUnitId = wf->execUnitId;
40121         gpuDynInst->exec_mask = wf->execMask();
40122         gpuDynInst->latency.init(gpuDynInst->computeUnit());
40123         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40124
40125         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40126
40127         addr.read();
40128
40129         calcAddr(gpuDynInst, addr);
40130
40131         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
40132             gpuDynInst->computeUnit()->globalMemoryPipe
40133                 .issueRequest(gpuDynInst);
40134             wf->wrGmReqsInPipe--;
40135             wf->outstandingReqsWrGm++;
40136         } else {
40137             fatal("Non global flat instructions not implemented yet.\n");
40138         }
40139
40140         wf->outstandingReqs++;
40141         wf->validateRequestCounters();
40142     }
40143
40144     void
40145     Inst_FLAT__FLAT_STORE_DWORDX2::initiateAcc(GPUDynInstPtr gpuDynInst)
40146     {
40147         ConstVecOperandU64 data(gpuDynInst, extData.DATA);
40148         data.read();
40149
40150         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40151             if (gpuDynInst->exec_mask[lane]) {
40152                 (reinterpret_cast<VecElemU64*>(gpuDynInst->d_data))[lane]
40153                     = data[lane];
40154             }
40155         }
40156
40157         initMemWrite<VecElemU64>(gpuDynInst);
40158     } // initiateAcc
40159
40160     void
40161     Inst_FLAT__FLAT_STORE_DWORDX2::completeAcc(GPUDynInstPtr gpuDynInst)
40162     {
40163     } // completeAcc
40164
40165     Inst_FLAT__FLAT_STORE_DWORDX3::Inst_FLAT__FLAT_STORE_DWORDX3(
40166           InFmt_FLAT *iFmt)
40167         : Inst_FLAT(iFmt, "flat_store_dwordx3")
40168     {
40169         setFlag(MemoryRef);
40170         setFlag(Store);
40171     } // Inst_FLAT__FLAT_STORE_DWORDX3
40172
40173     Inst_FLAT__FLAT_STORE_DWORDX3::~Inst_FLAT__FLAT_STORE_DWORDX3()
40174     {
40175     } // ~Inst_FLAT__FLAT_STORE_DWORDX3
40176
40177     // Untyped buffer store 3 dwords.
40178     void
40179     Inst_FLAT__FLAT_STORE_DWORDX3::execute(GPUDynInstPtr gpuDynInst)
40180     {
40181         Wavefront *wf = gpuDynInst->wavefront();
40182
40183         if (wf->execMask().none()) {
40184             wf->decVMemInstsIssued();
40185             wf->decLGKMInstsIssued();
40186             wf->wrGmReqsInPipe--;
40187             wf->wrLmReqsInPipe--;
40188             return;
40189         }
40190
40191         gpuDynInst->execUnitId = wf->execUnitId;
40192         gpuDynInst->exec_mask = wf->execMask();
40193         gpuDynInst->latency.init(gpuDynInst->computeUnit());
40194         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40195
40196         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40197
40198         addr.read();
40199
40200         calcAddr(gpuDynInst, addr);
40201
40202         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
40203             gpuDynInst->computeUnit()->globalMemoryPipe
40204                 .issueRequest(gpuDynInst);
40205             wf->wrGmReqsInPipe--;
40206             wf->outstandingReqsWrGm++;
40207         } else {
40208             fatal("Non global flat instructions not implemented yet.\n");
40209         }
40210
40211         gpuDynInst->wavefront()->outstandingReqs++;
40212         gpuDynInst->wavefront()->validateRequestCounters();
40213     }
40214
40215     void
40216     Inst_FLAT__FLAT_STORE_DWORDX3::initiateAcc(GPUDynInstPtr gpuDynInst)
40217     {
40218         ConstVecOperandU32 data0(gpuDynInst, extData.DATA);
40219         ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1);
40220         ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2);
40221
40222         data0.read();
40223         data1.read();
40224         data2.read();
40225
40226         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40227             if (gpuDynInst->exec_mask[lane]) {
40228                 (reinterpret_cast<VecElemU32*>(
40229                     gpuDynInst->d_data))[lane * 3] = data0[lane];
40230                 (reinterpret_cast<VecElemU32*>(
40231                     gpuDynInst->d_data))[lane * 3 + 1] = data1[lane];
40232                 (reinterpret_cast<VecElemU32*>(
40233                     gpuDynInst->d_data))[lane * 3 + 2] = data2[lane];
40234             }
40235         }
40236
40237         initMemWrite<3>(gpuDynInst);
40238     } // initiateAcc
40239
40240     void
40241     Inst_FLAT__FLAT_STORE_DWORDX3::completeAcc(GPUDynInstPtr gpuDynInst)
40242     {
40243     } // completeAcc
40244
40245     Inst_FLAT__FLAT_STORE_DWORDX4::Inst_FLAT__FLAT_STORE_DWORDX4(
40246           InFmt_FLAT *iFmt)
40247         : Inst_FLAT(iFmt, "flat_store_dwordx4")
40248     {
40249         setFlag(MemoryRef);
40250         setFlag(Store);
40251     } // Inst_FLAT__FLAT_STORE_DWORDX4
40252
40253     Inst_FLAT__FLAT_STORE_DWORDX4::~Inst_FLAT__FLAT_STORE_DWORDX4()
40254     {
40255     } // ~Inst_FLAT__FLAT_STORE_DWORDX4
40256
40257     // Untyped buffer store 4 dwords.
40258     void
40259     Inst_FLAT__FLAT_STORE_DWORDX4::execute(GPUDynInstPtr gpuDynInst)
40260     {
40261         Wavefront *wf = gpuDynInst->wavefront();
40262
40263         if (wf->execMask().none()) {
40264             wf->decVMemInstsIssued();
40265             wf->decLGKMInstsIssued();
40266             wf->wrGmReqsInPipe--;
40267             wf->wrLmReqsInPipe--;
40268             return;
40269         }
40270
40271         gpuDynInst->execUnitId = wf->execUnitId;
40272         gpuDynInst->exec_mask = wf->execMask();
40273         gpuDynInst->latency.init(gpuDynInst->computeUnit());
40274         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40275
40276         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40277
40278         addr.read();
40279
40280         calcAddr(gpuDynInst, addr);
40281
40282         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
40283             gpuDynInst->computeUnit()->globalMemoryPipe
40284                 .issueRequest(gpuDynInst);
40285             wf->wrGmReqsInPipe--;
40286             wf->outstandingReqsWrGm++;
40287         } else {
40288             fatal("Non global flat instructions not implemented yet.\n");
40289         }
40290
40291         gpuDynInst->wavefront()->outstandingReqs++;
40292         gpuDynInst->wavefront()->validateRequestCounters();
40293     }
40294
40295     void
40296     Inst_FLAT__FLAT_STORE_DWORDX4::initiateAcc(GPUDynInstPtr gpuDynInst)
40297     {
40298         ConstVecOperandU32 data0(gpuDynInst, extData.DATA);
40299         ConstVecOperandU32 data1(gpuDynInst, extData.DATA + 1);
40300         ConstVecOperandU32 data2(gpuDynInst, extData.DATA + 2);
40301         ConstVecOperandU32 data3(gpuDynInst, extData.DATA + 3);
40302
40303         data0.read();
40304         data1.read();
40305         data2.read();
40306         data3.read();
40307
40308         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40309             if (gpuDynInst->exec_mask[lane]) {
40310                 (reinterpret_cast<VecElemU32*>(
40311                     gpuDynInst->d_data))[lane * 4] = data0[lane];
40312                 (reinterpret_cast<VecElemU32*>(
40313                     gpuDynInst->d_data))[lane * 4 + 1] = data1[lane];
40314                 (reinterpret_cast<VecElemU32*>(
40315                     gpuDynInst->d_data))[lane * 4 + 2] = data2[lane];
40316                 (reinterpret_cast<VecElemU32*>(
40317                     gpuDynInst->d_data))[lane * 4 + 3] = data3[lane];
40318             }
40319         }
40320
40321         initMemWrite<4>(gpuDynInst);
40322     } // initiateAcc
40323
40324     void
40325     Inst_FLAT__FLAT_STORE_DWORDX4::completeAcc(GPUDynInstPtr gpuDynInst)
40326     {
40327     } // completeAcc
40328
40329     Inst_FLAT__FLAT_ATOMIC_SWAP::Inst_FLAT__FLAT_ATOMIC_SWAP(InFmt_FLAT *iFmt)
40330         : Inst_FLAT(iFmt, "flat_atomic_swap")
40331     {
40332         setFlag(AtomicExch);
40333         if (instData.GLC) {
40334             setFlag(AtomicReturn);
40335         } else {
40336             setFlag(AtomicNoReturn);
40337         } // if
40338         setFlag(MemoryRef);
40339     } // Inst_FLAT__FLAT_ATOMIC_SWAP
40340
40341     Inst_FLAT__FLAT_ATOMIC_SWAP::~Inst_FLAT__FLAT_ATOMIC_SWAP()
40342     {
40343     } // ~Inst_FLAT__FLAT_ATOMIC_SWAP
40344
40345     // tmp = MEM[ADDR];
40346     // MEM[ADDR] = DATA;
40347     // RETURN_DATA = tmp.
40348     void
40349     Inst_FLAT__FLAT_ATOMIC_SWAP::execute(GPUDynInstPtr gpuDynInst)
40350     {
40351         Wavefront *wf = gpuDynInst->wavefront();
40352
40353         if (wf->execMask().none()) {
40354             wf->decVMemInstsIssued();
40355             wf->decLGKMInstsIssued();
40356             wf->wrGmReqsInPipe--;
40357             wf->rdGmReqsInPipe--;
40358             return;
40359         }
40360
40361         gpuDynInst->execUnitId = wf->execUnitId;
40362         gpuDynInst->exec_mask = wf->execMask();
40363         gpuDynInst->latency.init(gpuDynInst->computeUnit());
40364         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40365
40366         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40367
40368         addr.read();
40369
40370         calcAddr(gpuDynInst, addr);
40371
40372         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL ||
40373             gpuDynInst->executedAs() == Enums::SC_PRIVATE) {
40374             // TODO: additional address computation required for scratch
40375             panic_if(gpuDynInst->executedAs() == Enums::SC_PRIVATE,
40376                      "Flats to private aperture not tested yet\n");
40377             gpuDynInst->computeUnit()->globalMemoryPipe.
40378                 issueRequest(gpuDynInst);
40379             wf->wrGmReqsInPipe--;
40380             wf->outstandingReqsWrGm++;
40381             wf->rdGmReqsInPipe--;
40382             wf->outstandingReqsRdGm++;
40383         } else {
40384             fatal("Non global flat instructions not implemented yet.\n");
40385         }
40386
40387         gpuDynInst->wavefront()->outstandingReqs++;
40388         gpuDynInst->wavefront()->validateRequestCounters();
40389
40390         ConstVecOperandU32 data(gpuDynInst, extData.DATA);
40391
40392         data.read();
40393
40394         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40395             if (gpuDynInst->exec_mask[lane]) {
40396                 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
40397                     = data[lane];
40398             }
40399         }
40400
40401     } // execute
40402
40403     void
40404     Inst_FLAT__FLAT_ATOMIC_SWAP::initiateAcc(GPUDynInstPtr gpuDynInst)
40405     {
40406         initAtomicAccess<VecElemU32>(gpuDynInst);
40407     } // initiateAcc
40408
40409     void
40410     Inst_FLAT__FLAT_ATOMIC_SWAP::completeAcc(GPUDynInstPtr gpuDynInst)
40411     {
40412         if (isAtomicRet()) {
40413             VecOperandU32 vdst(gpuDynInst, extData.VDST);
40414
40415             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40416                 if (gpuDynInst->exec_mask[lane]) {
40417                     vdst[lane] = (reinterpret_cast<VecElemU32*>(
40418                         gpuDynInst->d_data))[lane];
40419                 }
40420             }
40421
40422             vdst.write();
40423         }
40424     } // completeAcc
40425
40426     // --- Inst_FLAT__FLAT_ATOMIC_CMPSWAP class methods ---
40427
40428     Inst_FLAT__FLAT_ATOMIC_CMPSWAP
40429         ::Inst_FLAT__FLAT_ATOMIC_CMPSWAP(InFmt_FLAT *iFmt)
40430         : Inst_FLAT(iFmt, "flat_atomic_cmpswap")
40431     {
40432         setFlag(AtomicCAS);
40433         if (instData.GLC) {
40434             setFlag(AtomicReturn);
40435         } else {
40436             setFlag(AtomicNoReturn);
40437         } // if
40438         setFlag(MemoryRef);
40439     } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP
40440
40441     Inst_FLAT__FLAT_ATOMIC_CMPSWAP::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP()
40442     {
40443     } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP
40444
40445     // tmp = MEM[ADDR];
40446     // src = DATA[0];
40447     // cmp = DATA[1];
40448     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
40449     // RETURN_DATA[0] = tmp.
40450     void
40451     Inst_FLAT__FLAT_ATOMIC_CMPSWAP::execute(GPUDynInstPtr gpuDynInst)
40452     {
40453         Wavefront *wf = gpuDynInst->wavefront();
40454
40455         if (wf->execMask().none()) {
40456             wf->decVMemInstsIssued();
40457             wf->decLGKMInstsIssued();
40458             wf->wrGmReqsInPipe--;
40459             wf->rdGmReqsInPipe--;
40460             return;
40461         }
40462
40463         gpuDynInst->execUnitId = wf->execUnitId;
40464         gpuDynInst->exec_mask = wf->execMask();
40465         gpuDynInst->latency.init(gpuDynInst->computeUnit());
40466         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40467
40468         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40469         ConstVecOperandU32 data(gpuDynInst, extData.DATA);
40470         ConstVecOperandU32 cmp(gpuDynInst, extData.DATA + 1);
40471
40472         addr.read();
40473         data.read();
40474         cmp.read();
40475
40476         calcAddr(gpuDynInst, addr);
40477
40478         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40479             if (gpuDynInst->exec_mask[lane]) {
40480                 (reinterpret_cast<VecElemU32*>(gpuDynInst->x_data))[lane]
40481                     = data[lane];
40482                 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
40483                     = cmp[lane];
40484             }
40485         }
40486
40487         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL ||
40488             gpuDynInst->executedAs() == Enums::SC_PRIVATE) {
40489             /**
40490              * TODO: If you encounter this panic, just remove this panic
40491              * and restart the simulation. It should just work fine but
40492              * this is to warn user that this path is never tested although
40493              * all the necessary logic is implemented
40494              */
40495             panic_if(gpuDynInst->executedAs() == Enums::SC_PRIVATE,
40496                      "Flats to private aperture not tested yet\n");
40497             gpuDynInst->computeUnit()->globalMemoryPipe.
40498                 issueRequest(gpuDynInst);
40499             wf->wrGmReqsInPipe--;
40500             wf->outstandingReqsWrGm++;
40501             wf->rdGmReqsInPipe--;
40502             wf->outstandingReqsRdGm++;
40503         } else {
40504             fatal("Non global flat instructions not implemented yet.\n");
40505         }
40506
40507         gpuDynInst->wavefront()->outstandingReqs++;
40508         gpuDynInst->wavefront()->validateRequestCounters();
40509     }
40510
40511     void
40512     Inst_FLAT__FLAT_ATOMIC_CMPSWAP::initiateAcc(GPUDynInstPtr gpuDynInst)
40513     {
40514         initAtomicAccess<VecElemU32>(gpuDynInst);
40515     } // initiateAcc
40516
40517     void
40518     Inst_FLAT__FLAT_ATOMIC_CMPSWAP::completeAcc(GPUDynInstPtr gpuDynInst)
40519     {
40520         if (isAtomicRet()) {
40521             VecOperandU32 vdst(gpuDynInst, extData.VDST);
40522
40523             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40524                 if (gpuDynInst->exec_mask[lane]) {
40525                     vdst[lane] = (reinterpret_cast<VecElemU32*>(
40526                         gpuDynInst->d_data))[lane];
40527                 }
40528             }
40529
40530             vdst.write();
40531         }
40532     } // completeAcc
40533
40534     Inst_FLAT__FLAT_ATOMIC_ADD::Inst_FLAT__FLAT_ATOMIC_ADD(InFmt_FLAT *iFmt)
40535         : Inst_FLAT(iFmt, "flat_atomic_add")
40536     {
40537         setFlag(AtomicAdd);
40538         if (instData.GLC) {
40539             setFlag(AtomicReturn);
40540         } else {
40541             setFlag(AtomicNoReturn);
40542         } // if
40543         setFlag(MemoryRef);
40544     } // Inst_FLAT__FLAT_ATOMIC_ADD
40545
40546     Inst_FLAT__FLAT_ATOMIC_ADD::~Inst_FLAT__FLAT_ATOMIC_ADD()
40547     {
40548     } // ~Inst_FLAT__FLAT_ATOMIC_ADD
40549
40550     // tmp = MEM[ADDR];
40551     // MEM[ADDR] += DATA;
40552     // RETURN_DATA = tmp.
40553     void
40554     Inst_FLAT__FLAT_ATOMIC_ADD::execute(GPUDynInstPtr gpuDynInst)
40555     {
40556         Wavefront *wf = gpuDynInst->wavefront();
40557
40558         if (wf->execMask().none()) {
40559             wf->decVMemInstsIssued();
40560             wf->decLGKMInstsIssued();
40561             wf->wrGmReqsInPipe--;
40562             wf->rdGmReqsInPipe--;
40563             return;
40564         }
40565
40566         gpuDynInst->execUnitId = wf->execUnitId;
40567         gpuDynInst->exec_mask = wf->execMask();
40568         gpuDynInst->latency.init(gpuDynInst->computeUnit());
40569         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40570
40571         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40572         ConstVecOperandU32 data(gpuDynInst, extData.DATA);
40573
40574         addr.read();
40575         data.read();
40576
40577         calcAddr(gpuDynInst, addr);
40578
40579         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40580             if (gpuDynInst->exec_mask[lane]) {
40581                 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
40582                     = data[lane];
40583             }
40584         }
40585
40586         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
40587             gpuDynInst->computeUnit()->globalMemoryPipe.
40588                 issueRequest(gpuDynInst);
40589             wf->wrGmReqsInPipe--;
40590             wf->outstandingReqsWrGm++;
40591             wf->rdGmReqsInPipe--;
40592             wf->outstandingReqsRdGm++;
40593         } else {
40594             fatal("Non global flat instructions not implemented yet.\n");
40595         }
40596
40597         gpuDynInst->wavefront()->outstandingReqs++;
40598         gpuDynInst->wavefront()->validateRequestCounters();
40599     }
40600
40601     void
40602     Inst_FLAT__FLAT_ATOMIC_ADD::initiateAcc(GPUDynInstPtr gpuDynInst)
40603     {
40604         initAtomicAccess<VecElemU32>(gpuDynInst);
40605     } // initiateAcc
40606
40607     void
40608     Inst_FLAT__FLAT_ATOMIC_ADD::completeAcc(GPUDynInstPtr gpuDynInst)
40609     {
40610         if (isAtomicRet()) {
40611             VecOperandU32 vdst(gpuDynInst, extData.VDST);
40612
40613             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40614                 if (gpuDynInst->exec_mask[lane]) {
40615                     vdst[lane] = (reinterpret_cast<VecElemU32*>(
40616                         gpuDynInst->d_data))[lane];
40617                 }
40618             }
40619
40620             vdst.write();
40621         }
40622     } // completeAcc
40623
40624     Inst_FLAT__FLAT_ATOMIC_SUB::Inst_FLAT__FLAT_ATOMIC_SUB(InFmt_FLAT *iFmt)
40625         : Inst_FLAT(iFmt, "flat_atomic_sub")
40626     {
40627         setFlag(AtomicSub);
40628         if (instData.GLC) {
40629             setFlag(AtomicReturn);
40630         } else {
40631             setFlag(AtomicNoReturn);
40632         } // if
40633         setFlag(MemoryRef);
40634     } // Inst_FLAT__FLAT_ATOMIC_SUB
40635
40636     Inst_FLAT__FLAT_ATOMIC_SUB::~Inst_FLAT__FLAT_ATOMIC_SUB()
40637     {
40638     } // ~Inst_FLAT__FLAT_ATOMIC_SUB
40639
40640     // tmp = MEM[ADDR];
40641     // MEM[ADDR] -= DATA;
40642     // RETURN_DATA = tmp.
40643     void
40644     Inst_FLAT__FLAT_ATOMIC_SUB::execute(GPUDynInstPtr gpuDynInst)
40645     {
40646         Wavefront *wf = gpuDynInst->wavefront();
40647
40648         if (wf->execMask().none()) {
40649             wf->decVMemInstsIssued();
40650             wf->decLGKMInstsIssued();
40651             wf->wrGmReqsInPipe--;
40652             wf->rdGmReqsInPipe--;
40653             return;
40654         }
40655
40656         gpuDynInst->execUnitId = wf->execUnitId;
40657         gpuDynInst->exec_mask = wf->execMask();
40658         gpuDynInst->latency.init(gpuDynInst->computeUnit());
40659         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40660
40661         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40662         ConstVecOperandU32 data(gpuDynInst, extData.DATA);
40663
40664         addr.read();
40665         data.read();
40666
40667         calcAddr(gpuDynInst, addr);
40668
40669         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40670             if (gpuDynInst->exec_mask[lane]) {
40671                 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
40672                     = data[lane];
40673             }
40674         }
40675
40676         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
40677             gpuDynInst->computeUnit()->globalMemoryPipe.
40678                 issueRequest(gpuDynInst);
40679             wf->wrGmReqsInPipe--;
40680             wf->outstandingReqsWrGm++;
40681             wf->rdGmReqsInPipe--;
40682             wf->outstandingReqsRdGm++;
40683         } else {
40684             fatal("Non global flat instructions not implemented yet.\n");
40685         }
40686
40687         gpuDynInst->wavefront()->outstandingReqs++;
40688         gpuDynInst->wavefront()->validateRequestCounters();
40689     }
40690     void
40691     Inst_FLAT__FLAT_ATOMIC_SUB::initiateAcc(GPUDynInstPtr gpuDynInst)
40692     {
40693         initAtomicAccess<VecElemU32>(gpuDynInst);
40694     } // initiateAcc
40695
40696     void
40697     Inst_FLAT__FLAT_ATOMIC_SUB::completeAcc(GPUDynInstPtr gpuDynInst)
40698     {
40699         if (isAtomicRet()) {
40700             VecOperandU32 vdst(gpuDynInst, extData.VDST);
40701
40702             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40703                 if (gpuDynInst->exec_mask[lane]) {
40704                     vdst[lane] = (reinterpret_cast<VecElemU32*>(
40705                         gpuDynInst->d_data))[lane];
40706                 }
40707             }
40708
40709             vdst.write();
40710         }
40711     } // completeAcc
40712
40713     Inst_FLAT__FLAT_ATOMIC_SMIN::Inst_FLAT__FLAT_ATOMIC_SMIN(InFmt_FLAT *iFmt)
40714         : Inst_FLAT(iFmt, "flat_atomic_smin")
40715     {
40716         setFlag(AtomicMin);
40717         if (instData.GLC) {
40718             setFlag(AtomicReturn);
40719         } else {
40720             setFlag(AtomicNoReturn);
40721         }
40722         setFlag(MemoryRef);
40723     } // Inst_FLAT__FLAT_ATOMIC_SMIN
40724
40725     Inst_FLAT__FLAT_ATOMIC_SMIN::~Inst_FLAT__FLAT_ATOMIC_SMIN()
40726     {
40727     } // ~Inst_FLAT__FLAT_ATOMIC_SMIN
40728
40729     // tmp = MEM[ADDR];
40730     // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (signed compare);
40731     // RETURN_DATA = tmp.
40732     void
40733     Inst_FLAT__FLAT_ATOMIC_SMIN::execute(GPUDynInstPtr gpuDynInst)
40734     {
40735         panicUnimplemented();
40736     }
40737
40738     Inst_FLAT__FLAT_ATOMIC_UMIN::Inst_FLAT__FLAT_ATOMIC_UMIN(InFmt_FLAT *iFmt)
40739         : Inst_FLAT(iFmt, "flat_atomic_umin")
40740     {
40741         setFlag(AtomicMin);
40742         if (instData.GLC) {
40743             setFlag(AtomicReturn);
40744         } else {
40745             setFlag(AtomicNoReturn);
40746         }
40747         setFlag(MemoryRef);
40748     } // Inst_FLAT__FLAT_ATOMIC_UMIN
40749
40750     Inst_FLAT__FLAT_ATOMIC_UMIN::~Inst_FLAT__FLAT_ATOMIC_UMIN()
40751     {
40752     } // ~Inst_FLAT__FLAT_ATOMIC_UMIN
40753
40754     // tmp = MEM[ADDR];
40755     // MEM[ADDR] = (DATA < tmp) ? DATA : tmp (unsigned compare);
40756     // RETURN_DATA = tmp.
40757     void
40758     Inst_FLAT__FLAT_ATOMIC_UMIN::execute(GPUDynInstPtr gpuDynInst)
40759     {
40760         panicUnimplemented();
40761     }
40762
40763     Inst_FLAT__FLAT_ATOMIC_SMAX::Inst_FLAT__FLAT_ATOMIC_SMAX(InFmt_FLAT *iFmt)
40764         : Inst_FLAT(iFmt, "flat_atomic_smax")
40765     {
40766         setFlag(AtomicMax);
40767         if (instData.GLC) {
40768             setFlag(AtomicReturn);
40769         } else {
40770             setFlag(AtomicNoReturn);
40771         }
40772         setFlag(MemoryRef);
40773     } // Inst_FLAT__FLAT_ATOMIC_SMAX
40774
40775     Inst_FLAT__FLAT_ATOMIC_SMAX::~Inst_FLAT__FLAT_ATOMIC_SMAX()
40776     {
40777     } // ~Inst_FLAT__FLAT_ATOMIC_SMAX
40778
40779     // tmp = MEM[ADDR];
40780     // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (signed compare);
40781     // RETURN_DATA = tmp.
40782     void
40783     Inst_FLAT__FLAT_ATOMIC_SMAX::execute(GPUDynInstPtr gpuDynInst)
40784     {
40785         panicUnimplemented();
40786     }
40787
40788     Inst_FLAT__FLAT_ATOMIC_UMAX::Inst_FLAT__FLAT_ATOMIC_UMAX(InFmt_FLAT *iFmt)
40789         : Inst_FLAT(iFmt, "flat_atomic_umax")
40790     {
40791         setFlag(AtomicMax);
40792         if (instData.GLC) {
40793             setFlag(AtomicReturn);
40794         } else {
40795             setFlag(AtomicNoReturn);
40796         }
40797         setFlag(MemoryRef);
40798     } // Inst_FLAT__FLAT_ATOMIC_UMAX
40799
40800     Inst_FLAT__FLAT_ATOMIC_UMAX::~Inst_FLAT__FLAT_ATOMIC_UMAX()
40801     {
40802     } // ~Inst_FLAT__FLAT_ATOMIC_UMAX
40803
40804     // tmp = MEM[ADDR];
40805     // MEM[ADDR] = (DATA > tmp) ? DATA : tmp (unsigned compare);
40806     // RETURN_DATA = tmp.
40807     void
40808     Inst_FLAT__FLAT_ATOMIC_UMAX::execute(GPUDynInstPtr gpuDynInst)
40809     {
40810         panicUnimplemented();
40811     }
40812
40813     Inst_FLAT__FLAT_ATOMIC_AND::Inst_FLAT__FLAT_ATOMIC_AND(InFmt_FLAT *iFmt)
40814         : Inst_FLAT(iFmt, "flat_atomic_and")
40815     {
40816         setFlag(AtomicAnd);
40817         if (instData.GLC) {
40818             setFlag(AtomicReturn);
40819         } else {
40820             setFlag(AtomicNoReturn);
40821         }
40822         setFlag(MemoryRef);
40823     } // Inst_FLAT__FLAT_ATOMIC_AND
40824
40825     Inst_FLAT__FLAT_ATOMIC_AND::~Inst_FLAT__FLAT_ATOMIC_AND()
40826     {
40827     } // ~Inst_FLAT__FLAT_ATOMIC_AND
40828
40829     // tmp = MEM[ADDR];
40830     // MEM[ADDR] &= DATA;
40831     // RETURN_DATA = tmp.
40832     void
40833     Inst_FLAT__FLAT_ATOMIC_AND::execute(GPUDynInstPtr gpuDynInst)
40834     {
40835         panicUnimplemented();
40836     }
40837
40838     Inst_FLAT__FLAT_ATOMIC_OR::Inst_FLAT__FLAT_ATOMIC_OR(InFmt_FLAT *iFmt)
40839         : Inst_FLAT(iFmt, "flat_atomic_or")
40840     {
40841         setFlag(AtomicOr);
40842         if (instData.GLC) {
40843             setFlag(AtomicReturn);
40844         } else {
40845             setFlag(AtomicNoReturn);
40846         }
40847         setFlag(MemoryRef);
40848     } // Inst_FLAT__FLAT_ATOMIC_OR
40849
40850     Inst_FLAT__FLAT_ATOMIC_OR::~Inst_FLAT__FLAT_ATOMIC_OR()
40851     {
40852     } // ~Inst_FLAT__FLAT_ATOMIC_OR
40853
40854     // tmp = MEM[ADDR];
40855     // MEM[ADDR] |= DATA;
40856     // RETURN_DATA = tmp.
40857     void
40858     Inst_FLAT__FLAT_ATOMIC_OR::execute(GPUDynInstPtr gpuDynInst)
40859     {
40860         panicUnimplemented();
40861     }
40862
40863     Inst_FLAT__FLAT_ATOMIC_XOR::Inst_FLAT__FLAT_ATOMIC_XOR(InFmt_FLAT *iFmt)
40864         : Inst_FLAT(iFmt, "flat_atomic_xor")
40865     {
40866         setFlag(AtomicXor);
40867         if (instData.GLC) {
40868             setFlag(AtomicReturn);
40869         } else {
40870             setFlag(AtomicNoReturn);
40871         }
40872         setFlag(MemoryRef);
40873     } // Inst_FLAT__FLAT_ATOMIC_XOR
40874
40875     Inst_FLAT__FLAT_ATOMIC_XOR::~Inst_FLAT__FLAT_ATOMIC_XOR()
40876     {
40877     } // ~Inst_FLAT__FLAT_ATOMIC_XOR
40878
40879     // tmp = MEM[ADDR];
40880     // MEM[ADDR] ^= DATA;
40881     // RETURN_DATA = tmp.
40882     void
40883     Inst_FLAT__FLAT_ATOMIC_XOR::execute(GPUDynInstPtr gpuDynInst)
40884     {
40885         panicUnimplemented();
40886     }
40887
40888     Inst_FLAT__FLAT_ATOMIC_INC::Inst_FLAT__FLAT_ATOMIC_INC(InFmt_FLAT *iFmt)
40889         : Inst_FLAT(iFmt, "flat_atomic_inc")
40890     {
40891         setFlag(AtomicInc);
40892         if (instData.GLC) {
40893             setFlag(AtomicReturn);
40894         } else {
40895             setFlag(AtomicNoReturn);
40896         }
40897         setFlag(MemoryRef);
40898     } // Inst_FLAT__FLAT_ATOMIC_INC
40899
40900     Inst_FLAT__FLAT_ATOMIC_INC::~Inst_FLAT__FLAT_ATOMIC_INC()
40901     {
40902     } // ~Inst_FLAT__FLAT_ATOMIC_INC
40903
40904     // tmp = MEM[ADDR];
40905     // MEM[ADDR] = (tmp >= DATA) ? 0 : tmp + 1 (unsigned compare);
40906     // RETURN_DATA = tmp.
40907     void
40908     Inst_FLAT__FLAT_ATOMIC_INC::execute(GPUDynInstPtr gpuDynInst)
40909     {
40910         Wavefront *wf = gpuDynInst->wavefront();
40911
40912         if (wf->execMask().none()) {
40913             wf->decVMemInstsIssued();
40914             wf->decLGKMInstsIssued();
40915             wf->wrGmReqsInPipe--;
40916             wf->rdGmReqsInPipe--;
40917             return;
40918         }
40919
40920         gpuDynInst->execUnitId = wf->execUnitId;
40921         gpuDynInst->exec_mask = wf->execMask();
40922         gpuDynInst->latency.init(gpuDynInst->computeUnit());
40923         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
40924
40925         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
40926         ConstVecOperandU32 data(gpuDynInst, extData.DATA);
40927
40928         addr.read();
40929         data.read();
40930
40931         calcAddr(gpuDynInst, addr);
40932
40933         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40934             if (gpuDynInst->exec_mask[lane]) {
40935                 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
40936                     = data[lane];
40937             }
40938         }
40939
40940         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
40941             gpuDynInst->computeUnit()->globalMemoryPipe.
40942                 issueRequest(gpuDynInst);
40943             wf->wrGmReqsInPipe--;
40944             wf->outstandingReqsWrGm++;
40945             wf->rdGmReqsInPipe--;
40946             wf->outstandingReqsRdGm++;
40947         } else {
40948             fatal("Non global flat instructions not implemented yet.\n");
40949         }
40950
40951         gpuDynInst->wavefront()->outstandingReqs++;
40952         gpuDynInst->wavefront()->validateRequestCounters();
40953     }
40954
40955     void
40956     Inst_FLAT__FLAT_ATOMIC_INC::initiateAcc(GPUDynInstPtr gpuDynInst)
40957     {
40958         initAtomicAccess<VecElemU32>(gpuDynInst);
40959     } // initiateAcc
40960
40961     void
40962     Inst_FLAT__FLAT_ATOMIC_INC::completeAcc(GPUDynInstPtr gpuDynInst)
40963     {
40964         if (isAtomicRet()) {
40965             VecOperandU32 vdst(gpuDynInst, extData.VDST);
40966
40967             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
40968                 if (gpuDynInst->exec_mask[lane]) {
40969                     vdst[lane] = (reinterpret_cast<VecElemU32*>(
40970                         gpuDynInst->d_data))[lane];
40971                 }
40972             }
40973
40974             vdst.write();
40975         }
40976     } // completeAcc
40977
40978     Inst_FLAT__FLAT_ATOMIC_DEC::Inst_FLAT__FLAT_ATOMIC_DEC(InFmt_FLAT *iFmt)
40979         : Inst_FLAT(iFmt, "flat_atomic_dec")
40980     {
40981         setFlag(AtomicDec);
40982         if (instData.GLC) {
40983             setFlag(AtomicReturn);
40984         } else {
40985             setFlag(AtomicNoReturn);
40986         }
40987         setFlag(MemoryRef);
40988     } // Inst_FLAT__FLAT_ATOMIC_DEC
40989
40990     Inst_FLAT__FLAT_ATOMIC_DEC::~Inst_FLAT__FLAT_ATOMIC_DEC()
40991     {
40992     } // ~Inst_FLAT__FLAT_ATOMIC_DEC
40993
40994     // tmp = MEM[ADDR];
40995     // MEM[ADDR] = (tmp == 0 || tmp > DATA) ? DATA : tmp - 1
40996     // (unsigned compare); RETURN_DATA = tmp.
40997     void
40998     Inst_FLAT__FLAT_ATOMIC_DEC::execute(GPUDynInstPtr gpuDynInst)
40999     {
41000         Wavefront *wf = gpuDynInst->wavefront();
41001
41002         if (wf->execMask().none()) {
41003             wf->decVMemInstsIssued();
41004             wf->decLGKMInstsIssued();
41005             wf->wrGmReqsInPipe--;
41006             wf->rdGmReqsInPipe--;
41007             return;
41008         }
41009
41010         gpuDynInst->execUnitId = wf->execUnitId;
41011         gpuDynInst->exec_mask = wf->execMask();
41012         gpuDynInst->latency.init(gpuDynInst->computeUnit());
41013         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
41014
41015         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
41016         ConstVecOperandU32 data(gpuDynInst, extData.DATA);
41017
41018         addr.read();
41019         data.read();
41020
41021         calcAddr(gpuDynInst, addr);
41022
41023         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41024             if (gpuDynInst->exec_mask[lane]) {
41025                 (reinterpret_cast<VecElemU32*>(gpuDynInst->a_data))[lane]
41026                     = data[lane];
41027             }
41028         }
41029
41030         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
41031             gpuDynInst->computeUnit()->globalMemoryPipe.
41032                 issueRequest(gpuDynInst);
41033             wf->wrGmReqsInPipe--;
41034             wf->outstandingReqsWrGm++;
41035             wf->rdGmReqsInPipe--;
41036             wf->outstandingReqsRdGm++;
41037         } else {
41038             fatal("Non global flat instructions not implemented yet.\n");
41039         }
41040
41041         gpuDynInst->wavefront()->outstandingReqs++;
41042         gpuDynInst->wavefront()->validateRequestCounters();
41043     }
41044
41045     void
41046     Inst_FLAT__FLAT_ATOMIC_DEC::initiateAcc(GPUDynInstPtr gpuDynInst)
41047     {
41048         initAtomicAccess<VecElemU32>(gpuDynInst);
41049     } // initiateAcc
41050
41051     void
41052     Inst_FLAT__FLAT_ATOMIC_DEC::completeAcc(GPUDynInstPtr gpuDynInst)
41053     {
41054         if (isAtomicRet()) {
41055             VecOperandU32 vdst(gpuDynInst, extData.VDST);
41056
41057             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41058                 if (gpuDynInst->exec_mask[lane]) {
41059                     vdst[lane] = (reinterpret_cast<VecElemU32*>(
41060                         gpuDynInst->d_data))[lane];
41061                 }
41062             }
41063
41064             vdst.write();
41065         }
41066     } // completeAcc
41067
41068     Inst_FLAT__FLAT_ATOMIC_SWAP_X2::Inst_FLAT__FLAT_ATOMIC_SWAP_X2(
41069           InFmt_FLAT *iFmt)
41070         : Inst_FLAT(iFmt, "flat_atomic_swap_x2")
41071     {
41072         setFlag(AtomicExch);
41073         if (instData.GLC) {
41074             setFlag(AtomicReturn);
41075         } else {
41076             setFlag(AtomicNoReturn);
41077         }
41078         setFlag(MemoryRef);
41079     } // Inst_FLAT__FLAT_ATOMIC_SWAP_X2
41080
41081     Inst_FLAT__FLAT_ATOMIC_SWAP_X2::~Inst_FLAT__FLAT_ATOMIC_SWAP_X2()
41082     {
41083     } // ~Inst_FLAT__FLAT_ATOMIC_SWAP_X2
41084
41085     // tmp = MEM[ADDR];
41086     // MEM[ADDR] = DATA[0:1];
41087     // RETURN_DATA[0:1] = tmp.
41088     void
41089     Inst_FLAT__FLAT_ATOMIC_SWAP_X2::execute(GPUDynInstPtr gpuDynInst)
41090     {
41091         panicUnimplemented();
41092     }
41093
41094     Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2(
41095           InFmt_FLAT *iFmt)
41096         : Inst_FLAT(iFmt, "flat_atomic_cmpswap_x2")
41097     {
41098         setFlag(AtomicCAS);
41099         if (instData.GLC) {
41100             setFlag(AtomicReturn);
41101         } else {
41102             setFlag(AtomicNoReturn);
41103         }
41104         setFlag(MemoryRef);
41105     } // Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2
41106
41107     Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2()
41108     {
41109     } // ~Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2
41110
41111     // tmp = MEM[ADDR];
41112     // src = DATA[0:1];
41113     // cmp = DATA[2:3];
41114     // MEM[ADDR] = (tmp == cmp) ? src : tmp;
41115     // RETURN_DATA[0:1] = tmp.
41116     void
41117     Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::execute(GPUDynInstPtr gpuDynInst)
41118     {
41119         Wavefront *wf = gpuDynInst->wavefront();
41120
41121         if (wf->execMask().none()) {
41122             wf->decVMemInstsIssued();
41123             wf->decLGKMInstsIssued();
41124             wf->wrGmReqsInPipe--;
41125             wf->rdGmReqsInPipe--;
41126             return;
41127         }
41128
41129         gpuDynInst->execUnitId = wf->execUnitId;
41130         gpuDynInst->exec_mask = wf->execMask();
41131         gpuDynInst->latency.init(gpuDynInst->computeUnit());
41132         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
41133
41134         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
41135         ConstVecOperandU64 data(gpuDynInst, extData.DATA);
41136         ConstVecOperandU64 cmp(gpuDynInst, extData.DATA + 2);
41137
41138         addr.read();
41139         data.read();
41140         cmp.read();
41141
41142         calcAddr(gpuDynInst, addr);
41143
41144         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41145             if (gpuDynInst->exec_mask[lane]) {
41146                 (reinterpret_cast<VecElemU64*>(gpuDynInst->x_data))[lane]
41147                     = data[lane];
41148                 (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
41149                     = cmp[lane];
41150             }
41151         }
41152
41153         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL ||
41154             gpuDynInst->executedAs() == Enums::SC_PRIVATE) {
41155             /**
41156              * TODO: If you encounter this panic, just remove this panic
41157              * and restart the simulation. It should just work fine but
41158              * this is to warn user that this path is never tested although
41159              * all the necessary logic is implemented
41160              */
41161             panic_if(gpuDynInst->executedAs() == Enums::SC_PRIVATE,
41162                      "Flats to private aperture not tested yet\n");
41163             gpuDynInst->computeUnit()->globalMemoryPipe.
41164                 issueRequest(gpuDynInst);
41165             wf->wrGmReqsInPipe--;
41166             wf->outstandingReqsWrGm++;
41167             wf->rdGmReqsInPipe--;
41168             wf->outstandingReqsRdGm++;
41169         } else {
41170             fatal("Non global flat instructions not implemented yet.\n");
41171         }
41172
41173         gpuDynInst->wavefront()->outstandingReqs++;
41174         gpuDynInst->wavefront()->validateRequestCounters();
41175     }
41176
41177     void
41178     Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::initiateAcc(GPUDynInstPtr gpuDynInst)
41179     {
41180         initAtomicAccess<VecElemU64>(gpuDynInst);
41181     } // initiateAcc
41182
41183     void
41184     Inst_FLAT__FLAT_ATOMIC_CMPSWAP_X2::completeAcc(GPUDynInstPtr gpuDynInst)
41185     {
41186         if (isAtomicRet()) {
41187             VecOperandU64 vdst(gpuDynInst, extData.VDST);
41188
41189             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41190                 if (gpuDynInst->exec_mask[lane]) {
41191                     vdst[lane] = (reinterpret_cast<VecElemU64*>(
41192                         gpuDynInst->d_data))[lane];
41193                 }
41194             }
41195
41196             vdst.write();
41197         }
41198     } // completeAcc
41199
41200     Inst_FLAT__FLAT_ATOMIC_ADD_X2::Inst_FLAT__FLAT_ATOMIC_ADD_X2(
41201           InFmt_FLAT *iFmt)
41202         : Inst_FLAT(iFmt, "flat_atomic_add_x2")
41203     {
41204         setFlag(AtomicAdd);
41205         if (instData.GLC) {
41206             setFlag(AtomicReturn);
41207         } else {
41208             setFlag(AtomicNoReturn);
41209         }
41210         setFlag(MemoryRef);
41211     } // Inst_FLAT__FLAT_ATOMIC_ADD_X2
41212
41213     Inst_FLAT__FLAT_ATOMIC_ADD_X2::~Inst_FLAT__FLAT_ATOMIC_ADD_X2()
41214     {
41215     } // ~Inst_FLAT__FLAT_ATOMIC_ADD_X2
41216
41217     // tmp = MEM[ADDR];
41218     // MEM[ADDR] += DATA[0:1];
41219     // RETURN_DATA[0:1] = tmp.
41220     void
41221     Inst_FLAT__FLAT_ATOMIC_ADD_X2::execute(GPUDynInstPtr gpuDynInst)
41222     {
41223         Wavefront *wf = gpuDynInst->wavefront();
41224
41225         if (wf->execMask().none()) {
41226             wf->decVMemInstsIssued();
41227             wf->decLGKMInstsIssued();
41228             wf->wrGmReqsInPipe--;
41229             wf->rdGmReqsInPipe--;
41230             return;
41231         }
41232
41233         gpuDynInst->execUnitId = wf->execUnitId;
41234         gpuDynInst->exec_mask = wf->execMask();
41235         gpuDynInst->latency.init(gpuDynInst->computeUnit());
41236         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
41237
41238         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
41239         ConstVecOperandU64 data(gpuDynInst, extData.DATA);
41240
41241         addr.read();
41242         data.read();
41243
41244         calcAddr(gpuDynInst, addr);
41245
41246         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41247             if (gpuDynInst->exec_mask[lane]) {
41248                 (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
41249                     = data[lane];
41250             }
41251         }
41252
41253         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
41254             gpuDynInst->computeUnit()->globalMemoryPipe.
41255                 issueRequest(gpuDynInst);
41256             wf->wrGmReqsInPipe--;
41257             wf->outstandingReqsWrGm++;
41258             wf->rdGmReqsInPipe--;
41259             wf->outstandingReqsRdGm++;
41260         } else {
41261             fatal("Non global flat instructions not implemented yet.\n");
41262         }
41263
41264         gpuDynInst->wavefront()->outstandingReqs++;
41265         gpuDynInst->wavefront()->validateRequestCounters();
41266     }
41267
41268     void
41269     Inst_FLAT__FLAT_ATOMIC_ADD_X2::initiateAcc(GPUDynInstPtr gpuDynInst)
41270     {
41271         initAtomicAccess<VecElemU64>(gpuDynInst);
41272     } // initiateAcc
41273
41274     void
41275     Inst_FLAT__FLAT_ATOMIC_ADD_X2::completeAcc(GPUDynInstPtr gpuDynInst)
41276     {
41277         if (isAtomicRet()) {
41278             VecOperandU64 vdst(gpuDynInst, extData.VDST);
41279
41280
41281             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41282                 if (gpuDynInst->exec_mask[lane]) {
41283                     vdst[lane] = (reinterpret_cast<VecElemU64*>(
41284                         gpuDynInst->d_data))[lane];
41285                 }
41286             }
41287
41288             vdst.write();
41289         }
41290     } // completeAcc
41291
41292     Inst_FLAT__FLAT_ATOMIC_SUB_X2::Inst_FLAT__FLAT_ATOMIC_SUB_X2(
41293           InFmt_FLAT *iFmt)
41294         : Inst_FLAT(iFmt, "flat_atomic_sub_x2")
41295     {
41296         setFlag(AtomicSub);
41297         if (instData.GLC) {
41298             setFlag(AtomicReturn);
41299         } else {
41300             setFlag(AtomicNoReturn);
41301         }
41302         setFlag(MemoryRef);
41303     } // Inst_FLAT__FLAT_ATOMIC_SUB_X2
41304
41305     Inst_FLAT__FLAT_ATOMIC_SUB_X2::~Inst_FLAT__FLAT_ATOMIC_SUB_X2()
41306     {
41307     } // ~Inst_FLAT__FLAT_ATOMIC_SUB_X2
41308
41309     // tmp = MEM[ADDR];
41310     // MEM[ADDR] -= DATA[0:1];
41311     // RETURN_DATA[0:1] = tmp.
41312     void
41313     Inst_FLAT__FLAT_ATOMIC_SUB_X2::execute(GPUDynInstPtr gpuDynInst)
41314     {
41315         Wavefront *wf = gpuDynInst->wavefront();
41316
41317         if (wf->execMask().none()) {
41318             wf->decVMemInstsIssued();
41319             wf->decLGKMInstsIssued();
41320             wf->wrGmReqsInPipe--;
41321             wf->rdGmReqsInPipe--;
41322             return;
41323         }
41324
41325         gpuDynInst->execUnitId = wf->execUnitId;
41326         gpuDynInst->exec_mask = wf->execMask();
41327         gpuDynInst->latency.init(gpuDynInst->computeUnit());
41328         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
41329
41330         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
41331         ConstVecOperandU64 data(gpuDynInst, extData.DATA);
41332
41333         addr.read();
41334         data.read();
41335
41336         calcAddr(gpuDynInst, addr);
41337
41338         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41339             if (gpuDynInst->exec_mask[lane]) {
41340                 (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
41341                     = data[lane];
41342             }
41343         }
41344
41345         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
41346             gpuDynInst->computeUnit()->globalMemoryPipe.
41347                 issueRequest(gpuDynInst);
41348             wf->wrGmReqsInPipe--;
41349             wf->outstandingReqsWrGm++;
41350             wf->rdGmReqsInPipe--;
41351             wf->outstandingReqsRdGm++;
41352         } else {
41353             fatal("Non global flat instructions not implemented yet.\n");
41354         }
41355
41356         gpuDynInst->wavefront()->outstandingReqs++;
41357         gpuDynInst->wavefront()->validateRequestCounters();
41358     }
41359
41360     void
41361     Inst_FLAT__FLAT_ATOMIC_SUB_X2::initiateAcc(GPUDynInstPtr gpuDynInst)
41362     {
41363         initAtomicAccess<VecElemU64>(gpuDynInst);
41364     } // initiateAcc
41365
41366     void
41367     Inst_FLAT__FLAT_ATOMIC_SUB_X2::completeAcc(GPUDynInstPtr gpuDynInst)
41368     {
41369         if (isAtomicRet()) {
41370             VecOperandU64 vdst(gpuDynInst, extData.VDST);
41371
41372
41373             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41374                 if (gpuDynInst->exec_mask[lane]) {
41375                     vdst[lane] = (reinterpret_cast<VecElemU64*>(
41376                         gpuDynInst->d_data))[lane];
41377                 }
41378             }
41379
41380             vdst.write();
41381         }
41382     } // completeAcc
41383
41384     Inst_FLAT__FLAT_ATOMIC_SMIN_X2::Inst_FLAT__FLAT_ATOMIC_SMIN_X2(
41385           InFmt_FLAT *iFmt)
41386         : Inst_FLAT(iFmt, "flat_atomic_smin_x2")
41387     {
41388         setFlag(AtomicMin);
41389         if (instData.GLC) {
41390             setFlag(AtomicReturn);
41391         } else {
41392             setFlag(AtomicNoReturn);
41393         }
41394         setFlag(MemoryRef);
41395     } // Inst_FLAT__FLAT_ATOMIC_SMIN_X2
41396
41397     Inst_FLAT__FLAT_ATOMIC_SMIN_X2::~Inst_FLAT__FLAT_ATOMIC_SMIN_X2()
41398     {
41399     } // ~Inst_FLAT__FLAT_ATOMIC_SMIN_X2
41400
41401     // tmp = MEM[ADDR];
41402     // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (signed compare);
41403     // RETURN_DATA[0:1] = tmp.
41404     void
41405     Inst_FLAT__FLAT_ATOMIC_SMIN_X2::execute(GPUDynInstPtr gpuDynInst)
41406     {
41407         panicUnimplemented();
41408     }
41409
41410     Inst_FLAT__FLAT_ATOMIC_UMIN_X2::Inst_FLAT__FLAT_ATOMIC_UMIN_X2(
41411           InFmt_FLAT *iFmt)
41412         : Inst_FLAT(iFmt, "flat_atomic_umin_x2")
41413     {
41414         setFlag(AtomicMin);
41415         if (instData.GLC) {
41416             setFlag(AtomicReturn);
41417         } else {
41418             setFlag(AtomicNoReturn);
41419         }
41420         setFlag(MemoryRef);
41421     } // Inst_FLAT__FLAT_ATOMIC_UMIN_X2
41422
41423     Inst_FLAT__FLAT_ATOMIC_UMIN_X2::~Inst_FLAT__FLAT_ATOMIC_UMIN_X2()
41424     {
41425     } // ~Inst_FLAT__FLAT_ATOMIC_UMIN_X2
41426
41427     // tmp = MEM[ADDR];
41428     // MEM[ADDR] -= (DATA[0:1] < tmp) ? DATA[0:1] : tmp (unsigned compare);
41429     // RETURN_DATA[0:1] = tmp.
41430     void
41431     Inst_FLAT__FLAT_ATOMIC_UMIN_X2::execute(GPUDynInstPtr gpuDynInst)
41432     {
41433         panicUnimplemented();
41434     }
41435
41436     Inst_FLAT__FLAT_ATOMIC_SMAX_X2::Inst_FLAT__FLAT_ATOMIC_SMAX_X2(
41437           InFmt_FLAT *iFmt)
41438         : Inst_FLAT(iFmt, "flat_atomic_smax_x2")
41439     {
41440         setFlag(AtomicMax);
41441         if (instData.GLC) {
41442             setFlag(AtomicReturn);
41443         } else {
41444             setFlag(AtomicNoReturn);
41445         }
41446         setFlag(MemoryRef);
41447     } // Inst_FLAT__FLAT_ATOMIC_SMAX_X2
41448
41449     Inst_FLAT__FLAT_ATOMIC_SMAX_X2::~Inst_FLAT__FLAT_ATOMIC_SMAX_X2()
41450     {
41451     } // ~Inst_FLAT__FLAT_ATOMIC_SMAX_X2
41452
41453     // tmp = MEM[ADDR];
41454     // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (signed compare);
41455     // RETURN_DATA[0:1] = tmp.
41456     void
41457     Inst_FLAT__FLAT_ATOMIC_SMAX_X2::execute(GPUDynInstPtr gpuDynInst)
41458     {
41459         panicUnimplemented();
41460     }
41461
41462     Inst_FLAT__FLAT_ATOMIC_UMAX_X2::Inst_FLAT__FLAT_ATOMIC_UMAX_X2(
41463           InFmt_FLAT *iFmt)
41464         : Inst_FLAT(iFmt, "flat_atomic_umax_x2")
41465     {
41466         setFlag(AtomicMax);
41467         if (instData.GLC) {
41468             setFlag(AtomicReturn);
41469         } else {
41470             setFlag(AtomicNoReturn);
41471         }
41472         setFlag(MemoryRef);
41473     } // Inst_FLAT__FLAT_ATOMIC_UMAX_X2
41474
41475     Inst_FLAT__FLAT_ATOMIC_UMAX_X2::~Inst_FLAT__FLAT_ATOMIC_UMAX_X2()
41476     {
41477     } // ~Inst_FLAT__FLAT_ATOMIC_UMAX_X2
41478
41479     // tmp = MEM[ADDR];
41480     // MEM[ADDR] -= (DATA[0:1] > tmp) ? DATA[0:1] : tmp (unsigned compare);
41481     // RETURN_DATA[0:1] = tmp.
41482     void
41483     Inst_FLAT__FLAT_ATOMIC_UMAX_X2::execute(GPUDynInstPtr gpuDynInst)
41484     {
41485         panicUnimplemented();
41486     }
41487
41488     Inst_FLAT__FLAT_ATOMIC_AND_X2::Inst_FLAT__FLAT_ATOMIC_AND_X2(
41489           InFmt_FLAT *iFmt)
41490         : Inst_FLAT(iFmt, "flat_atomic_and_x2")
41491     {
41492         setFlag(AtomicAnd);
41493         if (instData.GLC) {
41494             setFlag(AtomicReturn);
41495         } else {
41496             setFlag(AtomicNoReturn);
41497         }
41498         setFlag(MemoryRef);
41499     } // Inst_FLAT__FLAT_ATOMIC_AND_X2
41500
41501     Inst_FLAT__FLAT_ATOMIC_AND_X2::~Inst_FLAT__FLAT_ATOMIC_AND_X2()
41502     {
41503     } // ~Inst_FLAT__FLAT_ATOMIC_AND_X2
41504
41505     // tmp = MEM[ADDR];
41506     // MEM[ADDR] &= DATA[0:1];
41507     // RETURN_DATA[0:1] = tmp.
41508     void
41509     Inst_FLAT__FLAT_ATOMIC_AND_X2::execute(GPUDynInstPtr gpuDynInst)
41510     {
41511         panicUnimplemented();
41512     }
41513
41514     Inst_FLAT__FLAT_ATOMIC_OR_X2::Inst_FLAT__FLAT_ATOMIC_OR_X2(
41515           InFmt_FLAT *iFmt)
41516         : Inst_FLAT(iFmt, "flat_atomic_or_x2")
41517     {
41518         setFlag(AtomicOr);
41519         if (instData.GLC) {
41520             setFlag(AtomicReturn);
41521         } else {
41522             setFlag(AtomicNoReturn);
41523         }
41524         setFlag(MemoryRef);
41525     } // Inst_FLAT__FLAT_ATOMIC_OR_X2
41526
41527     Inst_FLAT__FLAT_ATOMIC_OR_X2::~Inst_FLAT__FLAT_ATOMIC_OR_X2()
41528     {
41529     } // ~Inst_FLAT__FLAT_ATOMIC_OR_X2
41530
41531     // tmp = MEM[ADDR];
41532     // MEM[ADDR] |= DATA[0:1];
41533     // RETURN_DATA[0:1] = tmp.
41534     void
41535     Inst_FLAT__FLAT_ATOMIC_OR_X2::execute(GPUDynInstPtr gpuDynInst)
41536     {
41537         panicUnimplemented();
41538     }
41539
41540     Inst_FLAT__FLAT_ATOMIC_XOR_X2::Inst_FLAT__FLAT_ATOMIC_XOR_X2(
41541           InFmt_FLAT *iFmt)
41542         : Inst_FLAT(iFmt, "flat_atomic_xor_x2")
41543     {
41544         setFlag(AtomicXor);
41545         if (instData.GLC) {
41546             setFlag(AtomicReturn);
41547         } else {
41548             setFlag(AtomicNoReturn);
41549         }
41550         setFlag(MemoryRef);
41551     } // Inst_FLAT__FLAT_ATOMIC_XOR_X2
41552
41553     Inst_FLAT__FLAT_ATOMIC_XOR_X2::~Inst_FLAT__FLAT_ATOMIC_XOR_X2()
41554     {
41555     } // ~Inst_FLAT__FLAT_ATOMIC_XOR_X2
41556
41557     // tmp = MEM[ADDR];
41558     // MEM[ADDR] ^= DATA[0:1];
41559     // RETURN_DATA[0:1] = tmp.
41560     void
41561     Inst_FLAT__FLAT_ATOMIC_XOR_X2::execute(GPUDynInstPtr gpuDynInst)
41562     {
41563         panicUnimplemented();
41564     }
41565
41566     Inst_FLAT__FLAT_ATOMIC_INC_X2::Inst_FLAT__FLAT_ATOMIC_INC_X2(
41567           InFmt_FLAT *iFmt)
41568         : Inst_FLAT(iFmt, "flat_atomic_inc_x2")
41569     {
41570         setFlag(AtomicInc);
41571         if (instData.GLC) {
41572             setFlag(AtomicReturn);
41573         } else {
41574             setFlag(AtomicNoReturn);
41575         }
41576         setFlag(MemoryRef);
41577     } // Inst_FLAT__FLAT_ATOMIC_INC_X2
41578
41579     Inst_FLAT__FLAT_ATOMIC_INC_X2::~Inst_FLAT__FLAT_ATOMIC_INC_X2()
41580     {
41581     } // ~Inst_FLAT__FLAT_ATOMIC_INC_X2
41582
41583     // tmp = MEM[ADDR];
41584     // MEM[ADDR] = (tmp >= DATA[0:1]) ? 0 : tmp + 1 (unsigned compare);
41585     // RETURN_DATA[0:1] = tmp.
41586     void
41587     Inst_FLAT__FLAT_ATOMIC_INC_X2::execute(GPUDynInstPtr gpuDynInst)
41588     {
41589         Wavefront *wf = gpuDynInst->wavefront();
41590
41591         if (wf->execMask().none()) {
41592             wf->decVMemInstsIssued();
41593             wf->decLGKMInstsIssued();
41594             wf->wrGmReqsInPipe--;
41595             wf->rdGmReqsInPipe--;
41596             return;
41597         }
41598
41599         gpuDynInst->execUnitId = wf->execUnitId;
41600         gpuDynInst->exec_mask = wf->execMask();
41601         gpuDynInst->latency.init(gpuDynInst->computeUnit());
41602         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
41603
41604         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
41605         ConstVecOperandU64 data(gpuDynInst, extData.DATA);
41606
41607         addr.read();
41608         data.read();
41609
41610         calcAddr(gpuDynInst, addr);
41611
41612         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41613             if (gpuDynInst->exec_mask[lane]) {
41614                 (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
41615                     = data[lane];
41616             }
41617         }
41618
41619         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
41620             gpuDynInst->computeUnit()->globalMemoryPipe.
41621                 issueRequest(gpuDynInst);
41622             wf->wrGmReqsInPipe--;
41623             wf->outstandingReqsWrGm++;
41624             wf->rdGmReqsInPipe--;
41625             wf->outstandingReqsRdGm++;
41626         } else {
41627             fatal("Non global flat instructions not implemented yet.\n");
41628         }
41629
41630         gpuDynInst->wavefront()->outstandingReqs++;
41631         gpuDynInst->wavefront()->validateRequestCounters();
41632     }
41633
41634     void
41635     Inst_FLAT__FLAT_ATOMIC_INC_X2::initiateAcc(GPUDynInstPtr gpuDynInst)
41636     {
41637         initAtomicAccess<VecElemU64>(gpuDynInst);
41638     } // initiateAcc
41639
41640     void
41641     Inst_FLAT__FLAT_ATOMIC_INC_X2::completeAcc(GPUDynInstPtr gpuDynInst)
41642     {
41643         if (isAtomicRet()) {
41644             VecOperandU64 vdst(gpuDynInst, extData.VDST);
41645
41646
41647             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41648                 if (gpuDynInst->exec_mask[lane]) {
41649                     vdst[lane] = (reinterpret_cast<VecElemU64*>(
41650                         gpuDynInst->d_data))[lane];
41651                 }
41652             }
41653
41654             vdst.write();
41655         }
41656     } // completeAcc
41657
41658     Inst_FLAT__FLAT_ATOMIC_DEC_X2::Inst_FLAT__FLAT_ATOMIC_DEC_X2(
41659         InFmt_FLAT *iFmt)
41660         : Inst_FLAT(iFmt, "flat_atomic_dec_x2")
41661     {
41662         setFlag(AtomicDec);
41663         if (instData.GLC) {
41664             setFlag(AtomicReturn);
41665         } else {
41666             setFlag(AtomicNoReturn);
41667         }
41668         setFlag(MemoryRef);
41669     } // Inst_FLAT__FLAT_ATOMIC_DEC_X2
41670
41671     Inst_FLAT__FLAT_ATOMIC_DEC_X2::~Inst_FLAT__FLAT_ATOMIC_DEC_X2()
41672     {
41673     } // ~Inst_FLAT__FLAT_ATOMIC_DEC_X2
41674
41675     // tmp = MEM[ADDR];
41676     // MEM[ADDR] = (tmp == 0 || tmp > DATA[0:1]) ? DATA[0:1] : tmp - 1
41677     // (unsigned compare);
41678     // RETURN_DATA[0:1] = tmp.
41679     void
41680     Inst_FLAT__FLAT_ATOMIC_DEC_X2::execute(GPUDynInstPtr gpuDynInst)
41681     {
41682         Wavefront *wf = gpuDynInst->wavefront();
41683
41684         if (wf->execMask().none()) {
41685             wf->decVMemInstsIssued();
41686             wf->decLGKMInstsIssued();
41687             wf->wrGmReqsInPipe--;
41688             wf->rdGmReqsInPipe--;
41689             return;
41690         }
41691
41692         gpuDynInst->execUnitId = wf->execUnitId;
41693         gpuDynInst->exec_mask = wf->execMask();
41694         gpuDynInst->latency.init(gpuDynInst->computeUnit());
41695         gpuDynInst->latency.set(gpuDynInst->computeUnit()->clockPeriod());
41696
41697         ConstVecOperandU64 addr(gpuDynInst, extData.ADDR);
41698         ConstVecOperandU64 data(gpuDynInst, extData.DATA);
41699
41700         addr.read();
41701         data.read();
41702
41703         calcAddr(gpuDynInst, addr);
41704
41705         for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41706             if (gpuDynInst->exec_mask[lane]) {
41707                 (reinterpret_cast<VecElemU64*>(gpuDynInst->a_data))[lane]
41708                     = data[lane];
41709             }
41710         }
41711
41712         if (gpuDynInst->executedAs() == Enums::SC_GLOBAL) {
41713             gpuDynInst->computeUnit()->globalMemoryPipe.
41714                 issueRequest(gpuDynInst);
41715             wf->wrGmReqsInPipe--;
41716             wf->outstandingReqsWrGm++;
41717             wf->rdGmReqsInPipe--;
41718             wf->outstandingReqsRdGm++;
41719         } else {
41720             fatal("Non global flat instructions not implemented yet.\n");
41721         }
41722
41723         gpuDynInst->wavefront()->outstandingReqs++;
41724         gpuDynInst->wavefront()->validateRequestCounters();
41725     }
41726
41727     void
41728     Inst_FLAT__FLAT_ATOMIC_DEC_X2::initiateAcc(GPUDynInstPtr gpuDynInst)
41729     {
41730         initAtomicAccess<VecElemU64>(gpuDynInst);
41731     } // initiateAcc
41732
41733     void
41734     Inst_FLAT__FLAT_ATOMIC_DEC_X2::completeAcc(GPUDynInstPtr gpuDynInst)
41735     {
41736         if (isAtomicRet()) {
41737             VecOperandU64 vdst(gpuDynInst, extData.VDST);
41738
41739
41740             for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
41741                 if (gpuDynInst->exec_mask[lane]) {
41742                     vdst[lane] = (reinterpret_cast<VecElemU64*>(
41743                         gpuDynInst->d_data))[lane];
41744                 }
41745             }
41746
41747             vdst.write();
41748         }
41749     } // completeAcc
41750 } // namespace Gcn3ISA